#!/usr/bin/env python3
"""
PEPC V2 - Gemini ile Tam Otomatik PDF Çeviri
Koordinat çıkarma + Çeviri tek API çağrısında
"""

import fitz
import json
import base64
import google.generativeai as genai

# API Ayarı
GEMINI_API_KEY = "AIzaSyCkYVKLxWDPNbQZr0-HM0nlSneEeM9KMjs"

PROMPT = """Bu teknik doküman sayfasını analiz et ve çevir.

GÖREV:
1. Sayfadaki TÜM metinleri bul (başlık, paragraf, tablo hücreleri)
2. Her metin için koordinat ve Türkçe çeviri ver

KURALLAR:
- Teknik terimler: engine=motor, pump=pompa, valve=valf, brake=fren, sensor=sensör, controller=kontrol ünitesi, actuator=aktüatör, displacement=deplasman, traction=çekiş
- Hata kodları (111000, 111103 gibi) ve referansları (T2-2, T/M) ÇEVİRME
- min⁻¹ veya min-1 = dev/dk olarak çevir
- Section, Group gibi başlıkları çevirme (SECTION 5 TROUBLESHOOTING aynen kalsın)
- "-" işaretini çevirme

ÇIKTI FORMATI (sadece JSON, başka bir şey yazma):
{
  "items": [
    {
      "original": "orijinal İngilizce metin",
      "turkish": "Türkçe çeviri",
      "bbox_percent": [x0, y0, x1, y1]
    }
  ]
}

bbox_percent: Sayfa boyutuna göre yüzde (0-100). 
Örnek: Sol üst köşe [0,0], sağ alt köşe [100,100]

ÖNEMLİ: 
- Tablo hücrelerini AYRI AYRI listele (her hücre ayrı item)
- Boş hücreleri ve "-" olan hücreleri ATLAMA
- Sadece anlamlı metin içeren öğeleri listele"""


def pdf_page_to_image(pdf_path: str, page_num: int = 0, dpi: int = 150) -> bytes:
    """PDF sayfasını PNG'ye çevir"""
    doc = fitz.open(pdf_path)
    page = doc[page_num]
    
    mat = fitz.Matrix(dpi/72, dpi/72)
    pix = page.get_pixmap(matrix=mat)
    
    img_bytes = pix.tobytes("png")
    doc.close()
    
    return img_bytes


def extract_and_translate(image_bytes: bytes) -> dict:
    """Gemini ile koordinat çıkar ve çevir"""
    
    genai.configure(api_key=GEMINI_API_KEY)
    model = genai.GenerativeModel('gemini-2.0-flash')
    
    # Base64 encode
    img_base64 = base64.b64encode(image_bytes).decode()
    
    response = model.generate_content([
        {"mime_type": "image/png", "data": img_base64},
        PROMPT
    ])
    
    # JSON parse
    text = response.text
    
    # Debug: Raw response'u kaydet
    with open("/tmp/gemini_raw_response.txt", "w", encoding="utf-8") as f:
        f.write(text)
    print(f"   → Raw response kaydedildi: /tmp/gemini_raw_response.txt")
    
    try:
        # Markdown içinde JSON varsa çıkar
        if "```json" in text:
            json_str = text.split("```json")[1].split("```")[0].strip()
        elif "```" in text:
            json_str = text.split("```")[1].split("```")[0].strip()
        else:
            json_str = text.strip()
        
        # Escape karakterleri düzelt
        json_str = json_str.replace('\\"', '"').replace('\\n', ' ')
        
        return json.loads(json_str)
    except Exception as e:
        print(f"   ⚠️ JSON parse hatası: {e}")
        print(f"   İlk 500 karakter:\n{text[:500]}")
        return {"items": []}


def apply_translations(pdf_path: str, translations: dict, output_path: str, page_num: int = 0):
    """Çevirileri PDF'e uygula"""
    
    doc = fitz.open(pdf_path)
    page = doc[page_num]
    
    page_width = page.rect.width
    page_height = page.rect.height
    
    font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
    
    applied = 0
    
    for item in translations.get("items", []):
        original = item.get("original", "")
        turkish = item.get("turkish", "")
        bbox_pct = item.get("bbox_percent", [])
        
        # Çeviri yoksa veya aynıysa atla
        if not turkish or turkish == original:
            continue
        
        # Başlıkları atla (Section, Group)
        if "SECTION" in original or "Group" in original:
            continue
            
        if len(bbox_pct) != 4:
            continue
        
        # Yüzdeyi piksele çevir
        x0 = bbox_pct[0] * page_width / 100
        y0 = bbox_pct[1] * page_height / 100
        x1 = bbox_pct[2] * page_width / 100
        y1 = bbox_pct[3] * page_height / 100
        
        # Maskeleme
        rect = fitz.Rect(x0, y0, x1, y1)
        page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1))
        
        # Font boyutunu hesapla
        box_height = y1 - y0
        box_width = x1 - x0
        
        # Satır sayısına göre font boyutu
        text_len = len(turkish)
        chars_per_line = max(10, int(box_width / 5))
        lines_needed = max(1, (text_len // chars_per_line) + 1)
        
        fontsize = min(10, max(6, (box_height / lines_needed) - 2))
        
        # Metni yaz
        text_rect = fitz.Rect(x0 + 2, y0 + 1, x1 - 2, y1 - 1)
        
        page.insert_textbox(
            text_rect,
            turkish,
            fontsize=fontsize,
            fontname="dejavu",
            fontfile=font_path,
            color=(0.14, 0.14, 0.14),
            align=fitz.TEXT_ALIGN_LEFT
        )
        
        applied += 1
    
    doc.save(output_path)
    doc.close()
    
    return applied


def translate_pdf_page(input_path: str, output_path: str, page_num: int = 0):
    """Tek sayfayı çevir - Ana fonksiyon"""
    
    print(f"📄 Sayfa işleniyor: {input_path} (sayfa {page_num + 1})")
    
    # 1. PDF'i görüntüye çevir
    print("   → Görüntü oluşturuluyor...")
    img_bytes = pdf_page_to_image(input_path, page_num)
    
    # 2. Gemini ile analiz et ve çevir
    print("   → Gemini API çağrılıyor...")
    translations = extract_and_translate(img_bytes)
    
    item_count = len(translations.get("items", []))
    print(f"   → {item_count} metin öğesi bulundu")
    
    # 3. Çevirileri PDF'e yaz
    print("   → PDF'e yazılıyor...")
    applied = apply_translations(input_path, translations, output_path, page_num)
    
    print(f"✅ Tamamlandı! {applied} öğe çevrildi")
    print(f"   Çıktı: {output_path}")
    
    return translations


def translate_full_pdf(input_path: str, output_path: str):
    """Tüm PDF'i çevir (çok sayfalı)"""
    
    doc = fitz.open(input_path)
    total_pages = len(doc)
    doc.close()
    
    print(f"📚 Toplam {total_pages} sayfa işlenecek\n")
    
    # Her sayfayı ayrı işle ve birleştir
    temp_docs = []
    
    for page_num in range(total_pages):
        temp_output = f"/tmp/page_{page_num}_tr.pdf"
        translate_pdf_page(input_path, temp_output, page_num)
        temp_docs.append(temp_output)
        print()
    
    # Birleştir
    result = fitz.open()
    for temp_path in temp_docs:
        temp_doc = fitz.open(temp_path)
        result.insert_pdf(temp_doc)
        temp_doc.close()
    
    result.save(output_path)
    result.close()
    
    print(f"\n🎉 Tüm PDF çevrildi: {output_path}")


# Test
if __name__ == "__main__":
    input_pdf = "/var/www/html/PEPCVSON/public/katalog/api/output/ZW140-5B_sayfa_315.pdf"
    output_pdf = "/var/www/html/PEPCVSON/public/katalog/api/output/ZW140-5B_sayfa_315_GEMINI.pdf"
    
    result = translate_pdf_page(input_pdf, output_pdf)
    
    # Debug: İlk 5 çeviriyi göster
    print("\n📋 Örnek çeviriler:")
    for item in result.get("items", [])[:5]:
        print(f"   EN: {item.get('original', '')[:50]}...")
        print(f"   TR: {item.get('turkish', '')[:50]}...")
        print()