#!/usr/bin/env python3
"""
Google Cloud Vision API - WORD Seviyesinde Çeviri
Her kelime ayrı koordinat, satır bazlı gruplama
"""

import fitz
import json
import base64
import requests
import google.generativeai as genai
from collections import defaultdict

# API Keys
VISION_API_KEY = "AIzaSyCkYVKLxWDPNbQZr0-HM0nlSneEeM9KMjs"
GEMINI_API_KEY = "AIzaSyCkYVKLxWDPNbQZr0-HM0nlSneEeM9KMjs"
VISION_API_URL = f"https://vision.googleapis.com/v1/images:annotate?key={VISION_API_KEY}"

DPI = 200
SCALE = 72 / DPI


def pdf_page_to_image(pdf_path: str, page_num: int = 0):
    """PDF sayfasını PNG'ye çevir"""
    doc = fitz.open(pdf_path)
    page = doc.load_page(page_num)
    
    mat = fitz.Matrix(DPI/72, DPI/72)
    pix = page.get_pixmap(matrix=mat)
    
    img_bytes = pix.tobytes("png")
    page_rect = page.rect
    doc.close()
    
    return img_bytes, page_rect.width, page_rect.height


def vision_ocr(image_bytes: bytes) -> dict:
    """Google Cloud Vision API ile OCR"""
    
    img_base64 = base64.b64encode(image_bytes).decode()
    
    request_body = {
        "requests": [{
            "image": {"content": img_base64},
            "features": [{"type": "DOCUMENT_TEXT_DETECTION"}]
        }]
    }
    
    response = requests.post(
        VISION_API_URL,
        headers={"Content-Type": "application/json"},
        json=request_body
    )
    
    if response.status_code != 200:
        print(f"Vision API Hatası: {response.status_code}")
        return None
    
    return response.json()


def extract_words(vision_response: dict):
    """Vision yanıtından WORD seviyesinde koordinat çıkar"""
    
    if not vision_response or "responses" not in vision_response:
        return []
    
    response = vision_response["responses"][0]
    if "fullTextAnnotation" not in response:
        return []
    
    words = []
    
    for page in response["fullTextAnnotation"].get("pages", []):
        for block in page.get("blocks", []):
            for para in block.get("paragraphs", []):
                for word in para.get("words", []):
                    # Kelime metni
                    text = "".join(s.get("text", "") for s in word.get("symbols", []))
                    
                    # Koordinatlar
                    vertices = word.get("boundingBox", {}).get("vertices", [])
                    if len(vertices) < 4:
                        continue
                    
                    x0 = min(v.get("x", 0) for v in vertices) * SCALE
                    y0 = min(v.get("y", 0) for v in vertices) * SCALE
                    x1 = max(v.get("x", 0) for v in vertices) * SCALE
                    y1 = max(v.get("y", 0) for v in vertices) * SCALE
                    
                    # Satır sonu kontrolü
                    has_break = False
                    symbols = word.get("symbols", [])
                    if symbols:
                        last_sym = symbols[-1]
                        prop = last_sym.get("property", {})
                        detected_break = prop.get("detectedBreak", {})
                        break_type = detected_break.get("type", "")
                        if break_type in ["LINE_BREAK", "EOL_SURE_SPACE", "SPACE"]:
                            has_break = True
                    
                    words.append({
                        "text": text,
                        "bbox": [x0, y0, x1, y1],
                        "y_center": (y0 + y1) / 2,
                        "has_break": has_break
                    })
    
    return words


def group_words_by_line(words: list, tolerance: float = 5.0):
    """Kelimeleri Y koordinatına göre satırlara grupla"""
    
    if not words:
        return []
    
    # Y koordinatına göre sırala
    sorted_words = sorted(words, key=lambda w: (w["y_center"], w["bbox"][0]))
    
    lines = []
    current_line = [sorted_words[0]]
    current_y = sorted_words[0]["y_center"]
    
    for word in sorted_words[1:]:
        if abs(word["y_center"] - current_y) <= tolerance:
            # Aynı satır
            current_line.append(word)
        else:
            # Yeni satır
            lines.append(current_line)
            current_line = [word]
            current_y = word["y_center"]
    
    # Son satırı ekle
    if current_line:
        lines.append(current_line)
    
    # Her satırı X koordinatına göre sırala
    for line in lines:
        line.sort(key=lambda w: w["bbox"][0])
    
    return lines


def translate_lines(lines: list) -> list:
    """Gemini ile satır satır çeviri"""
    
    # Satır metinlerini hazırla
    line_texts = []
    for line in lines:
        text = " ".join(w["text"] for w in line)
        line_texts.append(text)
    
    prompt = f"""Bu teknik doküman satırlarını Türkçe'ye çevir.

KURALLAR:
- Teknik terimler: engine=motor, pump=pompa, valve=valf, brake=fren, sensor=sensör, controller=kontrol ünitesi, actuator=aktüatör
- Hata kodları (111000, 111103 gibi) ve referansları (T2-2, T/M, T5-6-3) AYNEN BIRAK
- min⁻¹ veya min-1 = dev/dk
- Kısa kelimeler (Item, Note, Parts, Function) çevir
- "-" tek başına ise AYNEN BIRAK

SATIRLAR:
{json.dumps(line_texts, ensure_ascii=False, indent=2)}

ÇIKTI: Sadece JSON array, her satır için çeviri (aynı sırada):
["çeviri1", "çeviri2", ...]
"""

    genai.configure(api_key=GEMINI_API_KEY)
    model = genai.GenerativeModel('gemini-2.0-flash')
    
    response = model.generate_content(prompt)
    text = response.text
    
    try:
        if "```json" in text:
            json_str = text.split("```json")[1].split("```")[0].strip()
        elif "```" in text:
            json_str = text.split("```")[1].split("```")[0].strip()
        else:
            json_str = text.strip()
        
        translations = json.loads(json_str)
        
        # Çevirileri satırlara ekle
        for i, line in enumerate(lines):
            if i < len(translations):
                line_bbox = [
                    min(w["bbox"][0] for w in line),
                    min(w["bbox"][1] for w in line),
                    max(w["bbox"][2] for w in line),
                    max(w["bbox"][3] for w in line)
                ]
                line.append({
                    "_translation": translations[i],
                    "_bbox": line_bbox
                })
        
        return lines
    
    except Exception as e:
        print(f"Çeviri parse hatası: {e}")
        return lines


def apply_translations(pdf_path: str, lines: list, output_path: str, page_num: int = 0):
    """Çevirileri PDF'e uygula - satır bazlı"""
    
    doc = fitz.open(pdf_path)
    page = doc.load_page(page_num)
    
    font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
    
    applied = 0
    
    for line in lines:
        # Çeviri bilgisi
        trans_info = None
        for item in line:
            if isinstance(item, dict) and "_translation" in item:
                trans_info = item
                break
        
        if not trans_info:
            continue
        
        translation = trans_info["_translation"]
        bbox = trans_info["_bbox"]
        
        # Orijinal metin
        original = " ".join(w["text"] for w in line if "text" in w)
        
        # Aynıysa veya sadece sayı/kod ise atla
        if translation == original:
            continue
        if original.replace(" ", "").replace("-", "").replace(",", "").isdigit():
            continue
        if len(original) < 2:
            continue
        
        x0, y0, x1, y1 = bbox
        
        # Kutuyu genişlet - Türkçe daha uzun
        padding_x = (x1 - x0) * 0.3  # %30 genişlet
        padding_y = (y1 - y0) * 0.5  # %50 yükseklik ekle
        
        expanded_rect = fitz.Rect(x0, y0, x1 + padding_x, y1 + padding_y)
        
        # Maskeleme - genişletilmiş alan
        page.draw_rect(expanded_rect, color=(1, 1, 1), fill=(1, 1, 1))
        
        # Font boyutu - çok küçük tut
        box_height = y1 - y0 + padding_y
        box_width = x1 - x0 + padding_x
        
        # Karakter başına genişlik tahmini
        chars_needed = len(translation)
        available_chars = box_width / 4.5  # ~4.5px per char at 7pt
        
        if chars_needed > available_chars:
            # Çok satıra bölünecek
            lines_needed = (chars_needed / available_chars) + 1
            fontsize = min(7, max(4, box_height / lines_needed - 1))
        else:
            fontsize = min(8, max(5, box_height - 3))
        
        # Metni yaz
        text_rect = fitz.Rect(x0, y0, x1 + padding_x, y1 + padding_y)
        
        try:
            # Debug: Her satırı logla
            print(f"   Yazılıyor: [{x0:.0f},{y0:.0f},{x1:.0f},{y1:.0f}] → {translation[:30]}...")
            
            rc = page.insert_textbox(
                text_rect,
                translation,
                fontsize=fontsize,
                fontname="dejavu",
                fontfile=font_path,
                color=(0, 0, 0),  # Siyah
                align=fitz.TEXT_ALIGN_LEFT
            )
            
            if rc < 0:
                print(f"      ⚠️ Overflow! rc={rc}")
            
            applied += 1
        except Exception as e:
            print(f"      ❌ Hata: {e}")
    
    doc.save(output_path)
    doc.close()
    
    return applied


def main():
    input_pdf = "/var/www/html/PEPCVSON/public/katalog/api/output/ZW140-5B_sayfa_315.pdf"
    output_pdf = "/var/www/html/PEPCVSON/public/katalog/api/output/ZW140-5B_sayfa_315_WORD.pdf"
    
    print("📄 1/5 PDF görüntüye dönüştürülüyor...")
    img_bytes, page_width, page_height = pdf_page_to_image(input_pdf, 0)
    
    print("🔍 2/5 Vision API ile OCR yapılıyor...")
    vision_result = vision_ocr(img_bytes)
    
    if not vision_result:
        print("❌ Vision API başarısız")
        return
    
    print("📊 3/5 Kelimeler çıkarılıyor...")
    words = extract_words(vision_result)
    print(f"   → {len(words)} kelime bulundu")
    
    print("📝 4/5 Satırlara gruplama ve çeviri...")
    lines = group_words_by_line(words, tolerance=5.0)
    print(f"   → {len(lines)} satır oluşturuldu")
    
    lines = translate_lines(lines)
    
    print("✍️ 5/5 PDF'e yazılıyor...")
    applied = apply_translations(input_pdf, lines, output_pdf, 0)
    
    print(f"\n✅ Tamamlandı! {applied} satır çevrildi")
    print(f"   Çıktı: {output_pdf}")
    
    # Örnek göster
    print("\n📋 Örnek satırlar:")
    for i, line in enumerate(lines[:8]):
        original = " ".join(w["text"] for w in line if "text" in w)
        trans = ""
        for item in line:
            if isinstance(item, dict) and "_translation" in item:
                trans = item["_translation"]
        print(f"   {i+1}. EN: {original[:50]}...")
        print(f"      TR: {trans[:50]}...")


if __name__ == "__main__":
    main()