#!/usr/bin/env python3
"""
Google Cloud Vision API + Gemini Çeviri
Vision: Kesin koordinatlar
Gemini: Türkçe çeviri
"""

import fitz
import json
import base64
import requests
import google.generativeai as genai

# API Keys
VISION_API_KEY = "AIzaSyCkYVKLxWDPNbQZr0-HM0nlSneEeM9KMjs"
GEMINI_API_KEY = "AIzaSyCkYVKLxWDPNbQZr0-HM0nlSneEeM9KMjs"
VISION_API_URL = f"https://vision.googleapis.com/v1/images:annotate?key={VISION_API_KEY}"


def pdf_page_to_image(pdf_path: str, page_num: int = 0, dpi: int = 200):
    """PDF sayfasını PNG'ye çevir"""
    doc = fitz.open(pdf_path)
    page = doc.load_page(page_num)
    
    rect = page.rect
    page_width = rect.width
    page_height = rect.height
    
    mat = fitz.Matrix(dpi/72, dpi/72)
    pix = page.get_pixmap(matrix=mat)
    
    img_bytes = pix.tobytes("png")
    doc.close()
    
    return img_bytes, page_width, page_height


def vision_ocr(image_bytes: bytes) -> dict:
    """Google Cloud Vision API ile OCR"""
    
    img_base64 = base64.b64encode(image_bytes).decode()
    
    request_body = {
        "requests": [{
            "image": {"content": img_base64},
            "features": [{"type": "DOCUMENT_TEXT_DETECTION"}]
        }]
    }
    
    response = requests.post(
        VISION_API_URL,
        headers={"Content-Type": "application/json"},
        json=request_body
    )
    
    if response.status_code != 200:
        print(f"Vision API Hatası: {response.status_code}")
        return None
    
    return response.json()


def extract_blocks(vision_response: dict, dpi: int = 200):
    """Vision yanıtından metin bloklarını çıkar"""
    
    if not vision_response or "responses" not in vision_response:
        return []
    
    response = vision_response["responses"][0]
    if "fullTextAnnotation" not in response:
        return []
    
    blocks = []
    scale = 72 / dpi
    
    for page in response["fullTextAnnotation"].get("pages", []):
        for block in page.get("blocks", []):
            vertices = block.get("boundingBox", {}).get("vertices", [])
            if len(vertices) < 4:
                continue
            
            x0 = min(v.get("x", 0) for v in vertices) * scale
            y0 = min(v.get("y", 0) for v in vertices) * scale
            x1 = max(v.get("x", 0) for v in vertices) * scale
            y1 = max(v.get("y", 0) for v in vertices) * scale
            
            # Metni birleştir
            text_parts = []
            for paragraph in block.get("paragraphs", []):
                for word in paragraph.get("words", []):
                    word_text = "".join(s.get("text", "") for s in word.get("symbols", []))
                    text_parts.append(word_text)
            
            text = " ".join(text_parts)
            
            if text.strip() and len(text) > 1:
                blocks.append({
                    "text": text,
                    "bbox": [x0, y0, x1, y1]
                })
    
    return blocks


def translate_texts(blocks: list) -> list:
    """Gemini ile toplu çeviri"""
    
    # Çevrilecek metinleri hazırla
    texts = [b["text"] for b in blocks]
    
    prompt = f"""Bu teknik doküman metinlerini Türkçe'ye çevir.

KURALLAR:
- Teknik terimler: engine=motor, pump=pompa, valve=valf, brake=fren, sensor=sensör, controller=kontrol ünitesi
- Hata kodları (111000 gibi) ve referansları (T2-2, T/M) ÇEVİRME
- min⁻¹ = dev/dk
- SECTION, Group başlıklarını çevir

METİNLER:
{json.dumps(texts, ensure_ascii=False, indent=2)}

ÇIKTI: Sadece JSON array döndür, her metin için çeviri:
["çeviri1", "çeviri2", ...]
"""

    genai.configure(api_key=GEMINI_API_KEY)
    model = genai.GenerativeModel('gemini-2.0-flash')
    
    response = model.generate_content(prompt)
    text = response.text
    
    try:
        if "```json" in text:
            json_str = text.split("```json")[1].split("```")[0].strip()
        elif "```" in text:
            json_str = text.split("```")[1].split("```")[0].strip()
        else:
            json_str = text.strip()
        
        translations = json.loads(json_str)
        
        # Çevirileri bloklara ekle
        for i, block in enumerate(blocks):
            if i < len(translations):
                block["turkish"] = translations[i]
            else:
                block["turkish"] = block["text"]
        
        return blocks
    
    except Exception as e:
        print(f"Çeviri parse hatası: {e}")
        for block in blocks:
            block["turkish"] = block["text"]
        return blocks


def apply_translations(pdf_path: str, blocks: list, output_path: str, page_num: int = 0):
    """Çevirileri PDF'e uygula"""
    
    doc = fitz.open(pdf_path)
    page = doc.load_page(page_num)
    
    font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
    
    applied = 0
    
    for block in blocks:
        text = block.get("text", "")
        turkish = block.get("turkish", text)
        bbox = block.get("bbox", [])
        
        if not turkish or turkish == text or len(bbox) != 4:
            continue
        
        # Hata kodları ve referansları atla
        if any(c.isdigit() for c in text) and len(text) < 20:
            continue
        
        x0, y0, x1, y1 = bbox
        rect = fitz.Rect(x0, y0, x1, y1)
        
        # Maskeleme
        page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1))
        
        # Font boyutu
        box_height = y1 - y0
        box_width = x1 - x0
        
        chars_per_line = max(10, int(box_width / 5))
        lines_needed = max(1, (len(turkish) // chars_per_line) + 1)
        fontsize = min(10, max(5, (box_height / lines_needed) - 1))
        
        # Metni yaz
        text_rect = fitz.Rect(x0 + 1, y0 + 1, x1 - 1, y1 - 1)
        
        try:
            page.insert_textbox(
                text_rect,
                turkish,
                fontsize=fontsize,
                fontname="dejavu",
                fontfile=font_path,
                color=(0.1, 0.1, 0.1),
                align=fitz.TEXT_ALIGN_LEFT
            )
            applied += 1
        except:
            pass
    
    doc.save(output_path)
    doc.close()
    
    return applied


def main():
    input_pdf = "/var/www/html/PEPCVSON/public/katalog/api/output/ZW140-5B_sayfa_315.pdf"
    output_pdf = "/var/www/html/PEPCVSON/public/katalog/api/output/ZW140-5B_sayfa_315_VISION.pdf"
    
    print("📄 1/4 PDF görüntüye dönüştürülüyor...")
    img_bytes, page_width, page_height = pdf_page_to_image(input_pdf, 0, dpi=200)
    
    print("🔍 2/4 Vision API ile OCR yapılıyor...")
    vision_result = vision_ocr(img_bytes)
    
    if not vision_result:
        print("❌ Vision API başarısız")
        return
    
    print("📊 3/4 Metin blokları çıkarılıyor ve çevriliyor...")
    blocks = extract_blocks(vision_result, dpi=200)
    print(f"   → {len(blocks)} blok bulundu")
    
    blocks = translate_texts(blocks)
    
    print("✍️ 4/4 PDF'e yazılıyor...")
    applied = apply_translations(input_pdf, blocks, output_pdf, 0)
    
    print(f"\n✅ Tamamlandı! {applied} blok çevrildi")
    print(f"   Çıktı: {output_pdf}")
    
    # Örnek göster
    print("\n📋 Örnek çeviriler:")
    for block in blocks[:5]:
        print(f"   EN: {block['text'][:40]}...")
        print(f"   TR: {block.get('turkish', '')[:40]}...")
        print()


if __name__ == "__main__":
    main()

