#!/usr/bin/env python3
"""
Google Cloud Vision API - OCR + Bounding Box Test
"""

import fitz
import json
import base64
import requests

# API Key
VISION_API_KEY = "AIzaSyCkYVKLxWDPNbQZr0-HM0nlSneEeM9KMjs"
VISION_API_URL = f"https://vision.googleapis.com/v1/images:annotate?key={VISION_API_KEY}"


def pdf_page_to_image(pdf_path: str, page_num: int = 0, dpi: int = 200):
    """PDF sayfasını PNG'ye çevir"""
    doc = fitz.open(pdf_path)
    page = doc.load_page(page_num)
    
    # Sayfa boyutunu al
    rect = page.rect
    page_width = rect.width
    page_height = rect.height
    
    mat = fitz.Matrix(dpi/72, dpi/72)
    pix = page.get_pixmap(matrix=mat)
    
    img_bytes = pix.tobytes("png")
    doc.close()
    
    return img_bytes, page_width, page_height


def vision_ocr(image_bytes: bytes) -> dict:
    """Google Cloud Vision API ile OCR"""
    
    # Base64 encode
    img_base64 = base64.b64encode(image_bytes).decode()
    
    # Request body
    request_body = {
        "requests": [
            {
                "image": {
                    "content": img_base64
                },
                "features": [
                    {
                        "type": "DOCUMENT_TEXT_DETECTION",
                        "maxResults": 50
                    }
                ]
            }
        ]
    }
    
    # API çağrısı
    response = requests.post(
        VISION_API_URL,
        headers={"Content-Type": "application/json"},
        json=request_body
    )
    
    if response.status_code != 200:
        print(f"API Hatası: {response.status_code}")
        print(response.text)
        return None
    
    return response.json()


def extract_text_blocks(vision_response: dict, page_width: float, page_height: float, dpi: int = 200):
    """Vision API yanıtından metin bloklarını ve koordinatları çıkar"""
    
    if not vision_response or "responses" not in vision_response:
        return []
    
    response = vision_response["responses"][0]
    
    if "error" in response:
        print(f"API Hatası: {response['error']}")
        return []
    
    if "fullTextAnnotation" not in response:
        print("Metin bulunamadı")
        return []
    
    blocks = []
    scale = 72 / dpi  # Piksel -> PDF koordinat dönüşümü
    
    # Sayfa seviyesinde blokları al
    for page in response["fullTextAnnotation"].get("pages", []):
        for block in page.get("blocks", []):
            # Block bounding box
            vertices = block.get("boundingBox", {}).get("vertices", [])
            if len(vertices) < 4:
                continue
            
            # Koordinatları al
            x0 = min(v.get("x", 0) for v in vertices) * scale
            y0 = min(v.get("y", 0) for v in vertices) * scale
            x1 = max(v.get("x", 0) for v in vertices) * scale
            y1 = max(v.get("y", 0) for v in vertices) * scale
            
            # Block içindeki metni birleştir
            text_parts = []
            for paragraph in block.get("paragraphs", []):
                for word in paragraph.get("words", []):
                    word_text = ""
                    for symbol in word.get("symbols", []):
                        word_text += symbol.get("text", "")
                    text_parts.append(word_text)
            
            text = " ".join(text_parts)
            
            if text.strip():
                blocks.append({
                    "text": text,
                    "bbox": [x0, y0, x1, y1],
                    "confidence": block.get("confidence", 0)
                })
    
    return blocks


def main():
    input_pdf = "/var/www/html/PEPCVSON/public/katalog/api/output/ZW140-5B_sayfa_315.pdf"
    
    print("📄 PDF işleniyor...")
    img_bytes, page_width, page_height = pdf_page_to_image(input_pdf, 0, dpi=200)
    
    print(f"   Sayfa boyutu: {page_width} x {page_height} pt")
    print(f"   Görüntü boyutu: {len(img_bytes)} bytes")
    
    print("\n🔍 Vision API çağrılıyor...")
    vision_result = vision_ocr(img_bytes)
    
    if not vision_result:
        print("❌ API çağrısı başarısız")
        return
    
    # Raw response kaydet
    with open("/tmp/vision_raw_response.json", "w", encoding="utf-8") as f:
        json.dump(vision_result, f, indent=2, ensure_ascii=False)
    print("   Raw response: /tmp/vision_raw_response.json")
    
    print("\n📊 Metin blokları çıkarılıyor...")
    blocks = extract_text_blocks(vision_result, page_width, page_height, dpi=200)
    
    print(f"\n✅ {len(blocks)} metin bloğu bulundu:")
    print("-" * 60)
    
    for i, block in enumerate(blocks[:10]):
        text = block["text"][:50] + "..." if len(block["text"]) > 50 else block["text"]
        bbox = block["bbox"]
        print(f"{i+1}. \"{text}\"")
        print(f"   Koordinat: [{bbox[0]:.1f}, {bbox[1]:.1f}, {bbox[2]:.1f}, {bbox[3]:.1f}]")
        print()


if __name__ == "__main__":
    main()