#!/usr/bin/env python3
"""
DeepL Glossary Oluşturma
- En yüksek skorlu 10,000 terimi seçer
- DeepL API ile glossary oluşturur
"""

import sqlite3
import requests
import json
from datetime import datetime

# Config
DEEPL_API_KEY = "b121dc7e-8e98-427f-8984-54c4d4f0851e"
DEEPL_API_URL = "https://api.deepl.com/v2"
DB_PATH = "/mnt/pdfs/dictionary_quality.db"
GLOSSARY_NAME = "PEPC_Technical_Terms"
MAX_TERMS = 10000

def get_top_terms(limit=MAX_TERMS):
    """En yüksek skorlu terimleri al"""
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()
    
    cursor.execute('''
        SELECT canonical_en, canonical_tr, quality_score
        FROM quality_terms
        ORDER BY quality_score DESC
        LIMIT ?
    ''', (limit,))
    
    terms = cursor.fetchall()
    conn.close()
    
    return terms

def create_glossary(terms):
    """DeepL glossary oluştur"""
    
    # TSV formatında hazırla (tab-separated)
    entries = []
    seen = set()  # Duplikat kontrolü
    
    for en, tr, score in terms:
        # Temizle
        en = en.strip()
        tr = tr.strip()
        
        # Boş veya çok kısa olanları atla
        if len(en) < 2 or len(tr) < 2:
            continue
        
        # Tab veya newline içerenleri atla
        if '\t' in en or '\n' in en or '\t' in tr or '\n' in tr:
            continue
        
        # Duplikat kontrolü (case-insensitive)
        key = en.lower()
        if key in seen:
            continue
        seen.add(key)
        
        entries.append(f"{en}\t{tr}")
    
    entries_tsv = "\n".join(entries)
    
    print(f"📊 Hazırlanan terim sayısı: {len(entries):,}")
    print(f"📝 İlk 5 örnek:")
    for line in entries[:5]:
        parts = line.split('\t')
        print(f"   {parts[0]:<30} → {parts[1]}")
    
    # API isteği
    url = f"{DEEPL_API_URL}/glossaries"
    
    data = {
        "name": GLOSSARY_NAME,
        "source_lang": "EN",
        "target_lang": "TR",
        "entries": entries_tsv,
        "entries_format": "tsv"
    }
    
    headers = {
        "Authorization": f"DeepL-Auth-Key {DEEPL_API_KEY}",
        "Content-Type": "application/json"
    }
    
    print(f"\n🚀 Glossary oluşturuluyor...")
    
    response = requests.post(url, headers=headers, json=data)
    
    if response.status_code == 201:
        result = response.json()
        print(f"\n✅ GLOSSARY OLUŞTURULDU!")
        print(f"═" * 50)
        print(f"   ID: {result['glossary_id']}")
        print(f"   İsim: {result['name']}")
        print(f"   Terim sayısı: {result['entry_count']:,}")
        print(f"   Kaynak: {result['source_lang']} → Hedef: {result['target_lang']}")
        print(f"   Oluşturma: {result['creation_time']}")
        print(f"═" * 50)
        return result['glossary_id']
    else:
        print(f"\n❌ HATA: {response.status_code}")
        print(response.text)
        return None

def list_glossaries():
    """Mevcut glossary'leri listele"""
    url = f"{DEEPL_API_URL}/glossaries"
    headers = {"Authorization": f"DeepL-Auth-Key {DEEPL_API_KEY}"}
    
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        glossaries = response.json().get("glossaries", [])
        if glossaries:
            print(f"\n📚 Mevcut Glossary'ler:")
            for g in glossaries:
                print(f"   - {g['name']} (ID: {g['glossary_id']}, {g['entry_count']} terim)")
        return glossaries
    return []

def test_translation(glossary_id, test_text):
    """Glossary ile çeviri testi"""
    url = f"{DEEPL_API_URL}/translate"
    
    headers = {
        "Authorization": f"DeepL-Auth-Key {DEEPL_API_KEY}",
        "Content-Type": "application/json"
    }
    
    # Glossary'siz
    data_without = {
        "text": [test_text],
        "source_lang": "EN",
        "target_lang": "TR"
    }
    
    # Glossary'li
    data_with = {
        "text": [test_text],
        "source_lang": "EN",
        "target_lang": "TR",
        "glossary_id": glossary_id
    }
    
    print(f"\n🧪 ÇEVİRİ TESTİ")
    print(f"═" * 60)
    print(f"📝 Orijinal: {test_text}")
    print(f"─" * 60)
    
    # Glossary'siz çeviri
    resp1 = requests.post(url, headers=headers, json=data_without)
    if resp1.status_code == 200:
        tr1 = resp1.json()["translations"][0]["text"]
        print(f"❌ Glossary'siz: {tr1}")
    
    # Glossary'li çeviri
    resp2 = requests.post(url, headers=headers, json=data_with)
    if resp2.status_code == 200:
        tr2 = resp2.json()["translations"][0]["text"]
        print(f"✅ Glossary'li:  {tr2}")
    
    print(f"═" * 60)

def main():
    print("=" * 60)
    print("🔧 DeepL GLOSSARY OLUŞTURMA")
    print("=" * 60)
    print(f"⏰ Başlangıç: {datetime.now().strftime('%H:%M:%S')}")
    
    # Mevcut glossary'leri kontrol et
    existing = list_glossaries()
    
    # Aynı isimde varsa sil
    for g in existing:
        if g['name'] == GLOSSARY_NAME:
            print(f"\n⚠️ Mevcut glossary siliniyor: {g['glossary_id']}")
            del_url = f"{DEEPL_API_URL}/glossaries/{g['glossary_id']}"
            requests.delete(del_url, headers={"Authorization": f"DeepL-Auth-Key {DEEPL_API_KEY}"})
    
    # Terimleri al
    print(f"\n📥 En yüksek skorlu {MAX_TERMS:,} terim alınıyor...")
    terms = get_top_terms(MAX_TERMS)
    print(f"✅ {len(terms):,} terim alındı")
    
    # Glossary oluştur
    glossary_id = create_glossary(terms)
    
    if glossary_id:
        # Test çevirileri
        test_texts = [
            "Check the hydraulic pump belt tension before starting the engine.",
            "The travel motor requires regular maintenance every 500 hours.",
            "Replace the fuel filter and check the transmission fluid level.",
            "Inspect the undercarriage and track tension on the excavator.",
            "The boom cylinder seal needs replacement due to oil leakage."
        ]
        
        for text in test_texts:
            test_translation(glossary_id, text)
        
        print(f"\n💾 Glossary ID'yi kaydedin: {glossary_id}")
    
    print(f"\n⏰ Bitiş: {datetime.now().strftime('%H:%M:%S')}")

if __name__ == "__main__":
    main()

