"""
Glossary Exporter Module
========================
DeepL CSV, Google AutoML TSV ve diğer format dönüştürücüler
"""

import csv
import json
import sys
import os
from typing import List, Dict, Optional
from datetime import datetime

# Config import
sys.path.insert(0, '..')
try:
    import config
except ImportError:
    from .. import config


class GlossaryExporter:
    """
    Glossary çıktı formatları oluşturucu.
    DeepL CSV, Google AutoML TSV, ve dahili format destekler.
    """
    
    def __init__(self, output_dir: str = None):
        """
        Args:
            output_dir: Çıktı dizini (default: config'den)
        """
        self.output_dir = output_dir or config.OUTPUT_DIR
        
        # Ensure output directory exists
        os.makedirs(self.output_dir, exist_ok=True)
    
    def export_glossary_candidates(self, terms: List[Dict], 
                                   filepath: str = None) -> str:
        """
        Terim adaylarını dahili formatta kaydet.
        
        Format: source_term,target_term,frequency,confidence
        
        Args:
            terms: List of term dicts
            filepath: Çıktı dosya yolu
            
        Returns:
            Kaydedilen dosya yolu
        """
        filepath = filepath or config.OUTPUT_GLOSSARY_CANDIDATES
        
        with open(filepath, 'w', encoding='utf-8', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['source_term', 'target_term', 'frequency', 'confidence'])
            
            for term in terms:
                writer.writerow([
                    term['source'],
                    term['target'],
                    term['frequency'],
                    f"{term['confidence']:.4f}"
                ])
        
        print(f"✅ Glossary candidates saved: {filepath} ({len(terms)} terms)")
        return filepath
    
    def export_deepl_glossary(self, terms: List[Dict], 
                              filepath: str = None) -> str:
        """
        DeepL Glossary formatında kaydet.
        
        Format: source,target (no header)
        
        Args:
            terms: List of term dicts
            filepath: Çıktı dosya yolu
            
        Returns:
            Kaydedilen dosya yolu
        """
        filepath = filepath or config.OUTPUT_DEEPL_GLOSSARY
        
        with open(filepath, 'w', encoding='utf-8', newline='') as f:
            writer = csv.writer(f)
            # DeepL glossary format: NO HEADER
            for term in terms:
                writer.writerow([term['source'], term['target']])
        
        print(f"✅ DeepL glossary saved: {filepath} ({len(terms)} terms)")
        return filepath
    
    def export_google_automl(self, terms: List[Dict], 
                             filepath: str = None,
                             source_lang: str = 'en',
                             target_lang: str = 'tr') -> str:
        """
        Google Cloud Translation AutoML formatında kaydet.
        
        Format: TSV with language codes in first row
        
        Args:
            terms: List of term dicts
            filepath: Çıktı dosya yolu
            source_lang: Kaynak dil kodu
            target_lang: Hedef dil kodu
            
        Returns:
            Kaydedilen dosya yolu
        """
        filepath = filepath or config.OUTPUT_GOOGLE_AUTOML
        
        with open(filepath, 'w', encoding='utf-8') as f:
            # Google AutoML format: language codes in first row
            f.write(f"{source_lang}\t{target_lang}\n")
            
            for term in terms:
                # Escape tabs in content
                source = term['source'].replace('\t', ' ')
                target = term['target'].replace('\t', ' ')
                f.write(f"{source}\t{target}\n")
        
        print(f"✅ Google AutoML TSV saved: {filepath} ({len(terms)} terms)")
        return filepath
    
    def export_tmx(self, terms: List[Dict], 
                   filepath: str = None,
                   source_lang: str = 'en',
                   target_lang: str = 'tr') -> str:
        """
        TMX (Translation Memory eXchange) formatında kaydet.
        
        Args:
            terms: List of term dicts
            filepath: Çıktı dosya yolu
            source_lang: Kaynak dil kodu
            target_lang: Hedef dil kodu
            
        Returns:
            Kaydedilen dosya yolu
        """
        filepath = filepath or os.path.join(self.output_dir, 'glossary.tmx')
        
        # TMX XML header
        tmx_header = f'''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE tmx SYSTEM "tmx14.dtd">
<tmx version="1.4">
  <header
    creationtool="TerminologyExtractor"
    creationtoolversion="1.0"
    datatype="plaintext"
    segtype="sentence"
    adminlang="{source_lang}"
    srclang="{source_lang}"
    o-tmf="terminology-extractor">
  </header>
  <body>
'''
        
        tmx_footer = '''  </body>
</tmx>
'''
        
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(tmx_header)
            
            for term in terms:
                # Escape XML special chars
                source = self._escape_xml(term['source'])
                target = self._escape_xml(term['target'])
                
                f.write(f'''    <tu>
      <tuv xml:lang="{source_lang}">
        <seg>{source}</seg>
      </tuv>
      <tuv xml:lang="{target_lang}">
        <seg>{target}</seg>
      </tuv>
    </tu>
''')
            
            f.write(tmx_footer)
        
        print(f"✅ TMX file saved: {filepath} ({len(terms)} terms)")
        return filepath
    
    def export_json(self, terms: List[Dict], 
                    filepath: str = None) -> str:
        """
        JSON formatında kaydet.
        
        Args:
            terms: List of term dicts
            filepath: Çıktı dosya yolu
            
        Returns:
            Kaydedilen dosya yolu
        """
        filepath = filepath or os.path.join(self.output_dir, 'glossary.json')
        
        output = {
            'metadata': {
                'generated_at': datetime.now().isoformat(),
                'total_terms': len(terms),
                'source_lang': 'en',
                'target_lang': 'tr'
            },
            'terms': terms
        }
        
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(output, f, ensure_ascii=False, indent=2)
        
        print(f"✅ JSON file saved: {filepath} ({len(terms)} terms)")
        return filepath
    
    def export_all(self, terms: List[Dict]) -> Dict[str, str]:
        """
        Tüm formatlarda kaydet.
        
        Args:
            terms: List of term dicts
            
        Returns:
            Dict of format -> filepath
        """
        results = {}
        
        results['candidates'] = self.export_glossary_candidates(terms)
        results['deepl'] = self.export_deepl_glossary(terms)
        results['google'] = self.export_google_automl(terms)
        results['json'] = self.export_json(terms)
        
        return results
    
    def _escape_xml(self, text: str) -> str:
        """XML özel karakterlerini escape et."""
        text = text.replace('&', '&amp;')
        text = text.replace('<', '&lt;')
        text = text.replace('>', '&gt;')
        text = text.replace('"', '&quot;')
        text = text.replace("'", '&apos;')
        return text
    
    def write_log(self, stats: Dict, filepath: str = None) -> str:
        """
        İşlem logunu kaydet.
        
        Args:
            stats: İstatistik dict'i
            filepath: Log dosya yolu
            
        Returns:
            Kaydedilen dosya yolu
        """
        filepath = filepath or config.OUTPUT_LOG
        
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write("=" * 60 + "\n")
            f.write("TERMINOLOGY EXTRACTOR - Processing Log\n")
            f.write("=" * 60 + "\n\n")
            f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            
            f.write("STATISTICS\n")
            f.write("-" * 40 + "\n")
            
            for key, value in stats.items():
                f.write(f"{key}: {value}\n")
            
            f.write("\n" + "=" * 60 + "\n")
        
        print(f"✅ Log saved: {filepath}")
        return filepath
    
    def save_checkpoint(self, data: Dict, filepath: str = None) -> str:
        """
        Checkpoint kaydet (interrupt durumunda devam için).
        
        Args:
            data: Checkpoint verisi
            filepath: Checkpoint dosya yolu
            
        Returns:
            Kaydedilen dosya yolu
        """
        filepath = filepath or config.OUTPUT_CHECKPOINT
        
        data['saved_at'] = datetime.now().isoformat()
        
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
        
        return filepath
    
    def load_checkpoint(self, filepath: str = None) -> Optional[Dict]:
        """
        Checkpoint yükle.
        
        Args:
            filepath: Checkpoint dosya yolu
            
        Returns:
            Checkpoint verisi veya None
        """
        filepath = filepath or config.OUTPUT_CHECKPOINT
        
        if not os.path.exists(filepath):
            return None
        
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                return json.load(f)
        except Exception as e:
            print(f"⚠️ Failed to load checkpoint: {e}")
            return None


# Test
if __name__ == "__main__":
    exporter = GlossaryExporter()
    
    # Test terms
    test_terms = [
        {'source': 'hydraulic pump', 'target': 'hidrolik pompa', 'frequency': 45, 'confidence': 0.95},
        {'source': 'safety valve', 'target': 'emniyet valfi', 'frequency': 32, 'confidence': 0.88},
        {'source': 'oil filter', 'target': 'yağ filtresi', 'frequency': 28, 'confidence': 0.92},
    ]
    
    print("Test Terms:")
    for term in test_terms:
        print(f"  {term['source']} -> {term['target']}")
    
    # Export all formats
    print("\nExporting...")
    results = exporter.export_all(test_terms)
    
    print("\nExported files:")
    for fmt, path in results.items():
        print(f"  {fmt}: {path}")

