o
    48i(                     @   s8  d Z ddlZddlZddlZddlZddlmZmZmZ ddl	m	Z	 ej
dd zddlZW n ey>   ddlmZ Y nw G dd	 d	Zed
kre ZdddddddddddddddgZed eD ]Zeded  ded   qged eeZed e D ]\ZZ
ede de
  qdS dS ) uv   
Glossary Exporter Module
========================
DeepL CSV, Google AutoML TSV ve diğer format dönüştürücüler
    N)ListDictOptional)datetimez..   )configc                   @   sF  e Zd ZdZd$defddZ	d$dee dedefd	d
Z	d$dee dedefddZ				d%dee dedededef
ddZ
			d%dee dedededef
ddZ	d$dee dedefddZdee deeef fddZdedefddZd$dededefddZd$dededefd d!Zd$dedee fd"d#ZdS )&GlossaryExporterur   
    Glossary çıktı formatları oluşturucu.
    DeepL CSV, Google AutoML TSV, ve dahili format destekler.
    N
output_dirc                 C   s    |pt j| _tj| jdd dS )uU   
        Args:
            output_dir: Çıktı dizini (default: config'den)
        T)exist_okN)r   
OUTPUT_DIRr	   osmakedirs)selfr	    r   </var/www/html/PEPCVSON/terminology-extractor/src/exporter.py__init__   s   zGlossaryExporter.__init__termsfilepathreturnc              	   C   s   |pt j}t|dddd,}t|}|g d |D ]}||d |d |d |d	 d
g qW d   n1 s<w   Y  td| dt| d |S )u)  
        Terim adaylarını dahili formatta kaydet.
        
        Format: source_term,target_term,frequency,confidence
        
        Args:
            terms: List of term dicts
            filepath: Çıktı dosya yolu
            
        Returns:
            Kaydedilen dosya yolu
        wutf-8 encodingnewline)source_termtarget_term	frequency
confidencesourcetargetr   r   z.4fNu   ✅ Glossary candidates saved:  ( terms))r   OUTPUT_GLOSSARY_CANDIDATESopencsvwriterwriterowprintlenr   r   r   fr&   termr   r   r   export_glossary_candidates&   s   


z+GlossaryExporter.export_glossary_candidatesc                 C   s   |pt j}t|dddd}t|}|D ]}||d |d g qW d   n1 s-w   Y  td| d	t| d
 |S )u  
        DeepL Glossary formatında kaydet.
        
        Format: source,target (no header)
        
        Args:
            terms: List of term dicts
            filepath: Çıktı dosya yolu
            
        Returns:
            Kaydedilen dosya yolu
        r   r   r   r   r   r    Nu   ✅ DeepL glossary saved: r!   r"   )r   OUTPUT_DEEPL_GLOSSARYr$   r%   r&   r'   r(   r)   r*   r   r   r   export_deepl_glossaryE   s   

z&GlossaryExporter.export_deepl_glossaryentrsource_langtarget_langc           	      C   s   |pt j}t|ddd3}|| d| d |D ]}|d dd}|d dd}|| d| d qW d	   n1 sBw   Y  td
| dt| d |S )u{  
        Google Cloud Translation AutoML formatında kaydet.
        
        Format: TSV with language codes in first row
        
        Args:
            terms: List of term dicts
            filepath: Çıktı dosya yolu
            source_lang: Kaynak dil kodu
            target_lang: Hedef dil kodu
            
        Returns:
            Kaydedilen dosya yolu
        r   r   r   	
r    r    Nu   ✅ Google AutoML TSV saved: r!   r"   )r   OUTPUT_GOOGLE_AUTOMLr$   writereplacer(   r)   )	r   r   r   r2   r3   r+   r,   r   r    r   r   r   export_google_automl^   s   

z%GlossaryExporter.export_google_automlc                 C   s   |p	t j| jd}d| d| d}d}t|ddd7}|| |D ]"}| |d	 }	| |d
 }
|d| d|	 d| d|
 d	 q$|| W d   n1 sVw   Y  td| dt| d |S )u?  
        TMX (Translation Memory eXchange) formatında kaydet.
        
        Args:
            terms: List of term dicts
            filepath: Çıktı dosya yolu
            source_lang: Kaynak dil kodu
            target_lang: Hedef dil kodu
            
        Returns:
            Kaydedilen dosya yolu
        zglossary.tmxz<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE tmx SYSTEM "tmx14.dtd">
<tmx version="1.4">
  <header
    creationtool="TerminologyExtractor"
    creationtoolversion="1.0"
    datatype="plaintext"
    segtype="sentence"
    adminlang="z"
    srclang="z:"
    o-tmf="terminology-extractor">
  </header>
  <body>
z  </body>
</tmx>
r   r   r4   r   r    z    <tu>
      <tuv xml:lang="z">
        <seg>z)</seg>
      </tuv>
      <tuv xml:lang="z</seg>
      </tuv>
    </tu>
Nu   ✅ TMX file saved: r!   r"   )	r   pathjoinr	   r$   r9   _escape_xmlr(   r)   )r   r   r   r2   r3   
tmx_header
tmx_footerr+   r,   r   r    r   r   r   
export_tmx   s2   	

zGlossaryExporter.export_tmxc                 C   s   |p	t j| jd}t  t|ddd|d}t|ddd}t	j
||d	d
d W d   n1 s5w   Y  td| dt| d |S )u   
        JSON formatında kaydet.
        
        Args:
            terms: List of term dicts
            filepath: Çıktı dosya yolu
            
        Returns:
            Kaydedilen dosya yolu
        zglossary.jsonr0   r1   )generated_attotal_termsr2   r3   )metadatar   r   r   r4   Fr   ensure_asciiindentNu   ✅ JSON file saved: r!   r"   )r   r<   r=   r	   r   now	isoformatr)   r$   jsondumpr(   )r   r   r   outputr+   r   r   r   export_json   s   

zGlossaryExporter.export_jsonc                 C   s@   i }|  ||d< | ||d< | ||d< | ||d< |S )u   
        Tüm formatlarda kaydet.
        
        Args:
            terms: List of term dicts
            
        Returns:
            Dict of format -> filepath
        
candidatesdeeplgooglerJ   )r-   r/   r;   rM   )r   r   resultsr   r   r   
export_all   s   
zGlossaryExporter.export_alltextc                 C   s@   | dd}| dd}| dd}| dd}| d	d
}|S )u#   XML özel karakterlerini escape et.&z&amp;<z&lt;>z&gt;"z&quot;'z&apos;)r:   )r   rS   r   r   r   r>      s   zGlossaryExporter._escape_xmlstatsc                 C   s   |pt j}t|dddH}|d |d |d |dt d d	 |d
 |d | D ]\}}|| d| d q8|d W d   n1 sWw   Y  td|  |S )u   
        İşlem logunu kaydet.
        
        Args:
            stats: İstatistik dict'i
            filepath: Log dosya yolu
            
        Returns:
            Kaydedilen dosya yolu
        r   r   r4   z=============================================================
z'TERMINOLOGY EXTRACTOR - Processing Log
z>============================================================

zGenerated: z%Y-%m-%d %H:%M:%Sz

zSTATISTICS
z)----------------------------------------
: r6   z>
============================================================
Nu   ✅ Log saved: )	r   
OUTPUT_LOGr$   r9   r   rH   strftimeitemsr(   )r   rY   r   r+   keyvaluer   r   r   	write_log   s   





zGlossaryExporter.write_logdatac                 C   s`   |pt j}t  |d< t|ddd}tj||ddd W d   |S 1 s)w   Y  |S )	u   
        Checkpoint kaydet (interrupt durumunda devam için).
        
        Args:
            data: Checkpoint verisi
            filepath: Checkpoint dosya yolu
            
        Returns:
            Kaydedilen dosya yolu
        saved_atr   r   r4   Fr   rE   N)r   OUTPUT_CHECKPOINTr   rH   rI   r$   rJ   rK   )r   ra   r   r+   r   r   r   save_checkpoint  s   

z GlossaryExporter.save_checkpointc              
   C   s   |pt j}tj|sdS z t|ddd}t|W  d   W S 1 s&w   Y  W dS  tyG } zt	d|  W Y d}~dS d}~ww )u   
        Checkpoint yükle.
        
        Args:
            filepath: Checkpoint dosya yolu
            
        Returns:
            Checkpoint verisi veya None
        Nrr   r4   u"   ⚠️ Failed to load checkpoint: )
r   rc   r   r<   existsr$   rJ   load	Exceptionr(   )r   r   r+   er   r   r   load_checkpoint(  s   

(z GlossaryExporter.load_checkpoint)N)Nr0   r1   )__name__
__module____qualname____doc__strr   r   r   r-   r/   r;   rA   rM   rR   r>   r`   rd   r   rj   r   r   r   r   r      sd    
 

"
>
	r   __main__zhydraulic pumpzhidrolik pompa-   gffffff?)r   r    r   r   zsafety valvezemniyet valfi    g)\(?z
oil filteru   yağ filtresi   gq=
ףp?zTest Terms:z  r   z -> r    z
Exporting...z
Exported files:rZ   )rn   r%   rJ   sysr   typingr   r   r   r   r<   insertr   ImportErrorr   r   rk   exporter
test_termsr(   r,   rR   rQ   r]   fmtr   r   r   r   <module>   s@      ,
