o
    61i$0                     @   s~   d Z ddlZddlZddlZddlmZmZmZmZ ddl	m
Z
 e
G dd dZG dd dZd	d
 Zedkr=e  dS dS )u  
Query Analyzer - Sorgu Analiz Modülü
====================================
Kullanıcının girdiği metinden Marka, Model ve Doküman Tipi bilgisini çıkarır.

Kullanım:
    from query_analyzer import QueryAnalyzer
    
    analyzer = QueryAnalyzer()
    result = analyzer.analyze("JCB 330 hidrolik pompa arızası")
    # {'brand': 'JCB', 'model': '330', 'doc_type': 'arıza', 'cleaned_query': 'hidrolik pompa arızası'}
    N)DictListOptionalTuple)	dataclassc                   @   s   e Zd ZU dZeed< dZee ed< dZe	e ed< dZ
ee ed< dZee ed< dZeed	< d
Zeed< defddZdefddZdS )QueryAnalysiszSorgu analiz sonucuoriginal_queryNbrandbrand_aliasesmodeldoc_type cleaned_query        
confidencereturnc                 C   s   | j | j| j| j| j| jdS )Nr   r	   r   r   r   r   r   self r   0/var/www/html/PEPCVSON/scripts/query_analyzer.pyto_dict!   s   zQueryAnalysis.to_dictc                 C   s   | j dup	| jduS )u   Filtrelenebilir bilgi var mı?N)r	   r   r   r   r   r   has_filters+   s   zQueryAnalysis.has_filters)__name__
__module____qualname____doc__str__annotations__r	   r   r
   r   r   r   r   r   floatr   r   boolr   r   r   r   r   r      s   
 
r   c                   @   s  e Zd ZdZd#defddZdedefdd	Zd
edefddZ	dede
e fddZdede
e deee ee f fddZdede
e dee deee ee f fddZdedee fddZd
edee dee defddZdedefddZd edeeee f fd!d"ZdS )$QueryAnalyzeru-   Sorgu Analizörü - Marka/Model/Tip çıkarmaNconfig_pathc                 C   s   |du rt jt jt}t j|d}| || _| jdi | _	| jdi | _
i | _| j	 D ]\}}|| j| < |dg D ]	}|| j| < qDq3i | _| j	 D ]#\}}|dg D ]}| }|| jvrqg | j|< | j| | qaqWdS )zN
        Args:
            config_path: brands_models.json dosya yolu
        Nzbrands_models.jsonbrandsdocument_typesaliasesmodels)ospathdirnameabspath__file__join_load_configconfiggetr#   	doc_typesalias_to_branditemslowermodel_to_brandappend)r   r"   
script_dirr	   dataaliasr   model_lowerr   r   r   __init__3   s*   

zQueryAnalyzer.__init__r(   r   c              
   C   s   z t |ddd}t|W  d   W S 1 sw   Y  W dS  ty5   td|  i i d Y S  tjyS } ztd|  i i dW  Y d}~S d}~ww )u   Config dosyasını yüklerzutf-8)encodingNu$   ⚠️ Config dosyası bulunamadı: )r#   r$   u   ⚠️ Config parse hatası: )openjsonloadFileNotFoundErrorprintJSONDecodeError)r   r(   fer   r   r   r-   P   s   (zQueryAnalyzer._load_configqueryc                 C   s   |st ddS |  }| |}t |d}| ||\}}|r0||_| j|i dg |_| 	|||\}}|rZ||_
|jsZ| | jv rZ| j|  }	t|	dkrZ|	d |_| |}
|
rd|
|_| ||||_| ||_|S )u   
        Sorguyu analiz et ve marka/model/tip çıkar.
        
        Args:
            query: Kullanıcı sorgusu
            
        Returns:
            QueryAnalysis nesnesi
        r   )r   r%      r   )r   r3   strip	_tokenize_detect_brandr	   r#   r/   r
   _detect_modelr   r4   len_detect_doc_typer   _clean_queryr   _calculate_confidencer   )r   rE   query_lowertokensresultr	   brand_matchr   model_matchpossible_brandsr   r   r   r   analyze\   s,   





zQueryAnalyzer.analyzetextc                 C   s   t dd|}| S )u   Metni token'lara ayırz[^a-z0-9\s\-] )resubsplit)r   rV   r   r   r   rH      s   zQueryAnalyzer._tokenizerO   rP   c                 C   sZ   t | j tdd}|D ]}dt| d }t||}|r*| j| | f  S qdS )zi
        Marka tespit et.
        
        Returns:
            (brand_name, matched_text) tuple
        Tkeyreverse\bNN)sortedr1   keysrK   rX   escapesearchgroup)r   rO   rP   sorted_aliasesr8   patternmatchr   r   r   rI      s   zQueryAnalyzer._detect_brandr	   c                 C   s   |r2| j |i dg }t|tddD ]}dt|  d }t||}|r1|| f  S qg d}|D ]$}t	||}	|	D ]}|
dd }
|
 | jv r[|
|f    S qBq8dS )	zi
        Model tespit et.
        
        Returns:
            (model_name, matched_text) tuple
        r&   Tr[   r^   )z)\b([a-z]{1,3}\s*\d{2,4}[a-z]?(?:-\d+)?)\bz\b(\d{2,4}[a-z]?(?:-\d+)?)\bz\b([a-z]\d{1,2})\brW   r   r_   )r#   r/   r`   rK   rX   rb   r3   rc   rd   findallreplaceupperr4   )r   rO   rP   r	   brand_modelsr   rf   rg   model_patternsmatchesmatch_cleanr   r   r   rJ      s$   zQueryAnalyzer._detect_modelc                 C   s:   | j  D ]\}}|D ]}| |v r|    S qqdS )u   Doküman tipini tespit etN)r0   r2   r3   )r   rO   r   keywordskeywordr   r   r   rL      s   zQueryAnalyzer._detect_doc_typerR   rS   c                 C   sb   |}|rt t |t j}|d|}|r&t t |t j}|d|}t dd| }|S )u)   Sorgudan marka ve model bilgisini çıkarr   z\s+rW   )rX   compilerb   
IGNORECASErY   rG   )r   rE   rR   rS   cleanedrf   r   r   r   rM      s   zQueryAnalyzer._clean_queryrQ   c                 C   s8   d}|j r	|d7 }|jr|d7 }|jr|d7 }t|dS )u'   Analiz güven skoru hesapla (0.0 - 1.0)r   g?g?g      ?)r	   r   r   min)r   rQ   scorer   r   r   rN      s   
z#QueryAnalyzer._calculate_confidence	file_pathc                 C   s0  dddd}|s
|S | ddd}t|D ]a\}}| }|| jv rg||d< |d t|d k rI||d  }tdd |dd	 D sI||d
< |d t|d k rf||d  }tdd |D rf||d< q| }	|	| jv rx| j|	 |d< qq|d s|r|d }
t	
d|
t	j}|r|d |d< |S )u'  
        Dosya yolundan marka ve model bilgisi çıkar.
        
        Örnek path: pdfs/İŞ MAKİNASI GRUBU/JCB/TELESKOPİK YÜKLEYİCİLER/330/330 SERVİS MANUELİ.pdf
        
        Returns:
            {'brand': 'JCB', 'model': '330', 'category': 'TELESKOPİK YÜKLEYİCİLER'}
        N)r	   r   category\/r	   rF   c                 s       | ]}|  V  qd S Nisdigit.0cr   r   r   	<genexpr>	      z2QueryAnalyzer.extract_from_path.<locals>.<genexpr>   rw      c                 s   rz   r{   r|   r~   r   r   r   r     r   r   z&\b([A-Z]{0,3}\d{2,4}[A-Z]?(?:-\d+)?)\b)ri   rZ   	enumeraterj   r#   rK   anyr3   r1   rX   rc   rr   rd   )r   rv   rQ   partsipart
part_upper	next_partmodel_candidate
part_lowerfilenamerS   r   r   r   extract_from_path   s8   	

zQueryAnalyzer.extract_from_pathr{   )r   r   r   r   r   r:   r   r-   r   rU   r   rH   r   r   rI   rJ   rL   rM   r   rN   r   r   r   r   r   r!   0   s    0*2"""r!   c                  C   s6  t  } g d}td td td |D ]=}| |}td|  td|jp)d  td|jp3d  td|jp=d  td	|j  td
|jd qtd td td g d}|D ]3}| |}td|dd   td|d p~d  td|d pd  td|d pd  qedS )zTest fonksiyonu)u    JCB 330 hidrolik pompa arızasıu#   komatsu pc200 motor yağ kapasitesiu!   Caterpillar 320D parça kataloğuu   hidrolik pompa arıza teşhisu   Liebherr LHM 150 servis bakımu   ZF 4WG94 şanzıman tamiru   Volvo EC210 elektrik şemasızPC400 motor problemiz<============================================================z   QUERY ANALYZER TESTu   
📝 Sorgu: z
   Marka: -z
   Model: z   Tip: z
   Temiz: u      Güven: z.0%z=
============================================================z   PATH EXTRACTION TEST)uV   pdfs/İŞ MAKİNASI GRUBU/JCB/TELESKOPİK YÜKLEYİCİLER/330/330 SERVİS MANUELİ.pdfuR   pdfs/İŞ MAKİNASI GRUBU/KOMATSU/EKSKAVATÖR/PC200-8/PC200-8 PARÇA KATALOĞU.pdfu=   pdfs/FORKLİFT GRUBU/HYUNDAI/HDF50/HDF50 SERVİS MANUELİ.pdfu   
📁 Path: ...iNr	   r   z   Kategori: rw   )	r!   rA   rU   r	   r   r   r   r   r   )analyzertest_queriesrE   rQ   
test_pathsr(   r   r   r   test_analyzer&  s0   

r   __main__)r   r'   rX   r>   typingr   r   r   r   dataclassesr   r   r!   r   r   r   r   r   r   <module>   s    w/
