o
    $d4iO                     @   sL  d Z ddlZddlZddlmZmZmZmZ ddlm	Z	m
Z
 zddlmZ ddlmZmZmZmZ W n ey>   ed  w zddlmZ W n eyS   ed	  w dd
lmZmZ ddlmZ eddZeeddZeddZdZ dZ!dZ"dZ#e	G dd dZ$e	G dd dZ%G dd dZ&dd Z'e(dkre'  dS dS )u  
Smart Search - Akıllı Arama Servisi
===================================
Query Expander + Hybrid Search entegrasyonu.

Akış:
1. Kullanıcı Türkçe sorgu yazar
2. Query Expander düzeltir ve İngilizce terimleri ekler
3. Query Analyzer marka/model çıkarır
4. Qdrant'ta hybrid search (filter + semantic)
5. Sonuçlar döner

Kullanım:
    from smart_search import SmartSearchService
    
    service = SmartSearchService()
    results = service.search("jcb 330 bom silindir kaçırıyor")
    N)DictListOptionalAny)	dataclassfield)QdrantClient)FilterFieldCondition	MatchText
MatchValueu;   ❌ qdrant-client yüklü değil: pip install qdrant-client)OpenAIu-   ❌ openai yüklü değil: pip install openai)QueryAnalyzerQueryAnalysis)SmartTranslatorQDRANT_HOSTz10.10.10.25QDRANT_PORT6333QDRANT_COLLECTIONmachine_docsztext-embedding-3-largei   2   d   c                   @   sx   e Zd ZU dZeed< eed< eed< ee ed< eed< dZ	ee ed< dZ
ee ed	< d
Zeed< defddZdS )SearchResultzTek bir arama sonucuidscorepdf_filenamepage_numbertextNbrandmodel pdf_pathreturnc              	   C   sJ   d}| j | j| j| jt| j|kr| jd | d n| j| j| j| jdS )Ni  ...r   r   r   r   r   r   r   r!   )	r   r   r   r   lenr   r   r   r!   )self
text_limit r(   ./var/www/html/PEPCVSON/scripts/smart_search.pyto_dictD   s   $zSearchResult.to_dict)__name__
__module____qualname____doc__str__annotations__floatr   intr   r   r!   r   r*   r(   r(   r(   r)   r   8   s   
 r   c                   @   st   e Zd ZU dZeed< eeef ed< eed< eed< ee	 ed< e
ed< e
ed< eed	< eed
< defddZdS )SmartSearchResponseu   Akıllı arama yanıtıoriginal_queryrelevant_termsanalysissearch_queryresultstotal_foundsearch_time_msfilter_appliedfallback_usedr"   c              
   C   sN   | j t| j | jj| jj| jjd| jdd | j	D | j
| j| j| jd	S )N)r   r   doc_typec                 S   s   g | ]}|  qS r(   )r*   ).0rr(   r(   r)   
<listcomp>q   s    z/SmartSearchResponse.to_dict.<locals>.<listcomp>)	r4   english_termsdetectedr7   r8   r9   r:   r;   r<   )r4   listr5   valuesr6   r   r   r=   r7   r8   r9   r:   r;   r<   r&   r(   r(   r)   r*   g   s   zSmartSearchResponse.to_dictN)r+   r,   r-   r.   r/   r0   r   r   r   r   r2   boolr*   r(   r(   r(   r)   r3   S   s   
 r3   c                   @   s   e Zd ZdZeeeddfdededededef
dd	Z	d
d Z
dedee fddZd(dededee fddZdee dee dedee fddZeddddfdededededededefddZ	 d)d!ed"ed#edee fd$d%Zd&d' ZdS )*SmartSearchServiceuw   
    Akıllı Arama Servisi
    
    Türkçe giriş -> Sözlük çevirisi -> Marka/Model tespiti -> Hybrid Search
    Nqdrant_hostqdrant_portcollection_nameopenai_api_keydictionary_pathc                 C   sX   |pt d}|stdt|d| _t||dd| _|| _t | _	t
 | _|   d S )NOPENAI_API_KEYzOPENAI_API_KEY gerekli!)api_key   )hostporttimeout)osgetenv
ValueErrorr   openair   qdrantrJ   r   
translatorr   analyzer_verify_connection)r&   rH   rI   rJ   rK   rL   rN   r(   r(   r)   __init__   s   	zSmartSearchService.__init__c              
   C   sP   z| j | j}td|jdd W dS  ty' } ztd| d}~ww )u    Qdrant bağlantısını doğrulau   ✓ Qdrant bağlantısı OK (,u	    vektör)u   Qdrant bağlantı hatası: N)rW   get_collectionrJ   printpoints_count	ExceptionConnectionError)r&   infoer(   r(   r)   rZ      s   z%SmartSearchService._verify_connectionr   r"   c                 C   s   | j jj|td}|jd jS )u   Metni vektöre çevir)inputr   r   )rV   
embeddingscreateEMBEDDING_MODELdata	embedding)r&   r   responser(   r(   r)   _get_embedding   s
   z!SmartSearchService._get_embeddingr6   
pdf_filterc                 C   s   g }|j r|tdt|j dd |jr"|tdt|jdd |r:ddl}|j|}|tdt|dd |s>dS t	|dS )	u   Qdrant filtresi oluşturr!   r   keymatchr   Nr   )valuemust)
r   appendr
   r   r   rS   pathbasenamer   r	   )r&   r6   rl   
conditionsrS   filenamer(   r(   r)   _build_filter   s6   


z SmartSearchService._build_filterquery_vector
filter_objlimitc              
   C   sb   z| j j| j|||dd}dd |jD W S  ty0 } ztd|  g W  Y d}~S d}~ww )zQdrant'ta arama yapT)rJ   queryquery_filterr|   with_payloadc                 S   s"   g | ]}t |j|j|jd qS ))r   r   payload)r/   r   r   r   )r>   hitr(   r(   r)   r@      s    z5SmartSearchService._search_qdrant.<locals>.<listcomp>u   ⚠️ Qdrant arama hatası: N)rW   query_pointsrJ   pointsr`   r^   )r&   rz   r{   r|   r8   rc   r(   r(   r)   _search_qdrant   s    z!SmartSearchService._search_qdrantTr}   
use_filteruse_expansionfallback_on_emptyc           !      C   s  t   }| j|}t| }	| j|}
|}	 td| d |}|g}	d}d}|r:|
	 s2|r:| 
|
|}d}| |}| |||}d}d	| d
| }|sh|rh|rh|shtd | |d|}d}d}n|sp|rptd |r|	rtd|	  |D ]Z}|di dd }|d }d}|	D ]5}|  }|sq||v r|d7 }d|v r| }t|D ]\}}||v r|dkr|d7 }q|d7 }qq|dkrd}|d  |7  < q}|jdd dd g }|D ]_}|di }|dp|dd}|dkr|dp|d d}|rtj|}t|d! |d ||d"|d#|d|d$d|d%|d&|d|d dd'}|| qtt   | d( } t|||
||t|| ||d)	S )*u  
        Akıllı arama yap.
        
        Args:
            query: Kullanıcı sorgusu (Türkçe, hatalar olabilir)
            limit: Maksimum sonuç sayısı
            use_filter: Marka/Model filtreleme kullan
            use_expansion: Sözlük genişletme kullan
            fallback_on_empty: Filtre sonuç vermezse genel aramaya düş
            
        Returns:
            SmartSearchResponse nesnesi
        FzDEBUG SEARCH: Raw='z' -> Corrected EN=''zDEBUG SEARCH: Raw Query Used='z' (No Translation)NTzQuery: z | Vector: uJ   ⚠️ Filtre (Marka/Model) sonuç vermedi, genel aramaya düşülüyor...u_   ℹ️ PDF Filtresi ile sonuç bulunamadı. Genel aramaya DÜŞÜLMÜYOR (Kullanıcı tercihi).u0   🔍 Phrase Boosting V2 uygulanıyor. Terimler: r   r   r    r   g        g? r   g333333?g?g333333?c                 S   s   | d S )Nr   r(   xr(   r(   r)   <lambda>p  s    z+SmartSearchService.search.<locals>.<lambda>)ro   reverser   source	viewer_v2	file_pathr!   r   pager   contentr   r   r$   i  )	r4   r5   r6   r7   r8   r9   r:   r;   r<   )timerX   get_relevant_termsrC   rD   rY   analyzecorrect_terminologyr^   has_filtersry   rk   r   getlowerstripsplit	enumeratesortrS   ru   rv   r   rt   r2   r3   r%   )!r&   r}   r|   r   r   r   rl   
start_timer5   rA   r6   turkish_queryenglish_queryr{   r;   search_vectorraw_resultsr<   r7   r?   text_contentcurrent_scoretotal_boostterm	sub_wordsiwordr8   r   r   ru   result
elapsed_msr(   r(   r)   search   s   




zSmartSearchService.search   r!   r   context_pagesc                 C   s  t d|| }|| }tdt|ddg}t|d}zR| jj| j|dddd	}g }	|d
 D ]4}
|
jdd
}||  kr@|kran q-|		t
t|
jd|
jdd||
jdd|
jddd q-|	jdd d |	W S  ty } ztd|  g W  Y d}~S d}~ww )u9  
        Belirli bir sayfanın çevresindeki sayfaları getir.
        
        Args:
            pdf_path: PDF dosya yolu
            page_number: Sayfa numarası
            context_pages: Kaç sayfa öncesi ve sonrasını getir
            
        Returns:
            Liste halinde sayfa sonuçları
        r   r!   rm   rn   rr   r   TF)rJ   scroll_filterr|   r   with_vectorsr   r   g      ?r   r    r   )r   r   r   r   r   r!   c                 S   s
   | j pdS )Nr   )r   r   r(   r(   r)   r     s   
 z5SmartSearchService.get_page_context.<locals>.<lambda>)ro   u   ⚠️ Sayfa context hatası: N)maxr
   r   r	   rW   scrollrJ   r   r   rt   r   r/   r   r   r`   r^   )r&   r!   r   r   
start_pageend_pagerw   r{   r8   page_resultspointr   rc   r(   r(   r)   get_page_context  sB   
	
z#SmartSearchService.get_page_contextc                 C   s   dS )u   Kaynakları temizleNr(   rE   r(   r(   r)   close  s   zSmartSearchService.close)N)r   )r+   r,   r-   r.   r   r   COLLECTION_NAMEr/   r2   r[   rZ   r   r1   rk   r   r   r	   ry   r   r   DEFAULT_LIMITrF   r3   r   r   r   r   r(   r(   r(   r)   rG   y   s|    
&
 
 1
:rG   c               
   C   s  ddl } | jdd}|jdddd |jd	d
tddd |jdddd |jdddd | }tds?td td dS |js<td td td td t	 }	 zt
d }| dkrfW n|sjW qV|j||j|j |j d}tdd   td!|j  td"t|j   td#|jjpd$  td%|jjpd$  td&|jdd'  d( td)|j d* td+|jrd,nd-  td.|j d/ t|jd0D ])\}}td1| d2|jd3d4|j  td5|j   td6|j!dd7  d( qW n+ t"y   Y n# t#y0 } ztd8|  ddl$}	|	%  W Y d}~nd}~ww qW|&  td9 dS t	 }|j|j|j|j |j d}ddl'}
t|
j(|) d:d;d< |&  dS )=u   CLI arayüzür   NzSmart Search Service)descriptionr}   ?zArama sorgusu)nargshelpz-lz--limit   u   Sonuç limiti)typedefaultr   z--no-filter
store_trueu!   Filtrelemeyi devre dışı bırak)actionr   z--no-expandu+   Sözlük genişletmeyi devre dışı bırakrM   u2   ❌ OPENAI_API_KEY ortam değişkeni ayarlanmalı!z!   export OPENAI_API_KEY='sk-...'zG
======================================================================u      🔍 SMART SEARCH SERVICEu=      Türkçe sorgu yazın, sistem sözlükten çevirir ve ararzF======================================================================Tu   
📝 Sorgu (q=çıkış): q)r|   r   r   
z<============================================================u   🔤 Orijinal: u   🇬🇧 İngilizce Terimler: u   🏭 Marka: -u   🔢 Model: u   🔍 Arama: <   r#   u   ⏱️ Süre: msu   🎯 Filter: Evetu   Hayıru   
📋 Sonuçlar (z):r   z   z. [z.3fz] u         📄 Sayfa: u         📝 r   u
   ❌ Hata: u   
👋 Güle güle!F   )ensure_asciiindent)*argparseArgumentParseradd_argumentr2   
parse_argsrS   rT   r^   r}   rG   rd   r   r   r   r|   	no_filter	no_expandr4   rC   r5   rD   r6   r   r   r7   r:   r;   r9   r   r8   r   r   r   r   KeyboardInterruptr`   	traceback	print_excr   jsondumpsr*   )r   parserargsservicer}   rj   r   r?   rc   r   r   r(   r(   r)   main  s   
 &r   __main__))r.   rS   r   typingr   r   r   r   dataclassesr   r   qdrant_clientr   qdrant_client.modelsr	   r
   r   r   ImportErrorr^   rV   r   query_analyzerr   r   smart_translatorr   rT   r   r2   r   r   rg   EMBEDDING_DIMr   	MAX_LIMITr   r3   rG   r   r+   r(   r(   r(   r)   <module>   sL   %  dT
