#!/usr/bin/env python3
"""
Search Service - Hybrid Search Modülü
=====================================
Vektör araması + Metadata filtreleme ile gelişmiş arama.

Özellikler:
- Query Analysis: Marka/Model otomatik tespit
- Metadata Filtering: Qdrant Filter ile arama uzayını daraltma
- Hybrid Search: Semantik + Exact match kombinasyonu
- Fallback: Filtre başarısız olursa genel aramaya düş

Kullanım:
    from search_service import HybridSearchService
    
    service = HybridSearchService()
    results = service.search("JCB 330 hidrolik pompa")
"""

import os
import time
from typing import Dict, List, Optional, Any
from dataclasses import dataclass, field

try:
    from qdrant_client import QdrantClient
    from qdrant_client.models import Filter, FieldCondition, MatchValue, MatchText
except ImportError:
    print("❌ qdrant-client yüklü değil: pip install qdrant-client")
    raise

try:
    from openai import OpenAI
except ImportError:
    print("❌ openai yüklü değil: pip install openai")
    raise

from query_analyzer import QueryAnalyzer, QueryAnalysis


# ==================== CONFIGURATION ====================

QDRANT_HOST = os.getenv("QDRANT_HOST", "10.10.10.25")
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
COLLECTION_NAME = os.getenv("QDRANT_COLLECTION", "machine_docs")

EMBEDDING_MODEL = "text-embedding-3-large"
EMBEDDING_DIM = 3072

DEFAULT_LIMIT = 10
MAX_LIMIT = 50


@dataclass
class SearchResult:
    """Tek bir arama sonucu"""
    id: str
    score: float
    pdf_filename: str
    page_number: Optional[int]
    text: str
    brand: Optional[str] = None
    model: Optional[str] = None
    metadata: Dict = field(default_factory=dict)


@dataclass
class SearchResponse:
    """Arama yanıtı"""
    query: str
    analysis: QueryAnalysis
    results: List[SearchResult]
    total_found: int
    search_time_ms: int
    filter_applied: bool
    fallback_used: bool


class HybridSearchService:
    """
    Hybrid Search Servisi
    
    Semantik arama + Metadata filtreleme kombinasyonu.
    """
    
    def __init__(
        self,
        qdrant_host: str = QDRANT_HOST,
        qdrant_port: int = QDRANT_PORT,
        collection_name: str = COLLECTION_NAME,
        openai_api_key: str = None
    ):
        # OpenAI client
        api_key = openai_api_key or os.getenv("OPENAI_API_KEY")
        if not api_key:
            raise ValueError("OPENAI_API_KEY gerekli!")
        self.openai = OpenAI(api_key=api_key)
        
        # Qdrant client
        self.qdrant = QdrantClient(host=qdrant_host, port=qdrant_port, timeout=30)
        self.collection_name = collection_name
        
        # Query analyzer
        self.analyzer = QueryAnalyzer()
        
        # Bağlantı testi
        self._verify_connection()
    
    def _verify_connection(self):
        """Qdrant bağlantısını doğrula"""
        try:
            info = self.qdrant.get_collection(self.collection_name)
            print(f"✓ Qdrant bağlantısı OK ({info.points_count:,} vektör)")
        except Exception as e:
            raise ConnectionError(f"Qdrant bağlantı hatası: {e}")
    
    def _get_embedding(self, text: str) -> List[float]:
        """Metni vektöre çevir"""
        response = self.openai.embeddings.create(
            input=text,
            model=EMBEDDING_MODEL
        )
        return response.data[0].embedding
    
    def _build_filter(self, analysis: QueryAnalysis) -> Optional[Filter]:
        """
        Sorgu analizinden Qdrant filtresi oluştur.
        
        Filtreleme stratejisi:
        - Brand varsa: pdf_path veya brand alanında ara
        - Model varsa: pdf_path veya model alanında ara
        """
        conditions = []
        
        if analysis.brand:
            # Brand filtresi - pdf_path içinde marka adı ara
            # Qdrant'ta MatchText full-text arama yapar
            conditions.append(
                FieldCondition(
                    key="pdf_path",
                    match=MatchText(text=analysis.brand)
                )
            )
        
        if analysis.model:
            # Model filtresi - pdf_path içinde model ara
            conditions.append(
                FieldCondition(
                    key="pdf_path",
                    match=MatchText(text=analysis.model)
                )
            )
        
        if not conditions:
            return None
        
        # Tüm koşullar AND ile birleştirilir
        return Filter(must=conditions)
    
    def _search_with_filter(
        self,
        query_vector: List[float],
        filter_obj: Optional[Filter],
        limit: int
    ) -> List[Dict]:
        """Filtreyle arama yap"""
        try:
            results = self.qdrant.query_points(
                collection_name=self.collection_name,
                query=query_vector,
                query_filter=filter_obj,
                limit=limit,
                with_payload=True
            )
            
            return [
                {
                    "id": str(hit.id),
                    "score": hit.score,
                    "payload": hit.payload
                }
                for hit in results.points
            ]
        except Exception as e:
            print(f"⚠️ Arama hatası: {e}")
            return []
    
    def search(
        self,
        query: str,
        limit: int = DEFAULT_LIMIT,
        use_filter: bool = True,
        fallback_on_empty: bool = True
    ) -> SearchResponse:
        """
        Hybrid arama yap.
        
        Args:
            query: Kullanıcı sorgusu
            limit: Maksimum sonuç sayısı
            use_filter: Metadata filtreleme kullan
            fallback_on_empty: Filtre sonuç vermezse genel aramaya düş
            
        Returns:
            SearchResponse nesnesi
        """
        start_time = time.time()
        
        # 1. Sorguyu analiz et
        analysis = self.analyzer.analyze(query)
        
        # 2. Embedding oluştur
        # Temizlenmiş sorguyu kullan (marka/model çıkarılmış)
        # Bu semantik aramayı daha doğru yapar
        search_text = analysis.cleaned_query if analysis.cleaned_query else query
        query_vector = self._get_embedding(search_text)
        
        # 3. Filtre oluştur
        filter_obj = None
        filter_applied = False
        
        if use_filter and analysis.has_filters():
            filter_obj = self._build_filter(analysis)
            filter_applied = True
        
        # 4. Arama yap
        raw_results = self._search_with_filter(query_vector, filter_obj, limit)
        fallback_used = False
        
        # 5. Fallback: Filtre sonuç vermezse genel arama
        if not raw_results and filter_applied and fallback_on_empty:
            print(f"⚠️ Filtre sonuç vermedi, genel aramaya düşülüyor...")
            raw_results = self._search_with_filter(query_vector, None, limit)
            fallback_used = True
            filter_applied = False
        
        # 6. Sonuçları dönüştür
        results = []
        for r in raw_results:
            payload = r.get("payload", {})
            
            result = SearchResult(
                id=r["id"],
                score=r["score"],
                pdf_filename=payload.get("pdf_filename", payload.get("pdf_name", "")),
                page_number=payload.get("page_number", payload.get("page")),
                text=payload.get("text", payload.get("content", ""))[:500],
                brand=payload.get("brand"),
                model=payload.get("model"),
                metadata={
                    "pdf_path": payload.get("pdf_path", ""),
                    "language": payload.get("language_guess", ""),
                    "source": payload.get("source", "")
                }
            )
            results.append(result)
        
        elapsed_ms = int((time.time() - start_time) * 1000)
        
        return SearchResponse(
            query=query,
            analysis=analysis,
            results=results,
            total_found=len(results),
            search_time_ms=elapsed_ms,
            filter_applied=filter_applied,
            fallback_used=fallback_used
        )
    
    def search_by_brand_model(
        self,
        brand: str,
        model: Optional[str] = None,
        doc_type: Optional[str] = None,
        limit: int = DEFAULT_LIMIT
    ) -> List[SearchResult]:
        """
        Sadece marka/model ile arama (semantik olmadan).
        
        Exact match için kullanışlı.
        """
        conditions = []
        
        # Brand filtresi
        conditions.append(
            FieldCondition(
                key="pdf_path",
                match=MatchText(text=brand)
            )
        )
        
        # Model filtresi
        if model:
            conditions.append(
                FieldCondition(
                    key="pdf_path",
                    match=MatchText(text=model)
                )
            )
        
        filter_obj = Filter(must=conditions)
        
        # Scroll ile tüm sonuçları al (vektör araması olmadan)
        try:
            results = self.qdrant.scroll(
                collection_name=self.collection_name,
                scroll_filter=filter_obj,
                limit=limit,
                with_payload=True,
                with_vectors=False
            )
            
            return [
                SearchResult(
                    id=str(point.id),
                    score=1.0,  # Exact match
                    pdf_filename=point.payload.get("pdf_filename", ""),
                    page_number=point.payload.get("page_number"),
                    text=point.payload.get("text", "")[:500],
                    brand=point.payload.get("brand"),
                    model=point.payload.get("model"),
                    metadata={}
                )
                for point in results[0]
            ]
        except Exception as e:
            print(f"⚠️ Scroll hatası: {e}")
            return []


# ==================== CLI INTERFACE ====================

def main():
    """CLI arayüzü"""
    import argparse
    
    parser = argparse.ArgumentParser(description="Hybrid Search Service")
    parser.add_argument("query", nargs="?", help="Arama sorgusu")
    parser.add_argument("-l", "--limit", type=int, default=5, help="Sonuç limiti")
    parser.add_argument("--no-filter", action="store_true", help="Filtrelemeyi devre dışı bırak")
    parser.add_argument("--no-fallback", action="store_true", help="Fallback'i devre dışı bırak")
    
    args = parser.parse_args()
    
    if not args.query:
        # İnteraktif mod
        print("\n" + "=" * 60)
        print("   🔍 HYBRID SEARCH SERVICE")
        print("=" * 60)
        
        service = HybridSearchService()
        
        while True:
            try:
                query = input("\n📝 Sorgu (q=çıkış): ").strip()
                if query.lower() == 'q':
                    break
                if not query:
                    continue
                
                response = service.search(
                    query,
                    limit=args.limit,
                    use_filter=not args.no_filter,
                    fallback_on_empty=not args.no_fallback
                )
                
                print(f"\n📊 Analiz: Brand={response.analysis.brand}, Model={response.analysis.model}")
                print(f"   Filter: {response.filter_applied}, Fallback: {response.fallback_used}")
                print(f"   Süre: {response.search_time_ms}ms, Sonuç: {response.total_found}")
                
                print("\n📋 Sonuçlar:")
                for i, r in enumerate(response.results, 1):
                    print(f"   {i}. [{r.score:.3f}] {r.pdf_filename[:50]}")
                    print(f"      Sayfa: {r.page_number}, Path: ...{r.metadata.get('pdf_path', '')[-40:]}")
                
            except KeyboardInterrupt:
                break
            except Exception as e:
                print(f"❌ Hata: {e}")
        
        print("\n👋 Güle güle!")
    else:
        # Tek sorgu modu
        service = HybridSearchService()
        response = service.search(
            args.query,
            limit=args.limit,
            use_filter=not args.no_filter,
            fallback_on_empty=not args.no_fallback
        )
        
        print(f"\nSorgu: {args.query}")
        print(f"Analiz: Brand={response.analysis.brand}, Model={response.analysis.model}")
        print(f"Filter: {response.filter_applied}, Fallback: {response.fallback_used}")
        print(f"Süre: {response.search_time_ms}ms\n")
        
        for i, r in enumerate(response.results, 1):
            print(f"{i}. [{r.score:.3f}] {r.pdf_filename}")
            print(f"   Sayfa: {r.page_number}")
            print(f"   {r.text[:150]}...\n")


if __name__ == "__main__":
    main()

