
import sqlite3
import random
import os

DB_PATH = '/mnt/pdfs/dictionary.db'

def check_glossary_db():
    if not os.path.exists(DB_PATH):
        print(f"File not found: {DB_PATH}")
        return

    try:
        conn = sqlite3.connect(DB_PATH)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()
        
        # Get count
        cursor.execute("SELECT COUNT(*) FROM technical_terms")
        total = cursor.fetchone()[0]
        print(f"Total terms in DB: {total}")
        
        # Sample terms
        print("\n--- Random Sample Inspection ---")
        cursor.execute("SELECT * FROM technical_terms ORDER BY RANDOM() LIMIT 20")
        rows = cursor.fetchall()
        
        issues = []
        for row in rows:
            tr = row['canonical_tr']
            en = row['canonical_en']
            category = row['category']
            
            print(f"EN: {en:<40} | TR: {tr:<40} | CAT: {category}")
            
            if not en or not tr:
                issues.append(f"Empty value for ID: {row['id']}")
            if en and tr and en.lower() == tr.lower() and len(en) > 3:
                # Some are valid (AdBlue, etc.)
                pass

        print("\n--- Quality Summary ---")
        # Check for empty canonicals
        cursor.execute("SELECT COUNT(*) FROM technical_terms WHERE canonical_tr IS NULL OR canonical_tr = ''")
        empty_tr = cursor.fetchone()[0]
        
        cursor.execute("SELECT COUNT(*) FROM technical_terms WHERE canonical_en IS NULL OR canonical_en = ''")
        empty_en = cursor.fetchone()[0]
        
        print(f"Empty TR terms: {empty_tr}")
        print(f"Empty EN terms: {empty_en}")
        
    except Exception as e:
        print(f"Error analyzing DB: {e}")
    finally:
        if conn:
            conn.close()

if __name__ == "__main__":
    check_glossary_db()
