
import json
import random
import os

GLOSSARY_PATH = '/var/www/html/PEPCVSON/config/glossary.json'

def check_glossary():
    if not os.path.exists(GLOSSARY_PATH):
        print(f"File not found: {GLOSSARY_PATH}")
        return

    try:
        with open(GLOSSARY_PATH, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except Exception as e:
        print(f"Error loading JSON: {e}")
        return

    terms = data.get('terms', {})
    total = len(terms)
    print(f"Total terms: {total}")
    print(f"Source: {data.get('source', 'Unknown')}")
    
    issues = []
    sample_size = 20
    keys = list(terms.keys())
    sample_keys = random.sample(keys, min(sample_size, total))
    
    print("\n--- Random Sample Inspection ---")
    for key in sample_keys:
        item = terms[key]
        en = item.get('en', '')
        tr = item.get('tr', '')
        print(f"EN: {en:<40} | TR: {tr}")
        
        if not en or not tr:
            issues.append(f"Empty value for key: {key}")
        if en.lower() == tr.lower() and len(en) > 3:
             # Sadece uzun kelimelerde birebir aynılık şüpheli olabilir (bazı teknik terimler aynıdır gerçi)
             pass 

    print("\n--- Quality Check Summary ---")
    
    # Check for empty values
    empty_count = 0
    identical_count = 0
    for key, item in terms.items():
        if not item.get('en') or not item.get('tr'):
            empty_count += 1
        if item.get('en', '').lower() == item.get('tr', '').lower():
            identical_count += 1

    print(f"Empty terms: {empty_count}")
    print(f"Identical terms (EN=TR): {identical_count}")
    
    if issues:
        print("\nSample Issues Found:")
        for issue in issues:
            print(f"- {issue}")

if __name__ == "__main__":
    check_glossary()
