"""Gradio app for Thirukkural GPT - Valluvar or AI."""
import random
import re

import huggingface_hub

if not hasattr(huggingface_hub, "HfFolder"):
    class _HfFolder:
        path = None

        @staticmethod
        def get_token():
            return None

    huggingface_hub.HfFolder = _HfFolder

import gradio as gr
import torch

from model import GPT, GPTConfig


def load_model():
    """Load the trained model and tokenizer."""
    # Allow GPTConfig for safe loading
    from model import GPTConfig
    torch.serialization.add_safe_globals([GPTConfig])
    checkpoint = torch.load("checkpoint_final.pt", map_location="cpu", weights_only=True)
    config = checkpoint["config"]
    stoi = checkpoint["stoi"]
    itos = checkpoint["itos"]

    model = GPT(config)
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()

    return model, stoi, itos


def generate(model, prompt, stoi, itos, max_new_tokens=200, temperature=0.8, device="cpu"):
    """Generate text from prompt."""
    model = model.to(device)

    # Encode prompt
    prompt_tokens = [stoi.get(c, stoi.get(" ", 0)) for c in prompt]
    idx = torch.tensor([prompt_tokens], dtype=torch.long, device=device)

    # Generate
    with torch.no_grad():
        for _ in range(max_new_tokens):
            # Crop to block size
            idx_cond = idx[:, -model.config.block_size :]

            # Get predictions
            logits, _ = model(idx_cond)
            logits = logits[:, -1, :] / temperature

            # Sample
            probs = torch.softmax(logits, dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)

            # Append
            idx = torch.cat((idx, idx_next), dim=1)

    # Decode
    tokens = idx[0].tolist()
    result = "".join([itos.get(t, "") for t in tokens])
    return result


def is_real_kural(text, original_text):
    """Check if generated text exists in original kurals.
    
    A kural is considered "real" if:
    1. The Tamil couplet (2 lines) exists in original
    2. The English translation matches
    """
    lines = text.strip().split("\n")
    
    # Get Tamil lines (contain Tamil Unicode)
    tamil_lines = [l.strip() for l in lines if re.search(r"[\u0B80-\u0BFF]", l)]
    # Get English lines (no Tamil, just text)
    english_lines = [l.strip() for l in lines if l.strip() and not re.search(r"[\u0B80-\u0BFF]", l)]
    
    if len(tamil_lines) < 2:
        return False
    
    # Check if Tamil couplet exists in original
    first_tamil = tamil_lines[0]
    second_tamil = tamil_lines[1] if len(tamil_lines) > 1 else ""
    
    # A true kural needs both Tamil lines to exist consecutively
    tamil_couplet = first_tamil + "\n" + second_tamil
    if tamil_couplet not in original_text:
        return False
    
    # Also check that English lines roughly match (at least one should exist)
    if english_lines:
        first_english = english_lines[0]
        # Check if this English translation exists near the Tamil
        return first_english in original_text
    
    return True


# Load model and data
print("Loading model...")
model, stoi, itos = load_model()
print(f"Model loaded: {sum(p.numel() for p in model.parameters()) / 1e6:.1f}M params")

# Load original text for verification
with open("thirukkural_clean.txt", "r", encoding="utf-8") as f:
    ORIGINAL_TEXT = f.read()


def generate_kural(prompt, temperature, max_tokens):
    """Generate and format kural with proper structure."""
    # Generate with higher token count to ensure complete kural
    output_raw = generate(model, prompt, stoi, itos, int(max_tokens) + 100, temperature)
    
    # Extract first complete kural from generated text
    lines = output_raw.strip().split("\n")
    
    # Find the first proper kural (skip headers, get 2 Tamil + 2 English lines)
    tamil_lines = []
    english_lines = []
    
    for line in lines:
        line = line.strip()
        if not line or " - " in line:
            continue
        # Skip short Tamil headers (1-2 words)
        if re.search(r"[\u0B80-\u0BFF]", line) and len(line.split()) <= 2 and not re.search(r"[a-zA-Z]", line):
            continue
            
        if re.search(r"[\u0B80-\u0BFF]", line):
            if len(tamil_lines) < 2:
                tamil_lines.append(line)
        elif line and len(english_lines) < 2:
            english_lines.append(line)
    
    # Build formatted output
    formatted_lines = []
    if tamil_lines:
        formatted_lines.extend(tamil_lines[:2])
    if english_lines:
        formatted_lines.extend(english_lines[:2])
    
    output = "\n".join(formatted_lines) if formatted_lines else format_kural(output_raw)

    # Check if real or AI
    is_real = is_real_kural(output_raw, ORIGINAL_TEXT)
    source = "📖 Original Thirukkural" if is_real else "🤖 AI Generated"

    return output, source


def format_kural(text):
    """Format kural text with proper structure (2 Tamil + 2 English lines)."""
    lines = text.strip().split("\n")
    
    # Skip headers: lines with " - " OR short single Tamil words (chapter names)
    def is_header(line):
        # Headers have " - " or are short Tamil-only phrases (1-3 words)
        if " - " in line:
            return True
        # Check if it's a short Tamil phrase (likely a chapter title)
        if re.search(r"[\u0B80-\u0BFF]", line) and len(line.split()) <= 3:
            # And no English words
            if not re.search(r"[a-zA-Z]", line):
                return True
        return False
    
    content_lines = [l.strip() for l in lines if l.strip() and not is_header(l)]
    
    # Classify lines
    tamil_lines = [l for l in content_lines if re.search(r"[\u0B80-\u0BFF]", l)]
    english_lines = [l for l in content_lines if l and not re.search(r"[\u0B80-\u0BFF]", l)]
    
    # Build proper 4-line kural
    formatted = []
    
    # Tamil couplet (2 lines)
    if len(tamil_lines) >= 2:
        formatted.extend(tamil_lines[:2])
    elif len(tamil_lines) == 1:
        formatted.append(tamil_lines[0])
        formatted.append("")  # Placeholder
    
    # English translation (2 lines)
    if len(english_lines) >= 2:
        formatted.extend(english_lines[:2])
    elif len(english_lines) == 1:
        formatted.append(english_lines[0])
        formatted.append("")
    
    return "\n".join(formatted)


def valluvar_or_ai_quiz():
    """Generate a quiz: one real, one AI."""
    # Get random real kural - find a proper 4-line kural
    lines = ORIGINAL_TEXT.strip().split("\n")
    
    # Find a random valid kural (2 Tamil + 2 English lines)
    attempts = 0
    real_kural = ""
    while attempts < 100:
        idx = random.randint(0, len(lines) - 4)
        chunk = lines[idx:idx+4]
        tamil_count = sum(1 for l in chunk if re.search(r"[\u0B80-\u0BFF]", l))
        english_count = sum(1 for l in chunk if l.strip() and not re.search(r"[\u0B80-\u0BFF]", l))
        if tamil_count == 2 and english_count == 2:
            real_kural = "\n".join(chunk).strip()
            break
        attempts += 1
    
    # Fallback if no proper kural found
    if not real_kural:
        real_kural = "அகர முதல எழுத்தெல்லாம் ஆதி\nபகவன் முதற்றே உலகு\n'A' leads letters; the Ancient Lord\nLeads and lords the entire world"

    # Generate AI kural with random prompt
    prompts = ["கடவுள் வாழ்த்து", "நட்பு", "அறன்", "வான் சிறப்பு", "அரசியல்"]
    prompt = random.choice(prompts)
    ai_kural_raw = generate(model, prompt, stoi, itos, 150, 0.8)
    ai_kural = format_kural(ai_kural_raw)

    # Format real kural too
    real_kural = format_kural(real_kural)

    # Shuffle
    kurals = [("A", real_kural, True), ("B", ai_kural, False)]
    random.shuffle(kurals)

    return (
        f"## Option A\n```\n{kurals[0][1]}\n```\n\n---\n\n## Option B\n```\n{kurals[1][1]}\n```",
        kurals[0][2],
        kurals[1][2],
        "A" if kurals[0][2] else "B",
    )


# Gradio Interface
with gr.Blocks(title="Valluvar or AI?") as demo:
    gr.Markdown("# 🕉️ Valluvar or AI?")
    gr.Markdown(
        "An AI that writes new Thirukkurals in the style of Thiruvalluvar. "
        "Enter a Tamil theme to generate bilingual wisdom."
    )

    with gr.Tab("✨ Generate Kural"):
        with gr.Row():
            with gr.Column():
                prompt = gr.Textbox(
                    label="Theme (Tamil)",
                    placeholder="e.g., கடவுள் வாழ்த்து, நட்பு, அரசியல்",
                    value="கடவுள் வாழ்த்து",
                )
                temperature = gr.Slider(
                    minimum=0.1,
                    maximum=2.0,
                    value=0.8,
                    step=0.1,
                    label="Temperature (Creativity)",
                )
                max_tokens = gr.Slider(
                    minimum=50,
                    maximum=400,
                    value=200,
                    step=50,
                    label="Max Tokens",
                )
                generate_btn = gr.Button("Generate", variant="primary")

            with gr.Column():
                output = gr.Textbox(
                    label="Generated Kural",
                    lines=10,
                )
                source = gr.Textbox(label="Source")

        generate_btn.click(
            fn=generate_kural,
            inputs=[prompt, temperature, max_tokens],
            outputs=[output, source],
        )

        # Quick theme buttons
        gr.Markdown("### Quick Themes")
        with gr.Row():
            themes = [
                "கடவுள் வாழ்த்து",
                "வான் சிறப்பு",
                "நட்பு",
                "அரசியல்",
                "அறன் வலியுறுத்தல்",
            ]
            for theme in themes:
                btn = gr.Button(theme)
                btn.click(lambda t=theme: t, outputs=prompt)

    with gr.Tab("🎯 Valluvar or AI? Quiz"):
        gr.Markdown("Can you tell which is the original Thirukkural and which is AI-generated?")

        quiz_output = gr.Markdown()
        with gr.Row():
            guess_a = gr.Button("Option A is Real", variant="secondary")
            guess_b = gr.Button("Option B is Real", variant="secondary")
        quiz_result = gr.Markdown()
        new_quiz_btn = gr.Button("New Quiz", variant="primary")

        # Store answers
        a_is_real = gr.State()
        b_is_real = gr.State()
        correct_answer = gr.State()

        def check_answer(guess, a_real, b_real, correct):
            if guess == correct:
                return "✅ Correct! You identified the original Thirukkural."
            return "❌ Wrong! The original Thirukkural was: " + correct

        new_quiz_btn.click(
            fn=valluvar_or_ai_quiz,
            outputs=[quiz_output, a_is_real, b_is_real, correct_answer],
        )

        guess_a.click(
            fn=lambda a, b, c: check_answer("A", a, b, c),
            inputs=[a_is_real, b_is_real, correct_answer],
            outputs=quiz_result,
        )

        guess_b.click(
            fn=lambda a, b, c: check_answer("B", a, b, c),
            inputs=[a_is_real, b_is_real, correct_answer],
            outputs=quiz_result,
        )

    with gr.Tab("📊 About"):
        gr.Markdown(
            f"""
            ## Model Details

            - **Architecture:** GPT ({model.config.n_layer}L/{model.config.n_head}H/{model.config.n_embd}D)
            - **Parameters:** {sum(p.numel() for p in model.parameters()) / 1e6:.1f}M
            - **Vocabulary:** {len(stoi)} characters (Tamil + English)
            - **Training Data:** Thirukkural (1330 kurals with English translations)
            - **Tokenization:** Character-level

            ## Training

            - Steps: 10,000
            - Device: Apple MPS (Mac Mini)
            - Time: ~5 hours
            - Final Loss: ~1.5

            ## Capabilities

            - ✅ Generate authentic Tamil couplets (2 lines × 4 words)
            - ✅ Produce coherent English translations
            - ✅ Handle traditional themes (virtue, politics, love)
            - ❌ Modern topics (science, technology) - not in training data

            ## Examples of AI vs Original

            The model sometimes generates exact memorized kurals from the 1330,
            and sometimes creates entirely new ones in Thiruvalluvar's style.

            Built with ❤️ using PyTorch and Gradio.
            """
        )


if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
    )