import os
import warnings
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

# ==========================
# CONFIG
# ==========================

MODEL_NAME = "sshleifer/tiny-gpt2"
MAX_NEW_TOKENS = 300
MAX_INPUT_CHARS = 4000
MAX_HISTORY = 5

# ==========================
# SETTINGS
# ==========================

warnings.filterwarnings("ignore")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

torch.set_grad_enabled(False)
torch.set_num_threads(max(1, os.cpu_count() // 2))

device = "cuda" if torch.cuda.is_available() else "cpu"

print("🔄 Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

print("🔄 Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    low_cpu_mem_usage=True
).to(device)

model.eval()

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("✅ Model loaded successfully.")

# ==========================
# PROMPTS
# ==========================

GENERATOR_PROMPT = """
You are a Senior Software Engineer.
Generate clean, secure, optimized production-ready code.
Explain briefly what the code does.
"""

ULTRA_REVIEW_PROMPT = """
You are a Principal Software Architect and Security Engineer.
Perform a strict professional review.

Return:

## Executive Summary
## Critical Bugs
## Security Issues
## Architecture Problems
## Performance Issues
## Code Quality Problems
## Refactored Version
## Final Verdict
"""

ARCHITECTURE_ANALYSIS_PROMPT = """
You are a Senior Software Architect.
Perform deep architectural analysis.

Return:

## Architecture Overview
## Structural Problems
## Scalability Evaluation
## Resilience Evaluation
## Improvements
## Final Verdict
"""

# ==========================
# PROMPT BUILDER
# ==========================

def build_prompt(mode, user_input, history):

    if mode == "Generar Código":
        system_prompt = GENERATOR_PROMPT
    elif mode == "Revisión Ultra Crítica":
        system_prompt = ULTRA_REVIEW_PROMPT
        user_input = f"Deep review:\n\n{user_input}"
    else:
        system_prompt = ARCHITECTURE_ANALYSIS_PROMPT
        user_input = f"Deep architectural analysis:\n\n{user_input}"

    conversation = system_prompt + "\n\n"

    history = history[-MAX_HISTORY:]

    for user, assistant in history:
        conversation += f"User: {user}\nAssistant: {assistant}\n"

    conversation += f"User: {user_input}\nAssistant:"

    return conversation

# ==========================
# GENERATION
# ==========================

def generate_text(prompt):

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=1024
    ).to(device)

    with torch.inference_mode():
        outputs = model.generate(
            **inputs,
            max_new_tokens=MAX_NEW_TOKENS,
            temperature=0.3,
            top_p=0.9,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )

    generated_tokens = outputs[0][inputs["input_ids"].shape[-1]:]

    return tokenizer.decode(
        generated_tokens,
        skip_special_tokens=True
    ).strip()

# ==========================
# CHAT
# ==========================

def chat(user_input, history, mode):

    if not user_input:
        return "⚠️ Empty input."

    if len(user_input) > MAX_INPUT_CHARS:
        return "⚠️ Input too large."

    try:
        prompt = build_prompt(mode, user_input, history)
        return generate_text(prompt)
    except RuntimeError as e:
        return f"⚠️ Generation error: {str(e)}"
    except Exception:
        return "⚠️ Unexpected error occurred."

# ==========================
# UI
# ==========================

with gr.Blocks(theme=gr.themes.Soft()) as demo:

    gr.Markdown("# 🧠 Federico - Lightweight CTO Assistant")

    mode = gr.Radio(
        [
            "Generar Código",
            "Revisión Ultra Crítica",
            "Análisis Arquitectura"
        ],
        value="Revisión Ultra Crítica",
        label="Modo"
    )

    gr.ChatInterface(
        fn=chat,
        additional_inputs=[mode],
        chatbot=gr.Chatbot(height=500),
    )

demo.launch(server_name="0.0.0.0", server_port=7860)