import os import warnings import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # ========================== # CONFIG # ========================== MODEL_NAME = "sshleifer/tiny-gpt2" MAX_NEW_TOKENS = 300 MAX_INPUT_CHARS = 4000 MAX_HISTORY = 5 # ========================== # SETTINGS # ========================== warnings.filterwarnings("ignore") os.environ["TOKENIZERS_PARALLELISM"] = "false" torch.set_grad_enabled(False) torch.set_num_threads(max(1, os.cpu_count() // 2)) device = "cuda" if torch.cuda.is_available() else "cpu" print("🔄 Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) print("🔄 Loading model...") model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, low_cpu_mem_usage=True ).to(device) model.eval() if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print("✅ Model loaded successfully.") # ========================== # PROMPTS # ========================== GENERATOR_PROMPT = """ You are a Senior Software Engineer. Generate clean, secure, optimized production-ready code. Explain briefly what the code does. """ ULTRA_REVIEW_PROMPT = """ You are a Principal Software Architect and Security Engineer. Perform a strict professional review. Return: ## Executive Summary ## Critical Bugs ## Security Issues ## Architecture Problems ## Performance Issues ## Code Quality Problems ## Refactored Version ## Final Verdict """ ARCHITECTURE_ANALYSIS_PROMPT = """ You are a Senior Software Architect. Perform deep architectural analysis. Return: ## Architecture Overview ## Structural Problems ## Scalability Evaluation ## Resilience Evaluation ## Improvements ## Final Verdict """ # ========================== # PROMPT BUILDER # ========================== def build_prompt(mode, user_input, history): if mode == "Generar Código": system_prompt = GENERATOR_PROMPT elif mode == "Revisión Ultra Crítica": system_prompt = ULTRA_REVIEW_PROMPT user_input = f"Deep review:\n\n{user_input}" else: system_prompt = ARCHITECTURE_ANALYSIS_PROMPT user_input = f"Deep architectural analysis:\n\n{user_input}" conversation = system_prompt + "\n\n" history = history[-MAX_HISTORY:] for user, assistant in history: conversation += f"User: {user}\nAssistant: {assistant}\n" conversation += f"User: {user_input}\nAssistant:" return conversation # ========================== # GENERATION # ========================== def generate_text(prompt): inputs = tokenizer( prompt, return_tensors="pt", truncation=True, max_length=1024 ).to(device) with torch.inference_mode(): outputs = model.generate( **inputs, max_new_tokens=MAX_NEW_TOKENS, temperature=0.3, top_p=0.9, do_sample=False, pad_token_id=tokenizer.eos_token_id ) generated_tokens = outputs[0][inputs["input_ids"].shape[-1]:] return tokenizer.decode( generated_tokens, skip_special_tokens=True ).strip() # ========================== # CHAT # ========================== def chat(user_input, history, mode): if not user_input: return "⚠️ Empty input." if len(user_input) > MAX_INPUT_CHARS: return "⚠️ Input too large." try: prompt = build_prompt(mode, user_input, history) return generate_text(prompt) except RuntimeError as e: return f"⚠️ Generation error: {str(e)}" except Exception: return "⚠️ Unexpected error occurred." # ========================== # UI # ========================== with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🧠 Federico - Lightweight CTO Assistant") mode = gr.Radio( [ "Generar Código", "Revisión Ultra Crítica", "Análisis Arquitectura" ], value="Revisión Ultra Crítica", label="Modo" ) gr.ChatInterface( fn=chat, additional_inputs=[mode], chatbot=gr.Chatbot(height=500), ) demo.launch(server_name="0.0.0.0", server_port=7860)