from flask import Flask, render_template, request, jsonify, session, redirect, url_for, copy_current_request_context
from flask_cors import CORS
import os
import sys
import json
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import re
import numpy as np
from typing import List, Dict, Tuple, Optional
from supabase import create_client, Client
from datetime import datetime
import uuid
import time
from functools import wraps, lru_cache
import threading
from queue import Queue
import gc
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# OpenAI SDK for NVIDIA API (DeepSeek model)
try:
    from openai import OpenAI
    NVIDIA_AVAILABLE = True
except ImportError:
    NVIDIA_AVAILABLE = False
    print("[WARN] openai package not installed. Run: pip install openai")

# Fix Windows console encoding
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
if sys.platform == 'win32':
    try:
        sys.stdout.reconfigure(encoding='utf-8')
    except:
        pass

app = Flask(__name__)
CORS(app)
app.secret_key = 'rngai_secret_key_change_in_production_2025'

# ============================================
# CONFIGURATION
# ============================================
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
TEXT_FILE_PATH = os.path.join(SCRIPT_DIR, "data", "link17.txt")
MODEL_CACHE_DIR = os.path.join(SCRIPT_DIR, "model_cache")

# ============================================
# NVIDIA MODEL CONFIGURATION (NVIDIA API)
# ============================================
NVIDIA_MODEL_REGISTRY = {
    "abacusai/dracarys-llama-3.1-70b-instruct": {
        "name": "Dracarys Llama 3.1 70B",
        "model_id": "abacusai/dracarys-llama-3.1-70b-instruct",
        "context_length": 128000,
        "max_new_tokens": 4096,
        "description": "Dracarys Llama 3.1 - Powerful 70B Instruct Model",
    },
}

DEFAULT_NVIDIA_MODEL = "abacusai/dracarys-llama-3.1-70b-instruct"

# Cost tracking (NVIDIA API pricing)
NVIDIA_COST_PER_1M_INPUT = 0.0  # Check NVIDIA pricing
NVIDIA_COST_PER_1M_OUTPUT = 0.0  # Check NVIDIA pricing

# ============================================
# NVIDIA CONFIGURATION
# ============================================
NVIDIA_API_KEY = os.environ.get('NVIDIA_API_KEY')
NVIDIA_API_KEY_2 = os.environ.get('NVIDIA_API_KEY_2')  # Backup key
NVIDIA_BASE_URL = os.environ.get('NVIDIA_BASE_URL', 'https://integrate.api.nvidia.com/v1')
nvidia_client = None
current_nvidia_model = DEFAULT_NVIDIA_MODEL

# Initialize NVIDIA OpenAI client
if NVIDIA_API_KEY and NVIDIA_AVAILABLE:
    nvidia_client = OpenAI(
        base_url=NVIDIA_BASE_URL,
        api_key=NVIDIA_API_KEY
    )
    print(f"[OK] NVIDIA API client initialized (Key 1)")
    if NVIDIA_API_KEY_2:
        print(f"[OK] Backup NVIDIA key configured")
else:
    if not NVIDIA_API_KEY:
        print("[WARN] NVIDIA_API_KEY not set in environment")
# Supabase Configuration (loaded from .env)
SUPABASE_URL = os.environ.get('SUPABASE_URL')
SUPABASE_KEY = os.environ.get('SUPABASE_KEY')

if not SUPABASE_URL:
    print("[WARN] SUPABASE_URL is missing from environment variables!")
if not SUPABASE_KEY:
    print("[WARN] SUPABASE_KEY is missing from environment variables!")

try:
    if SUPABASE_URL and SUPABASE_KEY:
        supabase: Client = create_client(SUPABASE_URL.strip(), SUPABASE_KEY.strip())
        print("[OK] Supabase connected successfully")
    else:
        print("[WARN] Skipping Supabase connection due to missing config")
        supabase = None
except Exception as e:
    print(f"[WARN] Could not connect to Supabase: {e}")
    supabase = None

# ============================================
# GLOBAL VARIABLES
# ============================================
embedding_model = None
collection = None
chroma_client = None
debug_mode = False

# OPTIMIZATION: Cache for embeddings
EMBEDDING_CACHE_SIZE = 200
embedding_cache = {}

# ============================================
# EMBEDDING MODEL (Same as original)
# ============================================

def load_embedding_model():
    """Load embedding model with optimizations"""
    global embedding_model

    if embedding_model is not None:
        return embedding_model

    print("[INFO] Loading embedding model (optimized)...")
    embedding_model = SentenceTransformer('mixedbread-ai/mxbai-embed-large-v1')

    # Check if GPU is available for embeddings
    try:
        import torch
        if torch.cuda.is_available():
            embedding_model = embedding_model.to('cuda')
            print("[PERF] Embedding model on GPU")
    except:
        print("[INFO] Embedding model on CPU")

    print("[OK] Embedding model loaded!")
    return embedding_model

@lru_cache(maxsize=EMBEDDING_CACHE_SIZE)
def get_cached_embedding(text: str) -> np.ndarray:
    """Cache embeddings for frequently asked questions"""
    global embedding_model
    if embedding_model is None:
        return None

    instruction = "Represent this sentence for searching relevant passages: "
    embedding = embedding_model.encode(
        [instruction + text], 
        normalize_embeddings=True,
        show_progress_bar=False,
        batch_size=1,
        convert_to_numpy=True
    )[0]
    return embedding

# ============================================
# AUTH HELPERS
# ============================================

def login_required(f):
    @wraps(f)
    def decorated_function(*args, **kwargs):
        if 'admin_logged_in' not in session or not session['admin_logged_in']:
            return redirect(url_for('admin_login'))
        return f(*args, **kwargs)
    return decorated_function

def get_or_create_session():
    if 'chat_session_id' not in session:
        session['chat_session_id'] = str(uuid.uuid4())

        if supabase:
            try:
                supabase.table('chat_sessions').insert({
                    'session_id': session['chat_session_id'],
                    'ip_address': request.remote_addr,
                    'user_agent': request.user_agent.string[:500] if request.user_agent.string else None,
                    'started_at': datetime.utcnow().isoformat()
                }).execute()
            except Exception as e:
                print(f"[WARN] Could not create session: {e}")

    return session['chat_session_id']

def save_chat_to_supabase(session_id: str, user_question: str, ai_response: str, 
                          response_time_ms: int, input_tokens: int = 0, output_tokens: int = 0):
    """Save chat synchronously but non-blocking"""
    if not supabase:
        return

    try:
        result = supabase.table('chat_sessions').select('id').eq('session_id', session_id).execute()

        total_cost = 0.0  # Groq is free tier

        if result.data:
            session_uuid = result.data[0]['id']

            supabase.table('chat_messages').insert({
                'session_id': session_uuid,
                'user_question': user_question,
                'ai_response': ai_response[:5000],
                'response_time_ms': response_time_ms,
                'input_tokens': input_tokens,
                'output_tokens': output_tokens,
                'total_cost': total_cost,
                'created_at': datetime.utcnow().isoformat()
            }).execute()
            print(f"[OK] Chat saved (tokens: in={input_tokens}, out={output_tokens})")
    except Exception as e:
        print(f"[WARN] Failed to save chat: {e}")

# ============================================
# TEXT PROCESSING (Same as original)
# ============================================

@lru_cache(maxsize=1)
def load_and_process_data(file_path: str) -> List[Dict]:
    """Cached data loading"""
    try:
        print(f"[INFO] Loading data from: {file_path}")

        if not os.path.exists(file_path):
            print(f"[ERROR] File not found: {file_path}")
            return []

        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()

        print(f"[OK] Loaded {len(content):,} characters")

        chunks = create_chunks(content)
        print(f"[OK] Created {len(chunks)} chunks")

        return chunks

    except Exception as e:
        print(f"[ERROR] Error loading file: {e}")
        return []

def clean_text(text: str) -> str:
    """Optimized text cleaning"""
    text = text.replace('\t', '    ')
    text = re.sub(r'[^\w\s.,!?;:()\-\'\"@/&|\[\]#]+', ' ', text)
    text = re.sub(r' +', ' ', text)
    text = re.sub(r'\n{3,}', '\n\n', text)
    return text.strip()

def create_chunks(text: str, chunk_size: int = 1000, overlap: int = 200) -> List[Dict]:
    """Balanced chunking - original parameters for quality"""
    sections = re.split(r'\n\s*\n|\n-{4,}\n', text)

    chunks = []
    chunk_id = 0
    current_chunk = []
    current_length = 0

    for section in sections:
        section = section.strip()
        if not section:
            continue

        section_lines = section.split('\n')

        for line in section_lines:
            line = line.strip()
            if not line:
                continue

            line_words = len(line.split())

            if current_length + line_words > chunk_size and current_chunk:
                chunk_text = '\n'.join(current_chunk)
                chunks.append({
                    'text': chunk_text,
                    'id': chunk_id,
                })
                chunk_id += 1

                overlap_buffer = []
                overlap_len = 0
                for prev_line in reversed(current_chunk):
                    prev_len = len(prev_line.split())
                    if overlap_len + prev_len > overlap:
                        break
                    overlap_buffer.insert(0, prev_line)
                    overlap_len += prev_len

                current_chunk = overlap_buffer
                current_length = overlap_len

            current_chunk.append(line)
            current_length += line_words

    if current_chunk:
        chunks.append({
            'text': '\n'.join(current_chunk),
            'id': chunk_id,
        })

    return chunks

# ============================================
# RAG INITIALIZATION (Same as original)
# ============================================

def initialize_rag():
    """Initialize RAG with optimizations"""
    global collection, embedding_model, chroma_client

    print("\n" + "="*60)
    print("INITIALIZING RAG SYSTEM")
    print("="*60)

    embedding_model = load_embedding_model()

    print("[INFO] Initializing vector database...")
    chroma_client = chromadb.Client(Settings(
        anonymized_telemetry=False,
        is_persistent=False
    ))

    collection = chroma_client.get_or_create_collection(
        name="rngpit_knowledge",
        metadata={"description": "RNG Patel Institute Knowledge Base"}
    )

    chunks = load_and_process_data(TEXT_FILE_PATH)

    if not chunks:
        print("[WARN] No data loaded!")
        return

    print("[INFO] Generating embeddings (batched)...")
    chunk_texts = [chunk['text'] for chunk in chunks]

    # Optimized batch encoding
    embeddings = embedding_model.encode(
        chunk_texts,
        show_progress_bar=True,
        batch_size=64,  # Balanced batch size
        normalize_embeddings=True,
        convert_to_numpy=True
    )

    print("[INFO] Adding to vector database...")
    collection.add(
        embeddings=embeddings.tolist(),
        documents=chunk_texts,
        ids=[f"chunk_{i}" for i in range(len(chunks))]
    )

    print("\n" + "="*60)
    print(f"RAG READY! ({len(chunks)} chunks)")
    print("="*60 + "\n")

def retrieve_context(query: str, top_k: int = 5) -> List[str]:
    """Optimized retrieval with caching - BALANCED: 5 chunks for quality"""
    global collection, embedding_model

    if collection is None or embedding_model is None:
        return []

    try:
        # Use cached embedding
        query_embedding = get_cached_embedding(query)
        if query_embedding is None:
            return []

        results = collection.query(
            query_embeddings=[query_embedding.tolist()],
            n_results=top_k
        )

        if results and results['documents']:
            return results['documents'][0]
        return []

    except Exception as e:
        print(f"[ERROR] Retrieval error: {e}")
        return []

# ============================================
# NVIDIA RESPONSE GENERATION (OneAI SDK)
# ============================================

def generate_response_nvidia(query: str, context_chunks: List[str]) -> Dict:
    """Generate response using NVIDIA API with Dracarys Llama 3.1 model"""
    global NVIDIA_API_KEY, nvidia_client, current_nvidia_model

    if not context_chunks:
        return {
            'text': "I don't have specific information about that. Could you ask me something else about RNG Patel Institute?",
            'input_tokens': 0,
            'output_tokens': 0,
            'model_used': 'none'
        }

    if not NVIDIA_AVAILABLE:
        return {
            'text': "OpenAI SDK not installed. Run: pip install openai",
            'input_tokens': 0,
            'output_tokens': 0,
            'model_used': 'error'
        }

    if not NVIDIA_API_KEY:
        return {
            'text': "NVIDIA API key not configured. Please set NVIDIA_API_KEY environment variable.",
            'input_tokens': 0,
            'output_tokens': 0,
            'model_used': 'error'
        }

    try:
        # Initialize NVIDIA client if needed
        if nvidia_client is None:
            nvidia_client = OpenAI(
                base_url=NVIDIA_BASE_URL,
                api_key=NVIDIA_API_KEY
            )
            print("[OK] Initialized NVIDIA client")

        # Build context
        context_parts = [chunk.strip() for chunk in context_chunks[:10] if chunk.strip()]
        context = "\n\n".join(context_parts)[:40000]

        # System prompt
        system_prompt = """You are a friendly and knowledgeable student ambassador for RNGPIT (R.N.G. Patel Institute of Technology). Your goal is to help students and visitors by answering their questions warmly and directly.

**ABOUT THIS AI (Important):**
When asked about "who made this AI", "who created you", "who built you", or similar questions about the creators/developers, respond with:

"I was built by **Team InnoCrew**, a talented group of students from RNGPIT:

- **Shis Tushar Maheta** (Lead AI Engineer) - B.Tech Computer Science, Class of 2025
- **Zuveriya Meman** -B.Voc Software development, Class of 2025
- **Karan Chaudhary** - B.Voc Software Development, Class of 2023
- **Sem Surti** - B.Voc Software Development, Class of 2023
- **Shreyansh Vasava** - B.Voc Software Development, Class of 2023

Team InnoCrew developed this AI assistant to help students and visitors learn more about RNG Patel Institute of Technology!"

EXACT FACULTY DETIALS:

#### **1. Information Technology (IT) Department Faculty**

| Name | Designation | Education | Exp. | Email |
| --- | --- | --- | --- | --- |
| **Prof. Vivek C. Joshi** | I/C HOD & Asst. Prof | M.Tech (CSE), Ph.D. (Pursuing) | 13+ Yrs | vcjoshi@rngpit.ac.in |
| **Prof. Hardi A. Patel** | Assistant Professor | M.E. (CSE) | 6+ Yrs | hapatel@rngpit.ac.in |
| **Prof. Krina N. Desai** | Assistant Professor | M.E. (CE) | 3+ Yrs | kndesai@rngpit.ac.in |
| **Prof. Nishtha H. Tandel** | Assistant Professor | M.Tech (IT), GSET Qualified | 4+ Yrs | nhtandel@rngpit.ac.in |
| **Prof. Bhavisha S. Parmar** | Assistant Professor | M.E. (CE), Ph.D. (Pursuing) | 12+ Yrs | bsparmar@rngpit.ac.in |
| **Prof. Foram C. Shukla** | Assistant Professor | M.E. (CE) | 1 Yr | fcshukla@rngpit.ac.in |
| **Prof. Purvaj P. Vaidya** | Assistant Professor | M.Tech (Media Tech - Germany) | 1 Yr | ppvaidya@rngpit.ac.in |
| **Prof. Monali R. Gandhi** | Assistant Professor | M.E. (CE) | 11+ Yrs | mrgandhi@rngpit.ac.in |
| **Prof. Ekta R. Bhatia** | Assistant Professor | M.Tech (CSE) | 4 Yrs | erbhatia@rngpit.ac.in |
| **Prof. Pratik M. Gohil** | Assistant Professor | M.Tech (CE) | 5+ Yrs | pmgohil@rngpit.ac.in |
| **Prof. Zeel R. Bhatt** | Assistant Professor | M.Tech (CE) | 3 Mos | zrbhatt@rngpit.ac.in |
| **Prof. Pooja D. Patel** | Assistant Professor | M.E. (CE) | - | pdpatel@rngpit.ac.in |
| **Prof. Ayushi H. Gandhi** | Assistant Professor | M.E. (CE) | 4+ Yrs | ahgandhi@rngpit.ac.in |
| **Prof. Rinisha S. Patel** | Assistant Professor | M.E. (CE) | - | rspatel@rngpit.ac.in |

#### **2. Mechanical Engineering Department Faculty**

| Name | Designation | Education | Email |
| --- | --- | --- | --- |
| **Dr. Kanti B. Rathod** | HOD & Assoc. Professor | Ph.D. (Mechanical) | kbrathod@rngpit.ac.in |
| **Mr. Hardik B. Nayak** | Assistant Professor | Ph.D. (Pursuing) | hbnayak@rngpit.ac.in |
| **Mr. Niravsinh B. Rathod** | Assistant Professor | Ph.D. (Pursuing) | nbrathod@rngpit.ac.in |
| **Mr. Gaurang K. Champaneri** | Assistant Professor | M.Tech (CIM) | gkchampaneri@rngpit.ac.in |
| **Mr. Chirag K. Balar** | Assistant Professor | M.Tech (Mechanical) | ckbalar@rngpit.ac.in |
| **Mr. Dharmin M. Patel** | Assistant Professor | Ph.D. (Pursuing) | dmpatel@rngpit.ac.in |
| **Mr. Nevilkumar M. Patel** | Assistant Professor | M.E. (Machine Design) | nmpatel@rngpit.ac.in |
| **Mr. Yatin H. Chauhan** | Assistant Professor | M.Tech (ME) | yhchauhan@rngpit.ac.in |
| **Mr. Vikramkumar A. Mistry** | Assistant Professor | M.E. (IC Engine & Automobile) | vamistry@rngpit.ac.in |
| **Mr. Sushant K. Merai** | Assistant Professor | Ph.D. (Pursuing) | skmerai@rngpit.ac.in |
| **Mr. Sapan H. Joshi** | Assistant Professor | M.Tech (Thermal System) | shjoshi@rngpit.ac.in |
| **Dr. Ankursinh P. Solanki** | Assistant Professor | Ph.D., M.E. (Thermal) | apsolanki@rngpit.ac.in |
| **Mr. Nikhil M. Pandya** | Assistant Professor | M.E. (Production) | nmpandya@rngpit.ac.in |
| **Mr. Mehul P. Patel** | Assistant Professor | M.E. (Production) | mppatel@rngpit.ac.in |
| **Mr. Vikesh B. Patel** | Assistant Professor | M.Tech (CAD/CAM) | patelvikesh1988@gmail.com |
| **Mr. Shobhit Y. Varshney** | Assistant Professor | M.Tech (Thermal System) | syvarshney@rngpit.ac.in |

#### **3. Civil Engineering Department Faculty**

| Name                         | Designation                 | Qualification                      | Email                                                     | More Info |
| ---------------------------- | --------------------------- | ---------------------------------- | --------------------------------------------------------- | --------- |
| Dr. Kamalsinh M. Padhiar     | HOD and Associate Professor | —                                  | [kmpadhiar@rngpit.ac.in](mailto:kmpadhiar@rngpit.ac.in)   | —         |
| Mr. Gaurav P. Barot          | Assistant Professor         | M.Tech (Structural Engineering)    | [gpbarot@rngpit.ac.in](mailto:gpbarot@rngpit.ac.in)       | —         |
| Mr. Mohammed Ahmed Qureshi   | Assistant Professor         | M.Tech (Structure), Ph.D. Pursuing | [maqureshi@rngpit.ac.in](mailto:maqureshi@rngpit.ac.in)   | —         |
| Mr. Nirav P. Desai           | Assistant Professor         | M.E (Transportation)               | [npdesai@rngpit.ac.in](mailto:npdesai@rngpit.ac.in)       | —         |
| Mr. Sharukh M. Marfani       | Assistant Professor         | M.E (CE)                           | [smmarfani@rngpit.ac.in](mailto:smmarfani@rngpit.ac.in)   | —         |
| Mr. Hilay N. Prajapati       | Assistant Professor         | M.E (CED)                          | [hnprajapati@ngpit.ac.in](mailto:hnprajapati@ngpit.ac.in) | —         |
| Mr. Ajay B. Patel            | Assistant Professor         | M.E. (Civil Engg.)                 | [ajaybpatel@rngpit.ac.in](mailto:ajaybpatel@rngpit.ac.in) | —         |
| Ms. Srushti U. Joshi         | Assistant Professor         | M.E (Civil Engg.)                  | [sujoshi@rngpit.ac.in](mailto:sujoshi@rngpit.ac.in)       | —         |
| Mr. Priyank H. Patel         | Assistant Professor         | —                                  | [phpatel@rngpit.ac.in](mailto:phpatel@rngpit.ac.in)       | —         |
| Mr. Atish P. More            | Assistant Professor         | Masters in Environmental Engg.     | [apmore@rngpit.ac.in](mailto:apmore@rngpit.ac.in)         | —         |
| Ms. Hetvi J. Kania           | Assistant Professor         | M.E. in Environmental Engg.        | [hjkania@rngpit.ac.in](mailto:hjkania@rngpit.ac.in)       | —         |
| Mr. Pritesh R. Bhandari      | Assistant Professor         | Diploma Civil                      | [prbhandari@rngpit.ac.in](mailto:prbhandari@rngpit.ac.in) | —         |
| Mr. Viral Jagdishbhai Rathod | Assistant Professor         | M.E. (Production)                  | [vjrathod@rngpit.ac.in](mailto:vjrathod@rngpit.ac.in)     | —         |

#### **GIVE THE DETIALS OF THE DEPARTMENT THAT IS ONLY ASKED DONT PROVIDE ANY OTHER RANDOM DATA**

**STRICT FORMATTING RULES:**
1. **ALWAYS USE TABLES FOR DATA**: If you are listing more than 3 items (like faculty names, committee members, fees, courses, placements), you **MUST** use a Markdown table. Do not use bullet points for long lists.
   *Example Table:*
   | Name | Position | Location |
   |------|----------|----------|
   | John Doe | President | Surat |

2. **DIRECT ANSWER**: Answer the question immediately. Do not start with "Okay", "Sure", or "Based on the context".

3. **BEAUTIFUL FORMATTING**: Use **bold** for importance, `code` for emails/numbers, and clear paragraphs.

4. **NO CITATIONS**: Do not say "according to the document". Speak as if you know the facts yourself.

5. **GRACEFUL FALLBACK**: If you don't know, suggest contacting info@rngpit.ac.in."""

        user_prompt = f"""Context Information:
{context}

User Question: {query}

Answer:"""

        def make_api_call(client_to_use, model_to_use):
            return client_to_use.chat.completions.create(
                model=model_to_use,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=0.5,
                top_p=0.8,
                max_tokens=4096,
                stream=True
            )

        # Attempt 1: Primary Key
        try:
            completion = make_api_call(nvidia_client, current_nvidia_model)
        except Exception as e:
            error_msg = str(e)
            # Check for Rate Limit (429) or Quota issues
            if ("429" in error_msg or "quota" in error_msg.lower()) and NVIDIA_API_KEY_2 and NVIDIA_API_KEY != NVIDIA_API_KEY_2:
                print(f"[WARN] detailed error: {error_msg}")
                print(f"[WARN] Primary API key rate limited using fallback key...")
                
                # Switch to backup key
                NVIDIA_API_KEY = NVIDIA_API_KEY_2
                nvidia_client = OpenAI(base_url=NVIDIA_BASE_URL, api_key=NVIDIA_API_KEY)
                
                # Attempt 2: Backup Key
                completion = make_api_call(nvidia_client, current_nvidia_model)
            else:
                raise e # Re-raise if not a rate limit issue or no backup key

        # Collect streamed response
        response_text = ""
        for chunk in completion:
            if chunk.choices[0].delta.content is not None:
                response_text += chunk.choices[0].delta.content

        # Estimate token counts
        input_tokens = int(len((system_prompt + user_prompt).split()) * 1.3)
        output_tokens = int(len(response_text.split()) * 1.3)

        if not response_text:
            response_text = "I couldn't generate a response. Please try again."

        print(f"[DEBUG] NVIDIA generated ~{int(output_tokens)} tokens using {current_nvidia_model}")

        return {
            'text': response_text.strip(),
            'input_tokens': int(input_tokens),
            'output_tokens': int(output_tokens),
            'model_used': current_nvidia_model
        }

    except Exception as e:
        error_msg = str(e)
        print(f"[ERROR] NVIDIA API error: {error_msg}")

        if "API key" in error_msg.lower() or "authentication" in error_msg.lower() or "unauthorized" in error_msg.lower():
            return {'text': "Invalid NVIDIA API key. Please check your API key.", 'input_tokens': 0, 'output_tokens': 0, 'model_used': 'error'}
        elif "quota" in error_msg.lower() or "limit" in error_msg.lower() or "rate" in error_msg.lower():
            return {'text': "API rate limit reached. Please wait a moment and try again.", 'input_tokens': 0, 'output_tokens': 0, 'model_used': 'error'}
        else:
            return {'text': f"Error processing request: {error_msg}", 'input_tokens': 0, 'output_tokens': 0, 'model_used': 'error'}

# ============================================
# MODEL INFO
# ============================================

def get_model_info():
    """Get model information"""
    return {
        "available_models": {
            model_id: {
                "name": config["name"],
                "description": config["description"],
                "context_length": config["context_length"],
                "max_new_tokens": config["max_new_tokens"],
            }
            for model_id, config in NVIDIA_MODEL_REGISTRY.items()
        },
        "current_model": current_nvidia_model,
        "is_loaded": bool(NVIDIA_API_KEY),
        "provider": "nvidia"
    }

# ============================================
# ROUTES
# ============================================

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/chat', methods=['POST'])
def chat():
    global debug_mode
    try:
        start_time = time.time()

        data = request.json
        user_message = data.get('message', '').strip()

        if not user_message:
            return jsonify({'error': 'No message provided'}), 400

        print(f"\n[CHAT] Query: {user_message}")

        # Get session ID within request context
        current_session_id = get_or_create_session()

        # Fast retrieval
        context_chunks = retrieve_context(user_message, top_k=5)

        if not context_chunks:
            response_text = "I don't have information about that. Try asking about courses, admissions, fees, placements, or facilities at RNGPIT."
            input_tokens = 0
            output_tokens = 0
        else:
            print(f"[INFO] Found {len(context_chunks)} relevant chunks")
            print("[INFO] Using NVIDIA API (DeepSeek) for generation...")
            result = generate_response_nvidia(user_message, context_chunks)

            response_text = result['text']
            input_tokens = result['input_tokens']
            output_tokens = result['output_tokens']
            model_used = result.get('model_used', current_nvidia_model)


        response_time_ms = int((time.time() - start_time) * 1000)

        # Save to database (synchronous but fast)
        save_chat_to_supabase(current_session_id, user_message, response_text, response_time_ms, input_tokens, output_tokens)

        print(f"[OK] Response ({len(response_text)} chars) in {response_time_ms}ms [Provider: NVIDIA DeepSeek]")

        response_data = {
            'response': response_text,
            'response_time_ms': response_time_ms
        }

        if debug_mode:
            response_data['debug'] = {
                'enabled': True,
                'chunks_used': len(context_chunks),
                'model': model_used if context_chunks else current_nvidia_model,
                'provider': 'nvidia',
                'input_tokens': input_tokens,
                'output_tokens': output_tokens
            }

        return jsonify(response_data)

    except Exception as e:
        print(f"[ERROR] Chat error: {e}")
        import traceback
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/health', methods=['GET'])
def health():
    model_info = get_model_info()
    return jsonify({
        'status': 'healthy',
        'chunks_loaded': collection.count() if collection else 0,
        'current_model': current_nvidia_model,
        'model_loaded': nvidia_client is not None,
        'provider': 'nvidia',
        'debug_mode': debug_mode,
        'nvidia_configured': bool(NVIDIA_API_KEY)
    })

# ============================================
# MODEL MANAGEMENT API
# ============================================

@app.route('/api/models', methods=['GET'])
def api_get_models():
    return jsonify(get_model_info())

@app.route('/api/models/switch', methods=['POST'])
@login_required
def api_switch_model():
    global current_nvidia_model

    data = request.json or {}
    model_id = data.get('model_id', DEFAULT_NVIDIA_MODEL)

    if model_id not in NVIDIA_MODEL_REGISTRY:
        return jsonify({
            'success': False, 
            'error': f"Model '{model_id}' not found in NVIDIA registry"
        }), 400

    current_nvidia_model = model_id
    print(f"[INFO] Switched to NVIDIA model: {model_id}")

    return jsonify({
        'success': True,
        'message': f"Switched to {NVIDIA_MODEL_REGISTRY[model_id]['name']}",
        'model_info': get_model_info()
    })

@app.route('/api/embeddings/regenerate', methods=['POST'])
def api_regenerate_embeddings():
    global collection

    try:
        if collection is not None:
            chroma_client.delete_collection("rngpit_knowledge")

        # Clear cache
        get_cached_embedding.cache_clear()
        load_and_process_data.cache_clear()

        initialize_rag()

        return jsonify({
            'success': True,
            'message': 'Embeddings regenerated',
            'chunks_loaded': collection.count() if collection else 0
        })
    except Exception as e:
        return jsonify({'success': False, 'error': str(e)}), 500

@app.route('/api/debug/toggle', methods=['POST'])
def api_toggle_debug():
    global debug_mode
    debug_mode = not debug_mode
    return jsonify({'success': True, 'debug_mode': debug_mode})

@app.route('/api/debug/status', methods=['GET'])
def api_debug_status():
    return jsonify({'debug_mode': debug_mode})

@app.route('/api/nvidia-key', methods=['POST'])
@login_required
def api_set_nvidia_key():
    global NVIDIA_API_KEY, nvidia_client

    data = request.json or {}
    api_key = data.get('api_key', '').strip()

    if not api_key:
        return jsonify({'success': False, 'error': 'API key required'}), 400

    NVIDIA_API_KEY = api_key
    nvidia_client = OpenAI(base_url=NVIDIA_BASE_URL, api_key=NVIDIA_API_KEY)  # Reset client with new key
    print("[INFO] NVIDIA API key updated")

    return jsonify({
        'success': True,
        'message': 'NVIDIA API key configured',
        'key_preview': api_key[:8] + '...' + api_key[-4:] if len(api_key) > 12 else '***'
    })

@app.route('/api/nvidia-key/status', methods=['GET'])
@login_required
def api_nvidia_key_status():
    if NVIDIA_API_KEY:
        return jsonify({
            'configured': True,
            'key_preview': NVIDIA_API_KEY[:8] + '...' + NVIDIA_API_KEY[-4:] if len(NVIDIA_API_KEY) > 12 else '***'
        })
    return jsonify({'configured': False})

# ============================================
# ADMIN ROUTES
# ============================================

@app.route('/admin/login', methods=['GET', 'POST'])
def admin_login():
    if request.method == 'GET':
        if session.get('admin_logged_in'):
            return redirect(url_for('admin_dashboard'))
        return render_template('login.html')

    data = request.json
    username = data.get('username', '').strip()
    password = data.get('password', '').strip()

    if not username or not password:
        return jsonify({'success': False, 'error': 'Username and password required'}), 400

    if not supabase:
        return jsonify({'success': False, 'error': 'Database not available'}), 500

    try:
        result = supabase.table('admin_users').select('*').eq('username', username).execute()

        if result.data and len(result.data) > 0:
            user = result.data[0]

            if user['password_hash'] == password:
                session['admin_logged_in'] = True
                session['admin_username'] = username
                session['admin_id'] = user['id']

                # Update last login
                supabase.table('admin_users').update({
                    'last_login': datetime.utcnow().isoformat()
                }).eq('id', user['id']).execute()

                return jsonify({'success': True, 'redirect': '/admin/dashboard'})

        return jsonify({'success': False, 'error': 'Invalid credentials'}), 401

    except Exception as e:
        print(f"[ERROR] Login: {e}")
        return jsonify({'success': False, 'error': str(e)}), 500

@app.route('/admin/logout')
def admin_logout():
    session.pop('admin_logged_in', None)
    session.pop('admin_username', None)
    session.pop('admin_id', None)
    return redirect(url_for('admin_login'))

@app.route('/admin/dashboard')
@login_required
def admin_dashboard():
    return render_template('admin.html', username=session.get('admin_username', 'Admin'))

# ============================================
# ANALYTICS API (OPTIMIZED)
# ============================================

@app.route('/api/analytics/stats')
@login_required
def get_analytics_stats():
    if not supabase:
        return jsonify({'error': 'Database not available'}), 500

    try:
        total_result = supabase.table('chat_messages').select('id, input_tokens, output_tokens, total_cost').execute()
        total_questions = len(total_result.data) if total_result.data else 0

        total_input_tokens = sum(msg.get('input_tokens', 0) or 0 for msg in (total_result.data or []))
        total_output_tokens = sum(msg.get('output_tokens', 0) or 0 for msg in (total_result.data or []))
        total_cost = sum(msg.get('total_cost', 0) or 0 for msg in (total_result.data or []))

        sessions_result = supabase.table('chat_sessions').select('id').execute()
        total_sessions = len(sessions_result.data) if sessions_result.data else 0

        today = datetime.utcnow().date().isoformat()
        today_result = supabase.table('chat_messages').select('id, input_tokens, output_tokens, total_cost').gte('created_at', today).execute()
        today_questions = len(today_result.data) if today_result.data else 0
        today_input_tokens = sum(msg.get('input_tokens', 0) or 0 for msg in (today_result.data or []))
        today_output_tokens = sum(msg.get('output_tokens', 0) or 0 for msg in (today_result.data or []))
        today_cost = sum(msg.get('total_cost', 0) or 0 for msg in (today_result.data or []))

        avg_input_tokens = total_input_tokens / total_questions if total_questions > 0 else 0
        avg_output_tokens = total_output_tokens / total_questions if total_questions > 0 else 0
        avg_cost_per_message = total_cost / total_questions if total_questions > 0 else 0

        return jsonify({
            'total_questions': total_questions,
            'total_sessions': total_sessions,
            'today_questions': today_questions,
            'total_input_tokens': total_input_tokens,
            'total_output_tokens': total_output_tokens,
            'total_tokens': total_input_tokens + total_output_tokens,
            'total_cost': round(total_cost, 6),
            'today_input_tokens': today_input_tokens,
            'today_output_tokens': today_output_tokens,
            'today_tokens': today_input_tokens + today_output_tokens,
            'today_cost': round(today_cost, 6),
            'avg_input_tokens': round(avg_input_tokens, 1),
            'avg_output_tokens': round(avg_output_tokens, 1),
            'avg_cost_per_message': round(avg_cost_per_message, 6),
            'pricing': {
                'input_per_1m': GROQ_COST_PER_1M_INPUT,
                'output_per_1m': GROQ_COST_PER_1M_OUTPUT
            },
            'provider': 'groq'
        })
    except Exception as e:
        print(f"[ERROR] Stats: {e}")
        return jsonify({'error': str(e)}), 500

@app.route('/api/analytics/top-questions')
@login_required
def get_top_questions():
    if not supabase:
        return jsonify({'error': 'Database not available'}), 500

    try:
        limit = request.args.get('limit', 10, type=int)
        result = supabase.table('chat_messages').select('user_question, created_at').execute()

        if not result.data:
            return jsonify({'questions': []})

        question_counts = {}
        for msg in result.data:
            q = msg['user_question'].lower().strip()
            if q in question_counts:
                question_counts[q]['count'] += 1
                if msg['created_at'] > question_counts[q]['last_asked']:
                    question_counts[q]['last_asked'] = msg['created_at']
                    question_counts[q]['original'] = msg['user_question']
            else:
                question_counts[q] = {
                    'count': 1,
                    'last_asked': msg['created_at'],
                    'original': msg['user_question']
                }

        sorted_questions = sorted(
            [{'question': v['original'], 'count': v['count'], 'last_asked': v['last_asked']}
             for v in question_counts.values()],
            key=lambda x: x['count'],
            reverse=True
        )[:limit]

        return jsonify({'questions': sorted_questions})
    except Exception as e:
        print(f"[ERROR] Top questions: {e}")
        return jsonify({'error': str(e)}), 500

@app.route('/api/analytics/all-questions')
@login_required
def get_all_questions():
    if not supabase:
        return jsonify({'error': 'Database not available'}), 500

    try:
        page = request.args.get('page', 1, type=int)
        per_page = request.args.get('per_page', 20, type=int)
        search = request.args.get('search', '').strip()

        offset = (page - 1) * per_page

        query = supabase.table('chat_messages').select('id, user_question, ai_response, created_at, response_time_ms')

        if search:
            query = query.ilike('user_question', f'%{search}%')

        result = query.order('created_at', desc=True).range(offset, offset + per_page - 1).execute()

        count_query = supabase.table('chat_messages').select('id', count='exact')
        if search:
            count_query = count_query.ilike('user_question', f'%{search}%')
        count_result = count_query.execute()
        total = count_result.count if count_result.count else 0

        return jsonify({
            'questions': result.data,
            'total': total,
            'page': page,
            'per_page': per_page,
            'total_pages': (total + per_page - 1) // per_page
        })
    except Exception as e:
        print(f"[ERROR] All questions: {e}")
        return jsonify({'error': str(e)}), 500

@app.route('/api/analytics/token-usage')
@login_required
def get_token_usage():
    if not supabase:
        return jsonify({'error': 'Database not available'}), 500

    try:
        result = supabase.table('chat_messages').select(
            'created_at, input_tokens, output_tokens, total_cost'
        ).order('created_at', desc=False).execute()

        if not result.data:
            return jsonify({'daily_usage': [], 'hourly_usage': []})

        daily_data = {}
        hourly_data = {}

        for msg in result.data:
            try:
                created_at = datetime.fromisoformat(msg['created_at'].replace('Z', '+00:00'))
                day_key = created_at.strftime('%Y-%m-%d')
                hour_key = created_at.strftime('%Y-%m-%d %H:00')

                input_tokens = msg.get('input_tokens', 0) or 0
                output_tokens = msg.get('output_tokens', 0) or 0
                cost = msg.get('total_cost', 0) or 0

                if day_key not in daily_data:
                    daily_data[day_key] = {
                        'date': day_key,
                        'input_tokens': 0,
                        'output_tokens': 0,
                        'total_tokens': 0,
                        'cost': 0,
                        'messages': 0
                    }
                daily_data[day_key]['input_tokens'] += input_tokens
                daily_data[day_key]['output_tokens'] += output_tokens
                daily_data[day_key]['total_tokens'] += input_tokens + output_tokens
                daily_data[day_key]['cost'] += cost
                daily_data[day_key]['messages'] += 1

                if hour_key not in hourly_data:
                    hourly_data[hour_key] = {
                        'hour': hour_key,
                        'input_tokens': 0,
                        'output_tokens': 0,
                        'total_tokens': 0,
                        'cost': 0,
                        'messages': 0
                    }
                hourly_data[hour_key]['input_tokens'] += input_tokens
                hourly_data[hour_key]['output_tokens'] += output_tokens
                hourly_data[hour_key]['total_tokens'] += input_tokens + output_tokens
                hourly_data[hour_key]['cost'] += cost
                hourly_data[hour_key]['messages'] += 1
            except:
                continue

        daily_usage = sorted(daily_data.values(), key=lambda x: x['date'])[-30:]
        hourly_usage = sorted(hourly_data.values(), key=lambda x: x['hour'])[-24:]

        if daily_usage:
            avg_daily_tokens = sum(d['total_tokens'] for d in daily_usage) / len(daily_usage)
            avg_daily_cost = sum(d['cost'] for d in daily_usage) / len(daily_usage)
            avg_daily_messages = sum(d['messages'] for d in daily_usage) / len(daily_usage)
        else:
            avg_daily_tokens = avg_daily_cost = avg_daily_messages = 0

        return jsonify({
            'daily_usage': daily_usage,
            'hourly_usage': hourly_usage,
            'projections': {
                'avg_daily_tokens': round(avg_daily_tokens, 0),
                'avg_daily_cost': round(avg_daily_cost, 4),
                'avg_daily_messages': round(avg_daily_messages, 1),
                'projected_monthly_tokens': round(avg_daily_tokens * 30, 0),
                'projected_monthly_cost': round(avg_daily_cost * 30, 2),
                'projected_monthly_messages': round(avg_daily_messages * 30, 0)
            }
        })
    except Exception as e:
        print(f"[ERROR] Token usage: {e}")
        return jsonify({'error': str(e)}), 500

@app.route('/api/admin/check')
def check_admin():
    return jsonify({
        'is_admin': session.get('admin_logged_in', False),
        'username': session.get('admin_username', None)
    })

# ============================================
# MAIN
# ============================================

if __name__ == '__main__':
    print("\n" + "="*60)
    print("RNGPIT AI ASSISTANT - NVIDIA API (Llama 3.1) VERSION")
    print("="*60)
    print(f"Data: {TEXT_FILE_PATH}")
    print(f"Available Models: {list(NVIDIA_MODEL_REGISTRY.keys())}")
    print(f"Default Model: {DEFAULT_NVIDIA_MODEL}")
    print(f"Supabase: {'✓' if supabase else '✗'}")
    print(f"NVIDIA API Key: {'✓ Configured' if NVIDIA_API_KEY else '✗ Not set (use NVIDIA_API_KEY env var)'}")
    print("="*60)

    initialize_rag()

    print("\n🚀 Server starting...")
    print("📱 Chatbot: http://localhost:5000")
    print("🔐 Admin: http://localhost:5000/admin/login")
    print("\n⚡ NVIDIA API Features:")
    print("  ✓ Dracarys Llama 3.1 70B via NVIDIA")
    print("  ✓ Available models:")
    for model_id, config in NVIDIA_MODEL_REGISTRY.items():
        print(f"    - {model_id}: {config['description']}")
    print("  ✓ OpenAI-compatible API")
    print("  ✓ Powerful 70B parameter model")
    print("  ✓ Same RAG pipeline as original")
    print("  ✓ Same admin panel and analytics")
    print("  ✓ No local GPU required!")
    print("="*60 + "\n")

    app.run(debug=False, port=5000, host='0.0.0.0', threaded=True)