from flask import Flask, render_template, request, jsonify, session, redirect, url_for, copy_current_request_context from flask_cors import CORS import os import sys import json from sentence_transformers import SentenceTransformer import chromadb from chromadb.config import Settings import re import numpy as np from typing import List, Dict, Tuple, Optional from supabase import create_client, Client from datetime import datetime import uuid import time from functools import wraps, lru_cache import threading from queue import Queue import gc # Groq SDK for LLM inference try: from groq import Groq GROQ_AVAILABLE = True except ImportError: GROQ_AVAILABLE = False print("[WARN] groq package not installed. Run: pip install groq") # Fix Windows console encoding os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' if sys.platform == 'win32': try: sys.stdout.reconfigure(encoding='utf-8') except: pass app = Flask(__name__) CORS(app) app.secret_key = 'rngai_secret_key_change_in_production_2025' # ============================================ # CONFIGURATION # ============================================ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) TEXT_FILE_PATH = os.path.join(SCRIPT_DIR, "data", "link17.txt") MODEL_CACHE_DIR = os.path.join(SCRIPT_DIR, "model_cache") # ============================================ # GROQ MODEL CONFIGURATION # ============================================ GROQ_MODEL_REGISTRY = { "llama-3.3-70b-versatile": { "name": "LLaMA 3.3 70B Versatile", "model_id": "llama-3.3-70b-versatile", "context_length": 128000, "max_new_tokens": 2048, "description": "LLaMA 3.3 70B - Fast, versatile, excellent quality", }, "llama-3.1-8b-instant": { "name": "LLaMA 3.1 8B Instant", "model_id": "llama-3.1-8b-instant", "context_length": 131072, "max_new_tokens": 8192, "description": "LLaMA 3.1 8B - Ultra fast, good for simple queries", }, "llama3-70b-8192": { "name": "LLaMA 3 70B", "model_id": "llama3-70b-8192", "context_length": 8192, "max_new_tokens": 2048, "description": "LLaMA 3 70B - High quality responses", }, "mixtral-8x7b-32768": { "name": "Mixtral 8x7B", "model_id": "mixtral-8x7b-32768", "context_length": 32768, "max_new_tokens": 4096, "description": "Mixtral 8x7B - Excellent for complex reasoning", }, "gemma2-9b-it": { "name": "Gemma 2 9B", "model_id": "gemma2-9b-it", "context_length": 8192, "max_new_tokens": 2048, "description": "Gemma 2 9B - Google's efficient model", }, } DEFAULT_GROQ_MODEL = "llama-3.3-70b-versatile" # Cost tracking (Groq is free tier with rate limits) GROQ_COST_PER_1M_INPUT = 0.0 # Free tier GROQ_COST_PER_1M_OUTPUT = 0.0 # Free tier # ============================================ # GROQ CONFIGURATION # ============================================ GROQ_API_KEY = os.environ.get('GROQ_API_KEY', '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') groq_client = None current_groq_model = DEFAULT_GROQ_MODEL # Supabase Configuration SUPABASE_URL = "https://kkbaddxfvtfiyzcfwpaw.supabase.co" SUPABASE_KEY = "sb_publishable_WWKffISotOKwynMOilHPng_pA8VIX1B" try: supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) except Exception as e: print(f"[WARN] Could not connect to Supabase: {e}") supabase = None # ============================================ # GLOBAL VARIABLES # ============================================ embedding_model = None collection = None chroma_client = None debug_mode = False # OPTIMIZATION: Cache for embeddings EMBEDDING_CACHE_SIZE = 200 embedding_cache = {} # ============================================ # EMBEDDING MODEL (Same as original) # ============================================ def load_embedding_model(): """Load embedding model with optimizations""" global embedding_model if embedding_model is not None: return embedding_model print("[INFO] Loading embedding model (optimized)...") embedding_model = SentenceTransformer('mixedbread-ai/mxbai-embed-large-v1') # Check if GPU is available for embeddings try: import torch if torch.cuda.is_available(): embedding_model = embedding_model.to('cuda') print("[PERF] Embedding model on GPU") except: print("[INFO] Embedding model on CPU") print("[OK] Embedding model loaded!") return embedding_model @lru_cache(maxsize=EMBEDDING_CACHE_SIZE) def get_cached_embedding(text: str) -> np.ndarray: """Cache embeddings for frequently asked questions""" global embedding_model if embedding_model is None: return None instruction = "Represent this sentence for searching relevant passages: " embedding = embedding_model.encode( [instruction + text], normalize_embeddings=True, show_progress_bar=False, batch_size=1, convert_to_numpy=True )[0] return embedding # ============================================ # AUTH HELPERS # ============================================ def login_required(f): @wraps(f) def decorated_function(*args, **kwargs): if 'admin_logged_in' not in session or not session['admin_logged_in']: return redirect(url_for('admin_login')) return f(*args, **kwargs) return decorated_function def get_or_create_session(): if 'chat_session_id' not in session: session['chat_session_id'] = str(uuid.uuid4()) if supabase: try: supabase.table('chat_sessions').insert({ 'session_id': session['chat_session_id'], 'ip_address': request.remote_addr, 'user_agent': request.user_agent.string[:500] if request.user_agent.string else None, 'started_at': datetime.utcnow().isoformat() }).execute() except Exception as e: print(f"[WARN] Could not create session: {e}") return session['chat_session_id'] def save_chat_to_supabase(session_id: str, user_question: str, ai_response: str, response_time_ms: int, input_tokens: int = 0, output_tokens: int = 0): """Save chat synchronously but non-blocking""" if not supabase: return try: result = supabase.table('chat_sessions').select('id').eq('session_id', session_id).execute() total_cost = 0.0 # Groq is free tier if result.data: session_uuid = result.data[0]['id'] supabase.table('chat_messages').insert({ 'session_id': session_uuid, 'user_question': user_question, 'ai_response': ai_response[:5000], 'response_time_ms': response_time_ms, 'input_tokens': input_tokens, 'output_tokens': output_tokens, 'total_cost': total_cost, 'created_at': datetime.utcnow().isoformat() }).execute() print(f"[OK] Chat saved (tokens: in={input_tokens}, out={output_tokens})") except Exception as e: print(f"[WARN] Failed to save chat: {e}") # ============================================ # TEXT PROCESSING (Same as original) # ============================================ @lru_cache(maxsize=1) def load_and_process_data(file_path: str) -> List[Dict]: """Cached data loading""" try: print(f"[INFO] Loading data from: {file_path}") if not os.path.exists(file_path): print(f"[ERROR] File not found: {file_path}") return [] with open(file_path, 'r', encoding='utf-8') as f: content = f.read() print(f"[OK] Loaded {len(content):,} characters") chunks = create_chunks(content) print(f"[OK] Created {len(chunks)} chunks") return chunks except Exception as e: print(f"[ERROR] Error loading file: {e}") return [] def clean_text(text: str) -> str: """Optimized text cleaning""" text = text.replace('\t', ' ') text = re.sub(r'[^\w\s.,!?;:()\-\'\"@/&|\[\]#]+', ' ', text) text = re.sub(r' +', ' ', text) text = re.sub(r'\n{3,}', '\n\n', text) return text.strip() def create_chunks(text: str, chunk_size: int = 1000, overlap: int = 200) -> List[Dict]: """Balanced chunking - original parameters for quality""" sections = re.split(r'\n\s*\n|\n-{4,}\n', text) chunks = [] chunk_id = 0 current_chunk = [] current_length = 0 for section in sections: section = section.strip() if not section: continue section_lines = section.split('\n') for line in section_lines: line = line.strip() if not line: continue line_words = len(line.split()) if current_length + line_words > chunk_size and current_chunk: chunk_text = '\n'.join(current_chunk) chunks.append({ 'text': chunk_text, 'id': chunk_id, }) chunk_id += 1 overlap_buffer = [] overlap_len = 0 for prev_line in reversed(current_chunk): prev_len = len(prev_line.split()) if overlap_len + prev_len > overlap: break overlap_buffer.insert(0, prev_line) overlap_len += prev_len current_chunk = overlap_buffer current_length = overlap_len current_chunk.append(line) current_length += line_words if current_chunk: chunks.append({ 'text': '\n'.join(current_chunk), 'id': chunk_id, }) return chunks # ============================================ # RAG INITIALIZATION (Same as original) # ============================================ def initialize_rag(): """Initialize RAG with optimizations""" global collection, embedding_model, chroma_client print("\n" + "="*60) print("INITIALIZING RAG SYSTEM") print("="*60) embedding_model = load_embedding_model() print("[INFO] Initializing vector database...") chroma_client = chromadb.Client(Settings( anonymized_telemetry=False, is_persistent=False )) collection = chroma_client.get_or_create_collection( name="rngpit_knowledge", metadata={"description": "RNG Patel Institute Knowledge Base"} ) chunks = load_and_process_data(TEXT_FILE_PATH) if not chunks: print("[WARN] No data loaded!") return print("[INFO] Generating embeddings (batched)...") chunk_texts = [chunk['text'] for chunk in chunks] # Optimized batch encoding embeddings = embedding_model.encode( chunk_texts, show_progress_bar=True, batch_size=64, # Balanced batch size normalize_embeddings=True, convert_to_numpy=True ) print("[INFO] Adding to vector database...") collection.add( embeddings=embeddings.tolist(), documents=chunk_texts, ids=[f"chunk_{i}" for i in range(len(chunks))] ) print("\n" + "="*60) print(f"RAG READY! ({len(chunks)} chunks)") print("="*60 + "\n") def retrieve_context(query: str, top_k: int = 5) -> List[str]: """Optimized retrieval with caching - BALANCED: 5 chunks for quality""" global collection, embedding_model if collection is None or embedding_model is None: return [] try: # Use cached embedding query_embedding = get_cached_embedding(query) if query_embedding is None: return [] results = collection.query( query_embeddings=[query_embedding.tolist()], n_results=top_k ) if results and results['documents']: return results['documents'][0] return [] except Exception as e: print(f"[ERROR] Retrieval error: {e}") return [] # ============================================ # GROQ RESPONSE GENERATION # ============================================ def generate_response_groq(query: str, context_chunks: List[str]) -> Dict: """Generate response using Groq API""" global GROQ_API_KEY, groq_client, current_groq_model if not context_chunks: return { 'text': "I don't have specific information about that. Could you ask me something else about RNG Patel Institute?", 'input_tokens': 0, 'output_tokens': 0 } if not GROQ_AVAILABLE: return { 'text': "Groq SDK not installed. Run: pip install groq", 'input_tokens': 0, 'output_tokens': 0 } if not GROQ_API_KEY: return { 'text': "Groq API key not configured. Please set it in admin panel or set GROQ_API_KEY environment variable.", 'input_tokens': 0, 'output_tokens': 0 } try: # Initialize Groq client if needed if groq_client is None: groq_client = Groq(api_key=GROQ_API_KEY) # Get model config model_config = GROQ_MODEL_REGISTRY.get(current_groq_model, GROQ_MODEL_REGISTRY[DEFAULT_GROQ_MODEL]) max_tokens = model_config.get("max_new_tokens", 2048) # Build context context_parts = [chunk.strip() for chunk in context_chunks[:10] if chunk.strip()] context = "\n\n".join(context_parts)[:40000] # System prompt system_prompt = """You are a friendly and knowledgeable student ambassador for RNGPIT (R.N.G. Patel Institute of Technology). Your goal is to help students and visitors by answering their questions warmly and directly. **IMPORTANT - COMPLETE LIST OF ALL PROGRAMS AT RNGPIT:** When asked about courses, programs, or departments at RNGPIT, you MUST mention ALL of the following: **B.Tech/B.E. Programs:** - Computer Science & Engineering - Information Technology - Chemical Engineering - Mechanical Engineering - Civil Engineering - Electrical Engineering **B.Voc Programs:** - Software Development - Solar & Renewable Technology - Industrial Chemistry - Production Technology - Refrigeration & Air Conditioning - Animation & VFX - Wealth Management **MBA Programs:** - MBA in Logistics & Supply Chain Management - MBA for Working Professionals - Online MBA **Integrated Programs:** - Integrated M.Sc (IT) - 5 Years Instructions: 1. **Be Conversational**: Write like a helpful human, not a robot. Use a warm, professional, yet engaging tone. 2. **Direct & Detailed**: Answer the specific question immediately, then provide relevant details. 3. **Use Markdown Tables**: When listing multiple items (faculty members, courses, fees, placements, etc.), ALWAYS format them as proper markdown tables with headers. Example: | Name | Designation | Email | |------|-------------|-------| | Prof. XYZ | Assistant Professor | xyz@rngpit.ac.in | 4. **Use Formatting**: Use **bold** for key terms, bullet points for short lists, and tables for structured data. 5. **No Citations**: NEVER say "according to the context", "based on the information provided", or similar phrases. Just state the facts naturally as if you know them. 6. **Complete Information**: Include all relevant details from the context - don't summarize too much when listing items. When listing programs/departments, ALWAYS include the complete list above. 7. **Graceful Fallback**: If information isn't available, suggest contacting the college at info@rngpit.ac.in or visiting rngpit.ac.in.""" user_prompt = f"""Context Information: {context} User Question: {query} Answer:""" # Make API call chat_completion = groq_client.chat.completions.create( messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], model=model_config["model_id"], max_tokens=max_tokens, temperature=0.7, top_p=0.9, ) response_text = chat_completion.choices[0].message.content if chat_completion.choices else "" # Get token usage input_tokens = chat_completion.usage.prompt_tokens if chat_completion.usage else 0 output_tokens = chat_completion.usage.completion_tokens if chat_completion.usage else 0 if not response_text: response_text = "I couldn't generate a response. Please try again." print(f"[DEBUG] Groq generated {output_tokens} tokens using {model_config['name']}") return { 'text': response_text.strip(), 'input_tokens': input_tokens, 'output_tokens': output_tokens } except Exception as e: error_msg = str(e) print(f"[ERROR] Groq error: {error_msg}") if "API key" in error_msg.lower() or "authentication" in error_msg.lower(): return {'text': "Invalid Groq API key. Please check your API key.", 'input_tokens': 0, 'output_tokens': 0} elif "rate" in error_msg.lower() or "limit" in error_msg.lower(): return {'text': "API rate limit reached. Please wait a moment and try again.", 'input_tokens': 0, 'output_tokens': 0} else: return {'text': f"Error processing request: {error_msg}", 'input_tokens': 0, 'output_tokens': 0} # ============================================ # MODEL INFO # ============================================ def get_model_info(): """Get model information""" return { "available_models": { model_id: { "name": config["name"], "description": config["description"], "context_length": config["context_length"], "max_new_tokens": config["max_new_tokens"], } for model_id, config in GROQ_MODEL_REGISTRY.items() }, "current_model": current_groq_model, "is_loaded": bool(GROQ_API_KEY), "provider": "groq" } # ============================================ # ROUTES # ============================================ @app.route('/') def home(): return render_template('index.html') @app.route('/chat', methods=['POST']) def chat(): global debug_mode try: start_time = time.time() data = request.json user_message = data.get('message', '').strip() if not user_message: return jsonify({'error': 'No message provided'}), 400 print(f"\n[CHAT] Query: {user_message}") # Get session ID within request context current_session_id = get_or_create_session() # Fast retrieval context_chunks = retrieve_context(user_message, top_k=5) if not context_chunks: response_text = "I don't have information about that. Try asking about courses, admissions, fees, placements, or facilities at RNGPIT." input_tokens = 0 output_tokens = 0 else: print(f"[INFO] Found {len(context_chunks)} relevant chunks") print("[INFO] Using Groq API for generation...") result = generate_response_groq(user_message, context_chunks) response_text = result['text'] input_tokens = result['input_tokens'] output_tokens = result['output_tokens'] response_time_ms = int((time.time() - start_time) * 1000) # Save to database (synchronous but fast) save_chat_to_supabase(current_session_id, user_message, response_text, response_time_ms, input_tokens, output_tokens) print(f"[OK] Response ({len(response_text)} chars) in {response_time_ms}ms [Provider: Groq]") response_data = { 'response': response_text, 'response_time_ms': response_time_ms } if debug_mode: response_data['debug'] = { 'enabled': True, 'chunks_used': len(context_chunks), 'model': current_groq_model, 'provider': 'groq', 'input_tokens': input_tokens, 'output_tokens': output_tokens } return jsonify(response_data) except Exception as e: print(f"[ERROR] Chat error: {e}") import traceback traceback.print_exc() return jsonify({'error': str(e)}), 500 @app.route('/health', methods=['GET']) def health(): model_info = get_model_info() return jsonify({ 'status': 'healthy', 'chunks_loaded': collection.count() if collection else 0, 'current_model': model_info['current_model'], 'model_loaded': model_info['is_loaded'], 'provider': 'groq', 'debug_mode': debug_mode, 'groq_configured': bool(GROQ_API_KEY) }) # ============================================ # MODEL MANAGEMENT API # ============================================ @app.route('/api/models', methods=['GET']) def api_get_models(): return jsonify(get_model_info()) @app.route('/api/models/switch', methods=['POST']) @login_required def api_switch_model(): global current_groq_model data = request.json or {} model_id = data.get('model_id', DEFAULT_GROQ_MODEL) if model_id not in GROQ_MODEL_REGISTRY: return jsonify({ 'success': False, 'error': f"Model '{model_id}' not found in Groq registry" }), 400 current_groq_model = model_id print(f"[INFO] Switched to Groq model: {model_id}") return jsonify({ 'success': True, 'message': f"Switched to {GROQ_MODEL_REGISTRY[model_id]['name']}", 'model_info': get_model_info() }) @app.route('/api/embeddings/regenerate', methods=['POST']) def api_regenerate_embeddings(): global collection try: if collection is not None: chroma_client.delete_collection("rngpit_knowledge") # Clear cache get_cached_embedding.cache_clear() load_and_process_data.cache_clear() initialize_rag() return jsonify({ 'success': True, 'message': 'Embeddings regenerated', 'chunks_loaded': collection.count() if collection else 0 }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/debug/toggle', methods=['POST']) def api_toggle_debug(): global debug_mode debug_mode = not debug_mode return jsonify({'success': True, 'debug_mode': debug_mode}) @app.route('/api/debug/status', methods=['GET']) def api_debug_status(): return jsonify({'debug_mode': debug_mode}) @app.route('/api/groq-key', methods=['POST']) @login_required def api_set_groq_key(): global GROQ_API_KEY, groq_client data = request.json or {} api_key = data.get('api_key', '').strip() if not api_key: return jsonify({'success': False, 'error': 'API key required'}), 400 if not api_key.startswith('gsk_'): return jsonify({ 'success': False, 'error': 'Invalid API key format. Groq API keys start with "gsk_"' }), 400 GROQ_API_KEY = api_key groq_client = None # Reset client to use new key print("[INFO] Groq API key updated") return jsonify({ 'success': True, 'message': 'Groq API key configured', 'key_preview': api_key[:8] + '...' + api_key[-4:] }) @app.route('/api/groq-key/status', methods=['GET']) @login_required def api_groq_key_status(): if GROQ_API_KEY: return jsonify({ 'configured': True, 'key_preview': GROQ_API_KEY[:8] + '...' + GROQ_API_KEY[-4:] }) return jsonify({'configured': False}) # ============================================ # ADMIN ROUTES # ============================================ @app.route('/admin/login', methods=['GET', 'POST']) def admin_login(): if request.method == 'GET': if session.get('admin_logged_in'): return redirect(url_for('admin_dashboard')) return render_template('login.html') data = request.json username = data.get('username', '').strip() password = data.get('password', '').strip() if not username or not password: return jsonify({'success': False, 'error': 'Username and password required'}), 400 if not supabase: return jsonify({'success': False, 'error': 'Database not available'}), 500 try: result = supabase.table('admin_users').select('*').eq('username', username).execute() if result.data and len(result.data) > 0: user = result.data[0] if user['password_hash'] == password: session['admin_logged_in'] = True session['admin_username'] = username session['admin_id'] = user['id'] # Update last login supabase.table('admin_users').update({ 'last_login': datetime.utcnow().isoformat() }).eq('id', user['id']).execute() return jsonify({'success': True, 'redirect': '/admin/dashboard'}) return jsonify({'success': False, 'error': 'Invalid credentials'}), 401 except Exception as e: print(f"[ERROR] Login: {e}") return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/admin/logout') def admin_logout(): session.pop('admin_logged_in', None) session.pop('admin_username', None) session.pop('admin_id', None) return redirect(url_for('admin_login')) @app.route('/admin/dashboard') @login_required def admin_dashboard(): return render_template('admin.html', username=session.get('admin_username', 'Admin')) # ============================================ # ANALYTICS API (OPTIMIZED) # ============================================ @app.route('/api/analytics/stats') @login_required def get_analytics_stats(): if not supabase: return jsonify({'error': 'Database not available'}), 500 try: total_result = supabase.table('chat_messages').select('id, input_tokens, output_tokens, total_cost').execute() total_questions = len(total_result.data) if total_result.data else 0 total_input_tokens = sum(msg.get('input_tokens', 0) or 0 for msg in (total_result.data or [])) total_output_tokens = sum(msg.get('output_tokens', 0) or 0 for msg in (total_result.data or [])) total_cost = sum(msg.get('total_cost', 0) or 0 for msg in (total_result.data or [])) sessions_result = supabase.table('chat_sessions').select('id').execute() total_sessions = len(sessions_result.data) if sessions_result.data else 0 today = datetime.utcnow().date().isoformat() today_result = supabase.table('chat_messages').select('id, input_tokens, output_tokens, total_cost').gte('created_at', today).execute() today_questions = len(today_result.data) if today_result.data else 0 today_input_tokens = sum(msg.get('input_tokens', 0) or 0 for msg in (today_result.data or [])) today_output_tokens = sum(msg.get('output_tokens', 0) or 0 for msg in (today_result.data or [])) today_cost = sum(msg.get('total_cost', 0) or 0 for msg in (today_result.data or [])) avg_input_tokens = total_input_tokens / total_questions if total_questions > 0 else 0 avg_output_tokens = total_output_tokens / total_questions if total_questions > 0 else 0 avg_cost_per_message = total_cost / total_questions if total_questions > 0 else 0 return jsonify({ 'total_questions': total_questions, 'total_sessions': total_sessions, 'today_questions': today_questions, 'total_input_tokens': total_input_tokens, 'total_output_tokens': total_output_tokens, 'total_tokens': total_input_tokens + total_output_tokens, 'total_cost': round(total_cost, 6), 'today_input_tokens': today_input_tokens, 'today_output_tokens': today_output_tokens, 'today_tokens': today_input_tokens + today_output_tokens, 'today_cost': round(today_cost, 6), 'avg_input_tokens': round(avg_input_tokens, 1), 'avg_output_tokens': round(avg_output_tokens, 1), 'avg_cost_per_message': round(avg_cost_per_message, 6), 'pricing': { 'input_per_1m': GROQ_COST_PER_1M_INPUT, 'output_per_1m': GROQ_COST_PER_1M_OUTPUT }, 'provider': 'groq' }) except Exception as e: print(f"[ERROR] Stats: {e}") return jsonify({'error': str(e)}), 500 @app.route('/api/analytics/top-questions') @login_required def get_top_questions(): if not supabase: return jsonify({'error': 'Database not available'}), 500 try: limit = request.args.get('limit', 10, type=int) result = supabase.table('chat_messages').select('user_question, created_at').execute() if not result.data: return jsonify({'questions': []}) question_counts = {} for msg in result.data: q = msg['user_question'].lower().strip() if q in question_counts: question_counts[q]['count'] += 1 if msg['created_at'] > question_counts[q]['last_asked']: question_counts[q]['last_asked'] = msg['created_at'] question_counts[q]['original'] = msg['user_question'] else: question_counts[q] = { 'count': 1, 'last_asked': msg['created_at'], 'original': msg['user_question'] } sorted_questions = sorted( [{'question': v['original'], 'count': v['count'], 'last_asked': v['last_asked']} for v in question_counts.values()], key=lambda x: x['count'], reverse=True )[:limit] return jsonify({'questions': sorted_questions}) except Exception as e: print(f"[ERROR] Top questions: {e}") return jsonify({'error': str(e)}), 500 @app.route('/api/analytics/all-questions') @login_required def get_all_questions(): if not supabase: return jsonify({'error': 'Database not available'}), 500 try: page = request.args.get('page', 1, type=int) per_page = request.args.get('per_page', 20, type=int) search = request.args.get('search', '').strip() offset = (page - 1) * per_page query = supabase.table('chat_messages').select('id, user_question, ai_response, created_at, response_time_ms') if search: query = query.ilike('user_question', f'%{search}%') result = query.order('created_at', desc=True).range(offset, offset + per_page - 1).execute() count_query = supabase.table('chat_messages').select('id', count='exact') if search: count_query = count_query.ilike('user_question', f'%{search}%') count_result = count_query.execute() total = count_result.count if count_result.count else 0 return jsonify({ 'questions': result.data, 'total': total, 'page': page, 'per_page': per_page, 'total_pages': (total + per_page - 1) // per_page }) except Exception as e: print(f"[ERROR] All questions: {e}") return jsonify({'error': str(e)}), 500 @app.route('/api/analytics/token-usage') @login_required def get_token_usage(): if not supabase: return jsonify({'error': 'Database not available'}), 500 try: result = supabase.table('chat_messages').select( 'created_at, input_tokens, output_tokens, total_cost' ).order('created_at', desc=False).execute() if not result.data: return jsonify({'daily_usage': [], 'hourly_usage': []}) daily_data = {} hourly_data = {} for msg in result.data: try: created_at = datetime.fromisoformat(msg['created_at'].replace('Z', '+00:00')) day_key = created_at.strftime('%Y-%m-%d') hour_key = created_at.strftime('%Y-%m-%d %H:00') input_tokens = msg.get('input_tokens', 0) or 0 output_tokens = msg.get('output_tokens', 0) or 0 cost = msg.get('total_cost', 0) or 0 if day_key not in daily_data: daily_data[day_key] = { 'date': day_key, 'input_tokens': 0, 'output_tokens': 0, 'total_tokens': 0, 'cost': 0, 'messages': 0 } daily_data[day_key]['input_tokens'] += input_tokens daily_data[day_key]['output_tokens'] += output_tokens daily_data[day_key]['total_tokens'] += input_tokens + output_tokens daily_data[day_key]['cost'] += cost daily_data[day_key]['messages'] += 1 if hour_key not in hourly_data: hourly_data[hour_key] = { 'hour': hour_key, 'input_tokens': 0, 'output_tokens': 0, 'total_tokens': 0, 'cost': 0, 'messages': 0 } hourly_data[hour_key]['input_tokens'] += input_tokens hourly_data[hour_key]['output_tokens'] += output_tokens hourly_data[hour_key]['total_tokens'] += input_tokens + output_tokens hourly_data[hour_key]['cost'] += cost hourly_data[hour_key]['messages'] += 1 except: continue daily_usage = sorted(daily_data.values(), key=lambda x: x['date'])[-30:] hourly_usage = sorted(hourly_data.values(), key=lambda x: x['hour'])[-24:] if daily_usage: avg_daily_tokens = sum(d['total_tokens'] for d in daily_usage) / len(daily_usage) avg_daily_cost = sum(d['cost'] for d in daily_usage) / len(daily_usage) avg_daily_messages = sum(d['messages'] for d in daily_usage) / len(daily_usage) else: avg_daily_tokens = avg_daily_cost = avg_daily_messages = 0 return jsonify({ 'daily_usage': daily_usage, 'hourly_usage': hourly_usage, 'projections': { 'avg_daily_tokens': round(avg_daily_tokens, 0), 'avg_daily_cost': round(avg_daily_cost, 4), 'avg_daily_messages': round(avg_daily_messages, 1), 'projected_monthly_tokens': round(avg_daily_tokens * 30, 0), 'projected_monthly_cost': round(avg_daily_cost * 30, 2), 'projected_monthly_messages': round(avg_daily_messages * 30, 0) } }) except Exception as e: print(f"[ERROR] Token usage: {e}") return jsonify({'error': str(e)}), 500 @app.route('/api/admin/check') def check_admin(): return jsonify({ 'is_admin': session.get('admin_logged_in', False), 'username': session.get('admin_username', None) }) # ============================================ # MAIN # ============================================ if __name__ == '__main__': print("\n" + "="*60) print("RNGPIT AI ASSISTANT - GROQ API VERSION") print("="*60) print(f"Data: {TEXT_FILE_PATH}") print(f"Available Groq Models: {list(GROQ_MODEL_REGISTRY.keys())}") print(f"Default Model: {DEFAULT_GROQ_MODEL}") print(f"Supabase: {'āœ“' if supabase else 'āœ—'}") print(f"Groq API Key: {'āœ“ Configured' if GROQ_API_KEY else 'āœ— Not set (use GROQ_API_KEY env var or admin panel)'}") print("="*60) initialize_rag() print("\nšŸš€ Server starting...") print("šŸ“± Chatbot: http://localhost:5000") print("šŸ” Admin: http://localhost:5000/admin/login") print("\n⚔ Groq API Features:") print(" āœ“ Ultra-fast inference (Groq LPU)") print(" āœ“ Multiple models available:") for model_id, config in GROQ_MODEL_REGISTRY.items(): print(f" - {model_id}: {config['description']}") print(" āœ“ Free tier with rate limits") print(" āœ“ Same RAG pipeline as original") print(" āœ“ Same admin panel and analytics") print(" āœ“ No local GPU required!") print("="*60 + "\n") import os port = int(os.environ.get('PORT', 7860)) # HF uses port 7860 app.run(debug=False, port=port, host='0.0.0.0', threaded=True)