import asyncio import sys import os import time # Add parent directory to path sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from app.core.llm_client import llm_client, ModelRole, GroqClient async def simulate_failover(): print("\n" + "="*60) print("🧪 SIMULATING GPT-OSS EXHAUSTION & FAILOVER") print("="*60) await llm_client.initialize() if not isinstance(llm_client.primary, GroqClient): print("Error: Primary client is not GroqClient. System configured for fallbacks incorrectly?") return target_model = "openai/gpt-oss-20b" print(f"\n[SCENARIO 1]: Standard Request (Normal Operation)") print(f"Requesting: {target_model}") # We won't call the actual API here to save user tokens, just check the logic path # But for a real demo, we'll make a small call res = await llm_client.generate("Hello", model=target_model) print(f"Result acquired using {target_model}") print(f"\n[SCENARIO 2]: GPT-OSS-20B EXHAUSTED (Simulating Daily Limit)") # MANUALLY POISON THE COOLDOWN (Simulating a real 429 daily limit catch) print(f"!!! System detects Daily Quota reached for {target_model} !!!") llm_client.primary.model_cooldowns[target_model] = time.time() + 600 print(f"\n[SCENARIO 3]: Proactive Redirection Check") print(f"Now requesting {target_model} again...") # This should trigger PROACTIVE REDIRECT in the logs start_time = time.time() res2 = await llm_client.generate("Second request", model=target_model) end_time = time.time() print(f"\n[ANALYSIS]:") print(f"Operation took {end_time - start_time:.2f}s") print("Check your terminal logs above – you should see [RELIABILITY] PROACTIVE REDIRECT") print("The system instantly moved to the 'Workhorse' model (Maverick) without even attempting the exhausted one.") if __name__ == "__main__": asyncio.run(simulate_failover())