import asyncio
import sys
import os
import time

# Add parent directory to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from app.core.llm_client import llm_client, ModelRole, GroqClient

async def simulate_failover():
    print("\n" + "="*60)
    print("🧪 SIMULATING GPT-OSS EXHAUSTION & FAILOVER")
    print("="*60)
    
    await llm_client.initialize()
    
    if not isinstance(llm_client.primary, GroqClient):
        print("Error: Primary client is not GroqClient. System configured for fallbacks incorrectly?")
        return

    target_model = "openai/gpt-oss-20b"
    
    print(f"\n[SCENARIO 1]: Standard Request (Normal Operation)")
    print(f"Requesting: {target_model}")
    # We won't call the actual API here to save user tokens, just check the logic path
    # But for a real demo, we'll make a small call
    res = await llm_client.generate("Hello", model=target_model)
    print(f"Result acquired using {target_model}")

    print(f"\n[SCENARIO 2]: GPT-OSS-20B EXHAUSTED (Simulating Daily Limit)")
    # MANUALLY POISON THE COOLDOWN (Simulating a real 429 daily limit catch)
    print(f"!!! System detects Daily Quota reached for {target_model} !!!")
    llm_client.primary.model_cooldowns[target_model] = time.time() + 600
    
    print(f"\n[SCENARIO 3]: Proactive Redirection Check")
    print(f"Now requesting {target_model} again...")
    
    # This should trigger PROACTIVE REDIRECT in the logs
    start_time = time.time()
    res2 = await llm_client.generate("Second request", model=target_model)
    end_time = time.time()
    
    print(f"\n[ANALYSIS]:")
    print(f"Operation took {end_time - start_time:.2f}s")
    print("Check your terminal logs above – you should see [RELIABILITY] PROACTIVE REDIRECT")
    print("The system instantly moved to the 'Workhorse' model (Maverick) without even attempting the exhausted one.")

if __name__ == "__main__":
    asyncio.run(simulate_failover())