import asyncio import unittest from unittest.mock import MagicMock, patch from app.core.llm_client import LLMClient, ModelRole from app.config import settings class TestModelSwitching(unittest.TestCase): def setUp(self): self.client = LLMClient() self.client.provider_name = "groq" def test_manual_override(self): print("\n--- Testing Manual Override ---") override_model = "test-override-model" with patch.object(settings, "GROQ_SMART_MODEL", override_model): model, reason = self.client._switchboard(ModelRole.SMART_REASONING) print(f"Model: {model}, Reason: {reason}") self.assertEqual(model, override_model) self.assertIn("Manual override", reason) print("SUCCESS: Manual override verified.") def test_throughput_escalation(self): print("\n--- Testing Throughput Escalation ---") # qwen/qwen3-32b TPM is 6000. 60% is 3600 tokens. # 15000 chars / 3.5 is ~4285 tokens. large_prompt = "A" * 15000 model, reason = self.client._switchboard(ModelRole.SMART_REASONING, prompt_text=large_prompt) print(f"Model: {model}, Reason: {reason}") # Expected workhorse for Groq SMART_REASONING escalation expected_workhorse = "meta-llama/llama-4-maverick-17b-128e-instruct" self.assertEqual(model, expected_workhorse) self.assertIn("Proactive Throughput Switch", reason) print("SUCCESS: Throughput escalation verified.") def test_context_window_safety(self): print("\n--- Testing Context Window Safety ---") # qwen/qwen3-32b Window is 131,072. 80% is ~104k tokens. # 400,000 chars / 3.5 is ~114k tokens. huge_prompt = "A" * 400000 model, reason = self.client._switchboard(ModelRole.SMART_REASONING, prompt_text=huge_prompt) print(f"Model: {model}, Reason: {reason}") # Expected wide model for Groq expected_wide = "moonshotai/kimi-k2-instruct-0905" self.assertEqual(model, expected_wide) self.assertIn("Adaptive Context Switch", reason) print("SUCCESS: Context window safety switch verified.") def test_default_routing(self): print("\n--- Testing Default Routing ---") model, reason = self.client._switchboard(ModelRole.SMART_REASONING, prompt_text="Small prompt") print(f"Model: {model}, Reason: {reason}") # Default for Groq SMART_REASONING should be qwen/qwen3-32b based on Registry self.assertEqual(model, "qwen/qwen3-32b") self.assertIn("Registry-optimized", reason) print("SUCCESS: Default routing verified.") if __name__ == "__main__": unittest.main()