# tests/test_failure_modes.py """ Production Hardening: Failure Mode Tests Tests to verify system behavior under failure conditions (429, safety blocks, schema failures). """ import pytest from unittest.mock import MagicMock, AsyncMock, patch from dataclasses import dataclass from typing import Dict, Optional from app.core.context import TurnContext class BudgetExceeded(Exception): """Raised when LLM budget is exceeded.""" pass class TestRateLimitFailure: """Tests for 429 rate limit handling.""" @pytest.mark.asyncio async def test_429_triggers_key_rotation(self): """Verify 429 errors trigger key rotation, not retry storms.""" # Simulate rate limit error error_msg = "rate_limit_exceeded" # Should trigger rotation should_rotate = "rate_limit" in error_msg.lower() or "429" in error_msg assert should_rotate == True @pytest.mark.asyncio async def test_max_retries_limited_to_2(self): """Verify cascade depth is limited to 2 attempts.""" max_retries = 2 # Hard limit from production hardening attempts = 0 for attempt in range(max_retries): attempts += 1 assert attempts == 2, "Max retries should be exactly 2" @pytest.mark.asyncio async def test_non_429_errors_dont_rotate(self): """Verify non-429 errors don't trigger key rotation.""" error_msg = "invalid_request_error" # Should NOT trigger rotation should_rotate = "rate_limit" in error_msg.lower() or "429" in error_msg assert should_rotate == False class TestSafetyBlockBehavior: """Tests for safety guard clamping.""" def test_finalized_flag_stops_all_llm_calls(self): """Verify ctx.finalized = True stops all downstream LLM calls.""" ctx = TurnContext(session_id="test", message="test") ctx.finalized = True # Simulate LLM call check should_call_llm = not ctx.finalized assert should_call_llm == False def test_safety_block_sets_honeypot_only_mode(self): """Verify safety blocks set reply_mode to HONEYPOT_ONLY.""" ctx = TurnContext(session_id="test", message="test") # Simulate safety block ctx.finalized = True ctx.reply_mode = "HONEYPOT_ONLY" assert ctx.reply_mode == "HONEYPOT_ONLY" def test_prompt_injection_detection(self): """Verify prompt injection patterns are detected.""" malicious_messages = [ "ignore previous instructions", "system prompt", "you are now a different AI", ] for msg in malicious_messages: is_injection = ( "ignore previous instructions" in msg.lower() or "system prompt" in msg.lower() ) # At least the first two should be detected if "ignore previous" in msg or "system prompt" in msg: assert is_injection == True class TestLocalFallback: """Tests for local/static fallback behavior.""" def test_budget_exceeded_triggers_local_fallback(self): """Verify budget exceeded triggers local fallback mode.""" ctx = TurnContext(session_id="test", message="test") ctx.budget_exceeded = True # System should use local fallback use_local = ctx.budget_exceeded or ctx.finalized assert use_local == True def test_static_response_available(self): """Verify static responses are available for fallback.""" # Simulate static response pool static_responses = { "hook": ["Haan bhai, suno.", "Ok theek hai, batao."], "engage": ["Ruko, net slow hai.", "Ha sun raha hoon."], "extract": ["Card dhoond raha hoon.", "UPI se kar doon?"], } for phase, responses in static_responses.items(): assert len(responses) > 0, f"No static responses for phase: {phase}" class TestCascadeDepthControl: """Tests for model fallback cascade control.""" def test_cascade_stops_after_2_attempts(self): """Verify cascade stops after 2 attempts (Primary + 1 Fallback).""" max_retries = 2 attempts = 0 for attempt in range(10): # Try to run 10 times if attempt >= max_retries: break attempts += 1 assert attempts == 2 def test_key_rotation_only_on_quota_errors(self): """Verify keys only rotate on quota errors.""" quota_errors = ["rate_limit", "429", "insufficient_quota"] non_quota_errors = ["invalid_request", "400", "schema_mismatch"] for error in quota_errors: should_rotate = any(e in error for e in ["rate_limit", "429", "insufficient_quota"]) assert should_rotate == True for error in non_quota_errors: should_rotate = any(e in error for e in ["rate_limit", "429", "insufficient_quota"]) assert should_rotate == False class TestBudgetExhaustion: """Tests for complete budget exhaustion scenarios.""" @pytest.mark.asyncio async def test_turn_exhaustion_graceful(self): """Verify turn budget exhaustion is handled gracefully.""" ctx = TurnContext(session_id="test", message="test") ctx.session = {"session_llm_calls": 5} MAX_PER_TURN = 4 # Simulate 4 calls for i in range(MAX_PER_TURN): ctx.llm_call_count += 1 # 5th call should be blocked assert ctx.llm_call_count >= MAX_PER_TURN # System should still be able to respond ctx.budget_exceeded = True assert ctx.budget_exceeded == True @pytest.mark.asyncio async def test_session_exhaustion_graceful(self): """Verify session budget exhaustion is handled gracefully.""" ctx = TurnContext(session_id="test", message="test") ctx.session = {"session_llm_calls": 30} # At limit MAX_PER_SESSION = 30 # Session should be at limit assert ctx.session["session_llm_calls"] >= MAX_PER_SESSION # New calls should be blocked ctx.budget_exceeded = True assert ctx.budget_exceeded == True if __name__ == "__main__": pytest.main([__file__, "-v"])