import os import sys import json import traceback try: from jinja2 import Environment, FileSystemLoader, StrictUndefined except ImportError: print("Error: jinja2 is required to run tests. Please install it using 'pip install jinja2'") sys.exit(1) TEMPLATE_FILE = 'chat_template.jinja' TEMPLATE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) env = Environment( loader=FileSystemLoader(TEMPLATE_DIR), undefined=StrictUndefined, keep_trailing_newline=True, lstrip_blocks=True, trim_blocks=True ) def raise_exception(msg): raise Exception(msg) env.globals['raise_exception'] = raise_exception try: template = env.get_template(TEMPLATE_FILE) except Exception as e: print(f"Error loading template: {e}") sys.exit(1) def run_test(name, messages, tools=None, kwargs=None, expected_in=None, expected_not_in=None, expect_error=False): if kwargs is None: kwargs = {} print(f"\n--- Running Test: {name} ---") try: render_kwargs = {'messages': messages, 'add_generation_prompt': True} if tools is not None: render_kwargs['tools'] = tools render_kwargs.update(kwargs) rendered = template.render(**render_kwargs) if expect_error: print("❌ FAILED: Expected an exception but got none.") return False success = True if expected_in: for ex in expected_in: if ex not in rendered: print(f"❌ FAILED: Missing expected string:\n'''{ex}'''") print(f"Rendered:\n{rendered}") success = False if expected_not_in: for n_ex in expected_not_in: if n_ex in rendered: print(f"❌ FAILED: Found string that should NOT be present:\n'''{n_ex}'''") print(f"Rendered:\n{rendered}") success = False if success: print("✅ PASSED") return True return False except Exception as e: if expect_error: print(f"✅ PASSED (Caught expected error: {e})") return True print(f"❌ FAILED with exception:\n{traceback.format_exc()}") return False # Tests tests_passed = 0 tests_total = 0 def execute_test(*args, **kwargs): global tests_passed, tests_total tests_total += 1 if run_test(*args, **kwargs): tests_passed += 1 # 1. auto_disable_thinking_with_tools (Default logic) execute_test( "auto_disable_thinking_with_tools (enabled via kwarg)", messages=[{"role": "user", "content": "Hello!"}], tools=[{"name": "test_tool"}], kwargs={"auto_disable_thinking_with_tools": True}, expected_in=["\n\n"], # Should be stripped ) execute_test( "auto_disable_thinking_with_tools (disabled via kwarg -> allows thinking)", messages=[{"role": "user", "content": "Hello!"}], tools=[{"name": "test_tool"}], kwargs={"auto_disable_thinking_with_tools": False}, expected_in=["\n"], expected_not_in=["\n\n"] ) # 2. inline <|think_on|> overrides auto_disable_thinking_with_tools execute_test( "inline <|think_on|> overrides auto_disable", messages=[{"role": "user", "content": "Hello! <|think_on|>"}], tools=[{"name": "test_tool"}], kwargs={"auto_disable_thinking_with_tools": True}, expected_in=["\n"], expected_not_in=["\n\n", "<|think_on|>"] # Tag must be stripped ) # 3. Payload truncation execute_test( "max_tool_arg_chars truncation", messages=[{"role": "user", "content": "Call tool"}, {"role": "assistant", "content": "", "tool_calls": [{"function": {"name": "test", "arguments": {"param": "1234567890"}}}]}], kwargs={"max_tool_arg_chars": 5}, expected_in=["12345\n[TRUNCATED"] ) execute_test( "max_tool_response_chars truncation", messages=[{"role": "user", "content": "Do it"}, {"role": "assistant", "content": "calling", "tool_calls": [{"name": "test"}]}, {"role": "tool", "content": "1234567890"}], kwargs={"max_tool_response_chars": 5}, expected_in=["\n12345\n[TRUNCATED"] ) # 4. Mid-conversation System Prompt execute_test( "mid-conversation system prompt", messages=[{"role": "user", "content": "Hello"}, {"role": "system", "content": "Reminder: Be polite"}], expected_in=["<|im_start|>system\nReminder: Be polite<|im_end|>"] ) # 5. Parallel tools delimiter execute_test( "parallel tools delimiter", messages=[{"role": "user", "content": "x"}, {"role": "assistant", "content": "", "tool_calls": [{"name": "t1"}, {"name": "t2"}]}], expected_in=["\n\n"] ) # 6. Deep Agent Fallback execute_test( "deep agent fallback (no user message)", messages=[{"role": "system", "content": "Sys"}, {"role": "tool", "content": "test"}], expected_in=["<|im_start|>system\nSys", "<|im_start|>user\n"] ) # 7. Error Escalation execute_test( "error escalation warnings", messages=[ {"role": "user", "content": "Do it"}, {"role": "tool", "content": "error: something failed"}, {"role": "assistant", "content": "calling"}, {"role": "tool", "content": "error: failed again"} ], expected_in=["⚠️ SYSTEM WARNING: 2 consecutive tool errors", "\n\n"] ) print(f"\n=============================") print(f"Test Summary: {tests_passed} / {tests_total} passed.") if tests_passed == tests_total: print("All tests passed successfully! 🎉") sys.exit(0) else: print("Some tests failed.") sys.exit(1)