#!/bin/bash # Comprehensive tool call test for Qwen3.6-35B-A3B # Usage: ./test_tool_calls.sh [port] # Default port: 8085 PORT="${1:-8085}" BASE="http://localhost:$PORT/v1/chat/completions" PASS=0 FAIL=0 check() { local name="$1" expected="$2" actual="$3" if echo "$actual" | grep -q "$expected"; then echo " ✅ $name" ((PASS++)) else echo " ❌ $name (expected '$expected', got: $actual)" ((FAIL++)) fi } echo "=== Tool Call Tests ===" echo "Server: http://localhost:$PORT" curl -s "http://localhost:$PORT/health" | grep -q ok || { echo "❌ Server not running"; exit 1; } echo "" # Test 1: Basic tool call echo "Test 1: Basic tool call trigger" R=$(curl -s --max-time 60 "$BASE" -H "Content-Type: application/json" -d '{ "messages": [{"role": "user", "content": "What is the weather in London?"}], "tools": [{"type": "function", "function": {"name": "get_weather", "description": "Get weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}], "max_tokens": 256, "temperature": 0 }') TCALLS=$(echo "$R" | python3 -c "import sys,json;d=json.load(sys.stdin);tc=d['choices'][0]['message'].get('tool_calls');print(tc[0]['function']['name'] if tc else 'none')" 2>/dev/null) FINISH=$(echo "$R" | python3 -c "import sys,json;d=json.load(sys.stdin);print(d['choices'][0]['finish_reason'])" 2>/dev/null) check "triggers tool call" "get_weather" "$TCALLS" check "finish_reason=tool_calls" "tool_calls" "$FINISH" # Test 2: Tool response → final answer (no loop) echo "" echo "Test 2: Tool response produces final answer (no loop)" R=$(curl -s --max-time 60 "$BASE" -H "Content-Type: application/json" -d '{ "messages": [ {"role": "user", "content": "What is the weather in London?"}, {"role": "assistant", "content": "", "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "get_weather", "arguments": "{\"location\":\"London\"}"}}]}, {"role": "tool", "tool_call_id": "call_1", "content": "{\"temperature\": 15, \"condition\": \"cloudy\"}"} ], "tools": [{"type": "function", "function": {"name": "get_weather", "description": "Get weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}], "max_tokens": 256, "temperature": 0 }') CONTENT=$(echo "$R" | python3 -c "import sys,json;d=json.load(sys.stdin);print(d['choices'][0]['message'].get('content',''))" 2>/dev/null) TCALLS=$(echo "$R" | python3 -c "import sys,json;d=json.load(sys.stdin);tc=d['choices'][0]['message'].get('tool_calls');print('has_calls' if tc else 'no_calls')" 2>/dev/null) FINISH=$(echo "$R" | python3 -c "import sys,json;d=json.load(sys.stdin);print(d['choices'][0]['finish_reason'])" 2>/dev/null) check "has content" "London\|15\|cloudy" "$CONTENT" check "no further tool calls" "no_calls" "$TCALLS" check "finish_reason=stop" "stop" "$FINISH" # Test 3: Correct tool selection echo "" echo "Test 3: Selects correct tool from multiple" R=$(curl -s --max-time 60 "$BASE" -H "Content-Type: application/json" -d '{ "messages": [{"role": "user", "content": "Search for latest AI news"}], "tools": [ {"type": "function", "function": {"name": "get_weather", "description": "Get weather for a location", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}, {"type": "function", "function": {"name": "search_web", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}} ], "max_tokens": 256, "temperature": 0 }') TCALLS=$(echo "$R" | python3 -c "import sys,json;d=json.load(sys.stdin);tc=d['choices'][0]['message'].get('tool_calls');print(tc[0]['function']['name'] if tc else 'none')" 2>/dev/null) check "picks search_web not get_weather" "search_web" "$TCALLS" # Test 4: No tool call when not needed echo "" echo "Test 4: No tool call for simple question" R=$(curl -s --max-time 60 "$BASE" -H "Content-Type: application/json" -d '{ "messages": [{"role": "user", "content": "What is 2+2?"}], "tools": [{"type": "function", "function": {"name": "get_weather", "description": "Get weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}], "max_tokens": 256, "temperature": 0 }') CONTENT=$(echo "$R" | python3 -c "import sys,json;d=json.load(sys.stdin);print(d['choices'][0]['message'].get('content',''))" 2>/dev/null) TCALLS=$(echo "$R" | python3 -c "import sys,json;d=json.load(sys.stdin);tc=d['choices'][0]['message'].get('tool_calls');print('has_calls' if tc else 'no_calls')" 2>/dev/null) check "answers directly with 4" "4" "$CONTENT" check "no tool call" "no_calls" "$TCALLS" # Test 5: Multi-step tool use echo "" echo "Test 5: Multi-step (2 cities)" R=$(curl -s --max-time 60 "$BASE" -H "Content-Type: application/json" -d '{ "messages": [{"role": "user", "content": "Compare weather in London and Paris"}], "tools": [{"type": "function", "function": {"name": "get_weather", "description": "Get weather for a location", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}], "max_tokens": 256, "temperature": 0 }') TCALLS=$(echo "$R" | python3 -c "import sys,json;d=json.load(sys.stdin);tc=d['choices'][0]['message'].get('tool_calls',[]);print(len(tc))" 2>/dev/null) check "calls tool (1 or 2 calls)" "[12]" "$TCALLS" # Summary echo "" echo "=== Results: $PASS passed, $FAIL failed ===" # Test 6: Nested quote escaping (stress test) echo "" echo "Test 6: Nested bash quote escaping (3 rounds)" TOOLS_T='[{"type":"function","function":{"name":"terminal","description":"Execute a bash command","parameters":{"type":"object","properties":{"command":{"type":"string"}},"required":["command"]}}}]' R1=$(curl -s --max-time 120 "$BASE" -H "Content-Type: application/json" -d "{ \"messages\":[{\"role\":\"user\",\"content\":\"Run: bash ~/script/proxy.sh \\\"web read --url \\\\\\\"https://example.com/\\\\\\\"\\\"\"}], \"tools\":$TOOLS_T, \"max_tokens\":512, \"temperature\":1.0, \"top_p\":0.95, \"top_k\":20, \"presence_penalty\":1.5 }") CMD1=$(echo "$R1" | python3 -c "import sys,json;d=json.load(sys.stdin);tc=d['choices'][0]['message'].get('tool_calls',[]);print(tc[0]['function']['arguments'] if tc else 'no_call')" 2>/dev/null) R2=$(curl -s --max-time 120 "$BASE" -H "Content-Type: application/json" -d "{ \"messages\":[ {\"role\":\"user\",\"content\":\"Run: bash ~/script/proxy.sh \\\"web read --url \\\\\\\"https://example.com/\\\\\\\"\\\"\"}, {\"role\":\"assistant\",\"content\":\"\",\"tool_calls\":[{\"id\":\"c1\",\"type\":\"function\",\"function\":{\"name\":\"terminal\",\"arguments\":$CMD1}}]}, {\"role\":\"tool\",\"tool_call_id\":\"c1\",\"content\":\"{\\\"output\\\":\\\"bash: unexpected EOF\\\\nSTATUS:FAILURE\\\",\\\"exit_code\\\":1}\"} ], \"tools\":$TOOLS_T, \"max_tokens\":512, \"temperature\":1.0, \"top_p\":0.95, \"top_k\":20, \"presence_penalty\":1.5 }") CMD2=$(echo "$R2" | python3 -c "import sys,json;d=json.load(sys.stdin);tc=d['choices'][0]['message'].get('tool_calls',[]);print(tc[0]['function']['arguments'] if tc else 'gave_up')" 2>/dev/null) if [ "$CMD1" = "$CMD2" ]; then echo " ⚠️ identical commands (potential loop)" ((FAIL++)) else echo " ✅ commands differ across retries (no loop)" ((PASS++)) fi echo " R1: $CMD1" echo " R2: $CMD2" # Summary echo "" echo "=== Results: $PASS passed, $FAIL failed ==="