#!/usr/bin/env bash # Benchmark latency on a few representative Hebrew queries. # Usage: bash scripts/bench_latency.sh PORT="${PORT:-8000}" HOST="http://localhost:${PORT}" # Read API key from env (server seeds it from TAU_RAG_SEED_ADMIN_KEY) API_KEY="${TAU_RAG_SEED_ADMIN_KEY:-${TAU_RAG_API_KEY:-}}" # First, sanity-check the server is up if ! curl -s --max-time 3 "${HOST}/health" > /dev/null 2>&1; then echo "✗ Server not responding at ${HOST}" echo " Run 'tail -30 /tmp/tau-rag.log' to see what's happening." exit 1 fi echo "✓ Server alive at ${HOST}" # Auto-detect which endpoint variant is registered. Public was added in # the latest patch; admin needs auth. PUBLIC_PATH="/v1/latency/profile" ADMIN_PATH="/v1/admin/latency/profile" ENDPOINT="" test_resp=$(curl -s --max-time 3 "${HOST}${PUBLIC_PATH}?query=test&n=1" 2>/dev/null) if [ -n "$test_resp" ] && echo "$test_resp" | grep -q '"stages"'; then ENDPOINT="$PUBLIC_PATH" echo " using public endpoint: $ENDPOINT (no auth)" elif [ -n "$API_KEY" ]; then ENDPOINT="$ADMIN_PATH" echo " using admin endpoint: $ENDPOINT (with X-API-Key)" else echo "✗ Neither $PUBLIC_PATH nor $ADMIN_PATH is reachable." echo " Restart the server (it needs the new public route), OR set:" echo " export TAU_RAG_SEED_ADMIN_KEY=" exit 1 fi echo QUERIES=( "תום לב במשא ומתן" "סעיף 39" "אשם תורם" "הסכם ממון" "פיצויי פיטורים" ) for q in "${QUERIES[@]}"; do echo "=== Query: $q ===" encoded=$(python3 -c "import urllib.parse, sys; print(urllib.parse.quote(sys.argv[1]))" "$q") if [ "$ENDPOINT" = "$ADMIN_PATH" ] && [ -n "$API_KEY" ]; then resp=$(curl -s --max-time 30 -H "X-API-Key: ${API_KEY}" \ "${HOST}${ENDPOINT}?query=${encoded}&n=3") else resp=$(curl -s --max-time 30 "${HOST}${ENDPOINT}?query=${encoded}&n=3") fi echo "$resp" | python3 - <<'PY_END' import json, sys raw = sys.stdin.read() if not raw.strip(): print(" (empty response — endpoint not responding)") sys.exit(0) try: r = json.loads(raw) except json.JSONDecodeError: print(" (non-JSON response)") sys.exit(0) err = r.get("error") if err: print(" ERROR:", err) sys.exit(0) stages = r.get("stages") or {} items = sorted(stages.items(), key=lambda x: -x[1].get("p50", 0)) for name, s in items: p50 = s.get("p50", 0) bar = "#" * min(40, int(p50 / 5)) print(" %-22s p50=%6.1fms %s" % (name, p50, bar)) bs = r.get("bottleneck_stage") bp = r.get("bottleneck_p50_ms") if bs: print(" -> bottleneck: %s (%sms)" % (bs, bp)) PY_END echo done