File size: 3,398 Bytes
3be54c6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | #!/usr/bin/env bash
# Full restart + latency benchmark — single command, parses results.
# Usage: bash scripts/full_latency_bench.sh
set +e # don't abort on individual command failures
PORT="${PORT:-8000}"
HOST="http://localhost:${PORT}"
LOG="/tmp/tau-rag.log"
color() { printf "\033[%sm%s\033[0m" "$1" "$2"; }
echo
echo "============================================================"
echo " tau-rag full latency benchmark"
echo "============================================================"
# 1. Kill stale server processes
echo
echo "[1/5] Killing stale server processes..."
pkill -9 -f "fastapi_app" 2>/dev/null
pkill -9 -f "tau_rag.api" 2>/dev/null
sleep 2
if lsof -i:8000 >/dev/null 2>&1; then
echo " $(color 31 '✗ port 8000 still in use after pkill')"
lsof -i:8000
exit 1
fi
echo " $(color 32 '✓ port 8000 free')"
# 2. Start server
echo
echo "[2/5] Starting server (background)..."
nohup make run-local > "$LOG" 2>&1 &
SERVER_PID=$!
echo " PID: $SERVER_PID"
# 3. Wait for ready
echo
echo "[3/5] Waiting for startup (corpus has ~50k docs, takes 2-3 min)..."
READY=0
for i in $(seq 1 60); do
sleep 5
if curl -s --max-time 2 "${HOST}/health" >/dev/null 2>&1; then
echo
echo " $(color 32 '✓ ready') after $((i*5))s"
READY=1
break
fi
printf "."
done
if [ $READY -eq 0 ]; then
echo
echo " $(color 31 '✗ server did not come up in 5 minutes')"
echo " --- last 30 log lines ---"
tail -30 "$LOG"
exit 1
fi
# 4. Run benchmark queries
run_one() {
local label="$1"
local query="$2"
local encoded
encoded=$(python3 -c "import urllib.parse,sys; print(urllib.parse.quote(sys.argv[1]))" "$query")
local resp
resp=$(curl -s --max-time 90 "${HOST}/v1/latency/per-retriever?query=${encoded}")
if [ -z "$resp" ]; then
echo " $(color 31 '✗ empty response')"
return
fi
echo "$resp" | python3 -c "
import json, sys
try:
r = json.load(sys.stdin)
except Exception:
print(' (non-JSON response)'); sys.exit(0)
if 'error' in r:
print(' ERROR:', r['error']); sys.exit(0)
total = r.get('total_ms', 0)
per_r = r.get('per_retriever_ms') or {}
print(f' total: {total:>8.1f} ms')
for name, ms in sorted(per_r.items(), key=lambda x: -x[1]):
bar = '#' * min(40, int(ms/10))
print(f' {name:10s} {ms:>8.1f} ms {bar}')
"
}
echo
echo "[4/5] Running benchmark queries..."
echo
echo "--- Query: 'test' (English, run 1: cold) ---"
run_one "test_cold" "test"
echo
echo "--- Query: 'test' (English, run 2: warm) ---"
run_one "test_warm" "test"
echo
echo "--- Query: 'test' (English, run 3: warm) ---"
run_one "test_warm2" "test"
echo
echo "--- Query: 'תום לב' (Hebrew, run 1: cold) ---"
run_one "heb_cold" "תום לב"
echo
echo "--- Query: 'תום לב' (Hebrew, run 2: warm) ---"
run_one "heb_warm" "תום לב"
echo
echo "--- Query: 'תום לב' (Hebrew, run 3: warm) ---"
run_one "heb_warm2" "תום לב"
echo
echo "--- Query: 'הסכם ממון' (Hebrew different) ---"
run_one "heb_other" "הסכם ממון"
echo
echo "--- Query: 'אשם תורם' (Hebrew different) ---"
run_one "heb_other2" "אשם תורם"
# 5. Done
echo
echo "============================================================"
echo " Done. Server PID $SERVER_PID still running on $HOST"
echo " Stop with: pkill -9 -f fastapi_app"
echo " Logs: tail -f $LOG"
echo "============================================================"
|