#!/usr/bin/env bash # Full restart + latency benchmark — single command, parses results. # Usage: bash scripts/full_latency_bench.sh set +e # don't abort on individual command failures PORT="${PORT:-8000}" HOST="http://localhost:${PORT}" LOG="/tmp/tau-rag.log" color() { printf "\033[%sm%s\033[0m" "$1" "$2"; } echo echo "============================================================" echo " tau-rag full latency benchmark" echo "============================================================" # 1. Kill stale server processes echo echo "[1/5] Killing stale server processes..." pkill -9 -f "fastapi_app" 2>/dev/null pkill -9 -f "tau_rag.api" 2>/dev/null sleep 2 if lsof -i:8000 >/dev/null 2>&1; then echo " $(color 31 '✗ port 8000 still in use after pkill')" lsof -i:8000 exit 1 fi echo " $(color 32 '✓ port 8000 free')" # 2. Start server echo echo "[2/5] Starting server (background)..." nohup make run-local > "$LOG" 2>&1 & SERVER_PID=$! echo " PID: $SERVER_PID" # 3. Wait for ready echo echo "[3/5] Waiting for startup (corpus has ~50k docs, takes 2-3 min)..." READY=0 for i in $(seq 1 60); do sleep 5 if curl -s --max-time 2 "${HOST}/health" >/dev/null 2>&1; then echo echo " $(color 32 '✓ ready') after $((i*5))s" READY=1 break fi printf "." done if [ $READY -eq 0 ]; then echo echo " $(color 31 '✗ server did not come up in 5 minutes')" echo " --- last 30 log lines ---" tail -30 "$LOG" exit 1 fi # 4. Run benchmark queries run_one() { local label="$1" local query="$2" local encoded encoded=$(python3 -c "import urllib.parse,sys; print(urllib.parse.quote(sys.argv[1]))" "$query") local resp resp=$(curl -s --max-time 90 "${HOST}/v1/latency/per-retriever?query=${encoded}") if [ -z "$resp" ]; then echo " $(color 31 '✗ empty response')" return fi echo "$resp" | python3 -c " import json, sys try: r = json.load(sys.stdin) except Exception: print(' (non-JSON response)'); sys.exit(0) if 'error' in r: print(' ERROR:', r['error']); sys.exit(0) total = r.get('total_ms', 0) per_r = r.get('per_retriever_ms') or {} print(f' total: {total:>8.1f} ms') for name, ms in sorted(per_r.items(), key=lambda x: -x[1]): bar = '#' * min(40, int(ms/10)) print(f' {name:10s} {ms:>8.1f} ms {bar}') " } echo echo "[4/5] Running benchmark queries..." echo echo "--- Query: 'test' (English, run 1: cold) ---" run_one "test_cold" "test" echo echo "--- Query: 'test' (English, run 2: warm) ---" run_one "test_warm" "test" echo echo "--- Query: 'test' (English, run 3: warm) ---" run_one "test_warm2" "test" echo echo "--- Query: 'תום לב' (Hebrew, run 1: cold) ---" run_one "heb_cold" "תום לב" echo echo "--- Query: 'תום לב' (Hebrew, run 2: warm) ---" run_one "heb_warm" "תום לב" echo echo "--- Query: 'תום לב' (Hebrew, run 3: warm) ---" run_one "heb_warm2" "תום לב" echo echo "--- Query: 'הסכם ממון' (Hebrew different) ---" run_one "heb_other" "הסכם ממון" echo echo "--- Query: 'אשם תורם' (Hebrew different) ---" run_one "heb_other2" "אשם תורם" # 5. Done echo echo "============================================================" echo " Done. Server PID $SERVER_PID still running on $HOST" echo " Stop with: pkill -9 -f fastapi_app" echo " Logs: tail -f $LOG" echo "============================================================"