| #!/usr/bin/env bash |
| |
| |
|
|
| set +e |
|
|
| PORT="${PORT:-8000}" |
| HOST="http://localhost:${PORT}" |
| LOG="/tmp/tau-rag.log" |
|
|
| color() { printf "\033[%sm%s\033[0m" "$1" "$2"; } |
|
|
| echo |
| echo "============================================================" |
| echo " tau-rag full latency benchmark" |
| echo "============================================================" |
|
|
| |
| echo |
| echo "[1/5] Killing stale server processes..." |
| pkill -9 -f "fastapi_app" 2>/dev/null |
| pkill -9 -f "tau_rag.api" 2>/dev/null |
| sleep 2 |
| if lsof -i:8000 >/dev/null 2>&1; then |
| echo " $(color 31 '✗ port 8000 still in use after pkill')" |
| lsof -i:8000 |
| exit 1 |
| fi |
| echo " $(color 32 '✓ port 8000 free')" |
|
|
| |
| echo |
| echo "[2/5] Starting server (background)..." |
| nohup make run-local > "$LOG" 2>&1 & |
| SERVER_PID=$! |
| echo " PID: $SERVER_PID" |
|
|
| |
| echo |
| echo "[3/5] Waiting for startup (corpus has ~50k docs, takes 2-3 min)..." |
| READY=0 |
| for i in $(seq 1 60); do |
| sleep 5 |
| if curl -s --max-time 2 "${HOST}/health" >/dev/null 2>&1; then |
| echo |
| echo " $(color 32 '✓ ready') after $((i*5))s" |
| READY=1 |
| break |
| fi |
| printf "." |
| done |
|
|
| if [ $READY -eq 0 ]; then |
| echo |
| echo " $(color 31 '✗ server did not come up in 5 minutes')" |
| echo " --- last 30 log lines ---" |
| tail -30 "$LOG" |
| exit 1 |
| fi |
|
|
| |
| run_one() { |
| local label="$1" |
| local query="$2" |
| local encoded |
| encoded=$(python3 -c "import urllib.parse,sys; print(urllib.parse.quote(sys.argv[1]))" "$query") |
| local resp |
| resp=$(curl -s --max-time 90 "${HOST}/v1/latency/per-retriever?query=${encoded}") |
| if [ -z "$resp" ]; then |
| echo " $(color 31 '✗ empty response')" |
| return |
| fi |
| echo "$resp" | python3 -c " |
| import json, sys |
| try: |
| r = json.load(sys.stdin) |
| except Exception: |
| print(' (non-JSON response)'); sys.exit(0) |
| if 'error' in r: |
| print(' ERROR:', r['error']); sys.exit(0) |
| total = r.get('total_ms', 0) |
| per_r = r.get('per_retriever_ms') or {} |
| print(f' total: {total:>8.1f} ms') |
| for name, ms in sorted(per_r.items(), key=lambda x: -x[1]): |
| bar = '#' * min(40, int(ms/10)) |
| print(f' {name:10s} {ms:>8.1f} ms {bar}') |
| " |
| } |
|
|
| echo |
| echo "[4/5] Running benchmark queries..." |
| echo |
| echo "--- Query: 'test' (English, run 1: cold) ---" |
| run_one "test_cold" "test" |
| echo |
| echo "--- Query: 'test' (English, run 2: warm) ---" |
| run_one "test_warm" "test" |
| echo |
| echo "--- Query: 'test' (English, run 3: warm) ---" |
| run_one "test_warm2" "test" |
| echo |
| echo "--- Query: 'תום לב' (Hebrew, run 1: cold) ---" |
| run_one "heb_cold" "תום לב" |
| echo |
| echo "--- Query: 'תום לב' (Hebrew, run 2: warm) ---" |
| run_one "heb_warm" "תום לב" |
| echo |
| echo "--- Query: 'תום לב' (Hebrew, run 3: warm) ---" |
| run_one "heb_warm2" "תום לב" |
| echo |
| echo "--- Query: 'הסכם ממון' (Hebrew different) ---" |
| run_one "heb_other" "הסכם ממון" |
| echo |
| echo "--- Query: 'אשם תורם' (Hebrew different) ---" |
| run_one "heb_other2" "אשם תורם" |
|
|
| |
| echo |
| echo "============================================================" |
| echo " Done. Server PID $SERVER_PID still running on $HOST" |
| echo " Stop with: pkill -9 -f fastapi_app" |
| echo " Logs: tail -f $LOG" |
| echo "============================================================" |
|
|