File size: 3,398 Bytes
3be54c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env bash
# Full restart + latency benchmark — single command, parses results.
# Usage:  bash scripts/full_latency_bench.sh

set +e   # don't abort on individual command failures

PORT="${PORT:-8000}"
HOST="http://localhost:${PORT}"
LOG="/tmp/tau-rag.log"

color() { printf "\033[%sm%s\033[0m" "$1" "$2"; }

echo
echo "============================================================"
echo " tau-rag full latency benchmark"
echo "============================================================"

# 1. Kill stale server processes
echo
echo "[1/5] Killing stale server processes..."
pkill -9 -f "fastapi_app" 2>/dev/null
pkill -9 -f "tau_rag.api" 2>/dev/null
sleep 2
if lsof -i:8000 >/dev/null 2>&1; then
  echo "  $(color 31 '✗ port 8000 still in use after pkill')"
  lsof -i:8000
  exit 1
fi
echo "  $(color 32 '✓ port 8000 free')"

# 2. Start server
echo
echo "[2/5] Starting server (background)..."
nohup make run-local > "$LOG" 2>&1 &
SERVER_PID=$!
echo "  PID: $SERVER_PID"

# 3. Wait for ready
echo
echo "[3/5] Waiting for startup (corpus has ~50k docs, takes 2-3 min)..."
READY=0
for i in $(seq 1 60); do
  sleep 5
  if curl -s --max-time 2 "${HOST}/health" >/dev/null 2>&1; then
    echo
    echo "  $(color 32 '✓ ready') after $((i*5))s"
    READY=1
    break
  fi
  printf "."
done

if [ $READY -eq 0 ]; then
  echo
  echo "  $(color 31 '✗ server did not come up in 5 minutes')"
  echo "  --- last 30 log lines ---"
  tail -30 "$LOG"
  exit 1
fi

# 4. Run benchmark queries
run_one() {
  local label="$1"
  local query="$2"
  local encoded
  encoded=$(python3 -c "import urllib.parse,sys; print(urllib.parse.quote(sys.argv[1]))" "$query")
  local resp
  resp=$(curl -s --max-time 90 "${HOST}/v1/latency/per-retriever?query=${encoded}")
  if [ -z "$resp" ]; then
    echo "  $(color 31 '✗ empty response')"
    return
  fi
  echo "$resp" | python3 -c "
import json, sys
try:
    r = json.load(sys.stdin)
except Exception:
    print('  (non-JSON response)'); sys.exit(0)
if 'error' in r:
    print('  ERROR:', r['error']); sys.exit(0)
total = r.get('total_ms', 0)
per_r = r.get('per_retriever_ms') or {}
print(f'  total: {total:>8.1f} ms')
for name, ms in sorted(per_r.items(), key=lambda x: -x[1]):
    bar = '#' * min(40, int(ms/10))
    print(f'    {name:10s} {ms:>8.1f} ms  {bar}')
"
}

echo
echo "[4/5] Running benchmark queries..."
echo
echo "--- Query: 'test' (English, run 1: cold) ---"
run_one "test_cold" "test"
echo
echo "--- Query: 'test' (English, run 2: warm) ---"
run_one "test_warm" "test"
echo
echo "--- Query: 'test' (English, run 3: warm) ---"
run_one "test_warm2" "test"
echo
echo "--- Query: 'תום לב' (Hebrew, run 1: cold) ---"
run_one "heb_cold" "תום לב"
echo
echo "--- Query: 'תום לב' (Hebrew, run 2: warm) ---"
run_one "heb_warm" "תום לב"
echo
echo "--- Query: 'תום לב' (Hebrew, run 3: warm) ---"
run_one "heb_warm2" "תום לב"
echo
echo "--- Query: 'הסכם ממון' (Hebrew different) ---"
run_one "heb_other" "הסכם ממון"
echo
echo "--- Query: 'אשם תורם' (Hebrew different) ---"
run_one "heb_other2" "אשם תורם"

# 5. Done
echo
echo "============================================================"
echo " Done. Server PID $SERVER_PID still running on $HOST"
echo " Stop with: pkill -9 -f fastapi_app"
echo " Logs:      tail -f $LOG"
echo "============================================================"