michal-giza commited on
Commit
70eab61
Β·
verified Β·
1 Parent(s): 9cedd4f

Upload 11 files

Browse files
main.py CHANGED
@@ -61,7 +61,19 @@ logger = logging.getLogger(__name__)
61
  # ---------------------------------------------------------------------------
62
  # Configuration
63
  # ---------------------------------------------------------------------------
64
- MODEL_ID = "MelodyMachine/Deepfake-audio-detection-V2"
 
 
 
 
 
 
 
 
 
 
 
 
65
  API_KEY = os.getenv("DETECTOR_API_KEY", "your-fallback-test-key")
66
  MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MB
67
  MIN_AUDIO_DURATION = 1.0 # seconds
@@ -359,8 +371,9 @@ def run_detection_pipeline(audio_data: np.ndarray, sr: int) -> AnalysisResult:
359
  model_score = 0.0
360
  if classifier:
361
  results = classifier(audio_data)
 
362
  for res in results:
363
- if res["label"].lower() in ["fake", "ai", "synthetic"]:
364
  model_score = res["score"]
365
  break
366
 
@@ -508,8 +521,9 @@ async def _stream_url_analysis(url: str, request_id: str):
508
  model_score = 0.0
509
  if classifier:
510
  results = await asyncio.to_thread(classifier, audio_data)
 
511
  for res in results:
512
- if res["label"].lower() in ["fake", "ai", "synthetic"]:
513
  model_score = res["score"]
514
  break
515
 
@@ -556,8 +570,9 @@ async def _stream_file_analysis(file_path: str, request_id: str):
556
  model_score = 0.0
557
  if classifier:
558
  results = await asyncio.to_thread(classifier, audio_data)
 
559
  for res in results:
560
- if res["label"].lower() in ["fake", "ai", "synthetic"]:
561
  model_score = res["score"]
562
  break
563
 
@@ -825,6 +840,7 @@ async def on_startup():
825
 
826
  logger.info("=== CheckAI Backend Starting ===")
827
  logger.info(f"Model: {MODEL_ID}")
 
828
  logger.info(f"Global concurrency: {MAX_GLOBAL_CONCURRENCY}")
829
  logger.info(f"Daily limit per IP: {DAILY_LIMIT}")
830
  logger.info(f"Allowed origins: {ALLOWED_ORIGINS}")
 
61
  # ---------------------------------------------------------------------------
62
  # Configuration
63
  # ---------------------------------------------------------------------------
64
+ # MODEL_ID is env-configurable so we can A/B test candidate detectors without
65
+ # redeploying. Verified so far:
66
+ # - "MelodyMachine/Deepfake-audio-detection-V2" β†’ BROKEN (constant ~1.0 on
67
+ # both real music and AI music; do not use)
68
+ # - "mo-thecreator/Deepfake-audio-detection" β†’ to evaluate (speech-trained)
69
+ MODEL_ID = os.getenv("MODEL_ID", "MelodyMachine/Deepfake-audio-detection-V2")
70
+
71
+ # Which pipeline labels count as "this is AI"? Comma-separated, case-insensitive.
72
+ # Some HF models use LABEL_0 / LABEL_1 instead of semantic names β€” check the
73
+ # model's config.json and set this accordingly.
74
+ _ai_labels_raw = os.getenv("MODEL_AI_LABELS", "fake,ai,synthetic,spoof,label_1")
75
+ AI_LABELS = {s.strip().lower() for s in _ai_labels_raw.split(",") if s.strip()}
76
+
77
  API_KEY = os.getenv("DETECTOR_API_KEY", "your-fallback-test-key")
78
  MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MB
79
  MIN_AUDIO_DURATION = 1.0 # seconds
 
371
  model_score = 0.0
372
  if classifier:
373
  results = classifier(audio_data)
374
+ logger.info(f"[model] raw output: {results}")
375
  for res in results:
376
+ if res["label"].lower() in AI_LABELS:
377
  model_score = res["score"]
378
  break
379
 
 
521
  model_score = 0.0
522
  if classifier:
523
  results = await asyncio.to_thread(classifier, audio_data)
524
+ logger.info(f"[model] raw output: {results}")
525
  for res in results:
526
+ if res["label"].lower() in AI_LABELS:
527
  model_score = res["score"]
528
  break
529
 
 
570
  model_score = 0.0
571
  if classifier:
572
  results = await asyncio.to_thread(classifier, audio_data)
573
+ logger.info(f"[model] raw output: {results}")
574
  for res in results:
575
+ if res["label"].lower() in AI_LABELS:
576
  model_score = res["score"]
577
  break
578
 
 
840
 
841
  logger.info("=== CheckAI Backend Starting ===")
842
  logger.info(f"Model: {MODEL_ID}")
843
+ logger.info(f"AI labels: {sorted(AI_LABELS)}")
844
  logger.info(f"Global concurrency: {MAX_GLOBAL_CONCURRENCY}")
845
  logger.info(f"Daily limit per IP: {DAILY_LIMIT}")
846
  logger.info(f"Allowed origins: {ALLOWED_ORIGINS}")
tests/benchmark.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Model benchmark harness.
4
+
5
+ Runs every audio file in `backend/tests/fixtures/{ai,human}/` against the
6
+ deployed backend's `/analyze/upload` endpoint and reports:
7
+
8
+ * Confusion matrix (TP / FP / TN / FN)
9
+ * Accuracy, precision, recall, F1
10
+ * Per-clip table: expected vs. observed + raw scores
11
+ * Score distribution histogram (text bar chart)
12
+ * CSV export for spreadsheet analysis
13
+
14
+ Usage:
15
+ export DETECTOR_API_URL='https://michal-giza-audio-detector-backend.hf.space'
16
+ export DETECTOR_API_KEY='...'
17
+
18
+ # 1. Drop AI clips into backend/tests/fixtures/ai/*.{mp3,wav,m4a}
19
+ # 2. Drop HUMAN clips into backend/tests/fixtures/human/*.{mp3,wav,m4a}
20
+ # 3. Run:
21
+ python3 benchmark.py # verbose
22
+ python3 benchmark.py --csv results.csv # also write CSV
23
+ python3 benchmark.py --threshold 0.65 # explore other decision thresholds
24
+
25
+ Exit code 0 on benchmark completion (regardless of model quality).
26
+ Exit 2 if no fixtures are present.
27
+ """
28
+
29
+ import argparse
30
+ import csv
31
+ import os
32
+ import sys
33
+ import time
34
+ from pathlib import Path
35
+ from typing import Iterator
36
+
37
+ import requests
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # Config
41
+ # ---------------------------------------------------------------------------
42
+ GREEN = "\033[92m"
43
+ RED = "\033[91m"
44
+ YELLOW = "\033[93m"
45
+ CYAN = "\033[96m"
46
+ BOLD = "\033[1m"
47
+ RESET = "\033[0m"
48
+
49
+ BASE_URL = os.environ.get("DETECTOR_API_URL", "").rstrip("/")
50
+ API_KEY = os.environ.get("DETECTOR_API_KEY", "")
51
+
52
+ FIXTURES_DIR = Path(__file__).parent / "fixtures"
53
+ AUDIO_EXTS = {".mp3", ".wav", ".m4a", ".aac", ".flac", ".ogg"}
54
+
55
+ MIME_FOR_EXT = {
56
+ ".mp3": "audio/mpeg",
57
+ ".wav": "audio/wav",
58
+ ".m4a": "audio/mp4",
59
+ ".aac": "audio/aac",
60
+ ".flac": "audio/flac",
61
+ ".ogg": "audio/ogg",
62
+ }
63
+
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Model-under-test wrapper
67
+ # ---------------------------------------------------------------------------
68
+ def analyze_clip(path: Path, timeout: int = 120) -> dict:
69
+ """POST one clip to /analyze/upload. Returns the JSON body or raises."""
70
+ mime = MIME_FOR_EXT.get(path.suffix.lower(), "application/octet-stream")
71
+ with path.open("rb") as f:
72
+ resp = requests.post(
73
+ f"{BASE_URL}/analyze/upload",
74
+ headers={"X-Api-Key": API_KEY},
75
+ files={"file": (path.name, f, mime)},
76
+ timeout=timeout,
77
+ )
78
+ if resp.status_code != 200:
79
+ raise RuntimeError(
80
+ f"HTTP {resp.status_code}: {resp.text[:200]}"
81
+ )
82
+ return resp.json()
83
+
84
+
85
+ def iter_fixtures() -> Iterator[tuple[Path, bool]]:
86
+ """Yield (path, is_ai_expected) for every clip under fixtures/."""
87
+ for subdir, expected_ai in [("ai", True), ("human", False)]:
88
+ root = FIXTURES_DIR / subdir
89
+ if not root.exists():
90
+ continue
91
+ for p in sorted(root.iterdir()):
92
+ if p.is_file() and p.suffix.lower() in AUDIO_EXTS:
93
+ yield p, expected_ai
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # Metrics
98
+ # ---------------------------------------------------------------------------
99
+ def compute_metrics(rows: list[dict], threshold: float) -> dict:
100
+ """Compute confusion matrix + derived rates for a given decision threshold."""
101
+ tp = fp = tn = fn = 0
102
+ for r in rows:
103
+ predicted_ai = r["confidence"] >= threshold
104
+ actual_ai = r["expected_ai"]
105
+ if predicted_ai and actual_ai:
106
+ tp += 1
107
+ elif predicted_ai and not actual_ai:
108
+ fp += 1
109
+ elif not predicted_ai and not actual_ai:
110
+ tn += 1
111
+ else:
112
+ fn += 1
113
+
114
+ total = tp + fp + tn + fn
115
+ accuracy = (tp + tn) / total if total else 0.0
116
+ precision = tp / (tp + fp) if (tp + fp) else 0.0
117
+ recall = tp / (tp + fn) if (tp + fn) else 0.0
118
+ f1 = (
119
+ 2 * precision * recall / (precision + recall)
120
+ if (precision + recall)
121
+ else 0.0
122
+ )
123
+ return {
124
+ "tp": tp, "fp": fp, "tn": tn, "fn": fn,
125
+ "accuracy": accuracy,
126
+ "precision": precision,
127
+ "recall": recall,
128
+ "f1": f1,
129
+ "total": total,
130
+ }
131
+
132
+
133
+ def text_histogram(values: list[float], width: int = 40, buckets: int = 20) -> str:
134
+ """Tiny ASCII histogram of [0..1] scores."""
135
+ if not values:
136
+ return "(no data)"
137
+ counts = [0] * buckets
138
+ for v in values:
139
+ idx = min(int(v * buckets), buckets - 1)
140
+ counts[idx] += 1
141
+ peak = max(counts) or 1
142
+ lines = []
143
+ for i, c in enumerate(counts):
144
+ lo = i / buckets
145
+ hi = (i + 1) / buckets
146
+ bar = "β–ˆ" * int(c / peak * width)
147
+ lines.append(f" [{lo:.2f}-{hi:.2f}) {c:3d} {bar}")
148
+ return "\n".join(lines)
149
+
150
+
151
+ # ---------------------------------------------------------------------------
152
+ # Main
153
+ # ---------------------------------------------------------------------------
154
+ def main() -> int:
155
+ parser = argparse.ArgumentParser()
156
+ parser.add_argument("--threshold", type=float, default=0.5,
157
+ help="Decision threshold on `confidence` (default 0.5)")
158
+ parser.add_argument("--csv", type=Path, default=None,
159
+ help="Optional CSV export path")
160
+ parser.add_argument("--sweep", action="store_true",
161
+ help="Also show metrics at 9 thresholds 0.1..0.9")
162
+ args = parser.parse_args()
163
+
164
+ if not BASE_URL or not API_KEY:
165
+ print("DETECTOR_API_URL and DETECTOR_API_KEY must be set.", file=sys.stderr)
166
+ return 2
167
+
168
+ fixtures = list(iter_fixtures())
169
+ if not fixtures:
170
+ print(f"{YELLOW}No fixtures found in {FIXTURES_DIR}/.{RESET}", file=sys.stderr)
171
+ print(" Expected layout:", file=sys.stderr)
172
+ print(f" {FIXTURES_DIR}/ai/*.mp3", file=sys.stderr)
173
+ print(f" {FIXTURES_DIR}/human/*.mp3", file=sys.stderr)
174
+ return 2
175
+
176
+ n_ai = sum(1 for _, is_ai in fixtures if is_ai)
177
+ n_human = len(fixtures) - n_ai
178
+ print(f"{BOLD}Benchmark β€” {BASE_URL}{RESET}")
179
+ print(f" fixtures: {len(fixtures)} ({n_ai} AI, {n_human} human)")
180
+ print(f" threshold: {args.threshold}")
181
+ print()
182
+
183
+ # --- Run ---
184
+ rows: list[dict] = []
185
+ print(f"{BOLD}{'path':<45} {'expect':<7} {'conf':<6} {'wav2vec':<7} {'fp':<6} {'verdict':<7}{RESET}")
186
+ print("-" * 86)
187
+ for path, expected_ai in fixtures:
188
+ rel = path.relative_to(FIXTURES_DIR)
189
+ try:
190
+ start = time.time()
191
+ body = analyze_clip(path)
192
+ elapsed = time.time() - start
193
+ conf = body["confidence"]
194
+ details = body.get("details", {})
195
+ wav2vec = details.get("wav2vec2_score", float("nan"))
196
+ fp_score = details.get("fingerprint_score", float("nan"))
197
+ predicted = conf >= args.threshold
198
+ correct = predicted == expected_ai
199
+ verdict = "AI" if predicted else "HUMAN"
200
+ color = GREEN if correct else RED
201
+ exp_label = "AI" if expected_ai else "HUMAN"
202
+ print(
203
+ f"{color}{str(rel):<45} {exp_label:<7} {conf:<6.3f} "
204
+ f"{wav2vec:<7.3f} {fp_score:<6.3f} {verdict:<7}{RESET} "
205
+ f"({elapsed:.1f}s)"
206
+ )
207
+ rows.append({
208
+ "path": str(rel),
209
+ "expected_ai": expected_ai,
210
+ "confidence": conf,
211
+ "wav2vec2_score": wav2vec,
212
+ "fingerprint_score": fp_score,
213
+ "elapsed_seconds": elapsed,
214
+ })
215
+ except Exception as e:
216
+ print(f"{RED}{str(rel):<45} ERROR: {e}{RESET}")
217
+ rows.append({
218
+ "path": str(rel),
219
+ "expected_ai": expected_ai,
220
+ "confidence": float("nan"),
221
+ "wav2vec2_score": float("nan"),
222
+ "fingerprint_score": float("nan"),
223
+ "elapsed_seconds": 0.0,
224
+ "error": str(e),
225
+ })
226
+
227
+ # --- Metrics ---
228
+ clean = [r for r in rows if "error" not in r]
229
+ if not clean:
230
+ print(f"\n{RED}No successful runs.{RESET}")
231
+ return 1
232
+
233
+ metrics = compute_metrics(clean, args.threshold)
234
+ print()
235
+ print(f"{BOLD}Confusion matrix @ threshold={args.threshold}{RESET}")
236
+ print(f" predicted AI predicted HUMAN")
237
+ print(f" actual AI {metrics['tp']:>4d} {metrics['fn']:>4d}")
238
+ print(f" actual HUMAN {metrics['fp']:>4d} {metrics['tn']:>4d}")
239
+ print()
240
+ print(f" accuracy {metrics['accuracy']:.3f}")
241
+ print(f" precision {metrics['precision']:.3f} (of predicted-AI, how many were AI)")
242
+ print(f" recall {metrics['recall']:.3f} (of actual-AI, how many we caught)")
243
+ print(f" f1 {metrics['f1']:.3f}")
244
+
245
+ # --- Score distributions (this is what reveals whether the model discriminates) ---
246
+ ai_scores = [r["wav2vec2_score"] for r in clean if r["expected_ai"]]
247
+ human_scores = [r["wav2vec2_score"] for r in clean if not r["expected_ai"]]
248
+ print()
249
+ print(f"{BOLD}wav2vec2 score distribution β€” AI clips (n={len(ai_scores)}){RESET}")
250
+ print(text_histogram(ai_scores))
251
+ print()
252
+ print(f"{BOLD}wav2vec2 score distribution β€” HUMAN clips (n={len(human_scores)}){RESET}")
253
+ print(text_histogram(human_scores))
254
+ print()
255
+
256
+ # Quick sanity read β€” means overlap = model doesn't discriminate.
257
+ if ai_scores and human_scores:
258
+ mean_ai = sum(ai_scores) / len(ai_scores)
259
+ mean_human = sum(human_scores) / len(human_scores)
260
+ separation = abs(mean_ai - mean_human)
261
+ print(f" mean(AI wav2vec2) = {mean_ai:.3f}")
262
+ print(f" mean(HUMAN wav2vec2) = {mean_human:.3f}")
263
+ print(f" separation = {separation:.3f}")
264
+ if separation < 0.1:
265
+ print(f" {RED}β†’ model does not discriminate β€” replace it.{RESET}")
266
+ elif separation < 0.3:
267
+ print(f" {YELLOW}β†’ weak discrimination β€” consider alternatives.{RESET}")
268
+ else:
269
+ print(f" {GREEN}β†’ meaningful discrimination.{RESET}")
270
+
271
+ # --- Threshold sweep ---
272
+ if args.sweep:
273
+ print()
274
+ print(f"{BOLD}Threshold sweep{RESET}")
275
+ print(f" {'t':<6} {'accuracy':<10} {'precision':<11} {'recall':<8} {'f1':<6}")
276
+ for t in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
277
+ m = compute_metrics(clean, t)
278
+ print(f" {t:<6.2f} {m['accuracy']:<10.3f} "
279
+ f"{m['precision']:<11.3f} {m['recall']:<8.3f} {m['f1']:<6.3f}")
280
+
281
+ # --- CSV export ---
282
+ if args.csv:
283
+ with args.csv.open("w", newline="") as f:
284
+ writer = csv.DictWriter(
285
+ f,
286
+ fieldnames=[
287
+ "path", "expected_ai", "confidence",
288
+ "wav2vec2_score", "fingerprint_score",
289
+ "elapsed_seconds", "error",
290
+ ],
291
+ )
292
+ writer.writeheader()
293
+ for r in rows:
294
+ writer.writerow({k: r.get(k, "") for k in writer.fieldnames})
295
+ print()
296
+ print(f"CSV written to {args.csv}")
297
+
298
+ return 0
299
+
300
+
301
+ if __name__ == "__main__":
302
+ sys.exit(main())
tests/fixtures/.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fixtures/*/*.mp3
2
+ fixtures/*/*.wav
3
+ fixtures/*/*.m4a
4
+ fixtures/*/*.aac
5
+ fixtures/*/*.flac
6
+ fixtures/*/*.ogg
tests/fixtures/.gitkeep ADDED
File without changes
tests/fixtures/README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Benchmark fixtures
2
+
3
+ Drop audio clips here to run `benchmark.py` against the deployed backend.
4
+ The folder structure **is** the ground-truth label:
5
+
6
+ ```
7
+ fixtures/
8
+ β”œβ”€β”€ ai/ ← AI-generated clips (expected: is_ai=true)
9
+ └── human/ ← real human-performed / human-produced clips (expected: is_ai=false)
10
+ ```
11
+
12
+ Supported: `.mp3 .wav .m4a .aac .flac .ogg`. Clips are gitignored β€” never
13
+ commit copyrighted audio or paid-generator outputs.
14
+
15
+ ## How many clips?
16
+
17
+ | Use case | Per folder | Total |
18
+ |---|---|---|
19
+ | Quick sanity check | 5 | 10 |
20
+ | Meaningful comparison between models | 20–30 | 40–60 |
21
+ | Publishable numbers | 100+ | 200+ |
22
+
23
+ For the **model replacement decision**, 20–30 per folder (60 total) is
24
+ enough to distinguish a broken model from a working one and to choose
25
+ among 2–3 candidates.
26
+
27
+ ## Collection tips
28
+
29
+ ### AI clips
30
+ - **Suno**: 5–10 across genres (rock, pop, hip-hop, classical, EDM)
31
+ - **Udio**: 5–10 different prompts
32
+ - **ElevenLabs**: 3–5 AI vocals / music
33
+ - **Soundraw / AIVA / Boomy / Mubert**: 1–2 each for coverage
34
+ - **Style variety matters more than quantity** β€” if all your AI clips are
35
+ Suno pop songs, you're only measuring Suno-pop detection.
36
+
37
+ ### Human clips
38
+ - **Varied decades**: 1970s β†’ 2020s
39
+ - **Varied production quality**: studio albums, live recordings,
40
+ lo-fi / demos, acoustic, heavy production
41
+ - **Varied sources**:
42
+ - Your own Apple Music library export
43
+ - Free-to-use samples from `freemusicarchive.org` or `ccmixter.org`
44
+ - 30s previews from iTunes (use `fetch_apple_previews.py --json` to
45
+ grab URLs, then `curl` + `ffmpeg -t 20` to make fixtures)
46
+ - **Avoid**: recent 2024+ chart hits (might be AI-assisted); solo
47
+ synthesized instruments (too easy); meme songs (too out-of-dist)
48
+
49
+ ## Duration
50
+
51
+ Backend rejects clips > 30s. Trim with:
52
+ ```bash
53
+ ffmpeg -i input.mp3 -t 20 -c copy output.mp3
54
+ ```
55
+
56
+ Or batch:
57
+ ```bash
58
+ for f in *.mp3; do ffmpeg -i "$f" -t 20 -c copy "trimmed_$f"; done
59
+ ```
60
+
61
+ ## Running the benchmark
62
+
63
+ ```bash
64
+ cd backend/tests
65
+ export DETECTOR_API_URL='https://michal-giza-audio-detector-backend.hf.space'
66
+ export DETECTOR_API_KEY='...'
67
+
68
+ python3 benchmark.py # basic run
69
+ python3 benchmark.py --sweep # try 9 thresholds
70
+ python3 benchmark.py --csv results.csv # also export CSV
71
+ ```
72
+
73
+ ## What to look at
74
+
75
+ The **score distribution** section is more important than the accuracy
76
+ number. If the wav2vec2 score histograms for AI and HUMAN clips
77
+ **overlap completely**, the model isn't discriminating β€” it's just
78
+ returning the same value for everything. No threshold will save it.
79
+
80
+ If they separate cleanly (AI scores cluster high, HUMAN scores cluster
81
+ low or vice-versa), the model is working and threshold tuning could
82
+ produce a usable classifier.
83
+
84
+ ## Quota awareness
85
+
86
+ Backend enforces **50 requests/IP/day**. A 40-clip benchmark run uses
87
+ 40 of those. If you hit the quota you'll see `429` responses β€” rerun
88
+ tomorrow, or temporarily increase `DAILY_LIMIT` in `main.py` for eval.