#!/usr/bin/env python3 """ End-to-end smoke test for the CheckAI backend. Exercises every public endpoint plus security/abuse guardrails. Designed to be run against a deployed HF Space (or a local uvicorn instance). Required environment variables: DETECTOR_API_URL — e.g. https://michal-giza-audio-detector-backend.hf.space DETECTOR_API_KEY — the X-Api-Key the backend expects Usage: export DETECTOR_API_URL=https://michal-giza-audio-detector-backend.hf.space export DETECTOR_API_KEY='...' python3 smoke_test.py # run all tests python3 smoke_test.py --only health # subset python3 smoke_test.py --only health,url,upload,stream,security Exit code is 0 on all-pass, 1 on any failure. """ import argparse import json import os import sys import time import urllib.parse import urllib.request from typing import Any, Callable import requests # bundled with the backend's requirements # --------------------------------------------------------------------------- # Colored output # --------------------------------------------------------------------------- GREEN = "\033[92m" RED = "\033[91m" YELLOW = "\033[93m" CYAN = "\033[96m" BOLD = "\033[1m" RESET = "\033[0m" def ok(msg: str) -> None: print(f" {GREEN}PASS{RESET} {msg}") def fail(msg: str) -> None: print(f" {RED}FAIL{RESET} {msg}") def info(msg: str) -> None: print(f" {CYAN}info{RESET} {msg}") def warn(msg: str) -> None: print(f" {YELLOW}warn{RESET} {msg}") def section(title: str) -> None: print(f"\n{BOLD}=== {title} ==={RESET}") # --------------------------------------------------------------------------- # Config # --------------------------------------------------------------------------- BASE_URL = os.environ.get("DETECTOR_API_URL", "").rstrip("/") API_KEY = os.environ.get("DETECTOR_API_KEY", "") if not BASE_URL: print("ERROR: DETECTOR_API_URL not set", file=sys.stderr) sys.exit(2) if not API_KEY: print("ERROR: DETECTOR_API_KEY not set", file=sys.stderr) sys.exit(2) HEADERS_OK = {"X-Api-Key": API_KEY, "Content-Type": "application/json"} HEADERS_BAD = {"X-Api-Key": "wrong-key-abc123", "Content-Type": "application/json"} # --------------------------------------------------------------------------- # iTunes preview fetch (reuse logic to get a fresh URL each run) # --------------------------------------------------------------------------- def get_apple_preview(query: str = "bohemian rhapsody queen") -> dict: url = f"https://itunes.apple.com/search?{urllib.parse.urlencode({'term': query, 'media': 'music', 'limit': 1, 'entity': 'song'})}" with urllib.request.urlopen(url, timeout=10) as resp: data = json.loads(resp.read().decode()) r = data["results"][0] return { "track": r["trackName"], "artist": r["artistName"], "preview_url": r["previewUrl"], } # --------------------------------------------------------------------------- # Test suites # --------------------------------------------------------------------------- class Results: def __init__(self) -> None: self.passed = 0 self.failed = 0 def record(self, success: bool) -> None: if success: self.passed += 1 else: self.failed += 1 def test_health(r: Results) -> None: section("Health & model load") try: resp = requests.get(f"{BASE_URL}/health", timeout=15) body = resp.json() if resp.status_code == 200 and body.get("status") == "online": ok(f"GET /health → 200 (uptime {body.get('uptime_seconds')}s)") r.record(True) else: fail(f"GET /health → {resp.status_code} body={body}") r.record(False) if body.get("model_loaded") is True: ok("Wav2Vec2 model reports loaded") r.record(True) else: fail("model_loaded is False — model failed to load on the Space") r.record(False) except Exception as e: fail(f"/health raised: {e}") r.record(False) try: resp = requests.get(f"{BASE_URL}/queue/status", timeout=10) body = resp.json() if resp.status_code == 200 and "max_concurrency" in body: ok(f"GET /queue/status → active={body['active_requests']} max={body['max_concurrency']}") r.record(True) else: fail(f"GET /queue/status → {resp.status_code} body={body}") r.record(False) except Exception as e: fail(f"/queue/status raised: {e}") r.record(False) def test_url_analysis(r: Results, preview: dict) -> None: section(f"URL analysis — {preview['artist']} · {preview['track']}") info(f"Preview: {preview['preview_url'][:80]}...") try: start = time.time() resp = requests.post( f"{BASE_URL}/analyze", headers=HEADERS_OK, json={"preview_url": preview["preview_url"]}, timeout=60, ) elapsed = time.time() - start if resp.status_code != 200: fail(f"/analyze → {resp.status_code} body={resp.text[:200]}") r.record(False) return body = resp.json() required = {"is_ai", "confidence", "details"} if not required.issubset(body.keys()): fail(f"response missing keys — got {list(body.keys())}") r.record(False) return ok(f"/analyze → 200 in {elapsed:.1f}s") info(f" verdict: {'AI' if body['is_ai'] else 'HUMAN'} " f"confidence: {body['confidence']:.3f}") info(f" wav2vec2: {body['details'].get('wav2vec2_score'):.3f} " f"fingerprint: {body['details'].get('fingerprint_score'):.3f}") # Sanity check — well-known human track should NOT read AI. if body["is_ai"]: warn("expected HUMAN for a well-known released track — review model thresholds") else: ok("correctly classified as HUMAN") r.record(True) except Exception as e: fail(f"/analyze raised: {e}") r.record(False) def test_upload_analysis(r: Results) -> None: section("Upload analysis — synthetic silent buffer") # Generate a 3-second silent WAV in memory (avoids needing a real recording file). # This mainly exercises the upload path + duration validation, not the model. try: import struct sample_rate = 16000 duration = 3 n_samples = sample_rate * duration pcm = b"\x00\x00" * n_samples # Minimal WAV header header = b"RIFF" header += struct.pack(" None: section(f"SSE streaming — {preview['artist']} · {preview['track']}") try: start = time.time() resp = requests.post( f"{BASE_URL}/analyze/stream", headers=HEADERS_OK, json={"preview_url": preview["preview_url"]}, stream=True, timeout=90, ) if resp.status_code != 200: fail(f"stream → {resp.status_code} body={resp.text[:200]}") r.record(False) return events_seen: list[str] = [] got_result = False got_complete = False current_event: str | None = None # Continue reading until we see `event: complete` rather than # breaking on `result`. This is the build-12 contract — every # successful stream MUST emit `complete` after the terminal # `result`/`error` so clients (the Flutter SSE parser) know # the stream is logically over without waiting for the socket # close. Apple's reviewer hit "stuck on finalizing probability # indefinitely" on build 13 because the previous server never # sent this terminator. for raw in resp.iter_lines(decode_unicode=True): if raw is None: continue line = raw.strip() if not line: continue if line.startswith("event:"): current_event = line.split(":", 1)[1].strip() events_seen.append(current_event) info(f" event: {current_event}") if current_event == "complete": got_complete = True break elif line.startswith("data:"): payload = line.split(":", 1)[1].strip() try: j = json.loads(payload) except json.JSONDecodeError: continue if current_event == "processing": info(f" stage: {j.get('stage')}") elif current_event == "queued": info(f" position: {j.get('position')}") elif current_event == "result": info(f" verdict: {'AI' if j.get('is_ai') else 'HUMAN'} " f"conf={j.get('confidence'):.3f}") got_result = True # Don't break — keep reading until `complete`. elif current_event == "error": fail(f" stream error: {j.get('message')}") r.record(False) return elapsed = time.time() - start expected_stages = {"processing", "result", "complete"} if got_result and got_complete and expected_stages.issubset(set(events_seen)): ok(f"stream completed in {elapsed:.1f}s events={events_seen}") r.record(True) elif got_result and not got_complete: fail( f"stream missing `event: complete` terminator (build-12 " f"contract violation) — events={events_seen}" ) r.record(False) else: fail(f"stream incomplete events={events_seen}") r.record(False) except Exception as e: fail(f"stream raised: {e}") r.record(False) def test_security(r: Results) -> None: section("Security guardrails") # Missing API key → 403 try: resp = requests.post( f"{BASE_URL}/analyze", headers={"Content-Type": "application/json"}, json={"preview_url": "https://audio-ssl.itunes.apple.com/foo.m4a"}, timeout=10, ) if resp.status_code == 403: ok("missing X-Api-Key → 403") r.record(True) else: fail(f"missing key → {resp.status_code} (expected 403)") r.record(False) except Exception as e: fail(f"missing-key test raised: {e}") r.record(False) # Wrong API key → 403 try: resp = requests.post( f"{BASE_URL}/analyze", headers=HEADERS_BAD, json={"preview_url": "https://audio-ssl.itunes.apple.com/foo.m4a"}, timeout=10, ) if resp.status_code == 403: ok("invalid X-Api-Key → 403") r.record(True) else: fail(f"bad key → {resp.status_code} (expected 403)") r.record(False) except Exception as e: fail(f"bad-key test raised: {e}") r.record(False) # Non-allowlisted domain → 400 try: resp = requests.post( f"{BASE_URL}/analyze", headers=HEADERS_OK, json={"preview_url": "https://evil.example.com/file.mp3"}, timeout=10, ) if resp.status_code == 400: ok("non-allowlisted domain → 400") r.record(True) else: fail(f"bad domain → {resp.status_code} (expected 400)") r.record(False) except Exception as e: fail(f"domain test raised: {e}") r.record(False) # http:// (not https) → 400 try: resp = requests.post( f"{BASE_URL}/analyze", headers=HEADERS_OK, json={"preview_url": "http://audio-ssl.itunes.apple.com/foo.m4a"}, timeout=10, ) if resp.status_code == 400: ok("http:// (non-https) → 400") r.record(True) else: fail(f"http scheme → {resp.status_code} (expected 400)") r.record(False) except Exception as e: fail(f"scheme test raised: {e}") r.record(False) # SSRF attempt — private IP → 400 try: resp = requests.post( f"{BASE_URL}/analyze", headers=HEADERS_OK, json={"preview_url": "https://127.0.0.1/x.m4a"}, timeout=10, ) if resp.status_code == 400: ok("SSRF private IP (127.0.0.1) → 400") r.record(True) else: fail(f"SSRF → {resp.status_code} (expected 400)") r.record(False) except Exception as e: fail(f"SSRF test raised: {e}") r.record(False) # Wrong content type on upload → 400 try: resp = requests.post( f"{BASE_URL}/analyze/upload", headers={"X-Api-Key": API_KEY}, files={"file": ("fake.exe", b"MZ\x90\x00", "application/x-dosexec")}, timeout=10, ) if resp.status_code == 400: ok("bad content-type → 400") r.record(True) else: fail(f"bad content-type → {resp.status_code} (expected 400)") r.record(False) except Exception as e: fail(f"content-type test raised: {e}") r.record(False) def test_rate_limit(r: Results) -> None: section("Rate limit — per-minute (5/min)") info("firing 7 rapid requests; expecting ≥1 429 from slowapi") codes: list[int] = [] for _ in range(7): try: resp = requests.post( f"{BASE_URL}/analyze", headers=HEADERS_OK, json={"preview_url": "https://evil.example.com/x.m4a"}, # rejected before model timeout=10, ) codes.append(resp.status_code) except Exception: codes.append(-1) info(f"status codes: {codes}") if 429 in codes: ok(f"rate limit kicked in ({codes.count(429)}× 429)") r.record(True) else: # Not fatal — /analyze rejects bad domain at 400 BEFORE rate limiter on # some configurations. Warn rather than fail. warn("no 429 seen — rate limiter may only apply after URL validation") r.record(True) # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main() -> int: parser = argparse.ArgumentParser() parser.add_argument( "--only", default="health,url,upload,stream,security,ratelimit", help="Comma-separated subset: health,url,upload,stream,security,ratelimit", ) parser.add_argument( "--query", default="bohemian rhapsody queen", help="iTunes search term for the URL/stream tests", ) args = parser.parse_args() selected = {s.strip() for s in args.only.split(",")} print(f"{BOLD}CheckAI backend smoke test{RESET}") print(f"Target: {BASE_URL}") print(f"API key: {API_KEY[:4]}...{API_KEY[-2:]} (len={len(API_KEY)})") # Fetch one preview URL up front for URL + stream tests preview = None if {"url", "stream"} & selected: try: preview = get_apple_preview(args.query) info(f"Preview chosen: {preview['artist']} — {preview['track']}") except Exception as e: print(f"{RED}Failed to fetch iTunes preview: {e}{RESET}") return 2 r = Results() if "health" in selected: test_health(r) if "url" in selected and preview: test_url_analysis(r, preview) if "upload" in selected: test_upload_analysis(r) if "stream" in selected and preview: test_stream_analysis(r, preview) if "security" in selected: test_security(r) if "ratelimit" in selected: test_rate_limit(r) # Summary print() total = r.passed + r.failed color = GREEN if r.failed == 0 else RED print(f"{BOLD}{color}{r.passed}/{total} checks passed{RESET}") return 0 if r.failed == 0 else 1 if __name__ == "__main__": sys.exit(main())