audio-detector-backend / tests /fetch_apple_previews.py
michal-giza's picture
Upload 7 files
5b33997 verified
#!/usr/bin/env python3
"""
Fetch real Apple Music preview URLs via the iTunes Search API.
iTunes Search is a free, keyless public API. Every result includes a 30s
`previewUrl` pointing at `audio-ssl.itunes.apple.com` — which is exactly
one of the domains our backend allowlists for /analyze.
Usage:
python3 fetch_apple_previews.py # default query set
python3 fetch_apple_previews.py "blinding lights" # single search
python3 fetch_apple_previews.py --json # machine-readable
Output: prints track metadata + preview URL, ready to feed to smoke_test.py.
"""
import argparse
import json
import sys
import urllib.parse
import urllib.request
ITUNES_SEARCH = "https://itunes.apple.com/search"
# A curated set of well-known HUMAN-performed tracks spanning genres.
# These are guaranteed "not AI" — useful as negative controls for the detector.
DEFAULT_QUERIES = [
"bohemian rhapsody queen",
"blinding lights the weeknd",
"viva la vida coldplay",
"billie jean michael jackson",
"shape of you ed sheeran",
"hotel california eagles",
]
def fetch_preview(query: str, limit: int = 1) -> list[dict]:
"""Search iTunes and return a list of preview candidates."""
params = urllib.parse.urlencode(
{"term": query, "media": "music", "limit": limit, "entity": "song"}
)
url = f"{ITUNES_SEARCH}?{params}"
with urllib.request.urlopen(url, timeout=10) as resp:
data = json.loads(resp.read().decode())
results = []
for r in data.get("results", []):
if not r.get("previewUrl"):
continue
results.append(
{
"track": r.get("trackName"),
"artist": r.get("artistName"),
"album": r.get("collectionName"),
"preview_url": r["previewUrl"],
"track_id": r.get("trackId"),
"genre": r.get("primaryGenreName"),
}
)
return results
def main() -> int:
parser = argparse.ArgumentParser(description="Fetch Apple Music preview URLs.")
parser.add_argument("query", nargs="?", help="Search term (default: a curated set)")
parser.add_argument("--json", action="store_true", help="Output as JSON")
parser.add_argument("--limit", type=int, default=1, help="Results per query")
args = parser.parse_args()
if args.query:
queries = [args.query]
else:
queries = DEFAULT_QUERIES
all_results: list[dict] = []
for q in queries:
try:
hits = fetch_preview(q, limit=args.limit)
all_results.extend(hits)
except Exception as e:
print(f"[warn] failed to fetch '{q}': {e}", file=sys.stderr)
if args.json:
print(json.dumps(all_results, indent=2))
return 0
if not all_results:
print("No results.", file=sys.stderr)
return 1
for i, r in enumerate(all_results, 1):
print(f"--- #{i} ---")
print(f"Track: {r['track']}")
print(f"Artist: {r['artist']}")
print(f"Album: {r['album']}")
print(f"Genre: {r['genre']}")
print(f"Preview: {r['preview_url']}")
print()
return 0
if __name__ == "__main__":
sys.exit(main())