File size: 3,265 Bytes
5b33997
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
"""
Fetch real Apple Music preview URLs via the iTunes Search API.

iTunes Search is a free, keyless public API. Every result includes a 30s
`previewUrl` pointing at `audio-ssl.itunes.apple.com` — which is exactly
one of the domains our backend allowlists for /analyze.

Usage:
    python3 fetch_apple_previews.py                       # default query set
    python3 fetch_apple_previews.py "blinding lights"     # single search
    python3 fetch_apple_previews.py --json                # machine-readable

Output: prints track metadata + preview URL, ready to feed to smoke_test.py.
"""

import argparse
import json
import sys
import urllib.parse
import urllib.request

ITUNES_SEARCH = "https://itunes.apple.com/search"

# A curated set of well-known HUMAN-performed tracks spanning genres.
# These are guaranteed "not AI" — useful as negative controls for the detector.
DEFAULT_QUERIES = [
    "bohemian rhapsody queen",
    "blinding lights the weeknd",
    "viva la vida coldplay",
    "billie jean michael jackson",
    "shape of you ed sheeran",
    "hotel california eagles",
]


def fetch_preview(query: str, limit: int = 1) -> list[dict]:
    """Search iTunes and return a list of preview candidates."""
    params = urllib.parse.urlencode(
        {"term": query, "media": "music", "limit": limit, "entity": "song"}
    )
    url = f"{ITUNES_SEARCH}?{params}"
    with urllib.request.urlopen(url, timeout=10) as resp:
        data = json.loads(resp.read().decode())

    results = []
    for r in data.get("results", []):
        if not r.get("previewUrl"):
            continue
        results.append(
            {
                "track": r.get("trackName"),
                "artist": r.get("artistName"),
                "album": r.get("collectionName"),
                "preview_url": r["previewUrl"],
                "track_id": r.get("trackId"),
                "genre": r.get("primaryGenreName"),
            }
        )
    return results


def main() -> int:
    parser = argparse.ArgumentParser(description="Fetch Apple Music preview URLs.")
    parser.add_argument("query", nargs="?", help="Search term (default: a curated set)")
    parser.add_argument("--json", action="store_true", help="Output as JSON")
    parser.add_argument("--limit", type=int, default=1, help="Results per query")
    args = parser.parse_args()

    if args.query:
        queries = [args.query]
    else:
        queries = DEFAULT_QUERIES

    all_results: list[dict] = []
    for q in queries:
        try:
            hits = fetch_preview(q, limit=args.limit)
            all_results.extend(hits)
        except Exception as e:
            print(f"[warn] failed to fetch '{q}': {e}", file=sys.stderr)

    if args.json:
        print(json.dumps(all_results, indent=2))
        return 0

    if not all_results:
        print("No results.", file=sys.stderr)
        return 1

    for i, r in enumerate(all_results, 1):
        print(f"--- #{i} ---")
        print(f"Track:   {r['track']}")
        print(f"Artist:  {r['artist']}")
        print(f"Album:   {r['album']}")
        print(f"Genre:   {r['genre']}")
        print(f"Preview: {r['preview_url']}")
        print()

    return 0


if __name__ == "__main__":
    sys.exit(main())