f3c91fc385
- Devon: more conversational when addressed directly (500 tokens, 3-5 sentences) - Devon: monitor prompt rewritten to encourage more contributions - Devon: polling interval 15s (was 30s), removed 2-message minimum - Whisper: no fuzzy name matching for 3-char names, require first letter match - fetch_music.py: post-fetch vocal detection filter using musicinfo tags - scan_music_vocals.py: new script to scan existing library for vocal tracks Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
262 lines
9.1 KiB
Python
262 lines
9.1 KiB
Python
"""Fetch instrumental background music from Jamendo for the radio show.
|
|
|
|
Pixabay has no public music API — this uses Jamendo's free API instead.
|
|
All tracks are Creative Commons licensed. Attribution is saved to music/CREDITS.txt.
|
|
|
|
Setup: Get a free client_id at https://devportal.jamendo.com
|
|
Add JAMENDO_CLIENT_ID=your_id to .env
|
|
|
|
Usage:
|
|
python fetch_music.py # download 20 tracks across all genres
|
|
python fetch_music.py --genre jazz # download jazz only
|
|
python fetch_music.py --count 50 # download 50 tracks
|
|
python fetch_music.py --list # just list available tracks, don't download
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
MUSIC_DIR = Path(__file__).parent / "music"
|
|
CREDITS_FILE = MUSIC_DIR / "CREDITS.txt"
|
|
API_BASE = "https://api.jamendo.com/v3.0"
|
|
|
|
# Genres good for a late-night radio show
|
|
GENRES = ["jazz", "lofi", "blues", "ambient", "acoustic", "funk", "chill"]
|
|
|
|
# Map search tags to labels that _detect_genre() in main.py can match
|
|
# jazz, blues, funk, lo-fi are already in GENRE_KEYWORDS
|
|
# ambient, acoustic, chill would need to be added for auto-detection
|
|
GENRE_LABELS = {
|
|
"jazz": "Jazz",
|
|
"lofi": "Lo-Fi",
|
|
"blues": "Blues",
|
|
"ambient": "Ambient",
|
|
"acoustic": "Acoustic",
|
|
"funk": "Funk",
|
|
"chill": "Chill",
|
|
}
|
|
|
|
|
|
def get_client_id():
|
|
key = os.getenv("JAMENDO_CLIENT_ID")
|
|
if not key:
|
|
print("Error: JAMENDO_CLIENT_ID not found in .env")
|
|
print("Get one free at https://devportal.jamendo.com")
|
|
sys.exit(1)
|
|
return key
|
|
|
|
|
|
def sanitize_filename(name: str) -> str:
|
|
return re.sub(r'[<>:"/\\|?*]', '', name).strip()
|
|
|
|
|
|
def _has_vocals(track: dict) -> bool:
|
|
"""Check musicinfo for vocal indicators — catches tracks Jamendo mis-tagged as instrumental."""
|
|
mi = track.get("musicinfo", {})
|
|
# Check the vocalinstrumental field in musicinfo (separate from the API filter)
|
|
vi = mi.get("vocalinstrumental")
|
|
if vi and vi.lower() == "vocal":
|
|
return True
|
|
# Check tags for vocal/singing indicators
|
|
tags = mi.get("tags", {})
|
|
# tags can be {"genres": [...], "instruments": [...], "vartags": [...]}
|
|
all_tags = []
|
|
if isinstance(tags, dict):
|
|
for v in tags.values():
|
|
if isinstance(v, list):
|
|
all_tags.extend(t.lower() for t in v)
|
|
elif isinstance(tags, list):
|
|
all_tags = [t.lower() for t in tags]
|
|
vocal_tags = {"vocals", "vocal", "singing", "singer", "voice", "lyrics",
|
|
"rap", "hiphop", "hip-hop", "spoken", "spoken word"}
|
|
if vocal_tags & set(all_tags):
|
|
return True
|
|
# Check track name for vocal giveaways
|
|
name_lower = track.get("name", "").lower()
|
|
if any(w in name_lower for w in ["feat.", "ft.", "vocal", "remix vocal", "(voice"]):
|
|
return True
|
|
return False
|
|
|
|
|
|
def search_tracks(client: httpx.Client, client_id: str, genre: str, limit: int = 20) -> list[dict]:
|
|
# Request more than needed so we can filter out vocal false positives
|
|
fetch_limit = min(limit * 3, 200)
|
|
params = {
|
|
"client_id": client_id,
|
|
"format": "json",
|
|
"limit": fetch_limit,
|
|
"vocalinstrumental": "instrumental",
|
|
"fuzzytags": genre,
|
|
"durationbetween": "60_300",
|
|
"include": "musicinfo+licenses",
|
|
"order": "popularity_total",
|
|
}
|
|
|
|
resp = client.get(f"{API_BASE}/tracks/", params=params)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
|
|
if data["headers"]["status"] != "success":
|
|
print(f" API error: {data['headers'].get('error_message', 'unknown')}")
|
|
return []
|
|
|
|
results = data.get("results", [])
|
|
# Post-filter: reject tracks with vocal indicators despite the API filter
|
|
filtered = []
|
|
for t in results:
|
|
if _has_vocals(t):
|
|
print(f" SKIP (vocals detected): {t.get('artist_name', '?')} - {t.get('name', '?')}")
|
|
continue
|
|
filtered.append(t)
|
|
if len(filtered) >= limit:
|
|
break
|
|
|
|
skipped = len(results) - len(filtered)
|
|
if skipped:
|
|
print(f" (filtered out {skipped} tracks with vocal indicators)")
|
|
return filtered
|
|
|
|
|
|
def make_filename(track: dict, genre_tag: str) -> str:
|
|
artist = sanitize_filename(track.get("artist_name", "Unknown"))
|
|
title = sanitize_filename(track.get("name", "Untitled"))
|
|
label = GENRE_LABELS.get(genre_tag, genre_tag.title())
|
|
|
|
# Include genre tag if not already detectable from artist/title
|
|
lower = f"{artist} {title}".lower()
|
|
needs_tag = not any(kw in lower for kw in [genre_tag, label.lower()])
|
|
|
|
if needs_tag:
|
|
return f"{artist} - {title} [{label}].mp3"
|
|
return f"{artist} - {title}.mp3"
|
|
|
|
|
|
def download_track(client: httpx.Client, track: dict, filepath: Path, index: int, total: int) -> bool:
|
|
url = track.get("audiodownload")
|
|
if not url:
|
|
print(f" [{index}/{total}] SKIP (no download URL): {track['name']}")
|
|
return False
|
|
|
|
if not track.get("audiodownload_allowed", True):
|
|
print(f" [{index}/{total}] SKIP (download not allowed): {track['name']}")
|
|
return False
|
|
|
|
print(f" [{index}/{total}] Downloading: {filepath.name}...", end=" ", flush=True)
|
|
resp = client.get(url, follow_redirects=True)
|
|
resp.raise_for_status()
|
|
filepath.write_bytes(resp.content)
|
|
size_mb = len(resp.content) / (1024 * 1024)
|
|
dur = track.get("duration", 0)
|
|
print(f"{size_mb:.1f} MB, {dur // 60}:{dur % 60:02d}")
|
|
return True
|
|
|
|
|
|
def save_credit(track: dict, filename: str):
|
|
artist = track.get("artist_name", "Unknown")
|
|
title = track.get("name", "Untitled")
|
|
license_url = track.get("license_ccurl", "")
|
|
share_url = track.get("shareurl", "")
|
|
|
|
line = f"{filename} | {artist} - {title} | {license_url} | {share_url}\n"
|
|
|
|
existing = CREDITS_FILE.read_text() if CREDITS_FILE.exists() else ""
|
|
if filename not in existing:
|
|
with open(CREDITS_FILE, "a") as f:
|
|
if not existing:
|
|
f.write("# Music Credits (Jamendo - Creative Commons)\n")
|
|
f.write("# File | Artist - Title | License | URL\n\n")
|
|
f.write(line)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Download instrumental music from Jamendo")
|
|
parser.add_argument("--genre", choices=GENRES, help="Download only this genre")
|
|
parser.add_argument("--count", type=int, default=20, help="Total tracks to download (default: 20)")
|
|
parser.add_argument("--list", action="store_true", help="List available tracks without downloading")
|
|
args = parser.parse_args()
|
|
|
|
client_id = get_client_id()
|
|
MUSIC_DIR.mkdir(exist_ok=True)
|
|
|
|
genres = [args.genre] if args.genre else GENRES
|
|
per_genre = max(1, args.count // len(genres))
|
|
remainder = args.count - per_genre * len(genres)
|
|
|
|
all_tracks = []
|
|
seen_ids = set()
|
|
|
|
with httpx.Client(timeout=30) as api_client:
|
|
for i, genre in enumerate(genres):
|
|
limit = per_genre + (1 if i < remainder else 0)
|
|
if limit <= 0:
|
|
continue
|
|
print(f"Searching {genre}...", end=" ", flush=True)
|
|
tracks = search_tracks(api_client, client_id, genre, limit)
|
|
# Deduplicate across genres
|
|
added = 0
|
|
for t in tracks:
|
|
if t["id"] not in seen_ids and added < limit:
|
|
t["_genre_tag"] = genre
|
|
all_tracks.append(t)
|
|
seen_ids.add(t["id"])
|
|
added += 1
|
|
print(f"{added} tracks")
|
|
|
|
if not all_tracks:
|
|
print("No tracks found.")
|
|
return
|
|
|
|
if args.list:
|
|
print(f"\n{'#':<4} {'Genre':<10} {'Artist':<25} {'Title':<40} {'Duration':<8}")
|
|
print("-" * 90)
|
|
for i, t in enumerate(all_tracks, 1):
|
|
dur = f"{t['duration'] // 60}:{t['duration'] % 60:02d}"
|
|
artist = t["artist_name"][:24]
|
|
title = t["name"][:39]
|
|
label = GENRE_LABELS.get(t["_genre_tag"], t["_genre_tag"])
|
|
print(f"{i:<4} {label:<10} {artist:<25} {title:<40} {dur:<8}")
|
|
print(f"\n{len(all_tracks)} tracks available")
|
|
return
|
|
|
|
# Download phase
|
|
downloaded = 0
|
|
skipped_exists = 0
|
|
skipped_error = 0
|
|
|
|
with httpx.Client(timeout=120, follow_redirects=True) as dl_client:
|
|
for i, track in enumerate(all_tracks, 1):
|
|
filename = make_filename(track, track["_genre_tag"])
|
|
filepath = MUSIC_DIR / filename
|
|
|
|
if filepath.exists():
|
|
print(f" [{i}/{len(all_tracks)}] EXISTS: {filename}")
|
|
skipped_exists += 1
|
|
continue
|
|
|
|
try:
|
|
if download_track(dl_client, track, filepath, i, len(all_tracks)):
|
|
save_credit(track, filename)
|
|
downloaded += 1
|
|
else:
|
|
skipped_error += 1
|
|
except Exception as e:
|
|
print(f" [{i}/{len(all_tracks)}] ERROR: {e}")
|
|
# Clean up partial download
|
|
if filepath.exists():
|
|
filepath.unlink()
|
|
skipped_error += 1
|
|
|
|
print(f"\nDone: {downloaded} downloaded, {skipped_exists} existed, {skipped_error} skipped")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|