Devon personality + Whisper name fix + music vocal filtering

- Devon: more conversational when addressed directly (500 tokens, 3-5 sentences) - Devon: monitor prompt rewritten to encourage more contributions - Devon: polling interval 15s (was 30s), removed 2-message minimum - Whisper: no fuzzy name matching for 3-char names, require first letter match - fetch_music.py: post-fetch vocal detection filter using musicinfo tags - scan_music_vocals.py: new script to scan existing library for vocal tracks Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-22 23:59:03 -06:00
parent c69c2ad532
commit f3c91fc385
4 changed files with 213 additions and 45 deletions
@@ -58,11 +58,41 @@ def sanitize_filename(name: str) -> str:
    return re.sub(r'[<>:"/\\|?*]', '', name).strip()


+def _has_vocals(track: dict) -> bool:
+    """Check musicinfo for vocal indicators — catches tracks Jamendo mis-tagged as instrumental."""
+    mi = track.get("musicinfo", {})
+    # Check the vocalinstrumental field in musicinfo (separate from the API filter)
+    vi = mi.get("vocalinstrumental")
+    if vi and vi.lower() == "vocal":
+        return True
+    # Check tags for vocal/singing indicators
+    tags = mi.get("tags", {})
+    # tags can be {"genres": [...], "instruments": [...], "vartags": [...]}
+    all_tags = []
+    if isinstance(tags, dict):
+        for v in tags.values():
+            if isinstance(v, list):
+                all_tags.extend(t.lower() for t in v)
+    elif isinstance(tags, list):
+        all_tags = [t.lower() for t in tags]
+    vocal_tags = {"vocals", "vocal", "singing", "singer", "voice", "lyrics",
+                  "rap", "hiphop", "hip-hop", "spoken", "spoken word"}
+    if vocal_tags & set(all_tags):
+        return True
+    # Check track name for vocal giveaways
+    name_lower = track.get("name", "").lower()
+    if any(w in name_lower for w in ["feat.", "ft.", "vocal", "remix vocal", "(voice"]):
+        return True
+    return False
+
+
 def search_tracks(client: httpx.Client, client_id: str, genre: str, limit: int = 20) -> list[dict]:
+    # Request more than needed so we can filter out vocal false positives
+    fetch_limit = min(limit * 3, 200)
    params = {
        "client_id": client_id,
        "format": "json",
-        "limit": min(limit, 200),
+        "limit": fetch_limit,
        "vocalinstrumental": "instrumental",
        "fuzzytags": genre,
        "durationbetween": "60_300",
@@ -78,7 +108,21 @@ def search_tracks(client: httpx.Client, client_id: str, genre: str, limit: int =
        print(f"  API error: {data['headers'].get('error_message', 'unknown')}")
        return []

-    return data.get("results", [])
+    results = data.get("results", [])
+    # Post-filter: reject tracks with vocal indicators despite the API filter
+    filtered = []
+    for t in results:
+        if _has_vocals(t):
+            print(f"    SKIP (vocals detected): {t.get('artist_name', '?')} - {t.get('name', '?')}")
+            continue
+        filtered.append(t)
+        if len(filtered) >= limit:
+            break
+
+    skipped = len(results) - len(filtered)
+    if skipped:
+        print(f"  (filtered out {skipped} tracks with vocal indicators)")
+    return filtered


 def make_filename(track: dict, genre_tag: str) -> str: