Devon personality + Whisper name fix + music vocal filtering
- Devon: more conversational when addressed directly (500 tokens, 3-5 sentences) - Devon: monitor prompt rewritten to encourage more contributions - Devon: polling interval 15s (was 30s), removed 2-message minimum - Whisper: no fuzzy name matching for 3-char names, require first letter match - fetch_music.py: post-fetch vocal detection filter using musicinfo tags - scan_music_vocals.py: new script to scan existing library for vocal tracks Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,122 @@
|
||||
"""Scan music directory for tracks that contain vocals/lyrics.
|
||||
|
||||
Uses Whisper to transcribe a sample from each track — if it picks up
|
||||
actual words, the track likely has vocals.
|
||||
|
||||
Usage:
|
||||
python scan_music_vocals.py # scan and report
|
||||
python scan_music_vocals.py --delete # scan and delete vocal tracks
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
MUSIC_DIR = Path(__file__).parent / "music"
|
||||
WHISPER_MODEL = "distil-large-v3"
|
||||
|
||||
# Words Whisper hallucinates on silence/instrumental — ignore these
|
||||
HALLUCINATION_PHRASES = {
|
||||
"thank you", "thanks for watching", "subscribe", "like and subscribe",
|
||||
"please subscribe", "thank you for watching", "thanks for listening",
|
||||
"you", "the end", "bye", "okay",
|
||||
}
|
||||
|
||||
|
||||
def scan_track(model: WhisperModel, filepath: Path) -> tuple[bool, str]:
|
||||
"""Check a single track for vocals. Returns (has_vocals, transcription)."""
|
||||
try:
|
||||
audio, sr = librosa.load(str(filepath), sr=16000, mono=True)
|
||||
except Exception as e:
|
||||
return False, f"[load error: {e}]"
|
||||
|
||||
duration = len(audio) / sr
|
||||
if duration < 10:
|
||||
return False, "[too short]"
|
||||
|
||||
# Sample 30s from the middle (most likely to have vocals)
|
||||
mid = len(audio) // 2
|
||||
half_window = int(15 * sr) # 15s each side
|
||||
start = max(0, mid - half_window)
|
||||
end = min(len(audio), mid + half_window)
|
||||
sample = audio[start:end]
|
||||
|
||||
segments, info = model.transcribe(
|
||||
sample,
|
||||
beam_size=3,
|
||||
language="en",
|
||||
vad_filter=True,
|
||||
vad_parameters=dict(min_speech_duration_ms=500),
|
||||
)
|
||||
segments_list = list(segments)
|
||||
text = " ".join(s.text for s in segments_list).strip()
|
||||
|
||||
# Filter out Whisper hallucinations
|
||||
text_lower = text.lower().strip()
|
||||
if text_lower in HALLUCINATION_PHRASES or len(text_lower) < 4:
|
||||
return False, ""
|
||||
|
||||
# If Whisper found substantial text, it's likely vocals
|
||||
word_count = len(text.split())
|
||||
has_vocals = word_count >= 3
|
||||
|
||||
return has_vocals, text
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Scan music for vocal tracks")
|
||||
parser.add_argument("--delete", action="store_true", help="Delete tracks with vocals")
|
||||
args = parser.parse_args()
|
||||
|
||||
audio_files = sorted(
|
||||
f for f in MUSIC_DIR.iterdir()
|
||||
if f.suffix.lower() in {".mp3", ".wav", ".ogg", ".flac"}
|
||||
)
|
||||
|
||||
if not audio_files:
|
||||
print("No audio files found in music/")
|
||||
return
|
||||
|
||||
print(f"Loading Whisper {WHISPER_MODEL}...")
|
||||
model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8")
|
||||
|
||||
print(f"Scanning {len(audio_files)} tracks for vocals...\n")
|
||||
|
||||
vocal_tracks = []
|
||||
for i, f in enumerate(audio_files, 1):
|
||||
print(f"[{i}/{len(audio_files)}] {f.name}...", end=" ", flush=True)
|
||||
has_vocals, text = scan_track(model, f)
|
||||
if has_vocals:
|
||||
print(f"VOCALS: {text[:80]}")
|
||||
vocal_tracks.append((f, text))
|
||||
else:
|
||||
print("OK")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Results: {len(vocal_tracks)} tracks with vocals out of {len(audio_files)}\n")
|
||||
|
||||
if not vocal_tracks:
|
||||
print("All tracks appear to be instrumental!")
|
||||
return
|
||||
|
||||
for f, text in vocal_tracks:
|
||||
print(f" {f.name}")
|
||||
print(f" Lyrics: {text[:120]}")
|
||||
print()
|
||||
|
||||
if args.delete:
|
||||
print(f"Deleting {len(vocal_tracks)} vocal tracks...")
|
||||
for f, _ in vocal_tracks:
|
||||
f.unlink()
|
||||
print(f" Deleted: {f.name}")
|
||||
print("Done.")
|
||||
else:
|
||||
print("Run with --delete to remove these tracks.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user