Add idents playback section — loads from idents/ folder, plays on ads channel

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-23 22:24:40 -07:00
parent b1bd4ed365
commit bbcf767a8f
7 changed files with 237 additions and 17 deletions

View File

@@ -44,6 +44,7 @@ class Settings(BaseSettings):
sounds_dir: Path = base_dir / "sounds"
music_dir: Path = base_dir / "music"
ads_dir: Path = base_dir / "ads"
idents_dir: Path = base_dir / "idents"
sessions_dir: Path = base_dir / "sessions"
class Config:

View File

@@ -820,6 +820,20 @@ BEFORE_CALLING = [
"Was just staring at a text they haven't replied to yet.",
"Was cleaning their gun at the kitchen table, it's a ritual that helps them think.",
"Was parked at the gas station, not ready to go home.",
"Was at the laundromat waiting on a load and heard the show through someone's phone.",
"Was closing up the shop, everyone else went home an hour ago.",
"Was in the bathtub, phone on the edge of the sink, show on speaker.",
"Was on a break at work, sitting in the break room alone.",
"Was at Waffle House at the counter by themselves, couldn't sleep.",
"Was reorganizing the junk drawer, which is what they do when they can't settle.",
"Was at the bar, last one there, bartender's wiping down.",
"Was folding laundry on the couch, show was on the radio in the kitchen.",
"Was laying in a hammock out back, couldn't go inside.",
"Was at a truck stop diner, cup of coffee, staring out the window.",
"Was up late painting — walls, not art — and had the radio on for company.",
"Was at their desk, supposedly working, but mostly just staring at the screen.",
"Was sitting in the waiting room at the ER with someone, long night.",
"Was at the 24-hour gym, basically empty, radio on over the speakers.",
]
# Specific memories or stories they can reference
@@ -1939,7 +1953,13 @@ PHONE_SITUATION = [
"Using the wifi calling, regular signal is garbage out here",
"Stepped outside to call — didn't want to wake anyone up",
"In the truck at the gas station — only place with good signal",
"In the truck at the gas station — only place with good signal",
"Borrowing my kid's phone, mine's cracked to hell",
"Calling from the back room at work, keeping their voice down",
"On a landline — yeah, they still have one",
"Using earbuds so nobody in the house hears",
"On speakerphone in the kitchen, everyone else is asleep",
"Calling from the motel room, walls are thin so they're whispering",
]
BACKGROUND_MUSIC = [
@@ -2356,6 +2376,8 @@ TIME: {time_ctx} {season_ctx}
Write 3-5 sentences describing this person — who they are, what's going on in their life, why they're calling tonight. The reason for calling is THE MOST IMPORTANT THING. This person called a radio show because something specific happened or is happening — they have a story to tell, a situation to unpack, or a question they need to talk through. Make it concrete and vivid. Don't be vague ("feeling off," "going through a lot") — give them a specific incident or situation driving the call. Make it feel like a real person, not a character sheet. Vary the structure. Don't use labels or categories — weave details into a natural description.
IMPORTANT: Vary where they're calling from and what they were doing. NOT everyone is sitting in their truck or on the porch. People call from kitchens, break rooms, laundromats, diners, motel rooms, the bathtub, the gym, their desk at work, a bar, a hospital waiting room, a hammock, walking down the road. Mix it up.
Output ONLY the character description, nothing else."""
try:
@@ -2641,6 +2663,8 @@ Southwest voice — "over in," "the other day," "down the road" — but don't fo
Don't repeat yourself. Don't summarize what you already said. Don't circle back if the host moved on. Keep it moving.
EVERY SENTENCE MUST BE COMPLETE. Never leave a thought hanging or trail off mid-sentence. If you start a sentence, finish it. No sentence fragments, no missing words, no dangling clauses. Say what you mean in clear, complete sentences.
NEVER mention minors in sexual context. Output spoken words only — no actions, no gestures, no stage directions."""
@@ -3899,16 +3923,18 @@ import re
def _pick_response_budget() -> tuple[int, int]:
"""Pick a random max_tokens and sentence cap for response variety.
Returns (max_tokens, max_sentences).
Keeps responses conversational but gives room for real answers."""
Keeps responses conversational but gives room for real answers.
Token budget is intentionally generous to avoid mid-sentence cutoffs —
the sentence cap controls actual length."""
roll = random.random()
if roll < 0.15:
return 200, 3 # 15% — quick reaction
return 450, 3 # 15% — quick reaction
elif roll < 0.45:
return 350, 4 # 30% — normal conversation
return 500, 4 # 30% — normal conversation
elif roll < 0.75:
return 450, 5 # 30% — room to breathe
return 600, 5 # 30% — room to breathe
else:
return 550, 6 # 25% — telling a story or riffing
return 700, 6 # 25% — telling a story or riffing
def _trim_to_sentences(text: str, max_sentences: int) -> str:
@@ -3952,8 +3978,8 @@ def clean_for_tts(text: str) -> str:
text = re.sub(r'\s*\[[^\]]*\]\s*', ' ', text)
# Remove content in angle brackets: <laughs>, <sigh>, etc.
text = re.sub(r'\s*<[^>]*>\s*', ' ', text)
# Remove "He/She sighs" style stage directions (full phrase)
text = re.sub(r'\b(He|She|I|They)\s+(sighs?|laughs?|pauses?|smiles?|chuckles?|grins?|nods?|shrugs?|frowns?)[^.]*\.\s*', '', text, flags=re.IGNORECASE)
# Remove "He/She sighs" style stage directions — only short ones (under ~40 chars) to avoid eating real dialog
text = re.sub(r'\b(He|She|I|They)\s+(sighs?|laughs?|pauses?|smiles?|chuckles?|grins?|nods?|shrugs?|frowns?)\s*(heavily|softly|deeply|quietly|loudly|nervously|sadly|a little|for a moment)?[.,]?\s*', '', text, flags=re.IGNORECASE)
# Remove standalone stage direction words only if they look like directions (with adverbs)
text = re.sub(r'\b(sighs?|laughs?|pauses?|chuckles?)\s+(heavily|softly|deeply|quietly|loudly|nervously|sadly)\b[.,]?\s*', '', text, flags=re.IGNORECASE)
# Remove quotes around the response if LLM wrapped it
@@ -4336,6 +4362,48 @@ async def stop_ad():
return {"status": "stopped"}
# --- Idents Endpoints ---
IDENT_DISPLAY_NAMES = {}
@app.get("/api/idents")
async def get_idents():
"""Get available ident tracks, shuffled"""
ident_list = []
if settings.idents_dir.exists():
for ext in ['*.wav', '*.mp3', '*.flac']:
for f in settings.idents_dir.glob(ext):
ident_list.append({
"name": IDENT_DISPLAY_NAMES.get(f.stem, f.stem),
"file": f.name,
"path": str(f)
})
random.shuffle(ident_list)
return {"idents": ident_list}
@app.post("/api/idents/play")
async def play_ident(request: MusicRequest):
"""Play an ident once on the ad channel (ch 11)"""
ident_path = settings.idents_dir / request.track
if not ident_path.exists():
raise HTTPException(404, "Ident not found")
if audio_service._music_playing:
audio_service.stop_music(fade_duration=1.0)
await asyncio.sleep(1.1)
audio_service.play_ident(str(ident_path))
return {"status": "playing", "track": request.track}
@app.post("/api/idents/stop")
async def stop_ident():
"""Stop ident playback"""
audio_service.stop_ident()
return {"status": "stopped"}
# --- LLM Settings Endpoints ---
@app.get("/api/settings")

View File

@@ -39,6 +39,13 @@ class AudioService:
self._ad_position: int = 0
self._ad_playing: bool = False
# Ident playback state
self._ident_stream: Optional[sd.OutputStream] = None
self._ident_data: Optional[np.ndarray] = None
self._ident_resampled: Optional[np.ndarray] = None
self._ident_position: int = 0
self._ident_playing: bool = False
# Recording state
self._recording = False
self._record_thread: Optional[threading.Thread] = None
@@ -933,6 +940,7 @@ class AudioService:
return
self.stop_ad()
self.stop_ident()
try:
audio, sr = librosa.load(str(path), sr=self.output_sample_rate, mono=True)
@@ -1005,6 +1013,88 @@ class AudioService:
self._ad_stream = None
self._ad_position = 0
def play_ident(self, file_path: str):
"""Load and play an ident file once (no loop) on the ad channel"""
import librosa
path = Path(file_path)
if not path.exists():
print(f"Ident file not found: {file_path}")
return
self.stop_ident()
self.stop_ad()
try:
audio, sr = librosa.load(str(path), sr=self.output_sample_rate, mono=True)
self._ident_data = audio.astype(np.float32)
except Exception as e:
print(f"Failed to load ident: {e}")
return
self._ident_playing = True
self._ident_position = 0
if self.output_device is None:
num_channels = 2
device = None
device_sr = self.output_sample_rate
channel_idx = 0
else:
device_info = sd.query_devices(self.output_device)
num_channels = device_info['max_output_channels']
device_sr = int(device_info['default_samplerate'])
device = self.output_device
channel_idx = min(self.ad_channel, num_channels) - 1
if self.output_sample_rate != device_sr:
self._ident_resampled = librosa.resample(
self._ident_data, orig_sr=self.output_sample_rate, target_sr=device_sr
).astype(np.float32)
else:
self._ident_resampled = self._ident_data
def callback(outdata, frames, time_info, status):
outdata[:] = 0
if not self._ident_playing or self._ident_resampled is None:
return
remaining = len(self._ident_resampled) - self._ident_position
if remaining >= frames:
chunk = self._ident_resampled[self._ident_position:self._ident_position + frames]
outdata[:, channel_idx] = chunk
if self.stem_recorder:
self.stem_recorder.write_sporadic("idents", chunk.copy(), device_sr)
self._ident_position += frames
else:
if remaining > 0:
outdata[:remaining, channel_idx] = self._ident_resampled[self._ident_position:]
self._ident_playing = False
try:
self._ident_stream = sd.OutputStream(
device=device,
channels=num_channels,
samplerate=device_sr,
dtype=np.float32,
callback=callback,
blocksize=2048
)
self._ident_stream.start()
print(f"Ident playback started on ch {self.ad_channel} @ {device_sr}Hz")
except Exception as e:
print(f"Ident playback error: {e}")
self._ident_playing = False
def stop_ident(self):
"""Stop ident playback"""
self._ident_playing = False
if self._ident_stream:
self._ident_stream.stop()
self._ident_stream.close()
self._ident_stream = None
self._ident_position = 0
def set_music_volume(self, volume: float):
"""Set music volume (0.0 to 1.0)"""
self._music_volume = max(0.0, min(1.0, volume))

View File

@@ -7,7 +7,7 @@ import soundfile as sf
from pathlib import Path
from collections import deque
STEM_NAMES = ["host", "caller", "music", "sfx", "ads"]
STEM_NAMES = ["host", "caller", "music", "sfx", "ads", "idents"]
class StemRecorder:

View File

@@ -112,6 +112,16 @@
</div>
</section>
<!-- Idents -->
<section class="music-section">
<h2>Idents</h2>
<select id="ident-select"></select>
<div class="music-controls">
<button id="ident-play-btn">Play Ident</button>
<button id="ident-stop-btn">Stop</button>
</div>
</section>
<!-- Sound Effects -->
<section class="sounds-section">
<h2>Sounds</h2>
@@ -240,6 +250,6 @@
</div>
</div>
<script src="/js/app.js?v=17"></script>
<script src="/js/app.js?v=18"></script>
</body>
</html>

View File

@@ -57,6 +57,7 @@ document.addEventListener('DOMContentLoaded', async () => {
await loadCallers();
await loadMusic();
await loadAds();
await loadIdents();
await loadSounds();
await loadSettings();
initEventListeners();
@@ -189,6 +190,10 @@ function initEventListeners() {
document.getElementById('ad-play-btn')?.addEventListener('click', playAd);
document.getElementById('ad-stop-btn')?.addEventListener('click', stopAd);
// Idents
document.getElementById('ident-play-btn')?.addEventListener('click', playIdent);
document.getElementById('ident-stop-btn')?.addEventListener('click', stopIdent);
// Settings
document.getElementById('settings-btn')?.addEventListener('click', async () => {
document.getElementById('settings-modal')?.classList.remove('hidden');
@@ -772,6 +777,52 @@ async function stopAd() {
await fetch('/api/ads/stop', { method: 'POST' });
}
async function loadIdents() {
try {
const res = await fetch('/api/idents');
const data = await res.json();
const idents = data.idents || [];
const select = document.getElementById('ident-select');
if (!select) return;
const previousValue = select.value;
select.innerHTML = '';
idents.forEach(ident => {
const option = document.createElement('option');
option.value = ident.file;
option.textContent = ident.name;
select.appendChild(option);
});
if (previousValue && [...select.options].some(o => o.value === previousValue)) {
select.value = previousValue;
}
console.log('Loaded', idents.length, 'idents');
} catch (err) {
console.error('loadIdents error:', err);
}
}
async function playIdent() {
await loadIdents();
const select = document.getElementById('ident-select');
const track = select?.value;
if (!track) return;
await fetch('/api/idents/play', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ track, action: 'play' })
});
}
async function stopIdent() {
await fetch('/api/idents/stop', { method: 'POST' });
}
// --- Sound Effects (Server-Side) ---
async function loadSounds() {

View File

@@ -3,7 +3,7 @@
Usage: python postprod.py recordings/2026-02-07_213000/ -o episode.mp3
Processes 5 aligned WAV stems (host, caller, music, sfx, ads) into a
Processes 6 aligned WAV stems (host, caller, music, sfx, ads, idents) into a
broadcast-ready MP3 with gap removal, voice compression, music ducking,
and loudness normalization.
"""
@@ -17,7 +17,7 @@ from pathlib import Path
import numpy as np
import soundfile as sf
STEM_NAMES = ["host", "caller", "music", "sfx", "ads"]
STEM_NAMES = ["host", "caller", "music", "sfx", "ads", "idents"]
def load_stems(stems_dir: Path) -> tuple[dict[str, np.ndarray], int]:
@@ -69,7 +69,7 @@ def remove_gaps(stems: dict[str, np.ndarray], sr: int,
# Detect gaps in everything except music (which always plays).
# This catches TTS latency gaps while protecting ad breaks and SFX transitions.
content = stems["host"] + stems["caller"] + stems["sfx"] + stems["ads"]
content = stems["host"] + stems["caller"] + stems["sfx"] + stems["ads"] + stems["idents"]
rms = compute_rms(content, window_samples)
# Threshold: percentile-based to sit above the mic noise floor
@@ -386,7 +386,7 @@ def apply_ducking(music: np.ndarray, dialog: np.ndarray, sr: int,
def match_voice_levels(stems: dict[str, np.ndarray], target_rms: float = 0.1) -> dict[str, np.ndarray]:
"""Normalize host, caller, and ads stems to the same RMS level."""
for name in ["host", "caller", "ads"]:
for name in ["host", "caller", "ads", "idents"]:
audio = stems[name]
# Only measure non-silent portions
active = audio[np.abs(audio) > 0.001]
@@ -408,7 +408,7 @@ def mix_stems(stems: dict[str, np.ndarray],
levels: dict[str, float] | None = None,
stereo_imaging: bool = True) -> np.ndarray:
if levels is None:
levels = {"host": 0, "caller": 0, "music": -6, "sfx": -10, "ads": 0}
levels = {"host": 0, "caller": 0, "music": -6, "sfx": -10, "ads": 0, "idents": 0}
gains = {name: 10 ** (db / 20) for name, db in levels.items()}
@@ -417,7 +417,7 @@ def mix_stems(stems: dict[str, np.ndarray],
if stereo_imaging:
# Pan positions: -1.0 = full left, 0.0 = center, 1.0 = full right
# Using constant-power panning law
pans = {"host": 0.0, "caller": 0.15, "music": 0.0, "sfx": 0.0, "ads": 0.0}
pans = {"host": 0.0, "caller": 0.15, "music": 0.0, "sfx": 0.0, "ads": 0.0, "idents": 0.0}
# Music gets stereo width via slight L/R decorrelation
music_width = 0.3
@@ -774,7 +774,7 @@ def main():
print(f"\n[3/{total_steps}] Limiting ads + SFX...")
with tempfile.TemporaryDirectory() as tmp:
tmp_dir = Path(tmp)
for name in ["ads", "sfx"]:
for name in ["ads", "sfx", "idents"]:
if np.any(stems[name] != 0):
stems[name] = limit_stem(stems[name], sr, tmp_dir, name)
@@ -834,7 +834,7 @@ def main():
dialog = stems["host"] + stems["caller"]
if np.any(dialog != 0) and np.any(stems["music"] != 0):
stems["music"] = apply_ducking(stems["music"], dialog, sr, duck_db=args.duck_amount,
mute_signal=stems["ads"])
mute_signal=stems["ads"] + stems["idents"])
print(" Applied")
else:
print(" No dialog or music to duck")