From bbcf767a8f5664089a969a46c0c9216132904c7e Mon Sep 17 00:00:00 2001 From: tcpsyn Date: Mon, 23 Feb 2026 22:24:40 -0700 Subject: [PATCH] =?UTF-8?q?Add=20idents=20playback=20section=20=E2=80=94?= =?UTF-8?q?=20loads=20from=20idents/=20folder,=20plays=20on=20ads=20channe?= =?UTF-8?q?l?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- backend/config.py | 1 + backend/main.py | 82 +++++++++++++++++++++++++--- backend/services/audio.py | 90 +++++++++++++++++++++++++++++++ backend/services/stem_recorder.py | 2 +- frontend/index.html | 12 ++++- frontend/js/app.js | 51 ++++++++++++++++++ postprod.py | 16 +++--- 7 files changed, 237 insertions(+), 17 deletions(-) diff --git a/backend/config.py b/backend/config.py index 7df8d4a..98315a0 100644 --- a/backend/config.py +++ b/backend/config.py @@ -44,6 +44,7 @@ class Settings(BaseSettings): sounds_dir: Path = base_dir / "sounds" music_dir: Path = base_dir / "music" ads_dir: Path = base_dir / "ads" + idents_dir: Path = base_dir / "idents" sessions_dir: Path = base_dir / "sessions" class Config: diff --git a/backend/main.py b/backend/main.py index 6c88179..61088e0 100644 --- a/backend/main.py +++ b/backend/main.py @@ -820,6 +820,20 @@ BEFORE_CALLING = [ "Was just staring at a text they haven't replied to yet.", "Was cleaning their gun at the kitchen table, it's a ritual that helps them think.", "Was parked at the gas station, not ready to go home.", + "Was at the laundromat waiting on a load and heard the show through someone's phone.", + "Was closing up the shop, everyone else went home an hour ago.", + "Was in the bathtub, phone on the edge of the sink, show on speaker.", + "Was on a break at work, sitting in the break room alone.", + "Was at Waffle House at the counter by themselves, couldn't sleep.", + "Was reorganizing the junk drawer, which is what they do when they can't settle.", + "Was at the bar, last one there, bartender's wiping down.", + "Was folding laundry on the couch, show was on the radio in the kitchen.", + "Was laying in a hammock out back, couldn't go inside.", + "Was at a truck stop diner, cup of coffee, staring out the window.", + "Was up late painting — walls, not art — and had the radio on for company.", + "Was at their desk, supposedly working, but mostly just staring at the screen.", + "Was sitting in the waiting room at the ER with someone, long night.", + "Was at the 24-hour gym, basically empty, radio on over the speakers.", ] # Specific memories or stories they can reference @@ -1939,7 +1953,13 @@ PHONE_SITUATION = [ "Using the wifi calling, regular signal is garbage out here", "Stepped outside to call — didn't want to wake anyone up", "In the truck at the gas station — only place with good signal", + "In the truck at the gas station — only place with good signal", "Borrowing my kid's phone, mine's cracked to hell", + "Calling from the back room at work, keeping their voice down", + "On a landline — yeah, they still have one", + "Using earbuds so nobody in the house hears", + "On speakerphone in the kitchen, everyone else is asleep", + "Calling from the motel room, walls are thin so they're whispering", ] BACKGROUND_MUSIC = [ @@ -2356,6 +2376,8 @@ TIME: {time_ctx} {season_ctx} Write 3-5 sentences describing this person — who they are, what's going on in their life, why they're calling tonight. The reason for calling is THE MOST IMPORTANT THING. This person called a radio show because something specific happened or is happening — they have a story to tell, a situation to unpack, or a question they need to talk through. Make it concrete and vivid. Don't be vague ("feeling off," "going through a lot") — give them a specific incident or situation driving the call. Make it feel like a real person, not a character sheet. Vary the structure. Don't use labels or categories — weave details into a natural description. +IMPORTANT: Vary where they're calling from and what they were doing. NOT everyone is sitting in their truck or on the porch. People call from kitchens, break rooms, laundromats, diners, motel rooms, the bathtub, the gym, their desk at work, a bar, a hospital waiting room, a hammock, walking down the road. Mix it up. + Output ONLY the character description, nothing else.""" try: @@ -2641,6 +2663,8 @@ Southwest voice — "over in," "the other day," "down the road" — but don't fo Don't repeat yourself. Don't summarize what you already said. Don't circle back if the host moved on. Keep it moving. +EVERY SENTENCE MUST BE COMPLETE. Never leave a thought hanging or trail off mid-sentence. If you start a sentence, finish it. No sentence fragments, no missing words, no dangling clauses. Say what you mean in clear, complete sentences. + NEVER mention minors in sexual context. Output spoken words only — no actions, no gestures, no stage directions.""" @@ -3899,16 +3923,18 @@ import re def _pick_response_budget() -> tuple[int, int]: """Pick a random max_tokens and sentence cap for response variety. Returns (max_tokens, max_sentences). - Keeps responses conversational but gives room for real answers.""" + Keeps responses conversational but gives room for real answers. + Token budget is intentionally generous to avoid mid-sentence cutoffs — + the sentence cap controls actual length.""" roll = random.random() if roll < 0.15: - return 200, 3 # 15% — quick reaction + return 450, 3 # 15% — quick reaction elif roll < 0.45: - return 350, 4 # 30% — normal conversation + return 500, 4 # 30% — normal conversation elif roll < 0.75: - return 450, 5 # 30% — room to breathe + return 600, 5 # 30% — room to breathe else: - return 550, 6 # 25% — telling a story or riffing + return 700, 6 # 25% — telling a story or riffing def _trim_to_sentences(text: str, max_sentences: int) -> str: @@ -3952,8 +3978,8 @@ def clean_for_tts(text: str) -> str: text = re.sub(r'\s*\[[^\]]*\]\s*', ' ', text) # Remove content in angle brackets: , , etc. text = re.sub(r'\s*<[^>]*>\s*', ' ', text) - # Remove "He/She sighs" style stage directions (full phrase) - text = re.sub(r'\b(He|She|I|They)\s+(sighs?|laughs?|pauses?|smiles?|chuckles?|grins?|nods?|shrugs?|frowns?)[^.]*\.\s*', '', text, flags=re.IGNORECASE) + # Remove "He/She sighs" style stage directions — only short ones (under ~40 chars) to avoid eating real dialog + text = re.sub(r'\b(He|She|I|They)\s+(sighs?|laughs?|pauses?|smiles?|chuckles?|grins?|nods?|shrugs?|frowns?)\s*(heavily|softly|deeply|quietly|loudly|nervously|sadly|a little|for a moment)?[.,]?\s*', '', text, flags=re.IGNORECASE) # Remove standalone stage direction words only if they look like directions (with adverbs) text = re.sub(r'\b(sighs?|laughs?|pauses?|chuckles?)\s+(heavily|softly|deeply|quietly|loudly|nervously|sadly)\b[.,]?\s*', '', text, flags=re.IGNORECASE) # Remove quotes around the response if LLM wrapped it @@ -4336,6 +4362,48 @@ async def stop_ad(): return {"status": "stopped"} +# --- Idents Endpoints --- + +IDENT_DISPLAY_NAMES = {} + + +@app.get("/api/idents") +async def get_idents(): + """Get available ident tracks, shuffled""" + ident_list = [] + if settings.idents_dir.exists(): + for ext in ['*.wav', '*.mp3', '*.flac']: + for f in settings.idents_dir.glob(ext): + ident_list.append({ + "name": IDENT_DISPLAY_NAMES.get(f.stem, f.stem), + "file": f.name, + "path": str(f) + }) + random.shuffle(ident_list) + return {"idents": ident_list} + + +@app.post("/api/idents/play") +async def play_ident(request: MusicRequest): + """Play an ident once on the ad channel (ch 11)""" + ident_path = settings.idents_dir / request.track + if not ident_path.exists(): + raise HTTPException(404, "Ident not found") + + if audio_service._music_playing: + audio_service.stop_music(fade_duration=1.0) + await asyncio.sleep(1.1) + audio_service.play_ident(str(ident_path)) + return {"status": "playing", "track": request.track} + + +@app.post("/api/idents/stop") +async def stop_ident(): + """Stop ident playback""" + audio_service.stop_ident() + return {"status": "stopped"} + + # --- LLM Settings Endpoints --- @app.get("/api/settings") diff --git a/backend/services/audio.py b/backend/services/audio.py index b4d6735..d3afeda 100644 --- a/backend/services/audio.py +++ b/backend/services/audio.py @@ -39,6 +39,13 @@ class AudioService: self._ad_position: int = 0 self._ad_playing: bool = False + # Ident playback state + self._ident_stream: Optional[sd.OutputStream] = None + self._ident_data: Optional[np.ndarray] = None + self._ident_resampled: Optional[np.ndarray] = None + self._ident_position: int = 0 + self._ident_playing: bool = False + # Recording state self._recording = False self._record_thread: Optional[threading.Thread] = None @@ -933,6 +940,7 @@ class AudioService: return self.stop_ad() + self.stop_ident() try: audio, sr = librosa.load(str(path), sr=self.output_sample_rate, mono=True) @@ -1005,6 +1013,88 @@ class AudioService: self._ad_stream = None self._ad_position = 0 + def play_ident(self, file_path: str): + """Load and play an ident file once (no loop) on the ad channel""" + import librosa + + path = Path(file_path) + if not path.exists(): + print(f"Ident file not found: {file_path}") + return + + self.stop_ident() + self.stop_ad() + + try: + audio, sr = librosa.load(str(path), sr=self.output_sample_rate, mono=True) + self._ident_data = audio.astype(np.float32) + except Exception as e: + print(f"Failed to load ident: {e}") + return + + self._ident_playing = True + self._ident_position = 0 + + if self.output_device is None: + num_channels = 2 + device = None + device_sr = self.output_sample_rate + channel_idx = 0 + else: + device_info = sd.query_devices(self.output_device) + num_channels = device_info['max_output_channels'] + device_sr = int(device_info['default_samplerate']) + device = self.output_device + channel_idx = min(self.ad_channel, num_channels) - 1 + + if self.output_sample_rate != device_sr: + self._ident_resampled = librosa.resample( + self._ident_data, orig_sr=self.output_sample_rate, target_sr=device_sr + ).astype(np.float32) + else: + self._ident_resampled = self._ident_data + + def callback(outdata, frames, time_info, status): + outdata[:] = 0 + if not self._ident_playing or self._ident_resampled is None: + return + + remaining = len(self._ident_resampled) - self._ident_position + if remaining >= frames: + chunk = self._ident_resampled[self._ident_position:self._ident_position + frames] + outdata[:, channel_idx] = chunk + if self.stem_recorder: + self.stem_recorder.write_sporadic("idents", chunk.copy(), device_sr) + self._ident_position += frames + else: + if remaining > 0: + outdata[:remaining, channel_idx] = self._ident_resampled[self._ident_position:] + self._ident_playing = False + + try: + self._ident_stream = sd.OutputStream( + device=device, + channels=num_channels, + samplerate=device_sr, + dtype=np.float32, + callback=callback, + blocksize=2048 + ) + self._ident_stream.start() + print(f"Ident playback started on ch {self.ad_channel} @ {device_sr}Hz") + except Exception as e: + print(f"Ident playback error: {e}") + self._ident_playing = False + + def stop_ident(self): + """Stop ident playback""" + self._ident_playing = False + if self._ident_stream: + self._ident_stream.stop() + self._ident_stream.close() + self._ident_stream = None + self._ident_position = 0 + def set_music_volume(self, volume: float): """Set music volume (0.0 to 1.0)""" self._music_volume = max(0.0, min(1.0, volume)) diff --git a/backend/services/stem_recorder.py b/backend/services/stem_recorder.py index 8704e20..9a4beb9 100644 --- a/backend/services/stem_recorder.py +++ b/backend/services/stem_recorder.py @@ -7,7 +7,7 @@ import soundfile as sf from pathlib import Path from collections import deque -STEM_NAMES = ["host", "caller", "music", "sfx", "ads"] +STEM_NAMES = ["host", "caller", "music", "sfx", "ads", "idents"] class StemRecorder: diff --git a/frontend/index.html b/frontend/index.html index a9a6e76..f125b9d 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -112,6 +112,16 @@ + +
+

Idents

+ +
+ + +
+
+

Sounds

@@ -240,6 +250,6 @@ - + diff --git a/frontend/js/app.js b/frontend/js/app.js index 4bdb89a..94914e1 100644 --- a/frontend/js/app.js +++ b/frontend/js/app.js @@ -57,6 +57,7 @@ document.addEventListener('DOMContentLoaded', async () => { await loadCallers(); await loadMusic(); await loadAds(); + await loadIdents(); await loadSounds(); await loadSettings(); initEventListeners(); @@ -189,6 +190,10 @@ function initEventListeners() { document.getElementById('ad-play-btn')?.addEventListener('click', playAd); document.getElementById('ad-stop-btn')?.addEventListener('click', stopAd); + // Idents + document.getElementById('ident-play-btn')?.addEventListener('click', playIdent); + document.getElementById('ident-stop-btn')?.addEventListener('click', stopIdent); + // Settings document.getElementById('settings-btn')?.addEventListener('click', async () => { document.getElementById('settings-modal')?.classList.remove('hidden'); @@ -772,6 +777,52 @@ async function stopAd() { await fetch('/api/ads/stop', { method: 'POST' }); } +async function loadIdents() { + try { + const res = await fetch('/api/idents'); + const data = await res.json(); + const idents = data.idents || []; + + const select = document.getElementById('ident-select'); + if (!select) return; + + const previousValue = select.value; + select.innerHTML = ''; + + idents.forEach(ident => { + const option = document.createElement('option'); + option.value = ident.file; + option.textContent = ident.name; + select.appendChild(option); + }); + + if (previousValue && [...select.options].some(o => o.value === previousValue)) { + select.value = previousValue; + } + + console.log('Loaded', idents.length, 'idents'); + } catch (err) { + console.error('loadIdents error:', err); + } +} + +async function playIdent() { + await loadIdents(); + const select = document.getElementById('ident-select'); + const track = select?.value; + if (!track) return; + + await fetch('/api/idents/play', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ track, action: 'play' }) + }); +} + +async function stopIdent() { + await fetch('/api/idents/stop', { method: 'POST' }); +} + // --- Sound Effects (Server-Side) --- async function loadSounds() { diff --git a/postprod.py b/postprod.py index 76c7913..598dc24 100644 --- a/postprod.py +++ b/postprod.py @@ -3,7 +3,7 @@ Usage: python postprod.py recordings/2026-02-07_213000/ -o episode.mp3 -Processes 5 aligned WAV stems (host, caller, music, sfx, ads) into a +Processes 6 aligned WAV stems (host, caller, music, sfx, ads, idents) into a broadcast-ready MP3 with gap removal, voice compression, music ducking, and loudness normalization. """ @@ -17,7 +17,7 @@ from pathlib import Path import numpy as np import soundfile as sf -STEM_NAMES = ["host", "caller", "music", "sfx", "ads"] +STEM_NAMES = ["host", "caller", "music", "sfx", "ads", "idents"] def load_stems(stems_dir: Path) -> tuple[dict[str, np.ndarray], int]: @@ -69,7 +69,7 @@ def remove_gaps(stems: dict[str, np.ndarray], sr: int, # Detect gaps in everything except music (which always plays). # This catches TTS latency gaps while protecting ad breaks and SFX transitions. - content = stems["host"] + stems["caller"] + stems["sfx"] + stems["ads"] + content = stems["host"] + stems["caller"] + stems["sfx"] + stems["ads"] + stems["idents"] rms = compute_rms(content, window_samples) # Threshold: percentile-based to sit above the mic noise floor @@ -386,7 +386,7 @@ def apply_ducking(music: np.ndarray, dialog: np.ndarray, sr: int, def match_voice_levels(stems: dict[str, np.ndarray], target_rms: float = 0.1) -> dict[str, np.ndarray]: """Normalize host, caller, and ads stems to the same RMS level.""" - for name in ["host", "caller", "ads"]: + for name in ["host", "caller", "ads", "idents"]: audio = stems[name] # Only measure non-silent portions active = audio[np.abs(audio) > 0.001] @@ -408,7 +408,7 @@ def mix_stems(stems: dict[str, np.ndarray], levels: dict[str, float] | None = None, stereo_imaging: bool = True) -> np.ndarray: if levels is None: - levels = {"host": 0, "caller": 0, "music": -6, "sfx": -10, "ads": 0} + levels = {"host": 0, "caller": 0, "music": -6, "sfx": -10, "ads": 0, "idents": 0} gains = {name: 10 ** (db / 20) for name, db in levels.items()} @@ -417,7 +417,7 @@ def mix_stems(stems: dict[str, np.ndarray], if stereo_imaging: # Pan positions: -1.0 = full left, 0.0 = center, 1.0 = full right # Using constant-power panning law - pans = {"host": 0.0, "caller": 0.15, "music": 0.0, "sfx": 0.0, "ads": 0.0} + pans = {"host": 0.0, "caller": 0.15, "music": 0.0, "sfx": 0.0, "ads": 0.0, "idents": 0.0} # Music gets stereo width via slight L/R decorrelation music_width = 0.3 @@ -774,7 +774,7 @@ def main(): print(f"\n[3/{total_steps}] Limiting ads + SFX...") with tempfile.TemporaryDirectory() as tmp: tmp_dir = Path(tmp) - for name in ["ads", "sfx"]: + for name in ["ads", "sfx", "idents"]: if np.any(stems[name] != 0): stems[name] = limit_stem(stems[name], sr, tmp_dir, name) @@ -834,7 +834,7 @@ def main(): dialog = stems["host"] + stems["caller"] if np.any(dialog != 0) and np.any(stems["music"] != 0): stems["music"] = apply_ducking(stems["music"], dialog, sr, duck_db=args.duck_amount, - mute_signal=stems["ads"]) + mute_signal=stems["ads"] + stems["idents"]) print(" Applied") else: print(" No dialog or music to duck")