Add idents playback section — loads from idents/ folder, plays on ads channel

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 22:24:40 -07:00
parent b1bd4ed365
commit bbcf767a8f
7 changed files with 237 additions and 17 deletions
@@ -44,6 +44,7 @@ class Settings(BaseSettings):
    sounds_dir: Path = base_dir / "sounds"
    music_dir: Path = base_dir / "music"
    ads_dir: Path = base_dir / "ads"
+    idents_dir: Path = base_dir / "idents"
    sessions_dir: Path = base_dir / "sessions"

    class Config:
@@ -820,6 +820,20 @@ BEFORE_CALLING = [
    "Was just staring at a text they haven't replied to yet.",
    "Was cleaning their gun at the kitchen table, it's a ritual that helps them think.",
    "Was parked at the gas station, not ready to go home.",
+    "Was at the laundromat waiting on a load and heard the show through someone's phone.",
+    "Was closing up the shop, everyone else went home an hour ago.",
+    "Was in the bathtub, phone on the edge of the sink, show on speaker.",
+    "Was on a break at work, sitting in the break room alone.",
+    "Was at Waffle House at the counter by themselves, couldn't sleep.",
+    "Was reorganizing the junk drawer, which is what they do when they can't settle.",
+    "Was at the bar, last one there, bartender's wiping down.",
+    "Was folding laundry on the couch, show was on the radio in the kitchen.",
+    "Was laying in a hammock out back, couldn't go inside.",
+    "Was at a truck stop diner, cup of coffee, staring out the window.",
+    "Was up late painting — walls, not art — and had the radio on for company.",
+    "Was at their desk, supposedly working, but mostly just staring at the screen.",
+    "Was sitting in the waiting room at the ER with someone, long night.",
+    "Was at the 24-hour gym, basically empty, radio on over the speakers.",
 ]

 # Specific memories or stories they can reference
@@ -1939,7 +1953,13 @@ PHONE_SITUATION = [
    "Using the wifi calling, regular signal is garbage out here",
    "Stepped outside to call — didn't want to wake anyone up",
    "In the truck at the gas station — only place with good signal",
+    "In the truck at the gas station — only place with good signal",
    "Borrowing my kid's phone, mine's cracked to hell",
+    "Calling from the back room at work, keeping their voice down",
+    "On a landline — yeah, they still have one",
+    "Using earbuds so nobody in the house hears",
+    "On speakerphone in the kitchen, everyone else is asleep",
+    "Calling from the motel room, walls are thin so they're whispering",
 ]

 BACKGROUND_MUSIC = [
@@ -2356,6 +2376,8 @@ TIME: {time_ctx} {season_ctx}

 Write 3-5 sentences describing this person — who they are, what's going on in their life, why they're calling tonight. The reason for calling is THE MOST IMPORTANT THING. This person called a radio show because something specific happened or is happening — they have a story to tell, a situation to unpack, or a question they need to talk through. Make it concrete and vivid. Don't be vague ("feeling off," "going through a lot") — give them a specific incident or situation driving the call. Make it feel like a real person, not a character sheet. Vary the structure. Don't use labels or categories — weave details into a natural description.

+IMPORTANT: Vary where they're calling from and what they were doing. NOT everyone is sitting in their truck or on the porch. People call from kitchens, break rooms, laundromats, diners, motel rooms, the bathtub, the gym, their desk at work, a bar, a hospital waiting room, a hammock, walking down the road. Mix it up.
+
 Output ONLY the character description, nothing else."""

    try:
@@ -2641,6 +2663,8 @@ Southwest voice — "over in," "the other day," "down the road" — but don't fo

 Don't repeat yourself. Don't summarize what you already said. Don't circle back if the host moved on. Keep it moving.

+EVERY SENTENCE MUST BE COMPLETE. Never leave a thought hanging or trail off mid-sentence. If you start a sentence, finish it. No sentence fragments, no missing words, no dangling clauses. Say what you mean in clear, complete sentences.
+
 NEVER mention minors in sexual context. Output spoken words only — no actions, no gestures, no stage directions."""


@@ -3899,16 +3923,18 @@ import re
 def _pick_response_budget() -> tuple[int, int]:
    """Pick a random max_tokens and sentence cap for response variety.
    Returns (max_tokens, max_sentences).
-    Keeps responses conversational but gives room for real answers."""
+    Keeps responses conversational but gives room for real answers.
+    Token budget is intentionally generous to avoid mid-sentence cutoffs —
+    the sentence cap controls actual length."""
    roll = random.random()
    if roll < 0.15:
-        return 200, 3   # 15% — quick reaction
+        return 450, 3   # 15% — quick reaction
    elif roll < 0.45:
-        return 350, 4   # 30% — normal conversation
+        return 500, 4   # 30% — normal conversation
    elif roll < 0.75:
-        return 450, 5   # 30% — room to breathe
+        return 600, 5   # 30% — room to breathe
    else:
-        return 550, 6   # 25% — telling a story or riffing
+        return 700, 6   # 25% — telling a story or riffing


 def _trim_to_sentences(text: str, max_sentences: int) -> str:
@@ -3952,8 +3978,8 @@ def clean_for_tts(text: str) -> str:
    text = re.sub(r'\s*\[[^\]]*\]\s*', ' ', text)
    # Remove content in angle brackets: <laughs>, <sigh>, etc.
    text = re.sub(r'\s*<[^>]*>\s*', ' ', text)
-    # Remove "He/She sighs" style stage directions (full phrase)
-    text = re.sub(r'\b(He|She|I|They)\s+(sighs?|laughs?|pauses?|smiles?|chuckles?|grins?|nods?|shrugs?|frowns?)[^.]*\.\s*', '', text, flags=re.IGNORECASE)
+    # Remove "He/She sighs" style stage directions — only short ones (under ~40 chars) to avoid eating real dialog
+    text = re.sub(r'\b(He|She|I|They)\s+(sighs?|laughs?|pauses?|smiles?|chuckles?|grins?|nods?|shrugs?|frowns?)\s*(heavily|softly|deeply|quietly|loudly|nervously|sadly|a little|for a moment)?[.,]?\s*', '', text, flags=re.IGNORECASE)
    # Remove standalone stage direction words only if they look like directions (with adverbs)
    text = re.sub(r'\b(sighs?|laughs?|pauses?|chuckles?)\s+(heavily|softly|deeply|quietly|loudly|nervously|sadly)\b[.,]?\s*', '', text, flags=re.IGNORECASE)
    # Remove quotes around the response if LLM wrapped it
@@ -4336,6 +4362,48 @@ async def stop_ad():
    return {"status": "stopped"}


+# --- Idents Endpoints ---
+
+IDENT_DISPLAY_NAMES = {}
+
+
+@app.get("/api/idents")
+async def get_idents():
+    """Get available ident tracks, shuffled"""
+    ident_list = []
+    if settings.idents_dir.exists():
+        for ext in ['*.wav', '*.mp3', '*.flac']:
+            for f in settings.idents_dir.glob(ext):
+                ident_list.append({
+                    "name": IDENT_DISPLAY_NAMES.get(f.stem, f.stem),
+                    "file": f.name,
+                    "path": str(f)
+                })
+    random.shuffle(ident_list)
+    return {"idents": ident_list}
+
+
+@app.post("/api/idents/play")
+async def play_ident(request: MusicRequest):
+    """Play an ident once on the ad channel (ch 11)"""
+    ident_path = settings.idents_dir / request.track
+    if not ident_path.exists():
+        raise HTTPException(404, "Ident not found")
+
+    if audio_service._music_playing:
+        audio_service.stop_music(fade_duration=1.0)
+        await asyncio.sleep(1.1)
+    audio_service.play_ident(str(ident_path))
+    return {"status": "playing", "track": request.track}
+
+
+@app.post("/api/idents/stop")
+async def stop_ident():
+    """Stop ident playback"""
+    audio_service.stop_ident()
+    return {"status": "stopped"}
+
+
 # --- LLM Settings Endpoints ---

@app.get("/api/settings")
@@ -39,6 +39,13 @@ class AudioService:
        self._ad_position: int = 0
        self._ad_playing: bool = False

+        # Ident playback state
+        self._ident_stream: Optional[sd.OutputStream] = None
+        self._ident_data: Optional[np.ndarray] = None
+        self._ident_resampled: Optional[np.ndarray] = None
+        self._ident_position: int = 0
+        self._ident_playing: bool = False
+
        # Recording state
        self._recording = False
        self._record_thread: Optional[threading.Thread] = None
@@ -933,6 +940,7 @@ class AudioService:
            return

        self.stop_ad()
+        self.stop_ident()

        try:
            audio, sr = librosa.load(str(path), sr=self.output_sample_rate, mono=True)
@@ -1005,6 +1013,88 @@ class AudioService:
            self._ad_stream = None
        self._ad_position = 0

+    def play_ident(self, file_path: str):
+        """Load and play an ident file once (no loop) on the ad channel"""
+        import librosa
+
+        path = Path(file_path)
+        if not path.exists():
+            print(f"Ident file not found: {file_path}")
+            return
+
+        self.stop_ident()
+        self.stop_ad()
+
+        try:
+            audio, sr = librosa.load(str(path), sr=self.output_sample_rate, mono=True)
+            self._ident_data = audio.astype(np.float32)
+        except Exception as e:
+            print(f"Failed to load ident: {e}")
+            return
+
+        self._ident_playing = True
+        self._ident_position = 0
+
+        if self.output_device is None:
+            num_channels = 2
+            device = None
+            device_sr = self.output_sample_rate
+            channel_idx = 0
+        else:
+            device_info = sd.query_devices(self.output_device)
+            num_channels = device_info['max_output_channels']
+            device_sr = int(device_info['default_samplerate'])
+            device = self.output_device
+            channel_idx = min(self.ad_channel, num_channels) - 1
+
+        if self.output_sample_rate != device_sr:
+            self._ident_resampled = librosa.resample(
+                self._ident_data, orig_sr=self.output_sample_rate, target_sr=device_sr
+            ).astype(np.float32)
+        else:
+            self._ident_resampled = self._ident_data
+
+        def callback(outdata, frames, time_info, status):
+            outdata[:] = 0
+            if not self._ident_playing or self._ident_resampled is None:
+                return
+
+            remaining = len(self._ident_resampled) - self._ident_position
+            if remaining >= frames:
+                chunk = self._ident_resampled[self._ident_position:self._ident_position + frames]
+                outdata[:, channel_idx] = chunk
+                if self.stem_recorder:
+                    self.stem_recorder.write_sporadic("idents", chunk.copy(), device_sr)
+                self._ident_position += frames
+            else:
+                if remaining > 0:
+                    outdata[:remaining, channel_idx] = self._ident_resampled[self._ident_position:]
+                self._ident_playing = False
+
+        try:
+            self._ident_stream = sd.OutputStream(
+                device=device,
+                channels=num_channels,
+                samplerate=device_sr,
+                dtype=np.float32,
+                callback=callback,
+                blocksize=2048
+            )
+            self._ident_stream.start()
+            print(f"Ident playback started on ch {self.ad_channel} @ {device_sr}Hz")
+        except Exception as e:
+            print(f"Ident playback error: {e}")
+            self._ident_playing = False
+
+    def stop_ident(self):
+        """Stop ident playback"""
+        self._ident_playing = False
+        if self._ident_stream:
+            self._ident_stream.stop()
+            self._ident_stream.close()
+            self._ident_stream = None
+        self._ident_position = 0
+
    def set_music_volume(self, volume: float):
        """Set music volume (0.0 to 1.0)"""
        self._music_volume = max(0.0, min(1.0, volume))
@@ -7,7 +7,7 @@ import soundfile as sf
 from pathlib import Path
 from collections import deque

-STEM_NAMES = ["host", "caller", "music", "sfx", "ads"]
+STEM_NAMES = ["host", "caller", "music", "sfx", "ads", "idents"]


 class StemRecorder:
@@ -112,6 +112,16 @@
                </div>
            </section>

+            <!-- Idents -->
+            <section class="music-section">
+                <h2>Idents</h2>
+                <select id="ident-select"></select>
+                <div class="music-controls">
+                    <button id="ident-play-btn">Play Ident</button>
+                    <button id="ident-stop-btn">Stop</button>
+                </div>
+            </section>
+
            <!-- Sound Effects -->
            <section class="sounds-section">
                <h2>Sounds</h2>
@@ -240,6 +250,6 @@
        </div>
    </div>

-    <script src="/js/app.js?v=17"></script>
+    <script src="/js/app.js?v=18"></script>
 </body>
 </html>
@@ -57,6 +57,7 @@ document.addEventListener('DOMContentLoaded', async () => {
        await loadCallers();
        await loadMusic();
        await loadAds();
+        await loadIdents();
        await loadSounds();
        await loadSettings();
        initEventListeners();
@@ -189,6 +190,10 @@ function initEventListeners() {
    document.getElementById('ad-play-btn')?.addEventListener('click', playAd);
    document.getElementById('ad-stop-btn')?.addEventListener('click', stopAd);

+    // Idents
+    document.getElementById('ident-play-btn')?.addEventListener('click', playIdent);
+    document.getElementById('ident-stop-btn')?.addEventListener('click', stopIdent);
+
    // Settings
    document.getElementById('settings-btn')?.addEventListener('click', async () => {
        document.getElementById('settings-modal')?.classList.remove('hidden');
@@ -772,6 +777,52 @@ async function stopAd() {
    await fetch('/api/ads/stop', { method: 'POST' });
 }

+async function loadIdents() {
+    try {
+        const res = await fetch('/api/idents');
+        const data = await res.json();
+        const idents = data.idents || [];
+
+        const select = document.getElementById('ident-select');
+        if (!select) return;
+
+        const previousValue = select.value;
+        select.innerHTML = '';
+
+        idents.forEach(ident => {
+            const option = document.createElement('option');
+            option.value = ident.file;
+            option.textContent = ident.name;
+            select.appendChild(option);
+        });
+
+        if (previousValue && [...select.options].some(o => o.value === previousValue)) {
+            select.value = previousValue;
+        }
+
+        console.log('Loaded', idents.length, 'idents');
+    } catch (err) {
+        console.error('loadIdents error:', err);
+    }
+}
+
+async function playIdent() {
+    await loadIdents();
+    const select = document.getElementById('ident-select');
+    const track = select?.value;
+    if (!track) return;
+
+    await fetch('/api/idents/play', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ track, action: 'play' })
+    });
+}
+
+async function stopIdent() {
+    await fetch('/api/idents/stop', { method: 'POST' });
+}
+

 // --- Sound Effects (Server-Side) ---
 async function loadSounds() {
@@ -3,7 +3,7 @@

 Usage: python postprod.py recordings/2026-02-07_213000/ -o episode.mp3

-Processes 5 aligned WAV stems (host, caller, music, sfx, ads) into a
+Processes 6 aligned WAV stems (host, caller, music, sfx, ads, idents) into a
 broadcast-ready MP3 with gap removal, voice compression, music ducking,
 and loudness normalization.
 """
@@ -17,7 +17,7 @@ from pathlib import Path
 import numpy as np
 import soundfile as sf

-STEM_NAMES = ["host", "caller", "music", "sfx", "ads"]
+STEM_NAMES = ["host", "caller", "music", "sfx", "ads", "idents"]


 def load_stems(stems_dir: Path) -> tuple[dict[str, np.ndarray], int]:
@@ -69,7 +69,7 @@ def remove_gaps(stems: dict[str, np.ndarray], sr: int,

    # Detect gaps in everything except music (which always plays).
    # This catches TTS latency gaps while protecting ad breaks and SFX transitions.
-    content = stems["host"] + stems["caller"] + stems["sfx"] + stems["ads"]
+    content = stems["host"] + stems["caller"] + stems["sfx"] + stems["ads"] + stems["idents"]
    rms = compute_rms(content, window_samples)

    # Threshold: percentile-based to sit above the mic noise floor
@@ -386,7 +386,7 @@ def apply_ducking(music: np.ndarray, dialog: np.ndarray, sr: int,

 def match_voice_levels(stems: dict[str, np.ndarray], target_rms: float = 0.1) -> dict[str, np.ndarray]:
    """Normalize host, caller, and ads stems to the same RMS level."""
-    for name in ["host", "caller", "ads"]:
+    for name in ["host", "caller", "ads", "idents"]:
        audio = stems[name]
        # Only measure non-silent portions
        active = audio[np.abs(audio) > 0.001]
@@ -408,7 +408,7 @@ def mix_stems(stems: dict[str, np.ndarray],
              levels: dict[str, float] | None = None,
              stereo_imaging: bool = True) -> np.ndarray:
    if levels is None:
-        levels = {"host": 0, "caller": 0, "music": -6, "sfx": -10, "ads": 0}
+        levels = {"host": 0, "caller": 0, "music": -6, "sfx": -10, "ads": 0, "idents": 0}

    gains = {name: 10 ** (db / 20) for name, db in levels.items()}

@@ -417,7 +417,7 @@ def mix_stems(stems: dict[str, np.ndarray],
    if stereo_imaging:
        # Pan positions: -1.0 = full left, 0.0 = center, 1.0 = full right
        # Using constant-power panning law
-        pans = {"host": 0.0, "caller": 0.15, "music": 0.0, "sfx": 0.0, "ads": 0.0}
+        pans = {"host": 0.0, "caller": 0.15, "music": 0.0, "sfx": 0.0, "ads": 0.0, "idents": 0.0}
        # Music gets stereo width via slight L/R decorrelation
        music_width = 0.3

@@ -774,7 +774,7 @@ def main():
    print(f"\n[3/{total_steps}] Limiting ads + SFX...")
    with tempfile.TemporaryDirectory() as tmp:
        tmp_dir = Path(tmp)
-        for name in ["ads", "sfx"]:
+        for name in ["ads", "sfx", "idents"]:
            if np.any(stems[name] != 0):
                stems[name] = limit_stem(stems[name], sr, tmp_dir, name)

@@ -834,7 +834,7 @@ def main():
        dialog = stems["host"] + stems["caller"]
        if np.any(dialog != 0) and np.any(stems["music"] != 0):
            stems["music"] = apply_ducking(stems["music"], dialog, sr, duck_db=args.duck_amount,
-                                           mute_signal=stems["ads"])
+                                           mute_signal=stems["ads"] + stems["idents"])
            print("  Applied")
        else:
            print("  No dialog or music to duck")