Postprod improvements: denoise, phone EQ, ad muting, ducking, voice mappings

- Add host mic noise reduction (afftdn + anlmdn) - Add phone EQ bandpass on caller stem - Mute music during ads with 2s lookahead/tail - Increase ducking release to 3s to reduce pumping - Add Inworld voice mappings for all regular callers - Recording toggle endpoint, stem sync fixes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-12 03:59:08 -07:00
parent 75f15ba2d2
commit 95c2d06435
6 changed files with 216 additions and 96 deletions
@@ -2377,8 +2377,8 @@ async def set_on_air(state: dict):
                def _run_postprod():
                    try:
                        result = subprocess.run(
-                            [python, "postprod.py", str(stems_dir), "-o", str(output_file)],
-                            capture_output=True, text=True, timeout=300,
+                            [python, "postprod.py", str(stems_dir), "-o", "episode.mp3"],
+                            capture_output=True, text=True, timeout=600,
                        )
                        if result.returncode == 0:
                            add_log(f"Post-production complete -> {output_file}")
@@ -3927,44 +3927,37 @@ async def server_status():

 # --- Stem Recording ---

-@app.post("/api/recording/start")
-async def start_stem_recording():
-    if audio_service.stem_recorder is not None:
-        raise HTTPException(400, "Recording already in progress")
-    from datetime import datetime
-    dir_name = datetime.now().strftime("%Y-%m-%d_%H%M%S")
-    recordings_dir = Path("recordings") / dir_name
-    import sounddevice as sd
-    device_info = sd.query_devices(audio_service.output_device) if audio_service.output_device is not None else None
-    sr = int(device_info["default_samplerate"]) if device_info else 48000
-    recorder = StemRecorder(recordings_dir, sample_rate=sr)
-    recorder.start()
-    audio_service.stem_recorder = recorder
-    audio_service.start_stem_mic()
-    add_log(f"Stem recording started -> {recordings_dir}")
-    # Auto go on-air
+@app.post("/api/recording/toggle")
+async def toggle_stem_recording():
+    """Toggle recording on/off. Also toggles on-air state."""
    global _show_on_air
-    if not _show_on_air:
-        _show_on_air = True
-        _start_host_audio_sender()
-        audio_service.start_host_stream(_host_audio_sync_callback)
-        threading.Thread(target=_update_on_air_cdn, args=(True,), daemon=True).start()
-        add_log("Show auto-set to ON AIR")
-    return {"status": "recording", "dir": str(recordings_dir), "on_air": _show_on_air}
-
-
-@app.post("/api/recording/stop")
-async def stop_stem_recording():
    if audio_service.stem_recorder is None:
-        raise HTTPException(400, "No recording in progress")
+        # START recording
+        from datetime import datetime
+        dir_name = datetime.now().strftime("%Y-%m-%d_%H%M%S")
+        recordings_dir = Path("recordings") / dir_name
+        import sounddevice as sd
+        device_info = sd.query_devices(audio_service.output_device) if audio_service.output_device is not None else None
+        sr = int(device_info["default_samplerate"]) if device_info else 48000
+        recorder = StemRecorder(recordings_dir, sample_rate=sr)
+        recorder.start()
+        audio_service.stem_recorder = recorder
+        audio_service.start_stem_mic()
+        add_log(f"Stem recording started -> {recordings_dir}")
+        if not _show_on_air:
+            _show_on_air = True
+            _start_host_audio_sender()
+            audio_service.start_host_stream(_host_audio_sync_callback)
+            threading.Thread(target=_update_on_air_cdn, args=(True,), daemon=True).start()
+            add_log("Show auto-set to ON AIR")
+        return {"on_air": _show_on_air, "recording": True}
+    # STOP recording
    audio_service.stop_stem_mic()
    stems_dir = audio_service.stem_recorder.output_dir
    paths = audio_service.stem_recorder.stop()
    audio_service.stem_recorder = None
    add_log(f"Stem recording stopped. Running post-production...")

-    # Auto go off-air
-    global _show_on_air
    if _show_on_air:
        _show_on_air = False
        audio_service.stop_host_stream()
@@ -3978,8 +3971,8 @@ async def stop_stem_recording():
    def _run_postprod():
        try:
            result = subprocess.run(
-                [python, "postprod.py", str(stems_dir), "-o", str(output_file)],
-                capture_output=True, text=True, timeout=300,
+                [python, "postprod.py", str(stems_dir), "-o", "episode.mp3"],
+                capture_output=True, text=True, timeout=600,
            )
            if result.returncode == 0:
                add_log(f"Post-production complete -> {output_file}")
@@ -3989,7 +3982,7 @@ async def stop_stem_recording():
            add_log(f"Post-production error: {e}")

    threading.Thread(target=_run_postprod, daemon=True).start()
-    return {"status": "stopped", "stems": paths, "processing": str(output_file), "on_air": _show_on_air}
+    return {"on_air": _show_on_air, "recording": False}


@app.post("/api/recording/process")
@@ -361,10 +361,6 @@ class AudioService:
            # Apply fade to prevent clicks
            audio = self._apply_fade(audio, device_sr)

-            # Stem recording: caller TTS
-            if self.stem_recorder:
-                self.stem_recorder.write_sporadic("caller", audio.copy(), device_sr)
-
            # Create multi-channel output with audio only on target channel
            multi_ch = np.zeros((len(audio), num_channels), dtype=np.float32)
            multi_ch[:, channel_idx] = audio
@@ -384,6 +380,9 @@ class AudioService:
                while pos < len(multi_ch) and not self._caller_stop_event.is_set():
                    end = min(pos + chunk_size, len(multi_ch))
                    stream.write(multi_ch[pos:end])
+                    # Record each chunk as it plays so hangups cut the stem too
+                    if self.stem_recorder:
+                        self.stem_recorder.write_sporadic("caller", audio[pos:end].copy(), device_sr)
                    pos = end

            if self._caller_stop_event.is_set():
@@ -752,7 +751,7 @@ class AudioService:
                mono_out = (old_samples * fade_out + new_samples * fade_in) * self._music_volume
                outdata[:, channel_idx] = mono_out
                if self.stem_recorder:
-                    self.stem_recorder.write("music", mono_out.copy(), device_sr)
+                    self.stem_recorder.write_sporadic("music", mono_out.copy(), device_sr)
                self._crossfade_progress = end_progress

                if self._crossfade_progress >= 1.0:
@@ -763,7 +762,7 @@ class AudioService:
                mono_out = new_samples * self._music_volume
                outdata[:, channel_idx] = mono_out
                if self.stem_recorder:
-                    self.stem_recorder.write("music", mono_out.copy(), device_sr)
+                    self.stem_recorder.write_sporadic("music", mono_out.copy(), device_sr)

        try:
            self._music_stream = sd.OutputStream(
@@ -873,7 +872,7 @@ class AudioService:
                chunk = self._ad_resampled[self._ad_position:self._ad_position + frames]
                outdata[:, channel_idx] = chunk
                if self.stem_recorder:
-                    self.stem_recorder.write("ads", chunk.copy(), device_sr)
+                    self.stem_recorder.write_sporadic("ads", chunk.copy(), device_sr)
                self._ad_position += frames
            else:
                if remaining > 0:
@@ -86,18 +86,28 @@ DEFAULT_VITS_SPEAKER = "p225"
 # Dennis, Dominus, Edward, Elizabeth, Hades, Hana, Julia, Luna, Mark, Olivia,
 # Pixie, Priya, Ronald, Sarah, Shaun, Theodore, Timothy, Wendy
 INWORLD_VOICES = {
-    # Male voices - each caller gets a unique voice matching their personality
+    # Original voice IDs
    "VR6AewLTigWG4xSOukaG": "Edward",    # Tony - fast-talking, emphatic, streetwise
    "TxGEqnHWrfWFTfGW9XjX": "Shaun",     # Rick - friendly, dynamic, conversational
    "pNInz6obpgDQGcFmaJgB": "Alex",      # Dennis - energetic, expressive, mildly nasal
    "ODq5zmih8GrVes37Dizd": "Craig",     # Earl - older British, refined, articulate
-    "IKne3meq5aSn9XLyUdCD": "Timothy",   # Marcus - lively, upbeat American
-    # Female voices - each caller gets a unique voice matching their personality
+    "IKne3meq5aSn9XLyUdCD": "Timothy",   # Marcus/Jerome - lively, upbeat American
    "jBpfuIE2acCO8z3wKNLl": "Hana",      # Jasmine - bright, expressive young female
    "EXAVITQu4vr4xnSDxMaL": "Ashley",    # Megan - warm, natural female
    "21m00Tcm4TlvDq8ikWAM": "Wendy",     # Tanya - posh, middle-aged British
    "XB0fDUnXU5powFXDhCwa": "Sarah",     # Carla - fast-talking, questioning tone
-    "pFZP5JQG7iQjIQuC4Bku": "Deborah",   # Brenda - gentle, elegant
+    "pFZP5JQG7iQjIQuC4Bku": "Deborah",   # Brenda (original) - gentle, elegant
+    # Regular caller voice IDs (backfilled)
+    "onwK4e9ZLuTAKqWW03F9": "Ronald",    # Bobby - repo man
+    "FGY2WhTYpPnrIDTdsKH5": "Julia",     # Carla (regular) - Jersey mom
+    "CwhRBWXzGAHq8TQ4Fs17": "Mark",      # Leon - male caller
+    "SOYHLrjzK2X1ezoPC6cr": "Carter",    # Carl - male caller
+    "N2lVS1w4EtoT3dr4eOWO": "Clive",     # Reggie - male caller
+    "hpp4J3VqNfWAUOO0d1Us": "Olivia",    # Brenda (regular) - ambulance driver
+    "nPczCjzI2devNBz1zQrb": "Theodore",  # Keith - male caller
+    "JBFqnCBsd6RMkjVDRZzb": "Blake",     # Andre - male caller
+    "TX3LPaxmHKxFdv7VOQHJ": "Dennis",    # Rick (regular) - male caller
+    "cgSgspJ2msm6clMCkdW9": "Priya",     # Megan (regular) - female caller
 }
 DEFAULT_INWORLD_VOICE = "Dennis"