Postprod improvements: denoise, phone EQ, ad muting, ducking, voice mappings
- Add host mic noise reduction (afftdn + anlmdn) - Add phone EQ bandpass on caller stem - Mute music during ads with 2s lookahead/tail - Increase ducking release to 3s to reduce pumping - Add Inworld voice mappings for all regular callers - Recording toggle endpoint, stem sync fixes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2377,8 +2377,8 @@ async def set_on_air(state: dict):
|
||||
def _run_postprod():
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[python, "postprod.py", str(stems_dir), "-o", str(output_file)],
|
||||
capture_output=True, text=True, timeout=300,
|
||||
[python, "postprod.py", str(stems_dir), "-o", "episode.mp3"],
|
||||
capture_output=True, text=True, timeout=600,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
add_log(f"Post-production complete -> {output_file}")
|
||||
@@ -3927,44 +3927,37 @@ async def server_status():
|
||||
|
||||
# --- Stem Recording ---
|
||||
|
||||
@app.post("/api/recording/start")
|
||||
async def start_stem_recording():
|
||||
if audio_service.stem_recorder is not None:
|
||||
raise HTTPException(400, "Recording already in progress")
|
||||
from datetime import datetime
|
||||
dir_name = datetime.now().strftime("%Y-%m-%d_%H%M%S")
|
||||
recordings_dir = Path("recordings") / dir_name
|
||||
import sounddevice as sd
|
||||
device_info = sd.query_devices(audio_service.output_device) if audio_service.output_device is not None else None
|
||||
sr = int(device_info["default_samplerate"]) if device_info else 48000
|
||||
recorder = StemRecorder(recordings_dir, sample_rate=sr)
|
||||
recorder.start()
|
||||
audio_service.stem_recorder = recorder
|
||||
audio_service.start_stem_mic()
|
||||
add_log(f"Stem recording started -> {recordings_dir}")
|
||||
# Auto go on-air
|
||||
@app.post("/api/recording/toggle")
|
||||
async def toggle_stem_recording():
|
||||
"""Toggle recording on/off. Also toggles on-air state."""
|
||||
global _show_on_air
|
||||
if not _show_on_air:
|
||||
_show_on_air = True
|
||||
_start_host_audio_sender()
|
||||
audio_service.start_host_stream(_host_audio_sync_callback)
|
||||
threading.Thread(target=_update_on_air_cdn, args=(True,), daemon=True).start()
|
||||
add_log("Show auto-set to ON AIR")
|
||||
return {"status": "recording", "dir": str(recordings_dir), "on_air": _show_on_air}
|
||||
|
||||
|
||||
@app.post("/api/recording/stop")
|
||||
async def stop_stem_recording():
|
||||
if audio_service.stem_recorder is None:
|
||||
raise HTTPException(400, "No recording in progress")
|
||||
# START recording
|
||||
from datetime import datetime
|
||||
dir_name = datetime.now().strftime("%Y-%m-%d_%H%M%S")
|
||||
recordings_dir = Path("recordings") / dir_name
|
||||
import sounddevice as sd
|
||||
device_info = sd.query_devices(audio_service.output_device) if audio_service.output_device is not None else None
|
||||
sr = int(device_info["default_samplerate"]) if device_info else 48000
|
||||
recorder = StemRecorder(recordings_dir, sample_rate=sr)
|
||||
recorder.start()
|
||||
audio_service.stem_recorder = recorder
|
||||
audio_service.start_stem_mic()
|
||||
add_log(f"Stem recording started -> {recordings_dir}")
|
||||
if not _show_on_air:
|
||||
_show_on_air = True
|
||||
_start_host_audio_sender()
|
||||
audio_service.start_host_stream(_host_audio_sync_callback)
|
||||
threading.Thread(target=_update_on_air_cdn, args=(True,), daemon=True).start()
|
||||
add_log("Show auto-set to ON AIR")
|
||||
return {"on_air": _show_on_air, "recording": True}
|
||||
# STOP recording
|
||||
audio_service.stop_stem_mic()
|
||||
stems_dir = audio_service.stem_recorder.output_dir
|
||||
paths = audio_service.stem_recorder.stop()
|
||||
audio_service.stem_recorder = None
|
||||
add_log(f"Stem recording stopped. Running post-production...")
|
||||
|
||||
# Auto go off-air
|
||||
global _show_on_air
|
||||
if _show_on_air:
|
||||
_show_on_air = False
|
||||
audio_service.stop_host_stream()
|
||||
@@ -3978,8 +3971,8 @@ async def stop_stem_recording():
|
||||
def _run_postprod():
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[python, "postprod.py", str(stems_dir), "-o", str(output_file)],
|
||||
capture_output=True, text=True, timeout=300,
|
||||
[python, "postprod.py", str(stems_dir), "-o", "episode.mp3"],
|
||||
capture_output=True, text=True, timeout=600,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
add_log(f"Post-production complete -> {output_file}")
|
||||
@@ -3989,7 +3982,7 @@ async def stop_stem_recording():
|
||||
add_log(f"Post-production error: {e}")
|
||||
|
||||
threading.Thread(target=_run_postprod, daemon=True).start()
|
||||
return {"status": "stopped", "stems": paths, "processing": str(output_file), "on_air": _show_on_air}
|
||||
return {"on_air": _show_on_air, "recording": False}
|
||||
|
||||
|
||||
@app.post("/api/recording/process")
|
||||
|
||||
@@ -361,10 +361,6 @@ class AudioService:
|
||||
# Apply fade to prevent clicks
|
||||
audio = self._apply_fade(audio, device_sr)
|
||||
|
||||
# Stem recording: caller TTS
|
||||
if self.stem_recorder:
|
||||
self.stem_recorder.write_sporadic("caller", audio.copy(), device_sr)
|
||||
|
||||
# Create multi-channel output with audio only on target channel
|
||||
multi_ch = np.zeros((len(audio), num_channels), dtype=np.float32)
|
||||
multi_ch[:, channel_idx] = audio
|
||||
@@ -384,6 +380,9 @@ class AudioService:
|
||||
while pos < len(multi_ch) and not self._caller_stop_event.is_set():
|
||||
end = min(pos + chunk_size, len(multi_ch))
|
||||
stream.write(multi_ch[pos:end])
|
||||
# Record each chunk as it plays so hangups cut the stem too
|
||||
if self.stem_recorder:
|
||||
self.stem_recorder.write_sporadic("caller", audio[pos:end].copy(), device_sr)
|
||||
pos = end
|
||||
|
||||
if self._caller_stop_event.is_set():
|
||||
@@ -752,7 +751,7 @@ class AudioService:
|
||||
mono_out = (old_samples * fade_out + new_samples * fade_in) * self._music_volume
|
||||
outdata[:, channel_idx] = mono_out
|
||||
if self.stem_recorder:
|
||||
self.stem_recorder.write("music", mono_out.copy(), device_sr)
|
||||
self.stem_recorder.write_sporadic("music", mono_out.copy(), device_sr)
|
||||
self._crossfade_progress = end_progress
|
||||
|
||||
if self._crossfade_progress >= 1.0:
|
||||
@@ -763,7 +762,7 @@ class AudioService:
|
||||
mono_out = new_samples * self._music_volume
|
||||
outdata[:, channel_idx] = mono_out
|
||||
if self.stem_recorder:
|
||||
self.stem_recorder.write("music", mono_out.copy(), device_sr)
|
||||
self.stem_recorder.write_sporadic("music", mono_out.copy(), device_sr)
|
||||
|
||||
try:
|
||||
self._music_stream = sd.OutputStream(
|
||||
@@ -873,7 +872,7 @@ class AudioService:
|
||||
chunk = self._ad_resampled[self._ad_position:self._ad_position + frames]
|
||||
outdata[:, channel_idx] = chunk
|
||||
if self.stem_recorder:
|
||||
self.stem_recorder.write("ads", chunk.copy(), device_sr)
|
||||
self.stem_recorder.write_sporadic("ads", chunk.copy(), device_sr)
|
||||
self._ad_position += frames
|
||||
else:
|
||||
if remaining > 0:
|
||||
|
||||
@@ -86,18 +86,28 @@ DEFAULT_VITS_SPEAKER = "p225"
|
||||
# Dennis, Dominus, Edward, Elizabeth, Hades, Hana, Julia, Luna, Mark, Olivia,
|
||||
# Pixie, Priya, Ronald, Sarah, Shaun, Theodore, Timothy, Wendy
|
||||
INWORLD_VOICES = {
|
||||
# Male voices - each caller gets a unique voice matching their personality
|
||||
# Original voice IDs
|
||||
"VR6AewLTigWG4xSOukaG": "Edward", # Tony - fast-talking, emphatic, streetwise
|
||||
"TxGEqnHWrfWFTfGW9XjX": "Shaun", # Rick - friendly, dynamic, conversational
|
||||
"pNInz6obpgDQGcFmaJgB": "Alex", # Dennis - energetic, expressive, mildly nasal
|
||||
"ODq5zmih8GrVes37Dizd": "Craig", # Earl - older British, refined, articulate
|
||||
"IKne3meq5aSn9XLyUdCD": "Timothy", # Marcus - lively, upbeat American
|
||||
# Female voices - each caller gets a unique voice matching their personality
|
||||
"IKne3meq5aSn9XLyUdCD": "Timothy", # Marcus/Jerome - lively, upbeat American
|
||||
"jBpfuIE2acCO8z3wKNLl": "Hana", # Jasmine - bright, expressive young female
|
||||
"EXAVITQu4vr4xnSDxMaL": "Ashley", # Megan - warm, natural female
|
||||
"21m00Tcm4TlvDq8ikWAM": "Wendy", # Tanya - posh, middle-aged British
|
||||
"XB0fDUnXU5powFXDhCwa": "Sarah", # Carla - fast-talking, questioning tone
|
||||
"pFZP5JQG7iQjIQuC4Bku": "Deborah", # Brenda - gentle, elegant
|
||||
"pFZP5JQG7iQjIQuC4Bku": "Deborah", # Brenda (original) - gentle, elegant
|
||||
# Regular caller voice IDs (backfilled)
|
||||
"onwK4e9ZLuTAKqWW03F9": "Ronald", # Bobby - repo man
|
||||
"FGY2WhTYpPnrIDTdsKH5": "Julia", # Carla (regular) - Jersey mom
|
||||
"CwhRBWXzGAHq8TQ4Fs17": "Mark", # Leon - male caller
|
||||
"SOYHLrjzK2X1ezoPC6cr": "Carter", # Carl - male caller
|
||||
"N2lVS1w4EtoT3dr4eOWO": "Clive", # Reggie - male caller
|
||||
"hpp4J3VqNfWAUOO0d1Us": "Olivia", # Brenda (regular) - ambulance driver
|
||||
"nPczCjzI2devNBz1zQrb": "Theodore", # Keith - male caller
|
||||
"JBFqnCBsd6RMkjVDRZzb": "Blake", # Andre - male caller
|
||||
"TX3LPaxmHKxFdv7VOQHJ": "Dennis", # Rick (regular) - male caller
|
||||
"cgSgspJ2msm6clMCkdW9": "Priya", # Megan (regular) - female caller
|
||||
}
|
||||
DEFAULT_INWORLD_VOICE = "Dennis"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user