diff --git a/backend/main.py b/backend/main.py index 3a1bfb7..6488ec5 100644 --- a/backend/main.py +++ b/backend/main.py @@ -2377,8 +2377,8 @@ async def set_on_air(state: dict): def _run_postprod(): try: result = subprocess.run( - [python, "postprod.py", str(stems_dir), "-o", str(output_file)], - capture_output=True, text=True, timeout=300, + [python, "postprod.py", str(stems_dir), "-o", "episode.mp3"], + capture_output=True, text=True, timeout=600, ) if result.returncode == 0: add_log(f"Post-production complete -> {output_file}") @@ -3927,44 +3927,37 @@ async def server_status(): # --- Stem Recording --- -@app.post("/api/recording/start") -async def start_stem_recording(): - if audio_service.stem_recorder is not None: - raise HTTPException(400, "Recording already in progress") - from datetime import datetime - dir_name = datetime.now().strftime("%Y-%m-%d_%H%M%S") - recordings_dir = Path("recordings") / dir_name - import sounddevice as sd - device_info = sd.query_devices(audio_service.output_device) if audio_service.output_device is not None else None - sr = int(device_info["default_samplerate"]) if device_info else 48000 - recorder = StemRecorder(recordings_dir, sample_rate=sr) - recorder.start() - audio_service.stem_recorder = recorder - audio_service.start_stem_mic() - add_log(f"Stem recording started -> {recordings_dir}") - # Auto go on-air +@app.post("/api/recording/toggle") +async def toggle_stem_recording(): + """Toggle recording on/off. Also toggles on-air state.""" global _show_on_air - if not _show_on_air: - _show_on_air = True - _start_host_audio_sender() - audio_service.start_host_stream(_host_audio_sync_callback) - threading.Thread(target=_update_on_air_cdn, args=(True,), daemon=True).start() - add_log("Show auto-set to ON AIR") - return {"status": "recording", "dir": str(recordings_dir), "on_air": _show_on_air} - - -@app.post("/api/recording/stop") -async def stop_stem_recording(): if audio_service.stem_recorder is None: - raise HTTPException(400, "No recording in progress") + # START recording + from datetime import datetime + dir_name = datetime.now().strftime("%Y-%m-%d_%H%M%S") + recordings_dir = Path("recordings") / dir_name + import sounddevice as sd + device_info = sd.query_devices(audio_service.output_device) if audio_service.output_device is not None else None + sr = int(device_info["default_samplerate"]) if device_info else 48000 + recorder = StemRecorder(recordings_dir, sample_rate=sr) + recorder.start() + audio_service.stem_recorder = recorder + audio_service.start_stem_mic() + add_log(f"Stem recording started -> {recordings_dir}") + if not _show_on_air: + _show_on_air = True + _start_host_audio_sender() + audio_service.start_host_stream(_host_audio_sync_callback) + threading.Thread(target=_update_on_air_cdn, args=(True,), daemon=True).start() + add_log("Show auto-set to ON AIR") + return {"on_air": _show_on_air, "recording": True} + # STOP recording audio_service.stop_stem_mic() stems_dir = audio_service.stem_recorder.output_dir paths = audio_service.stem_recorder.stop() audio_service.stem_recorder = None add_log(f"Stem recording stopped. Running post-production...") - # Auto go off-air - global _show_on_air if _show_on_air: _show_on_air = False audio_service.stop_host_stream() @@ -3978,8 +3971,8 @@ async def stop_stem_recording(): def _run_postprod(): try: result = subprocess.run( - [python, "postprod.py", str(stems_dir), "-o", str(output_file)], - capture_output=True, text=True, timeout=300, + [python, "postprod.py", str(stems_dir), "-o", "episode.mp3"], + capture_output=True, text=True, timeout=600, ) if result.returncode == 0: add_log(f"Post-production complete -> {output_file}") @@ -3989,7 +3982,7 @@ async def stop_stem_recording(): add_log(f"Post-production error: {e}") threading.Thread(target=_run_postprod, daemon=True).start() - return {"status": "stopped", "stems": paths, "processing": str(output_file), "on_air": _show_on_air} + return {"on_air": _show_on_air, "recording": False} @app.post("/api/recording/process") diff --git a/backend/services/audio.py b/backend/services/audio.py index 2329824..6cb3316 100644 --- a/backend/services/audio.py +++ b/backend/services/audio.py @@ -361,10 +361,6 @@ class AudioService: # Apply fade to prevent clicks audio = self._apply_fade(audio, device_sr) - # Stem recording: caller TTS - if self.stem_recorder: - self.stem_recorder.write_sporadic("caller", audio.copy(), device_sr) - # Create multi-channel output with audio only on target channel multi_ch = np.zeros((len(audio), num_channels), dtype=np.float32) multi_ch[:, channel_idx] = audio @@ -384,6 +380,9 @@ class AudioService: while pos < len(multi_ch) and not self._caller_stop_event.is_set(): end = min(pos + chunk_size, len(multi_ch)) stream.write(multi_ch[pos:end]) + # Record each chunk as it plays so hangups cut the stem too + if self.stem_recorder: + self.stem_recorder.write_sporadic("caller", audio[pos:end].copy(), device_sr) pos = end if self._caller_stop_event.is_set(): @@ -752,7 +751,7 @@ class AudioService: mono_out = (old_samples * fade_out + new_samples * fade_in) * self._music_volume outdata[:, channel_idx] = mono_out if self.stem_recorder: - self.stem_recorder.write("music", mono_out.copy(), device_sr) + self.stem_recorder.write_sporadic("music", mono_out.copy(), device_sr) self._crossfade_progress = end_progress if self._crossfade_progress >= 1.0: @@ -763,7 +762,7 @@ class AudioService: mono_out = new_samples * self._music_volume outdata[:, channel_idx] = mono_out if self.stem_recorder: - self.stem_recorder.write("music", mono_out.copy(), device_sr) + self.stem_recorder.write_sporadic("music", mono_out.copy(), device_sr) try: self._music_stream = sd.OutputStream( @@ -873,7 +872,7 @@ class AudioService: chunk = self._ad_resampled[self._ad_position:self._ad_position + frames] outdata[:, channel_idx] = chunk if self.stem_recorder: - self.stem_recorder.write("ads", chunk.copy(), device_sr) + self.stem_recorder.write_sporadic("ads", chunk.copy(), device_sr) self._ad_position += frames else: if remaining > 0: diff --git a/backend/services/tts.py b/backend/services/tts.py index 1b684ce..a400309 100644 --- a/backend/services/tts.py +++ b/backend/services/tts.py @@ -86,18 +86,28 @@ DEFAULT_VITS_SPEAKER = "p225" # Dennis, Dominus, Edward, Elizabeth, Hades, Hana, Julia, Luna, Mark, Olivia, # Pixie, Priya, Ronald, Sarah, Shaun, Theodore, Timothy, Wendy INWORLD_VOICES = { - # Male voices - each caller gets a unique voice matching their personality + # Original voice IDs "VR6AewLTigWG4xSOukaG": "Edward", # Tony - fast-talking, emphatic, streetwise "TxGEqnHWrfWFTfGW9XjX": "Shaun", # Rick - friendly, dynamic, conversational "pNInz6obpgDQGcFmaJgB": "Alex", # Dennis - energetic, expressive, mildly nasal "ODq5zmih8GrVes37Dizd": "Craig", # Earl - older British, refined, articulate - "IKne3meq5aSn9XLyUdCD": "Timothy", # Marcus - lively, upbeat American - # Female voices - each caller gets a unique voice matching their personality + "IKne3meq5aSn9XLyUdCD": "Timothy", # Marcus/Jerome - lively, upbeat American "jBpfuIE2acCO8z3wKNLl": "Hana", # Jasmine - bright, expressive young female "EXAVITQu4vr4xnSDxMaL": "Ashley", # Megan - warm, natural female "21m00Tcm4TlvDq8ikWAM": "Wendy", # Tanya - posh, middle-aged British "XB0fDUnXU5powFXDhCwa": "Sarah", # Carla - fast-talking, questioning tone - "pFZP5JQG7iQjIQuC4Bku": "Deborah", # Brenda - gentle, elegant + "pFZP5JQG7iQjIQuC4Bku": "Deborah", # Brenda (original) - gentle, elegant + # Regular caller voice IDs (backfilled) + "onwK4e9ZLuTAKqWW03F9": "Ronald", # Bobby - repo man + "FGY2WhTYpPnrIDTdsKH5": "Julia", # Carla (regular) - Jersey mom + "CwhRBWXzGAHq8TQ4Fs17": "Mark", # Leon - male caller + "SOYHLrjzK2X1ezoPC6cr": "Carter", # Carl - male caller + "N2lVS1w4EtoT3dr4eOWO": "Clive", # Reggie - male caller + "hpp4J3VqNfWAUOO0d1Us": "Olivia", # Brenda (regular) - ambulance driver + "nPczCjzI2devNBz1zQrb": "Theodore", # Keith - male caller + "JBFqnCBsd6RMkjVDRZzb": "Blake", # Andre - male caller + "TX3LPaxmHKxFdv7VOQHJ": "Dennis", # Rick (regular) - male caller + "cgSgspJ2msm6clMCkdW9": "Priya", # Megan (regular) - female caller } DEFAULT_INWORLD_VOICE = "Dennis" diff --git a/data/regulars.json b/data/regulars.json index c5e90a5..e9af700 100644 --- a/data/regulars.json +++ b/data/regulars.json @@ -60,9 +60,13 @@ { "summary": "Jerome, a police officer in Texas, called from a DQ parking lot worried about AI writing police reports after his son sent him an article suggesting it might replace him. Through the conversation, he moved from fear about accountability and accuracy in criminal cases to acknowledging that AI handling routine paperwork (like cattle complaints) could free him up to do more meaningful police work in his understaffed county, though he remains uncertain about where this technology will lead.", "timestamp": 1770692087.560522 + }, + { + "summary": "The caller described a turbulent couple of weeks, mentioning an issue with AI writing police reports, which he suggested was just the beginning of a larger problem. He seemed concerned about the developments and wanted to discuss the topic further with the host.", + "timestamp": 1770892192.893108 } ], - "last_call": 1770692087.560523, + "last_call": 1770892192.89311, "created_at": 1770692087.560523, "voice": "IKne3meq5aSn9XLyUdCD" }, diff --git a/frontend/js/app.js b/frontend/js/app.js index f530f94..9e43d49 100644 --- a/frontend/js/app.js +++ b/frontend/js/app.js @@ -101,17 +101,10 @@ function initEventListeners() { if (recBtn) { recBtn.addEventListener('click', async () => { try { - if (!stemRecording) { - const res = await safeFetch('/api/recording/start', { method: 'POST' }); - updateRecBtn(true); - if (onAirBtn) updateOnAirBtn(onAirBtn, res.on_air); - log('Recording started + ON AIR: ' + res.dir); - } else { - const res = await safeFetch('/api/recording/stop', { method: 'POST' }); - updateRecBtn(false); - if (onAirBtn) updateOnAirBtn(onAirBtn, res.on_air); - log('Recording stopped + OFF AIR'); - } + const res = await safeFetch('/api/recording/toggle', { method: 'POST' }); + updateRecBtn(res.recording); + if (onAirBtn) updateOnAirBtn(onAirBtn, res.on_air); + log(res.recording ? 'Recording started + ON AIR' : 'Recording stopped + OFF AIR'); } catch (err) { log('Recording error: ' + err.message); } diff --git a/postprod.py b/postprod.py index 2175edf..3d4f659 100644 --- a/postprod.py +++ b/postprod.py @@ -61,23 +61,30 @@ def compute_rms(audio: np.ndarray, window_samples: int) -> np.ndarray: def remove_gaps(stems: dict[str, np.ndarray], sr: int, - threshold_s: float = 1.5, crossfade_ms: float = 30) -> dict[str, np.ndarray]: + threshold_s: float = 2.0, max_gap_s: float = 8.0, + crossfade_ms: float = 30, pad_s: float = 0.5) -> dict[str, np.ndarray]: window_ms = 50 window_samples = int(sr * window_ms / 1000) crossfade_samples = int(sr * crossfade_ms / 1000) - dialog = stems["host"] + stems["caller"] - rms = compute_rms(dialog, window_samples) + # Detect gaps in everything except music (which always plays). + # This catches TTS latency gaps while protecting ad breaks and SFX transitions. + content = stems["host"] + stems["caller"] + stems["sfx"] + stems["ads"] + rms = compute_rms(content, window_samples) - # Threshold: -60dB or adaptive based on mean RMS - mean_rms = np.mean(rms[rms > 0]) if np.any(rms > 0) else 1e-4 - silence_thresh = min(mean_rms * 0.05, 0.001) + # Threshold: percentile-based to sit above the mic noise floor + nonzero_rms = rms[rms > 0] + if len(nonzero_rms) == 0: + print(" No audio detected") + return stems + noise_floor = np.percentile(nonzero_rms, 20) + silence_thresh = noise_floor * 3 - # Find silent regions is_silent = rms < silence_thresh min_silent_windows = int(threshold_s / (window_ms / 1000)) + max_silent_windows = int(max_gap_s / (window_ms / 1000)) - # Build list of regions to cut (in samples) + # Only cut gaps between 1.5-8s — targets TTS latency, not long breaks cuts = [] i = 0 while i < len(is_silent): @@ -86,10 +93,11 @@ def remove_gaps(stems: dict[str, np.ndarray], sr: int, while i < len(is_silent) and is_silent[i]: i += 1 length = i - start - if length >= min_silent_windows: - # Keep a small buffer at edges - cut_start = (start + 1) * window_samples - cut_end = (i - 1) * window_samples + if min_silent_windows <= length <= max_silent_windows: + # Leave pad_s of silence so the edit sounds natural + pad_samples = int(pad_s * sr) + cut_start = (start + 1) * window_samples + pad_samples + cut_end = (i - 1) * window_samples - pad_samples if cut_end > cut_start + crossfade_samples * 2: cuts.append((cut_start, cut_end)) else: @@ -102,18 +110,18 @@ def remove_gaps(stems: dict[str, np.ndarray], sr: int, total_cut = sum(end - start for start, end in cuts) / sr print(f" Removing {len(cuts)} gaps ({total_cut:.1f}s total)") - # Apply cuts to dialog stems (host, caller, sfx, ads) — not music - cut_stems = ["host", "caller", "sfx", "ads"] + # Cut dialog/sfx/ads at gap points. Leave music uncut — just trim to fit. result = {} - for name in cut_stems: + for name in STEM_NAMES: + if name == "music": + continue # handled below audio = stems[name] pieces = [] prev_end = 0 for cut_start, cut_end in cuts: if prev_end < cut_start: piece = audio[prev_end:cut_start].copy() - # Apply crossfade at join point if pieces and len(piece) > crossfade_samples: fade_in = np.linspace(0, 1, crossfade_samples, dtype=np.float32) piece[:crossfade_samples] *= fade_in @@ -135,18 +143,49 @@ def remove_gaps(stems: dict[str, np.ndarray], sr: int, result[name] = np.concatenate(pieces) if pieces else np.array([], dtype=np.float32) - # Trim music to match new duration, with fade-out at end + # Music: leave uncut, just trim to match new duration with fade-out new_len = len(result["host"]) - music = stems["music"][:new_len].copy() if len(stems["music"]) >= new_len else np.pad(stems["music"], (0, max(0, new_len - len(stems["music"])))) - fade_samples = int(sr * 2) # 2s fade out + music = stems["music"] + if len(music) >= new_len: + music = music[:new_len].copy() + else: + music = np.pad(music, (0, new_len - len(music))) + fade_samples = int(sr * 3) if len(music) > fade_samples: - fade_out = np.linspace(1, 0, fade_samples, dtype=np.float32) - music[-fade_samples:] *= fade_out + music[-fade_samples:] *= np.linspace(1, 0, fade_samples, dtype=np.float32) result["music"] = music return result +def denoise(audio: np.ndarray, sr: int, tmp_dir: Path) -> np.ndarray: + """High-quality noise reduction using ffmpeg afftdn (adaptive Wiener filter).""" + in_path = tmp_dir / "host_pre_denoise.wav" + out_path = tmp_dir / "host_post_denoise.wav" + sf.write(str(in_path), audio, sr) + + # afftdn: adaptive FFT denoiser with Wiener filter + # nt=w - Wiener filter (best quality) + # om=o - output cleaned signal + # nr=10 - noise reduction in dB (10 = moderate, preserves voice naturalness) + # nf=-30 - noise floor estimate in dB + # anlmdn: non-local means denoiser for residual broadband noise + # s=4 - patch size + # p=0.002 - strength (gentle to avoid artifacts) + af = ( + "afftdn=nt=w:om=o:nr=12:nf=-30," + "anlmdn=s=4:p=0.002" + ) + cmd = ["ffmpeg", "-y", "-i", str(in_path), "-af", af, str(out_path)] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f" WARNING: denoise failed: {result.stderr[:200]}") + return audio + + denoised, _ = sf.read(str(out_path), dtype="float32") + return denoised + + def compress_voice(audio: np.ndarray, sr: int, tmp_dir: Path, stem_name: str) -> np.ndarray: in_path = tmp_dir / f"{stem_name}_pre_comp.wav" @@ -156,7 +195,7 @@ def compress_voice(audio: np.ndarray, sr: int, tmp_dir: Path, cmd = [ "ffmpeg", "-y", "-i", str(in_path), - "-af", "acompressor=threshold=-24dB:ratio=3:attack=5:release=100:makeup=6dB", + "-af", "acompressor=threshold=-24dB:ratio=2.5:attack=10:release=800:makeup=6dB", str(out_path), ] result = subprocess.run(cmd, capture_output=True, text=True) @@ -168,9 +207,32 @@ def compress_voice(audio: np.ndarray, sr: int, tmp_dir: Path, return compressed +def phone_eq(audio: np.ndarray, sr: int, tmp_dir: Path) -> np.ndarray: + """Apply telephone EQ to make caller sound like a phone call.""" + in_path = tmp_dir / "caller_pre_phone.wav" + out_path = tmp_dir / "caller_post_phone.wav" + sf.write(str(in_path), audio, sr) + + # Bandpass 300-3400Hz (telephone bandwidth) + slight mid boost for presence + af = ( + "highpass=f=300:poles=2," + "lowpass=f=3400:poles=2," + "equalizer=f=1000:t=q:w=0.8:g=4" + ) + cmd = ["ffmpeg", "-y", "-i", str(in_path), "-af", af, str(out_path)] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f" WARNING: phone EQ failed: {result.stderr[:200]}") + return audio + + filtered, _ = sf.read(str(out_path), dtype="float32") + return filtered + + def apply_ducking(music: np.ndarray, dialog: np.ndarray, sr: int, - duck_db: float = -12, attack_ms: float = 200, - release_ms: float = 500) -> np.ndarray: + duck_db: float = -20, attack_ms: float = 200, + release_ms: float = 3000, + mute_signal: np.ndarray | None = None) -> np.ndarray: window_ms = 50 window_samples = int(sr * window_ms / 1000) rms = compute_rms(dialog, window_samples) @@ -184,6 +246,22 @@ def apply_ducking(music: np.ndarray, dialog: np.ndarray, sr: int, is_speech = rms > speech_thresh target_gain = np.where(is_speech, duck_gain, 1.0).astype(np.float32) + # Mute music completely during ads with lookahead and tail + if mute_signal is not None: + mute_rms = compute_rms(mute_signal, window_samples) + mute_thresh = np.mean(mute_rms[mute_rms > 0]) * 0.1 if np.any(mute_rms > 0) else 1e-4 + is_ads = mute_rms > mute_thresh + # Expand ad regions: 2s before (fade out music before ad) and 2s after (don't resume immediately) + lookahead_windows = int(2000 / window_ms) + tail_windows = int(2000 / window_ms) + expanded_ads = is_ads.copy() + for i in range(len(is_ads)): + if is_ads[i]: + start = max(0, i - lookahead_windows) + end = min(len(expanded_ads), i + tail_windows + 1) + expanded_ads[start:end] = True + target_gain[expanded_ads] = 0.0 + # Smooth the envelope attack_windows = max(1, int(attack_ms / window_ms)) release_windows = max(1, int(release_ms / window_ms)) @@ -206,10 +284,30 @@ def apply_ducking(music: np.ndarray, dialog: np.ndarray, sr: int, return music * gain_samples +def match_voice_levels(stems: dict[str, np.ndarray], target_rms: float = 0.1) -> dict[str, np.ndarray]: + """Normalize host, caller, and ads stems to the same RMS level.""" + for name in ["host", "caller", "ads"]: + audio = stems[name] + # Only measure non-silent portions + active = audio[np.abs(audio) > 0.001] + if len(active) == 0: + continue + current_rms = np.sqrt(np.mean(active ** 2)) + if current_rms < 1e-6: + continue + gain = target_rms / current_rms + # Clamp gain to avoid extreme boosts on very quiet stems + gain = min(gain, 10.0) + stems[name] = np.clip(audio * gain, -1.0, 1.0).astype(np.float32) + db_change = 20 * np.log10(gain) if gain > 0 else 0 + print(f" {name}: RMS {current_rms:.4f} -> {target_rms:.4f} ({db_change:+.1f}dB)") + return stems + + def mix_stems(stems: dict[str, np.ndarray], levels: dict[str, float] | None = None) -> np.ndarray: if levels is None: - levels = {"host": 0, "caller": 0, "music": -6, "sfx": -3, "ads": 0} + levels = {"host": 0, "caller": 0, "music": -6, "sfx": -6, "ads": 0} gains = {name: 10 ** (db / 20) for name, db in levels.items()} @@ -282,8 +380,8 @@ def main(): parser = argparse.ArgumentParser(description="Post-production for AI podcast stems") parser.add_argument("stems_dir", type=Path, help="Directory containing stem WAV files") parser.add_argument("-o", "--output", type=str, default="episode.mp3", help="Output filename") - parser.add_argument("--gap-threshold", type=float, default=1.5, help="Min silence to cut (seconds)") - parser.add_argument("--duck-amount", type=float, default=-12, help="Music duck in dB") + parser.add_argument("--gap-threshold", type=float, default=2.0, help="Min silence to cut (seconds)") + parser.add_argument("--duck-amount", type=float, default=-20, help="Music duck in dB") parser.add_argument("--target-lufs", type=float, default=-16, help="Target loudness (LUFS)") parser.add_argument("--bitrate", type=str, default="128k", help="MP3 bitrate") parser.add_argument("--no-gap-removal", action="store_true", help="Skip gap removal") @@ -313,18 +411,27 @@ def main(): return # Step 1: Load - print("\n[1/6] Loading stems...") + print("\n[1/9] Loading stems...") stems, sr = load_stems(stems_dir) # Step 2: Gap removal - print("\n[2/6] Gap removal...") + print("\n[2/9] Gap removal...") if not args.no_gap_removal: stems = remove_gaps(stems, sr, threshold_s=args.gap_threshold) else: print(" Skipped") - # Step 3: Voice compression - print("\n[3/6] Voice compression...") + # Step 3: Host mic noise reduction + print("\n[3/9] Host mic noise reduction...") + if np.any(stems["host"] != 0): + with tempfile.TemporaryDirectory() as tmp: + stems["host"] = denoise(stems["host"], sr, Path(tmp)) + print(" Applied") + else: + print(" No host audio") + + # Step 4: Voice compression + print("\n[4/9] Voice compression...") if not args.no_compression: with tempfile.TemporaryDirectory() as tmp: tmp_dir = Path(tmp) @@ -335,25 +442,39 @@ def main(): else: print(" Skipped") - # Step 4: Music ducking - print("\n[4/6] Music ducking...") + # Step 5: Phone EQ on caller + print("\n[5/9] Phone EQ on caller...") + if np.any(stems["caller"] != 0): + with tempfile.TemporaryDirectory() as tmp: + stems["caller"] = phone_eq(stems["caller"], sr, Path(tmp)) + print(" Applied") + else: + print(" No caller audio") + + # Step 6: Match voice levels + print("\n[6/9] Matching voice levels...") + stems = match_voice_levels(stems) + + # Step 7: Music ducking + print("\n[7/9] Music ducking...") if not args.no_ducking: dialog = stems["host"] + stems["caller"] if np.any(dialog != 0) and np.any(stems["music"] != 0): - stems["music"] = apply_ducking(stems["music"], dialog, sr, duck_db=args.duck_amount) + stems["music"] = apply_ducking(stems["music"], dialog, sr, duck_db=args.duck_amount, + mute_signal=stems["ads"]) print(" Applied") else: print(" No dialog or music to duck") else: print(" Skipped") - # Step 5: Mix - print("\n[5/6] Mixing...") + # Step 8: Mix + print("\n[8/9] Mixing...") stereo = mix_stems(stems) print(f" Mixed to stereo: {len(stereo)} samples ({len(stereo)/sr:.1f}s)") - # Step 6: Normalize + export - print("\n[6/6] Loudness normalization + export...") + # Step 9: Normalize + export + print("\n[9/9] Loudness normalization + export...") with tempfile.TemporaryDirectory() as tmp: normalize_and_export(stereo, sr, output_path, target_lufs=args.target_lufs,