Compare commits
3 Commits
75f15ba2d2
...
9fd977ad9f
| Author | SHA1 | Date | |
|---|---|---|---|
| 9fd977ad9f | |||
| cb5665bca8 | |||
| 95c2d06435 |
@@ -46,12 +46,26 @@ MALE_NAMES = [
|
|||||||
"Tony", "Rick", "Dennis", "Earl", "Marcus", "Keith", "Darnell", "Wayne",
|
"Tony", "Rick", "Dennis", "Earl", "Marcus", "Keith", "Darnell", "Wayne",
|
||||||
"Greg", "Andre", "Ray", "Jerome", "Hector", "Travis", "Vince", "Leon",
|
"Greg", "Andre", "Ray", "Jerome", "Hector", "Travis", "Vince", "Leon",
|
||||||
"Dale", "Frank", "Terrence", "Bobby", "Cliff", "Nate", "Reggie", "Carl",
|
"Dale", "Frank", "Terrence", "Bobby", "Cliff", "Nate", "Reggie", "Carl",
|
||||||
|
"Donnie", "Mitch", "Lamar", "Tyrone", "Russell", "Cedric", "Marvin", "Curtis",
|
||||||
|
"Rodney", "Clarence", "Floyd", "Otis", "Chester", "Leroy", "Melvin", "Vernon",
|
||||||
|
"Dwight", "Benny", "Elvin", "Alonzo", "Dexter", "Roland", "Wendell", "Clyde",
|
||||||
|
"Luther", "Virgil", "Ernie", "Lenny", "Sal", "Gus", "Moe", "Archie",
|
||||||
|
"Duke", "Sonny", "Red", "Butch", "Skeeter", "T-Bone", "Slim", "Big Mike",
|
||||||
|
"Chip", "Ricky", "Darryl", "Pete", "Artie", "Stu", "Phil", "Murray",
|
||||||
|
"Norm", "Woody", "Rocco", "Paulie", "Vinnie", "Frankie", "Mikey", "Joey",
|
||||||
]
|
]
|
||||||
|
|
||||||
FEMALE_NAMES = [
|
FEMALE_NAMES = [
|
||||||
"Jasmine", "Megan", "Tanya", "Carla", "Brenda", "Sheila", "Denise", "Tamika",
|
"Jasmine", "Megan", "Tanya", "Carla", "Brenda", "Sheila", "Denise", "Tamika",
|
||||||
"Lorraine", "Crystal", "Angie", "Renee", "Monique", "Gina", "Patrice", "Deb",
|
"Lorraine", "Crystal", "Angie", "Renee", "Monique", "Gina", "Patrice", "Deb",
|
||||||
"Shonda", "Marlene", "Yolanda", "Stacy", "Jackie", "Carmen", "Rita", "Val",
|
"Shonda", "Marlene", "Yolanda", "Stacy", "Jackie", "Carmen", "Rita", "Val",
|
||||||
|
"Diane", "Connie", "Wanda", "Doris", "Maxine", "Gladys", "Pearl", "Lucille",
|
||||||
|
"Rochelle", "Bernadette", "Thelma", "Dolores", "Naomi", "Bonnie", "Francine", "Irene",
|
||||||
|
"Estelle", "Charlene", "Yvonne", "Roberta", "Darlene", "Adrienne", "Vivian", "Rosalie",
|
||||||
|
"Pam", "Barb", "Cheryl", "Jolene", "Mavis", "Faye", "Luann", "Peggy",
|
||||||
|
"Dot", "Bev", "Tina", "Lori", "Sandy", "Debbie", "Terri", "Cindy",
|
||||||
|
"Tonya", "Keisha", "Latoya", "Shaniqua", "Aaliyah", "Ebony", "Lakisha", "Shanice",
|
||||||
|
"Nikki", "Candy", "Misty", "Brandy", "Tiffany", "Amber", "Heather", "Jen",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Voice pools per TTS provider
|
# Voice pools per TTS provider
|
||||||
@@ -121,8 +135,20 @@ def _randomize_callers():
|
|||||||
Overrides 2-3 slots with returning regulars when available."""
|
Overrides 2-3 slots with returning regulars when available."""
|
||||||
num_m = sum(1 for c in CALLER_BASES.values() if c["gender"] == "male")
|
num_m = sum(1 for c in CALLER_BASES.values() if c["gender"] == "male")
|
||||||
num_f = sum(1 for c in CALLER_BASES.values() if c["gender"] == "female")
|
num_f = sum(1 for c in CALLER_BASES.values() if c["gender"] == "female")
|
||||||
males = random.sample(MALE_NAMES, num_m)
|
|
||||||
females = random.sample(FEMALE_NAMES, num_f)
|
# Get returning callers first so we can exclude their names from random pool
|
||||||
|
returning = []
|
||||||
|
try:
|
||||||
|
returning = regular_caller_service.get_returning_callers(random.randint(2, 3))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[Regulars] Failed to get returning callers: {e}")
|
||||||
|
|
||||||
|
returning_names = {r["name"] for r in returning}
|
||||||
|
avail_males = [n for n in MALE_NAMES if n not in returning_names]
|
||||||
|
avail_females = [n for n in FEMALE_NAMES if n not in returning_names]
|
||||||
|
|
||||||
|
males = random.sample(avail_males, num_m)
|
||||||
|
females = random.sample(avail_females, num_f)
|
||||||
male_pool, female_pool = _get_voice_pools()
|
male_pool, female_pool = _get_voice_pools()
|
||||||
m_voices = random.sample(male_pool, min(num_m, len(male_pool)))
|
m_voices = random.sample(male_pool, min(num_m, len(male_pool)))
|
||||||
f_voices = random.sample(female_pool, min(num_f, len(female_pool)))
|
f_voices = random.sample(female_pool, min(num_f, len(female_pool)))
|
||||||
@@ -141,7 +167,6 @@ def _randomize_callers():
|
|||||||
|
|
||||||
# Override 2-3 random slots with returning callers
|
# Override 2-3 random slots with returning callers
|
||||||
try:
|
try:
|
||||||
returning = regular_caller_service.get_returning_callers(random.randint(2, 3))
|
|
||||||
if returning:
|
if returning:
|
||||||
keys_by_gender = {"male": [], "female": []}
|
keys_by_gender = {"male": [], "female": []}
|
||||||
for k, v in CALLER_BASES.items():
|
for k, v in CALLER_BASES.items():
|
||||||
@@ -2377,8 +2402,8 @@ async def set_on_air(state: dict):
|
|||||||
def _run_postprod():
|
def _run_postprod():
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
[python, "postprod.py", str(stems_dir), "-o", str(output_file)],
|
[python, "postprod.py", str(stems_dir), "-o", "episode.mp3"],
|
||||||
capture_output=True, text=True, timeout=300,
|
capture_output=True, text=True, timeout=600,
|
||||||
)
|
)
|
||||||
if result.returncode == 0:
|
if result.returncode == 0:
|
||||||
add_log(f"Post-production complete -> {output_file}")
|
add_log(f"Post-production complete -> {output_file}")
|
||||||
@@ -3927,10 +3952,12 @@ async def server_status():
|
|||||||
|
|
||||||
# --- Stem Recording ---
|
# --- Stem Recording ---
|
||||||
|
|
||||||
@app.post("/api/recording/start")
|
@app.post("/api/recording/toggle")
|
||||||
async def start_stem_recording():
|
async def toggle_stem_recording():
|
||||||
if audio_service.stem_recorder is not None:
|
"""Toggle recording on/off. Also toggles on-air state."""
|
||||||
raise HTTPException(400, "Recording already in progress")
|
global _show_on_air
|
||||||
|
if audio_service.stem_recorder is None:
|
||||||
|
# START recording
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
dir_name = datetime.now().strftime("%Y-%m-%d_%H%M%S")
|
dir_name = datetime.now().strftime("%Y-%m-%d_%H%M%S")
|
||||||
recordings_dir = Path("recordings") / dir_name
|
recordings_dir = Path("recordings") / dir_name
|
||||||
@@ -3942,29 +3969,20 @@ async def start_stem_recording():
|
|||||||
audio_service.stem_recorder = recorder
|
audio_service.stem_recorder = recorder
|
||||||
audio_service.start_stem_mic()
|
audio_service.start_stem_mic()
|
||||||
add_log(f"Stem recording started -> {recordings_dir}")
|
add_log(f"Stem recording started -> {recordings_dir}")
|
||||||
# Auto go on-air
|
|
||||||
global _show_on_air
|
|
||||||
if not _show_on_air:
|
if not _show_on_air:
|
||||||
_show_on_air = True
|
_show_on_air = True
|
||||||
_start_host_audio_sender()
|
_start_host_audio_sender()
|
||||||
audio_service.start_host_stream(_host_audio_sync_callback)
|
audio_service.start_host_stream(_host_audio_sync_callback)
|
||||||
threading.Thread(target=_update_on_air_cdn, args=(True,), daemon=True).start()
|
threading.Thread(target=_update_on_air_cdn, args=(True,), daemon=True).start()
|
||||||
add_log("Show auto-set to ON AIR")
|
add_log("Show auto-set to ON AIR")
|
||||||
return {"status": "recording", "dir": str(recordings_dir), "on_air": _show_on_air}
|
return {"on_air": _show_on_air, "recording": True}
|
||||||
|
# STOP recording
|
||||||
|
|
||||||
@app.post("/api/recording/stop")
|
|
||||||
async def stop_stem_recording():
|
|
||||||
if audio_service.stem_recorder is None:
|
|
||||||
raise HTTPException(400, "No recording in progress")
|
|
||||||
audio_service.stop_stem_mic()
|
audio_service.stop_stem_mic()
|
||||||
stems_dir = audio_service.stem_recorder.output_dir
|
stems_dir = audio_service.stem_recorder.output_dir
|
||||||
paths = audio_service.stem_recorder.stop()
|
paths = audio_service.stem_recorder.stop()
|
||||||
audio_service.stem_recorder = None
|
audio_service.stem_recorder = None
|
||||||
add_log(f"Stem recording stopped. Running post-production...")
|
add_log(f"Stem recording stopped. Running post-production...")
|
||||||
|
|
||||||
# Auto go off-air
|
|
||||||
global _show_on_air
|
|
||||||
if _show_on_air:
|
if _show_on_air:
|
||||||
_show_on_air = False
|
_show_on_air = False
|
||||||
audio_service.stop_host_stream()
|
audio_service.stop_host_stream()
|
||||||
@@ -3978,8 +3996,8 @@ async def stop_stem_recording():
|
|||||||
def _run_postprod():
|
def _run_postprod():
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
[python, "postprod.py", str(stems_dir), "-o", str(output_file)],
|
[python, "postprod.py", str(stems_dir), "-o", "episode.mp3"],
|
||||||
capture_output=True, text=True, timeout=300,
|
capture_output=True, text=True, timeout=600,
|
||||||
)
|
)
|
||||||
if result.returncode == 0:
|
if result.returncode == 0:
|
||||||
add_log(f"Post-production complete -> {output_file}")
|
add_log(f"Post-production complete -> {output_file}")
|
||||||
@@ -3989,7 +4007,7 @@ async def stop_stem_recording():
|
|||||||
add_log(f"Post-production error: {e}")
|
add_log(f"Post-production error: {e}")
|
||||||
|
|
||||||
threading.Thread(target=_run_postprod, daemon=True).start()
|
threading.Thread(target=_run_postprod, daemon=True).start()
|
||||||
return {"status": "stopped", "stems": paths, "processing": str(output_file), "on_air": _show_on_air}
|
return {"on_air": _show_on_air, "recording": False}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/recording/process")
|
@app.post("/api/recording/process")
|
||||||
|
|||||||
@@ -361,10 +361,6 @@ class AudioService:
|
|||||||
# Apply fade to prevent clicks
|
# Apply fade to prevent clicks
|
||||||
audio = self._apply_fade(audio, device_sr)
|
audio = self._apply_fade(audio, device_sr)
|
||||||
|
|
||||||
# Stem recording: caller TTS
|
|
||||||
if self.stem_recorder:
|
|
||||||
self.stem_recorder.write_sporadic("caller", audio.copy(), device_sr)
|
|
||||||
|
|
||||||
# Create multi-channel output with audio only on target channel
|
# Create multi-channel output with audio only on target channel
|
||||||
multi_ch = np.zeros((len(audio), num_channels), dtype=np.float32)
|
multi_ch = np.zeros((len(audio), num_channels), dtype=np.float32)
|
||||||
multi_ch[:, channel_idx] = audio
|
multi_ch[:, channel_idx] = audio
|
||||||
@@ -384,6 +380,9 @@ class AudioService:
|
|||||||
while pos < len(multi_ch) and not self._caller_stop_event.is_set():
|
while pos < len(multi_ch) and not self._caller_stop_event.is_set():
|
||||||
end = min(pos + chunk_size, len(multi_ch))
|
end = min(pos + chunk_size, len(multi_ch))
|
||||||
stream.write(multi_ch[pos:end])
|
stream.write(multi_ch[pos:end])
|
||||||
|
# Record each chunk as it plays so hangups cut the stem too
|
||||||
|
if self.stem_recorder:
|
||||||
|
self.stem_recorder.write_sporadic("caller", audio[pos:end].copy(), device_sr)
|
||||||
pos = end
|
pos = end
|
||||||
|
|
||||||
if self._caller_stop_event.is_set():
|
if self._caller_stop_event.is_set():
|
||||||
@@ -752,7 +751,7 @@ class AudioService:
|
|||||||
mono_out = (old_samples * fade_out + new_samples * fade_in) * self._music_volume
|
mono_out = (old_samples * fade_out + new_samples * fade_in) * self._music_volume
|
||||||
outdata[:, channel_idx] = mono_out
|
outdata[:, channel_idx] = mono_out
|
||||||
if self.stem_recorder:
|
if self.stem_recorder:
|
||||||
self.stem_recorder.write("music", mono_out.copy(), device_sr)
|
self.stem_recorder.write_sporadic("music", mono_out.copy(), device_sr)
|
||||||
self._crossfade_progress = end_progress
|
self._crossfade_progress = end_progress
|
||||||
|
|
||||||
if self._crossfade_progress >= 1.0:
|
if self._crossfade_progress >= 1.0:
|
||||||
@@ -763,7 +762,7 @@ class AudioService:
|
|||||||
mono_out = new_samples * self._music_volume
|
mono_out = new_samples * self._music_volume
|
||||||
outdata[:, channel_idx] = mono_out
|
outdata[:, channel_idx] = mono_out
|
||||||
if self.stem_recorder:
|
if self.stem_recorder:
|
||||||
self.stem_recorder.write("music", mono_out.copy(), device_sr)
|
self.stem_recorder.write_sporadic("music", mono_out.copy(), device_sr)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self._music_stream = sd.OutputStream(
|
self._music_stream = sd.OutputStream(
|
||||||
@@ -873,7 +872,7 @@ class AudioService:
|
|||||||
chunk = self._ad_resampled[self._ad_position:self._ad_position + frames]
|
chunk = self._ad_resampled[self._ad_position:self._ad_position + frames]
|
||||||
outdata[:, channel_idx] = chunk
|
outdata[:, channel_idx] = chunk
|
||||||
if self.stem_recorder:
|
if self.stem_recorder:
|
||||||
self.stem_recorder.write("ads", chunk.copy(), device_sr)
|
self.stem_recorder.write_sporadic("ads", chunk.copy(), device_sr)
|
||||||
self._ad_position += frames
|
self._ad_position += frames
|
||||||
else:
|
else:
|
||||||
if remaining > 0:
|
if remaining > 0:
|
||||||
|
|||||||
@@ -86,18 +86,28 @@ DEFAULT_VITS_SPEAKER = "p225"
|
|||||||
# Dennis, Dominus, Edward, Elizabeth, Hades, Hana, Julia, Luna, Mark, Olivia,
|
# Dennis, Dominus, Edward, Elizabeth, Hades, Hana, Julia, Luna, Mark, Olivia,
|
||||||
# Pixie, Priya, Ronald, Sarah, Shaun, Theodore, Timothy, Wendy
|
# Pixie, Priya, Ronald, Sarah, Shaun, Theodore, Timothy, Wendy
|
||||||
INWORLD_VOICES = {
|
INWORLD_VOICES = {
|
||||||
# Male voices - each caller gets a unique voice matching their personality
|
# Original voice IDs
|
||||||
"VR6AewLTigWG4xSOukaG": "Edward", # Tony - fast-talking, emphatic, streetwise
|
"VR6AewLTigWG4xSOukaG": "Edward", # Tony - fast-talking, emphatic, streetwise
|
||||||
"TxGEqnHWrfWFTfGW9XjX": "Shaun", # Rick - friendly, dynamic, conversational
|
"TxGEqnHWrfWFTfGW9XjX": "Shaun", # Rick - friendly, dynamic, conversational
|
||||||
"pNInz6obpgDQGcFmaJgB": "Alex", # Dennis - energetic, expressive, mildly nasal
|
"pNInz6obpgDQGcFmaJgB": "Alex", # Dennis - energetic, expressive, mildly nasal
|
||||||
"ODq5zmih8GrVes37Dizd": "Craig", # Earl - older British, refined, articulate
|
"ODq5zmih8GrVes37Dizd": "Craig", # Earl - older British, refined, articulate
|
||||||
"IKne3meq5aSn9XLyUdCD": "Timothy", # Marcus - lively, upbeat American
|
"IKne3meq5aSn9XLyUdCD": "Timothy", # Marcus/Jerome - lively, upbeat American
|
||||||
# Female voices - each caller gets a unique voice matching their personality
|
|
||||||
"jBpfuIE2acCO8z3wKNLl": "Hana", # Jasmine - bright, expressive young female
|
"jBpfuIE2acCO8z3wKNLl": "Hana", # Jasmine - bright, expressive young female
|
||||||
"EXAVITQu4vr4xnSDxMaL": "Ashley", # Megan - warm, natural female
|
"EXAVITQu4vr4xnSDxMaL": "Ashley", # Megan - warm, natural female
|
||||||
"21m00Tcm4TlvDq8ikWAM": "Wendy", # Tanya - posh, middle-aged British
|
"21m00Tcm4TlvDq8ikWAM": "Wendy", # Tanya - posh, middle-aged British
|
||||||
"XB0fDUnXU5powFXDhCwa": "Sarah", # Carla - fast-talking, questioning tone
|
"XB0fDUnXU5powFXDhCwa": "Sarah", # Carla - fast-talking, questioning tone
|
||||||
"pFZP5JQG7iQjIQuC4Bku": "Deborah", # Brenda - gentle, elegant
|
"pFZP5JQG7iQjIQuC4Bku": "Deborah", # Brenda (original) - gentle, elegant
|
||||||
|
# Regular caller voice IDs (backfilled)
|
||||||
|
"onwK4e9ZLuTAKqWW03F9": "Ronald", # Bobby - repo man
|
||||||
|
"FGY2WhTYpPnrIDTdsKH5": "Julia", # Carla (regular) - Jersey mom
|
||||||
|
"CwhRBWXzGAHq8TQ4Fs17": "Mark", # Leon - male caller
|
||||||
|
"SOYHLrjzK2X1ezoPC6cr": "Carter", # Carl - male caller
|
||||||
|
"N2lVS1w4EtoT3dr4eOWO": "Clive", # Reggie - male caller
|
||||||
|
"hpp4J3VqNfWAUOO0d1Us": "Olivia", # Brenda (regular) - ambulance driver
|
||||||
|
"nPczCjzI2devNBz1zQrb": "Theodore", # Keith - male caller
|
||||||
|
"JBFqnCBsd6RMkjVDRZzb": "Blake", # Andre - male caller
|
||||||
|
"TX3LPaxmHKxFdv7VOQHJ": "Dennis", # Rick (regular) - male caller
|
||||||
|
"cgSgspJ2msm6clMCkdW9": "Priya", # Megan (regular) - female caller
|
||||||
}
|
}
|
||||||
DEFAULT_INWORLD_VOICE = "Dennis"
|
DEFAULT_INWORLD_VOICE = "Dennis"
|
||||||
|
|
||||||
|
|||||||
@@ -60,9 +60,13 @@
|
|||||||
{
|
{
|
||||||
"summary": "Jerome, a police officer in Texas, called from a DQ parking lot worried about AI writing police reports after his son sent him an article suggesting it might replace him. Through the conversation, he moved from fear about accountability and accuracy in criminal cases to acknowledging that AI handling routine paperwork (like cattle complaints) could free him up to do more meaningful police work in his understaffed county, though he remains uncertain about where this technology will lead.",
|
"summary": "Jerome, a police officer in Texas, called from a DQ parking lot worried about AI writing police reports after his son sent him an article suggesting it might replace him. Through the conversation, he moved from fear about accountability and accuracy in criminal cases to acknowledging that AI handling routine paperwork (like cattle complaints) could free him up to do more meaningful police work in his understaffed county, though he remains uncertain about where this technology will lead.",
|
||||||
"timestamp": 1770692087.560522
|
"timestamp": 1770692087.560522
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"summary": "The caller described a turbulent couple of weeks, mentioning an issue with AI writing police reports, which he suggested was just the beginning of a larger problem. He seemed concerned about the developments and wanted to discuss the topic further with the host.",
|
||||||
|
"timestamp": 1770892192.893108
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"last_call": 1770692087.560523,
|
"last_call": 1770892192.89311,
|
||||||
"created_at": 1770692087.560523,
|
"created_at": 1770692087.560523,
|
||||||
"voice": "IKne3meq5aSn9XLyUdCD"
|
"voice": "IKne3meq5aSn9XLyUdCD"
|
||||||
},
|
},
|
||||||
@@ -242,9 +246,13 @@
|
|||||||
{
|
{
|
||||||
"summary": "Megan, a kindergarten teacher from the bootheel, called in after one of her students asked if stars know we're looking at them, which led her to reflect on how her sister Crystal in Flagstaff has stopped appreciating the night sky despite having access to it. The conversation took an unexpected turn when Luke challenged her to admit a gross habit, and after some prodding, she confessed to picking dry skin off her feet while watching TV and flicking it on the floor.",
|
"summary": "Megan, a kindergarten teacher from the bootheel, called in after one of her students asked if stars know we're looking at them, which led her to reflect on how her sister Crystal in Flagstaff has stopped appreciating the night sky despite having access to it. The conversation took an unexpected turn when Luke challenged her to admit a gross habit, and after some prodding, she confessed to picking dry skin off her feet while watching TV and flicking it on the floor.",
|
||||||
"timestamp": 1770870641.723117
|
"timestamp": 1770870641.723117
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"summary": "Here is a 1-2 sentence summary of the call:\n\nThe caller, Megan, is following up on a previous call about her sister Crystal, who lives in Flagstaff and has lost appreciation for the night sky. Megan seems eager to provide an update on the situation with her sister.",
|
||||||
|
"timestamp": 1770894505.175125
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"last_call": 1770870641.723117,
|
"last_call": 1770894505.175125,
|
||||||
"created_at": 1770870641.723117,
|
"created_at": 1770870641.723117,
|
||||||
"voice": "cgSgspJ2msm6clMCkdW9"
|
"voice": "cgSgspJ2msm6clMCkdW9"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,12 +1,17 @@
|
|||||||
/* AI Radio Show - Clean CSS */
|
/* AI Radio Show - Control Panel */
|
||||||
|
|
||||||
:root {
|
:root {
|
||||||
--bg: #1a1a2e;
|
--bg: #1a1209;
|
||||||
--bg-light: #252547;
|
--bg-light: #2a2015;
|
||||||
--accent: #e94560;
|
--bg-dark: #110c05;
|
||||||
--text: #fff;
|
--accent: #e8791d;
|
||||||
--text-muted: #888;
|
--accent-hover: #f59a4a;
|
||||||
--radius: 8px;
|
--accent-red: #cc2222;
|
||||||
|
--accent-green: #5a8a3c;
|
||||||
|
--text: #f5f0e5;
|
||||||
|
--text-muted: #9a8b78;
|
||||||
|
--radius: 12px;
|
||||||
|
--radius-sm: 8px;
|
||||||
}
|
}
|
||||||
|
|
||||||
* {
|
* {
|
||||||
@@ -16,7 +21,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
body {
|
body {
|
||||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
|
||||||
background: var(--bg);
|
background: var(--bg);
|
||||||
color: var(--text);
|
color: var(--text);
|
||||||
min-height: 100vh;
|
min-height: 100vh;
|
||||||
@@ -38,6 +43,8 @@ header {
|
|||||||
|
|
||||||
header h1 {
|
header h1 {
|
||||||
font-size: 1.5rem;
|
font-size: 1.5rem;
|
||||||
|
font-weight: 700;
|
||||||
|
color: var(--accent);
|
||||||
}
|
}
|
||||||
|
|
||||||
.header-buttons {
|
.header-buttons {
|
||||||
@@ -48,10 +55,16 @@ header h1 {
|
|||||||
header button {
|
header button {
|
||||||
background: var(--bg-light);
|
background: var(--bg-light);
|
||||||
color: var(--text);
|
color: var(--text);
|
||||||
border: none;
|
border: 1px solid rgba(232, 121, 29, 0.15);
|
||||||
padding: 8px 16px;
|
padding: 8px 16px;
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
header button:hover {
|
||||||
|
background: #3a2e1f;
|
||||||
|
border-color: rgba(232, 121, 29, 0.3);
|
||||||
}
|
}
|
||||||
|
|
||||||
.on-air-btn {
|
.on-air-btn {
|
||||||
@@ -62,11 +75,14 @@ header button {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.on-air-btn.off {
|
.on-air-btn.off {
|
||||||
background: #666 !important;
|
background: #4a3d2e !important;
|
||||||
|
border-color: transparent !important;
|
||||||
|
color: var(--text-muted) !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.on-air-btn.on {
|
.on-air-btn.on {
|
||||||
background: #cc2222 !important;
|
background: var(--accent-red) !important;
|
||||||
|
border-color: var(--accent-red) !important;
|
||||||
animation: on-air-pulse 2s ease-in-out infinite;
|
animation: on-air-pulse 2s ease-in-out infinite;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,17 +95,27 @@ header button {
|
|||||||
font-weight: 700;
|
font-weight: 700;
|
||||||
text-transform: uppercase;
|
text-transform: uppercase;
|
||||||
letter-spacing: 0.05em;
|
letter-spacing: 0.05em;
|
||||||
background: #555 !important;
|
background: #4a3d2e !important;
|
||||||
|
color: var(--text-muted) !important;
|
||||||
|
border-color: transparent !important;
|
||||||
transition: background 0.2s;
|
transition: background 0.2s;
|
||||||
}
|
}
|
||||||
|
|
||||||
.rec-btn.recording {
|
.rec-btn.recording {
|
||||||
background: #cc2222 !important;
|
background: var(--accent-red) !important;
|
||||||
|
color: var(--text) !important;
|
||||||
|
border-color: var(--accent-red) !important;
|
||||||
animation: on-air-pulse 2s ease-in-out infinite;
|
animation: on-air-pulse 2s ease-in-out infinite;
|
||||||
}
|
}
|
||||||
|
|
||||||
.new-session-btn {
|
.new-session-btn {
|
||||||
background: var(--accent) !important;
|
background: var(--accent) !important;
|
||||||
|
border-color: var(--accent) !important;
|
||||||
|
color: #fff !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.new-session-btn:hover {
|
||||||
|
background: var(--accent-hover) !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.session-id {
|
.session-id {
|
||||||
@@ -102,7 +128,7 @@ details.caller-background {
|
|||||||
font-size: 0.85rem;
|
font-size: 0.85rem;
|
||||||
color: var(--text-muted);
|
color: var(--text-muted);
|
||||||
background: var(--bg);
|
background: var(--bg);
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
margin-bottom: 12px;
|
margin-bottom: 12px;
|
||||||
line-height: 1.4;
|
line-height: 1.4;
|
||||||
}
|
}
|
||||||
@@ -142,10 +168,14 @@ section {
|
|||||||
background: var(--bg-light);
|
background: var(--bg-light);
|
||||||
padding: 16px;
|
padding: 16px;
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius);
|
||||||
|
border: 1px solid rgba(232, 121, 29, 0.08);
|
||||||
}
|
}
|
||||||
|
|
||||||
section h2 {
|
section h2 {
|
||||||
font-size: 1rem;
|
font-size: 0.85rem;
|
||||||
|
font-weight: 700;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.05em;
|
||||||
margin-bottom: 12px;
|
margin-bottom: 12px;
|
||||||
color: var(--text-muted);
|
color: var(--text-muted);
|
||||||
}
|
}
|
||||||
@@ -163,7 +193,7 @@ section h2 {
|
|||||||
color: var(--text);
|
color: var(--text);
|
||||||
border: 2px solid transparent;
|
border: 2px solid transparent;
|
||||||
padding: 10px 8px;
|
padding: 10px 8px;
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
font-size: 0.85rem;
|
font-size: 0.85rem;
|
||||||
transition: all 0.2s;
|
transition: all 0.2s;
|
||||||
@@ -171,11 +201,13 @@ section h2 {
|
|||||||
|
|
||||||
.caller-btn:hover {
|
.caller-btn:hover {
|
||||||
border-color: var(--accent);
|
border-color: var(--accent);
|
||||||
|
background: #2a1e10;
|
||||||
}
|
}
|
||||||
|
|
||||||
.caller-btn.active {
|
.caller-btn.active {
|
||||||
background: var(--accent);
|
background: var(--accent);
|
||||||
border-color: var(--accent);
|
border-color: var(--accent);
|
||||||
|
color: #fff;
|
||||||
}
|
}
|
||||||
|
|
||||||
.call-status {
|
.call-status {
|
||||||
@@ -187,13 +219,18 @@ section h2 {
|
|||||||
|
|
||||||
.hangup-btn {
|
.hangup-btn {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
background: #c0392b;
|
background: var(--accent-red);
|
||||||
color: white;
|
color: white;
|
||||||
border: none;
|
border: none;
|
||||||
padding: 12px;
|
padding: 12px;
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
|
transition: background 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hangup-btn:hover {
|
||||||
|
background: #e03030;
|
||||||
}
|
}
|
||||||
|
|
||||||
.hangup-btn:disabled {
|
.hangup-btn:disabled {
|
||||||
@@ -215,25 +252,26 @@ section h2 {
|
|||||||
.chat-log {
|
.chat-log {
|
||||||
height: 300px;
|
height: 300px;
|
||||||
overflow-y: auto;
|
overflow-y: auto;
|
||||||
background: var(--bg);
|
background: var(--bg-dark);
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
padding: 12px;
|
padding: 12px;
|
||||||
margin-bottom: 12px;
|
margin-bottom: 12px;
|
||||||
|
border: 1px solid rgba(232, 121, 29, 0.06);
|
||||||
}
|
}
|
||||||
|
|
||||||
.message {
|
.message {
|
||||||
padding: 8px 12px;
|
padding: 8px 12px;
|
||||||
margin-bottom: 8px;
|
margin-bottom: 8px;
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
line-height: 1.4;
|
line-height: 1.4;
|
||||||
}
|
}
|
||||||
|
|
||||||
.message.host {
|
.message.host {
|
||||||
background: #2c5282;
|
background: #3a2510;
|
||||||
}
|
}
|
||||||
|
|
||||||
.message.caller {
|
.message.caller {
|
||||||
background: #553c9a;
|
background: #2a1a0a;
|
||||||
}
|
}
|
||||||
|
|
||||||
.message strong {
|
.message strong {
|
||||||
@@ -254,7 +292,7 @@ section h2 {
|
|||||||
color: white;
|
color: white;
|
||||||
border: none;
|
border: none;
|
||||||
padding: 16px;
|
padding: 16px;
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
font-size: 1rem;
|
font-size: 1rem;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
@@ -262,11 +300,11 @@ section h2 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.talk-btn:hover {
|
.talk-btn:hover {
|
||||||
filter: brightness(1.1);
|
background: var(--accent-hover);
|
||||||
}
|
}
|
||||||
|
|
||||||
.talk-btn.recording {
|
.talk-btn.recording {
|
||||||
background: #c0392b;
|
background: var(--accent-red);
|
||||||
animation: pulse 1s infinite;
|
animation: pulse 1s infinite;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -278,10 +316,15 @@ section h2 {
|
|||||||
.type-btn {
|
.type-btn {
|
||||||
background: var(--bg);
|
background: var(--bg);
|
||||||
color: var(--text);
|
color: var(--text);
|
||||||
border: none;
|
border: 1px solid rgba(232, 121, 29, 0.15);
|
||||||
padding: 16px 24px;
|
padding: 16px 24px;
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.type-btn:hover {
|
||||||
|
border-color: var(--accent);
|
||||||
}
|
}
|
||||||
|
|
||||||
.status {
|
.status {
|
||||||
@@ -301,8 +344,8 @@ section h2 {
|
|||||||
padding: 10px;
|
padding: 10px;
|
||||||
background: var(--bg);
|
background: var(--bg);
|
||||||
color: var(--text);
|
color: var(--text);
|
||||||
border: none;
|
border: 1px solid rgba(232, 121, 29, 0.15);
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
margin-bottom: 10px;
|
margin-bottom: 10px;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -315,14 +358,21 @@ section h2 {
|
|||||||
.music-controls button {
|
.music-controls button {
|
||||||
background: var(--bg);
|
background: var(--bg);
|
||||||
color: var(--text);
|
color: var(--text);
|
||||||
border: none;
|
border: 1px solid rgba(232, 121, 29, 0.15);
|
||||||
padding: 10px 16px;
|
padding: 10px 16px;
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.music-controls button:hover {
|
||||||
|
border-color: var(--accent);
|
||||||
|
background: #2a1e10;
|
||||||
}
|
}
|
||||||
|
|
||||||
.music-controls input[type="range"] {
|
.music-controls input[type="range"] {
|
||||||
flex: 1;
|
flex: 1;
|
||||||
|
accent-color: var(--accent);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Soundboard */
|
/* Soundboard */
|
||||||
@@ -335,9 +385,9 @@ section h2 {
|
|||||||
.sound-btn {
|
.sound-btn {
|
||||||
background: var(--bg);
|
background: var(--bg);
|
||||||
color: var(--text);
|
color: var(--text);
|
||||||
border: none;
|
border: 1px solid rgba(232, 121, 29, 0.1);
|
||||||
padding: 12px 8px;
|
padding: 12px 8px;
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
font-size: 0.8rem;
|
font-size: 0.8rem;
|
||||||
transition: all 0.1s;
|
transition: all 0.1s;
|
||||||
@@ -345,6 +395,8 @@ section h2 {
|
|||||||
|
|
||||||
.sound-btn:hover {
|
.sound-btn:hover {
|
||||||
background: var(--accent);
|
background: var(--accent);
|
||||||
|
border-color: var(--accent);
|
||||||
|
color: #fff;
|
||||||
}
|
}
|
||||||
|
|
||||||
.sound-btn:active {
|
.sound-btn:active {
|
||||||
@@ -372,17 +424,19 @@ section h2 {
|
|||||||
border-radius: var(--radius);
|
border-radius: var(--radius);
|
||||||
width: 90%;
|
width: 90%;
|
||||||
max-width: 400px;
|
max-width: 400px;
|
||||||
|
border: 1px solid rgba(232, 121, 29, 0.15);
|
||||||
}
|
}
|
||||||
|
|
||||||
.modal-content h2 {
|
.modal-content h2 {
|
||||||
margin-bottom: 16px;
|
margin-bottom: 16px;
|
||||||
|
color: var(--accent);
|
||||||
}
|
}
|
||||||
|
|
||||||
.modal-content h3 {
|
.modal-content h3 {
|
||||||
font-size: 0.9rem;
|
font-size: 0.9rem;
|
||||||
color: var(--text-muted);
|
color: var(--text-muted);
|
||||||
margin: 16px 0 8px 0;
|
margin: 16px 0 8px 0;
|
||||||
border-bottom: 1px solid var(--bg);
|
border-bottom: 1px solid rgba(232, 121, 29, 0.1);
|
||||||
padding-bottom: 4px;
|
padding-bottom: 4px;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -436,11 +490,18 @@ section h2 {
|
|||||||
padding: 10px;
|
padding: 10px;
|
||||||
background: var(--bg);
|
background: var(--bg);
|
||||||
color: var(--text);
|
color: var(--text);
|
||||||
border: none;
|
border: 1px solid rgba(232, 121, 29, 0.15);
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
margin-top: 4px;
|
margin-top: 4px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.modal-content select:focus,
|
||||||
|
.modal-content input[type="text"]:focus,
|
||||||
|
.modal-content textarea:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: var(--accent);
|
||||||
|
}
|
||||||
|
|
||||||
.modal-buttons {
|
.modal-buttons {
|
||||||
display: flex;
|
display: flex;
|
||||||
gap: 10px;
|
gap: 10px;
|
||||||
@@ -451,9 +512,10 @@ section h2 {
|
|||||||
flex: 1;
|
flex: 1;
|
||||||
padding: 12px;
|
padding: 12px;
|
||||||
border: none;
|
border: none;
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
|
transition: all 0.2s;
|
||||||
}
|
}
|
||||||
|
|
||||||
.modal-buttons button:first-child {
|
.modal-buttons button:first-child {
|
||||||
@@ -461,25 +523,32 @@ section h2 {
|
|||||||
color: white;
|
color: white;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.modal-buttons button:first-child:hover {
|
||||||
|
background: var(--accent-hover);
|
||||||
|
}
|
||||||
|
|
||||||
.modal-buttons button:last-child {
|
.modal-buttons button:last-child {
|
||||||
background: var(--bg);
|
background: var(--bg);
|
||||||
color: var(--text);
|
color: var(--text);
|
||||||
|
border: 1px solid rgba(232, 121, 29, 0.15);
|
||||||
}
|
}
|
||||||
|
|
||||||
.refresh-btn {
|
.refresh-btn {
|
||||||
background: var(--bg);
|
background: var(--bg);
|
||||||
color: var(--text-muted);
|
color: var(--text-muted);
|
||||||
border: 1px solid var(--bg-light);
|
border: 1px solid rgba(232, 121, 29, 0.15);
|
||||||
padding: 6px 12px;
|
padding: 6px 12px;
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
font-size: 0.85rem;
|
font-size: 0.85rem;
|
||||||
margin-top: 8px;
|
margin-top: 8px;
|
||||||
|
transition: all 0.2s;
|
||||||
}
|
}
|
||||||
|
|
||||||
.refresh-btn:hover {
|
.refresh-btn:hover {
|
||||||
background: var(--bg-light);
|
background: var(--bg-light);
|
||||||
color: var(--text);
|
color: var(--text);
|
||||||
|
border-color: var(--accent);
|
||||||
}
|
}
|
||||||
|
|
||||||
.refresh-btn:disabled {
|
.refresh-btn:disabled {
|
||||||
@@ -522,28 +591,29 @@ section h2 {
|
|||||||
.server-btn {
|
.server-btn {
|
||||||
border: none;
|
border: none;
|
||||||
padding: 6px 12px;
|
padding: 6px 12px;
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
font-size: 0.85rem;
|
font-size: 0.85rem;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
|
transition: all 0.2s;
|
||||||
}
|
}
|
||||||
|
|
||||||
.server-btn.restart {
|
.server-btn.restart {
|
||||||
background: #2196F3;
|
background: var(--accent);
|
||||||
color: white;
|
color: white;
|
||||||
}
|
}
|
||||||
|
|
||||||
.server-btn.restart:hover {
|
.server-btn.restart:hover {
|
||||||
background: #1976D2;
|
background: var(--accent-hover);
|
||||||
}
|
}
|
||||||
|
|
||||||
.server-btn.stop {
|
.server-btn.stop {
|
||||||
background: #c0392b;
|
background: var(--accent-red);
|
||||||
color: white;
|
color: white;
|
||||||
}
|
}
|
||||||
|
|
||||||
.server-btn.stop:hover {
|
.server-btn.stop:hover {
|
||||||
background: #a93226;
|
background: #e03030;
|
||||||
}
|
}
|
||||||
|
|
||||||
.auto-scroll-label {
|
.auto-scroll-label {
|
||||||
@@ -555,16 +625,21 @@ section h2 {
|
|||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.auto-scroll-label input[type="checkbox"] {
|
||||||
|
accent-color: var(--accent);
|
||||||
|
}
|
||||||
|
|
||||||
.server-log {
|
.server-log {
|
||||||
height: 200px;
|
height: 200px;
|
||||||
overflow-y: auto;
|
overflow-y: auto;
|
||||||
background: #0d0d1a;
|
background: var(--bg-dark);
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius-sm);
|
||||||
padding: 12px;
|
padding: 12px;
|
||||||
font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
|
font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
|
||||||
font-size: 0.75rem;
|
font-size: 0.75rem;
|
||||||
line-height: 1.5;
|
line-height: 1.5;
|
||||||
color: #8f8;
|
color: #b8a88a;
|
||||||
|
border: 1px solid rgba(232, 121, 29, 0.06);
|
||||||
}
|
}
|
||||||
|
|
||||||
.server-log .log-line {
|
.server-log .log-line {
|
||||||
@@ -573,69 +648,70 @@ section h2 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.server-log .log-line.error {
|
.server-log .log-line.error {
|
||||||
color: #f88;
|
color: #e8604a;
|
||||||
}
|
}
|
||||||
|
|
||||||
.server-log .log-line.warning {
|
.server-log .log-line.warning {
|
||||||
color: #ff8;
|
color: #e8b84a;
|
||||||
}
|
}
|
||||||
|
|
||||||
.server-log .log-line.tts {
|
.server-log .log-line.tts {
|
||||||
color: #8ff;
|
color: var(--accent);
|
||||||
}
|
}
|
||||||
|
|
||||||
.server-log .log-line.chat {
|
.server-log .log-line.chat {
|
||||||
color: #f8f;
|
color: var(--accent-hover);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Call Queue */
|
/* Call Queue */
|
||||||
.queue-section { margin: 1rem 0; }
|
.queue-section { margin: 1rem 0; }
|
||||||
.call-queue { border: 1px solid #333; border-radius: 4px; padding: 0.5rem; max-height: 150px; overflow-y: auto; }
|
.call-queue { border: 1px solid rgba(232, 121, 29, 0.15); border-radius: var(--radius-sm); padding: 0.5rem; max-height: 150px; overflow-y: auto; }
|
||||||
.queue-empty { color: #666; text-align: center; padding: 0.5rem; }
|
.queue-empty { color: var(--text-muted); text-align: center; padding: 0.5rem; }
|
||||||
.queue-item { display: flex; align-items: center; gap: 0.75rem; padding: 0.4rem 0.5rem; border-bottom: 1px solid #222; }
|
.queue-item { display: flex; align-items: center; gap: 0.75rem; padding: 0.4rem 0.5rem; border-bottom: 1px solid rgba(232, 121, 29, 0.08); flex-wrap: wrap; }
|
||||||
.queue-item:last-child { border-bottom: none; }
|
.queue-item:last-child { border-bottom: none; }
|
||||||
.queue-phone { font-family: monospace; color: #4fc3f7; }
|
.queue-phone { font-family: monospace; color: var(--accent); }
|
||||||
.queue-wait { color: #999; font-size: 0.85rem; flex: 1; }
|
.queue-wait { color: var(--text-muted); font-size: 0.85rem; flex: 1; }
|
||||||
.queue-take-btn { background: #2e7d32; color: white; border: none; padding: 0.25rem 0.75rem; border-radius: 3px; cursor: pointer; }
|
.queue-take-btn { background: var(--accent-green); color: white; border: none; padding: 0.25rem 0.75rem; border-radius: var(--radius-sm); cursor: pointer; transition: background 0.2s; }
|
||||||
.queue-take-btn:hover { background: #388e3c; }
|
.queue-take-btn:hover { background: #6a9a4c; }
|
||||||
.queue-drop-btn { background: #c62828; color: white; border: none; padding: 0.25rem 0.5rem; border-radius: 3px; cursor: pointer; }
|
.queue-drop-btn { background: var(--accent-red); color: white; border: none; padding: 0.25rem 0.5rem; border-radius: var(--radius-sm); cursor: pointer; transition: background 0.2s; }
|
||||||
.queue-drop-btn:hover { background: #d32f2f; }
|
.queue-drop-btn:hover { background: #e03030; }
|
||||||
|
|
||||||
/* Active Call Indicator */
|
/* Active Call Indicator */
|
||||||
.active-call { border: 1px solid #444; border-radius: 4px; padding: 0.75rem; margin: 0.5rem 0; background: #1a1a2e; }
|
.active-call { border: 1px solid rgba(232, 121, 29, 0.15); border-radius: var(--radius-sm); padding: 0.75rem; margin: 0.5rem 0; background: var(--bg); }
|
||||||
.caller-info { display: flex; align-items: center; gap: 0.5rem; margin-bottom: 0.5rem; }
|
.caller-info { display: flex; align-items: center; gap: 0.5rem; margin-bottom: 0.5rem; }
|
||||||
.caller-info:last-of-type { margin-bottom: 0; }
|
.caller-info:last-of-type { margin-bottom: 0; }
|
||||||
.caller-type { font-size: 0.7rem; font-weight: bold; padding: 0.15rem 0.4rem; border-radius: 3px; text-transform: uppercase; }
|
.caller-type { font-size: 0.7rem; font-weight: bold; padding: 0.15rem 0.4rem; border-radius: var(--radius-sm); text-transform: uppercase; }
|
||||||
.caller-type.real { background: #c62828; color: white; }
|
.caller-type.real { background: var(--accent-red); color: white; }
|
||||||
.caller-type.ai { background: #1565c0; color: white; }
|
.caller-type.ai { background: var(--accent); color: white; }
|
||||||
.channel-badge { font-size: 0.75rem; color: #999; background: #222; padding: 0.1rem 0.4rem; border-radius: 3px; }
|
.channel-badge { font-size: 0.75rem; color: var(--text-muted); background: var(--bg-light); padding: 0.1rem 0.4rem; border-radius: var(--radius-sm); }
|
||||||
.call-duration { font-family: monospace; color: #4fc3f7; }
|
.call-duration { font-family: monospace; color: var(--accent); }
|
||||||
.ai-controls { display: flex; align-items: center; gap: 0.5rem; margin-left: auto; }
|
.ai-controls { display: flex; align-items: center; gap: 0.5rem; margin-left: auto; }
|
||||||
.mode-toggle { display: flex; border: 1px solid #444; border-radius: 3px; overflow: hidden; }
|
.mode-toggle { display: flex; border: 1px solid rgba(232, 121, 29, 0.2); border-radius: var(--radius-sm); overflow: hidden; }
|
||||||
.mode-btn { background: #222; color: #999; border: none; padding: 0.2rem 0.5rem; font-size: 0.75rem; cursor: pointer; }
|
.mode-btn { background: var(--bg-light); color: var(--text-muted); border: none; padding: 0.2rem 0.5rem; font-size: 0.75rem; cursor: pointer; transition: all 0.2s; }
|
||||||
.mode-btn.active { background: #1565c0; color: white; }
|
.mode-btn.active { background: var(--accent); color: white; }
|
||||||
.respond-btn { background: #2e7d32; color: white; border: none; padding: 0.25rem 0.75rem; border-radius: 3px; font-size: 0.8rem; cursor: pointer; }
|
.respond-btn { background: var(--accent-green); color: white; border: none; padding: 0.25rem 0.75rem; border-radius: var(--radius-sm); font-size: 0.8rem; cursor: pointer; transition: background 0.2s; }
|
||||||
|
.respond-btn:hover { background: #6a9a4c; }
|
||||||
.hangup-btn.small { font-size: 0.75rem; padding: 0.2rem 0.5rem; }
|
.hangup-btn.small { font-size: 0.75rem; padding: 0.2rem 0.5rem; }
|
||||||
.auto-followup-label { display: flex; align-items: center; gap: 0.4rem; font-size: 0.8rem; color: #999; margin-top: 0.5rem; }
|
.auto-followup-label { display: flex; align-items: center; gap: 0.4rem; font-size: 0.8rem; color: var(--text-muted); margin-top: 0.5rem; }
|
||||||
|
|
||||||
/* Returning Caller */
|
/* Returning Caller */
|
||||||
.caller-btn.returning {
|
.caller-btn.returning {
|
||||||
border-color: #f9a825;
|
border-color: var(--accent);
|
||||||
color: #f9a825;
|
color: var(--accent);
|
||||||
}
|
}
|
||||||
|
|
||||||
.caller-btn.returning:hover {
|
.caller-btn.returning:hover {
|
||||||
border-color: #fdd835;
|
border-color: var(--accent-hover);
|
||||||
|
color: var(--accent-hover);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Screening Badges */
|
/* Screening Badges */
|
||||||
.screening-badge { font-size: 0.7rem; padding: 0.1rem 0.4rem; border-radius: 3px; font-weight: bold; }
|
.screening-badge { font-size: 0.7rem; padding: 0.1rem 0.4rem; border-radius: var(--radius-sm); font-weight: bold; }
|
||||||
.screening-badge.screening { background: #e65100; color: white; animation: pulse 1.5s infinite; }
|
.screening-badge.screening { background: var(--accent); color: white; animation: pulse 1.5s infinite; }
|
||||||
.screening-badge.screened { background: #2e7d32; color: white; }
|
.screening-badge.screened { background: var(--accent-green); color: white; }
|
||||||
.screening-summary { font-size: 0.8rem; color: #aaa; font-style: italic; flex-basis: 100%; margin-top: 0.2rem; }
|
.screening-summary { font-size: 0.8rem; color: var(--text-muted); font-style: italic; flex-basis: 100%; margin-top: 0.2rem; }
|
||||||
.queue-item { flex-wrap: wrap; }
|
|
||||||
|
|
||||||
/* Three-Party Chat */
|
/* Three-Party Chat */
|
||||||
.message.real-caller { border-left: 3px solid #c62828; padding-left: 0.5rem; }
|
.message.real-caller { border-left: 3px solid var(--accent-red); padding-left: 0.5rem; }
|
||||||
.message.ai-caller { border-left: 3px solid #1565c0; padding-left: 0.5rem; }
|
.message.ai-caller { border-left: 3px solid var(--accent); padding-left: 0.5rem; }
|
||||||
.message.host { border-left: 3px solid #2e7d32; padding-left: 0.5rem; }
|
.message.host { border-left: 3px solid var(--accent-green); padding-left: 0.5rem; }
|
||||||
|
|||||||
@@ -101,17 +101,10 @@ function initEventListeners() {
|
|||||||
if (recBtn) {
|
if (recBtn) {
|
||||||
recBtn.addEventListener('click', async () => {
|
recBtn.addEventListener('click', async () => {
|
||||||
try {
|
try {
|
||||||
if (!stemRecording) {
|
const res = await safeFetch('/api/recording/toggle', { method: 'POST' });
|
||||||
const res = await safeFetch('/api/recording/start', { method: 'POST' });
|
updateRecBtn(res.recording);
|
||||||
updateRecBtn(true);
|
|
||||||
if (onAirBtn) updateOnAirBtn(onAirBtn, res.on_air);
|
if (onAirBtn) updateOnAirBtn(onAirBtn, res.on_air);
|
||||||
log('Recording started + ON AIR: ' + res.dir);
|
log(res.recording ? 'Recording started + ON AIR' : 'Recording stopped + OFF AIR');
|
||||||
} else {
|
|
||||||
const res = await safeFetch('/api/recording/stop', { method: 'POST' });
|
|
||||||
updateRecBtn(false);
|
|
||||||
if (onAirBtn) updateOnAirBtn(onAirBtn, res.on_air);
|
|
||||||
log('Recording stopped + OFF AIR');
|
|
||||||
}
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
log('Recording error: ' + err.message);
|
log('Recording error: ' + err.message);
|
||||||
}
|
}
|
||||||
|
|||||||
665
postprod.py
665
postprod.py
@@ -61,23 +61,30 @@ def compute_rms(audio: np.ndarray, window_samples: int) -> np.ndarray:
|
|||||||
|
|
||||||
|
|
||||||
def remove_gaps(stems: dict[str, np.ndarray], sr: int,
|
def remove_gaps(stems: dict[str, np.ndarray], sr: int,
|
||||||
threshold_s: float = 1.5, crossfade_ms: float = 30) -> dict[str, np.ndarray]:
|
threshold_s: float = 2.0, max_gap_s: float = 8.0,
|
||||||
|
crossfade_ms: float = 30, pad_s: float = 0.5) -> dict[str, np.ndarray]:
|
||||||
window_ms = 50
|
window_ms = 50
|
||||||
window_samples = int(sr * window_ms / 1000)
|
window_samples = int(sr * window_ms / 1000)
|
||||||
crossfade_samples = int(sr * crossfade_ms / 1000)
|
crossfade_samples = int(sr * crossfade_ms / 1000)
|
||||||
|
|
||||||
dialog = stems["host"] + stems["caller"]
|
# Detect gaps in everything except music (which always plays).
|
||||||
rms = compute_rms(dialog, window_samples)
|
# This catches TTS latency gaps while protecting ad breaks and SFX transitions.
|
||||||
|
content = stems["host"] + stems["caller"] + stems["sfx"] + stems["ads"]
|
||||||
|
rms = compute_rms(content, window_samples)
|
||||||
|
|
||||||
# Threshold: -60dB or adaptive based on mean RMS
|
# Threshold: percentile-based to sit above the mic noise floor
|
||||||
mean_rms = np.mean(rms[rms > 0]) if np.any(rms > 0) else 1e-4
|
nonzero_rms = rms[rms > 0]
|
||||||
silence_thresh = min(mean_rms * 0.05, 0.001)
|
if len(nonzero_rms) == 0:
|
||||||
|
print(" No audio detected")
|
||||||
|
return stems
|
||||||
|
noise_floor = np.percentile(nonzero_rms, 20)
|
||||||
|
silence_thresh = noise_floor * 3
|
||||||
|
|
||||||
# Find silent regions
|
|
||||||
is_silent = rms < silence_thresh
|
is_silent = rms < silence_thresh
|
||||||
min_silent_windows = int(threshold_s / (window_ms / 1000))
|
min_silent_windows = int(threshold_s / (window_ms / 1000))
|
||||||
|
max_silent_windows = int(max_gap_s / (window_ms / 1000))
|
||||||
|
|
||||||
# Build list of regions to cut (in samples)
|
# Only cut gaps between threshold-8s — targets TTS latency, not long breaks
|
||||||
cuts = []
|
cuts = []
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(is_silent):
|
while i < len(is_silent):
|
||||||
@@ -86,10 +93,11 @@ def remove_gaps(stems: dict[str, np.ndarray], sr: int,
|
|||||||
while i < len(is_silent) and is_silent[i]:
|
while i < len(is_silent) and is_silent[i]:
|
||||||
i += 1
|
i += 1
|
||||||
length = i - start
|
length = i - start
|
||||||
if length >= min_silent_windows:
|
if min_silent_windows <= length <= max_silent_windows:
|
||||||
# Keep a small buffer at edges
|
# Leave pad_s of silence so the edit sounds natural
|
||||||
cut_start = (start + 1) * window_samples
|
pad_samples = int(pad_s * sr)
|
||||||
cut_end = (i - 1) * window_samples
|
cut_start = (start + 1) * window_samples + pad_samples
|
||||||
|
cut_end = (i - 1) * window_samples - pad_samples
|
||||||
if cut_end > cut_start + crossfade_samples * 2:
|
if cut_end > cut_start + crossfade_samples * 2:
|
||||||
cuts.append((cut_start, cut_end))
|
cuts.append((cut_start, cut_end))
|
||||||
else:
|
else:
|
||||||
@@ -102,18 +110,18 @@ def remove_gaps(stems: dict[str, np.ndarray], sr: int,
|
|||||||
total_cut = sum(end - start for start, end in cuts) / sr
|
total_cut = sum(end - start for start, end in cuts) / sr
|
||||||
print(f" Removing {len(cuts)} gaps ({total_cut:.1f}s total)")
|
print(f" Removing {len(cuts)} gaps ({total_cut:.1f}s total)")
|
||||||
|
|
||||||
# Apply cuts to dialog stems (host, caller, sfx, ads) — not music
|
# Cut dialog/sfx/ads at gap points. Leave music uncut — just trim to fit.
|
||||||
cut_stems = ["host", "caller", "sfx", "ads"]
|
|
||||||
result = {}
|
result = {}
|
||||||
|
|
||||||
for name in cut_stems:
|
for name in STEM_NAMES:
|
||||||
|
if name == "music":
|
||||||
|
continue # handled below
|
||||||
audio = stems[name]
|
audio = stems[name]
|
||||||
pieces = []
|
pieces = []
|
||||||
prev_end = 0
|
prev_end = 0
|
||||||
for cut_start, cut_end in cuts:
|
for cut_start, cut_end in cuts:
|
||||||
if prev_end < cut_start:
|
if prev_end < cut_start:
|
||||||
piece = audio[prev_end:cut_start].copy()
|
piece = audio[prev_end:cut_start].copy()
|
||||||
# Apply crossfade at join point
|
|
||||||
if pieces and len(piece) > crossfade_samples:
|
if pieces and len(piece) > crossfade_samples:
|
||||||
fade_in = np.linspace(0, 1, crossfade_samples, dtype=np.float32)
|
fade_in = np.linspace(0, 1, crossfade_samples, dtype=np.float32)
|
||||||
piece[:crossfade_samples] *= fade_in
|
piece[:crossfade_samples] *= fade_in
|
||||||
@@ -135,18 +143,143 @@ def remove_gaps(stems: dict[str, np.ndarray], sr: int,
|
|||||||
|
|
||||||
result[name] = np.concatenate(pieces) if pieces else np.array([], dtype=np.float32)
|
result[name] = np.concatenate(pieces) if pieces else np.array([], dtype=np.float32)
|
||||||
|
|
||||||
# Trim music to match new duration, with fade-out at end
|
# Music: leave uncut, just trim to match new duration with fade-out
|
||||||
new_len = len(result["host"])
|
new_len = len(result["host"])
|
||||||
music = stems["music"][:new_len].copy() if len(stems["music"]) >= new_len else np.pad(stems["music"], (0, max(0, new_len - len(stems["music"]))))
|
music = stems["music"]
|
||||||
fade_samples = int(sr * 2) # 2s fade out
|
if len(music) >= new_len:
|
||||||
|
music = music[:new_len].copy()
|
||||||
|
else:
|
||||||
|
music = np.pad(music, (0, new_len - len(music)))
|
||||||
|
fade_samples = int(sr * 3)
|
||||||
if len(music) > fade_samples:
|
if len(music) > fade_samples:
|
||||||
fade_out = np.linspace(1, 0, fade_samples, dtype=np.float32)
|
music[-fade_samples:] *= np.linspace(1, 0, fade_samples, dtype=np.float32)
|
||||||
music[-fade_samples:] *= fade_out
|
|
||||||
result["music"] = music
|
result["music"] = music
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def denoise(audio: np.ndarray, sr: int, tmp_dir: Path) -> np.ndarray:
|
||||||
|
"""HPF to cut rumble below 80Hz (plosives, HVAC, handling noise)."""
|
||||||
|
in_path = tmp_dir / "host_pre_denoise.wav"
|
||||||
|
out_path = tmp_dir / "host_post_denoise.wav"
|
||||||
|
sf.write(str(in_path), audio, sr)
|
||||||
|
|
||||||
|
af = "highpass=f=80:poles=2"
|
||||||
|
cmd = ["ffmpeg", "-y", "-i", str(in_path), "-af", af, str(out_path)]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(f" WARNING: denoise failed: {result.stderr[:200]}")
|
||||||
|
return audio
|
||||||
|
|
||||||
|
denoised, _ = sf.read(str(out_path), dtype="float32")
|
||||||
|
return denoised
|
||||||
|
|
||||||
|
|
||||||
|
def deess(audio: np.ndarray, sr: int, tmp_dir: Path) -> np.ndarray:
|
||||||
|
"""Reduce sibilance (harsh s/sh/ch sounds) in voice audio."""
|
||||||
|
in_path = tmp_dir / "host_pre_deess.wav"
|
||||||
|
out_path = tmp_dir / "host_post_deess.wav"
|
||||||
|
sf.write(str(in_path), audio, sr)
|
||||||
|
|
||||||
|
# Gentle high-shelf reduction at 5kHz (-4dB) to tame sibilance
|
||||||
|
# Single-pass, no phase issues unlike split-band approaches
|
||||||
|
af = "equalizer=f=5500:t=h:w=2000:g=-4"
|
||||||
|
cmd = ["ffmpeg", "-y", "-i", str(in_path), "-af", af, str(out_path)]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(f" WARNING: de-essing failed: {result.stderr[:200]}")
|
||||||
|
return audio
|
||||||
|
|
||||||
|
deessed, _ = sf.read(str(out_path), dtype="float32")
|
||||||
|
return deessed
|
||||||
|
|
||||||
|
|
||||||
|
def reduce_breaths(audio: np.ndarray, sr: int, reduction_db: float = -12) -> np.ndarray:
|
||||||
|
"""Reduce loud breaths between speech phrases."""
|
||||||
|
window_ms = 30
|
||||||
|
window_samples = int(sr * window_ms / 1000)
|
||||||
|
rms = compute_rms(audio, window_samples)
|
||||||
|
|
||||||
|
if not np.any(rms > 0):
|
||||||
|
return audio
|
||||||
|
|
||||||
|
# Speech threshold: breaths are quieter than speech but louder than silence
|
||||||
|
nonzero = rms[rms > 0]
|
||||||
|
speech_level = np.percentile(nonzero, 70)
|
||||||
|
silence_level = np.percentile(nonzero, 10)
|
||||||
|
breath_upper = speech_level * 0.3 # below 30% of speech level
|
||||||
|
breath_lower = silence_level * 2 # above 2x silence
|
||||||
|
|
||||||
|
if breath_upper <= breath_lower:
|
||||||
|
return audio
|
||||||
|
|
||||||
|
# Detect breath-length bursts (0.15-0.8s) in the breath amplitude range
|
||||||
|
min_windows = int(150 / window_ms)
|
||||||
|
max_windows = int(800 / window_ms)
|
||||||
|
|
||||||
|
breath_gain = 10 ** (reduction_db / 20)
|
||||||
|
gain_envelope = np.ones(len(rms), dtype=np.float32)
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
breath_count = 0
|
||||||
|
while i < len(rms):
|
||||||
|
if breath_lower < rms[i] < breath_upper:
|
||||||
|
start = i
|
||||||
|
while i < len(rms) and breath_lower < rms[i] < breath_upper:
|
||||||
|
i += 1
|
||||||
|
length = i - start
|
||||||
|
if min_windows <= length <= max_windows:
|
||||||
|
gain_envelope[start:i] = breath_gain
|
||||||
|
breath_count += 1
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if breath_count == 0:
|
||||||
|
return audio
|
||||||
|
|
||||||
|
print(f" Reduced {breath_count} breaths by {reduction_db}dB")
|
||||||
|
|
||||||
|
# Smooth transitions (10ms ramp)
|
||||||
|
ramp = max(1, int(10 / window_ms))
|
||||||
|
smoothed = gain_envelope.copy()
|
||||||
|
for i in range(1, len(smoothed)):
|
||||||
|
if smoothed[i] < smoothed[i - 1]:
|
||||||
|
smoothed[i] = smoothed[i - 1] + (smoothed[i] - smoothed[i - 1]) / ramp
|
||||||
|
elif smoothed[i] > smoothed[i - 1]:
|
||||||
|
smoothed[i] = smoothed[i - 1] + (smoothed[i] - smoothed[i - 1]) / ramp
|
||||||
|
|
||||||
|
# Expand to sample level
|
||||||
|
gain_samples = np.repeat(smoothed, window_samples)[:len(audio)]
|
||||||
|
if len(gain_samples) < len(audio):
|
||||||
|
gain_samples = np.pad(gain_samples, (0, len(audio) - len(gain_samples)), constant_values=1.0)
|
||||||
|
|
||||||
|
return (audio * gain_samples).astype(np.float32)
|
||||||
|
|
||||||
|
|
||||||
|
def limit_stem(audio: np.ndarray, sr: int, tmp_dir: Path,
|
||||||
|
stem_name: str) -> np.ndarray:
|
||||||
|
"""Hard-limit a stem to -1dB true peak to prevent clipping."""
|
||||||
|
peak = np.max(np.abs(audio))
|
||||||
|
if peak <= 0.89: # already below -1dB
|
||||||
|
return audio
|
||||||
|
in_path = tmp_dir / f"{stem_name}_pre_limit.wav"
|
||||||
|
out_path = tmp_dir / f"{stem_name}_post_limit.wav"
|
||||||
|
sf.write(str(in_path), audio, sr)
|
||||||
|
cmd = [
|
||||||
|
"ffmpeg", "-y", "-i", str(in_path),
|
||||||
|
"-af", "alimiter=limit=-1dB:level=false:attack=0.1:release=50",
|
||||||
|
str(out_path),
|
||||||
|
]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(f" WARNING: limiting failed for {stem_name}: {result.stderr[:200]}")
|
||||||
|
return audio
|
||||||
|
limited, _ = sf.read(str(out_path), dtype="float32")
|
||||||
|
peak_db = 20 * np.log10(peak)
|
||||||
|
print(f" {stem_name}: peak was {peak_db:+.1f}dB, limited to -1dB")
|
||||||
|
return limited
|
||||||
|
|
||||||
|
|
||||||
def compress_voice(audio: np.ndarray, sr: int, tmp_dir: Path,
|
def compress_voice(audio: np.ndarray, sr: int, tmp_dir: Path,
|
||||||
stem_name: str) -> np.ndarray:
|
stem_name: str) -> np.ndarray:
|
||||||
in_path = tmp_dir / f"{stem_name}_pre_comp.wav"
|
in_path = tmp_dir / f"{stem_name}_pre_comp.wav"
|
||||||
@@ -154,9 +287,15 @@ def compress_voice(audio: np.ndarray, sr: int, tmp_dir: Path,
|
|||||||
|
|
||||||
sf.write(str(in_path), audio, sr)
|
sf.write(str(in_path), audio, sr)
|
||||||
|
|
||||||
|
if stem_name == "host":
|
||||||
|
# Spoken word compression: lower threshold, higher ratio, more makeup
|
||||||
|
af = "acompressor=threshold=-28dB:ratio=4:attack=5:release=600:makeup=8dB"
|
||||||
|
else:
|
||||||
|
af = "acompressor=threshold=-24dB:ratio=2.5:attack=10:release=800:makeup=6dB"
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
"ffmpeg", "-y", "-i", str(in_path),
|
"ffmpeg", "-y", "-i", str(in_path),
|
||||||
"-af", "acompressor=threshold=-24dB:ratio=3:attack=5:release=100:makeup=6dB",
|
"-af", af,
|
||||||
str(out_path),
|
str(out_path),
|
||||||
]
|
]
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
@@ -168,9 +307,32 @@ def compress_voice(audio: np.ndarray, sr: int, tmp_dir: Path,
|
|||||||
return compressed
|
return compressed
|
||||||
|
|
||||||
|
|
||||||
|
def phone_eq(audio: np.ndarray, sr: int, tmp_dir: Path) -> np.ndarray:
|
||||||
|
"""Apply telephone EQ to make caller sound like a phone call."""
|
||||||
|
in_path = tmp_dir / "caller_pre_phone.wav"
|
||||||
|
out_path = tmp_dir / "caller_post_phone.wav"
|
||||||
|
sf.write(str(in_path), audio, sr)
|
||||||
|
|
||||||
|
# Bandpass 300-3400Hz (telephone bandwidth) + slight mid boost for presence
|
||||||
|
af = (
|
||||||
|
"highpass=f=300:poles=2,"
|
||||||
|
"lowpass=f=3400:poles=2,"
|
||||||
|
"equalizer=f=1000:t=q:w=0.8:g=4"
|
||||||
|
)
|
||||||
|
cmd = ["ffmpeg", "-y", "-i", str(in_path), "-af", af, str(out_path)]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(f" WARNING: phone EQ failed: {result.stderr[:200]}")
|
||||||
|
return audio
|
||||||
|
|
||||||
|
filtered, _ = sf.read(str(out_path), dtype="float32")
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
def apply_ducking(music: np.ndarray, dialog: np.ndarray, sr: int,
|
def apply_ducking(music: np.ndarray, dialog: np.ndarray, sr: int,
|
||||||
duck_db: float = -12, attack_ms: float = 200,
|
duck_db: float = -20, attack_ms: float = 200,
|
||||||
release_ms: float = 500) -> np.ndarray:
|
release_ms: float = 3000,
|
||||||
|
mute_signal: np.ndarray | None = None) -> np.ndarray:
|
||||||
window_ms = 50
|
window_ms = 50
|
||||||
window_samples = int(sr * window_ms / 1000)
|
window_samples = int(sr * window_ms / 1000)
|
||||||
rms = compute_rms(dialog, window_samples)
|
rms = compute_rms(dialog, window_samples)
|
||||||
@@ -184,6 +346,22 @@ def apply_ducking(music: np.ndarray, dialog: np.ndarray, sr: int,
|
|||||||
is_speech = rms > speech_thresh
|
is_speech = rms > speech_thresh
|
||||||
target_gain = np.where(is_speech, duck_gain, 1.0).astype(np.float32)
|
target_gain = np.where(is_speech, duck_gain, 1.0).astype(np.float32)
|
||||||
|
|
||||||
|
# Mute music completely during ads with lookahead and tail
|
||||||
|
if mute_signal is not None:
|
||||||
|
mute_rms = compute_rms(mute_signal, window_samples)
|
||||||
|
mute_thresh = np.mean(mute_rms[mute_rms > 0]) * 0.1 if np.any(mute_rms > 0) else 1e-4
|
||||||
|
is_ads = mute_rms > mute_thresh
|
||||||
|
# Expand ad regions: 2s before (fade out music before ad) and 2s after (don't resume immediately)
|
||||||
|
lookahead_windows = int(2000 / window_ms)
|
||||||
|
tail_windows = int(2000 / window_ms)
|
||||||
|
expanded_ads = is_ads.copy()
|
||||||
|
for i in range(len(is_ads)):
|
||||||
|
if is_ads[i]:
|
||||||
|
start = max(0, i - lookahead_windows)
|
||||||
|
end = min(len(expanded_ads), i + tail_windows + 1)
|
||||||
|
expanded_ads[start:end] = True
|
||||||
|
target_gain[expanded_ads] = 0.0
|
||||||
|
|
||||||
# Smooth the envelope
|
# Smooth the envelope
|
||||||
attack_windows = max(1, int(attack_ms / window_ms))
|
attack_windows = max(1, int(attack_ms / window_ms))
|
||||||
release_windows = max(1, int(release_ms / window_ms))
|
release_windows = max(1, int(release_ms / window_ms))
|
||||||
@@ -206,32 +384,255 @@ def apply_ducking(music: np.ndarray, dialog: np.ndarray, sr: int,
|
|||||||
return music * gain_samples
|
return music * gain_samples
|
||||||
|
|
||||||
|
|
||||||
|
def match_voice_levels(stems: dict[str, np.ndarray], target_rms: float = 0.1) -> dict[str, np.ndarray]:
|
||||||
|
"""Normalize host, caller, and ads stems to the same RMS level."""
|
||||||
|
for name in ["host", "caller", "ads"]:
|
||||||
|
audio = stems[name]
|
||||||
|
# Only measure non-silent portions
|
||||||
|
active = audio[np.abs(audio) > 0.001]
|
||||||
|
if len(active) == 0:
|
||||||
|
continue
|
||||||
|
current_rms = np.sqrt(np.mean(active ** 2))
|
||||||
|
if current_rms < 1e-6:
|
||||||
|
continue
|
||||||
|
gain = target_rms / current_rms
|
||||||
|
# Clamp gain to avoid extreme boosts on very quiet stems
|
||||||
|
gain = min(gain, 10.0)
|
||||||
|
stems[name] = np.clip(audio * gain, -1.0, 1.0).astype(np.float32)
|
||||||
|
db_change = 20 * np.log10(gain) if gain > 0 else 0
|
||||||
|
print(f" {name}: RMS {current_rms:.4f} -> {target_rms:.4f} ({db_change:+.1f}dB)")
|
||||||
|
return stems
|
||||||
|
|
||||||
|
|
||||||
def mix_stems(stems: dict[str, np.ndarray],
|
def mix_stems(stems: dict[str, np.ndarray],
|
||||||
levels: dict[str, float] | None = None) -> np.ndarray:
|
levels: dict[str, float] | None = None,
|
||||||
|
stereo_imaging: bool = True) -> np.ndarray:
|
||||||
if levels is None:
|
if levels is None:
|
||||||
levels = {"host": 0, "caller": 0, "music": -6, "sfx": -3, "ads": 0}
|
levels = {"host": 0, "caller": 0, "music": -6, "sfx": -10, "ads": 0}
|
||||||
|
|
||||||
gains = {name: 10 ** (db / 20) for name, db in levels.items()}
|
gains = {name: 10 ** (db / 20) for name, db in levels.items()}
|
||||||
|
|
||||||
# Find max length
|
|
||||||
max_len = max(len(s) for s in stems.values())
|
max_len = max(len(s) for s in stems.values())
|
||||||
|
|
||||||
|
if stereo_imaging:
|
||||||
|
# Pan positions: -1.0 = full left, 0.0 = center, 1.0 = full right
|
||||||
|
# Using constant-power panning law
|
||||||
|
pans = {"host": 0.0, "caller": 0.15, "music": 0.0, "sfx": 0.0, "ads": 0.0}
|
||||||
|
# Music gets stereo width via slight L/R decorrelation
|
||||||
|
music_width = 0.3
|
||||||
|
|
||||||
|
left = np.zeros(max_len, dtype=np.float64)
|
||||||
|
right = np.zeros(max_len, dtype=np.float64)
|
||||||
|
|
||||||
|
for name in STEM_NAMES:
|
||||||
|
audio = stems[name]
|
||||||
|
if len(audio) < max_len:
|
||||||
|
audio = np.pad(audio, (0, max_len - len(audio)))
|
||||||
|
signal = audio.astype(np.float64) * gains.get(name, 1.0)
|
||||||
|
|
||||||
|
if name == "music" and music_width > 0:
|
||||||
|
# Widen music: delay right channel by ~0.5ms for Haas effect
|
||||||
|
delay_samples = int(0.0005 * 44100) # ~22 samples at 44.1kHz
|
||||||
|
left += signal * (1 + music_width * 0.5)
|
||||||
|
right_delayed = np.zeros_like(signal)
|
||||||
|
right_delayed[delay_samples:] = signal[:-delay_samples] if delay_samples > 0 else signal
|
||||||
|
right += right_delayed * (1 + music_width * 0.5)
|
||||||
|
else:
|
||||||
|
pan = pans.get(name, 0.0)
|
||||||
|
# Constant-power pan: L = cos(angle), R = sin(angle)
|
||||||
|
angle = (pan + 1) * np.pi / 4 # 0 to pi/2
|
||||||
|
l_gain = np.cos(angle)
|
||||||
|
r_gain = np.sin(angle)
|
||||||
|
left += signal * l_gain
|
||||||
|
right += signal * r_gain
|
||||||
|
|
||||||
|
left = np.clip(left, -1.0, 1.0).astype(np.float32)
|
||||||
|
right = np.clip(right, -1.0, 1.0).astype(np.float32)
|
||||||
|
stereo = np.column_stack([left, right])
|
||||||
|
else:
|
||||||
mix = np.zeros(max_len, dtype=np.float64)
|
mix = np.zeros(max_len, dtype=np.float64)
|
||||||
for name in STEM_NAMES:
|
for name in STEM_NAMES:
|
||||||
audio = stems[name]
|
audio = stems[name]
|
||||||
if len(audio) < max_len:
|
if len(audio) < max_len:
|
||||||
audio = np.pad(audio, (0, max_len - len(audio)))
|
audio = np.pad(audio, (0, max_len - len(audio)))
|
||||||
mix += audio.astype(np.float64) * gains.get(name, 1.0)
|
mix += audio.astype(np.float64) * gains.get(name, 1.0)
|
||||||
|
|
||||||
# Stereo (mono duplicated to both channels)
|
|
||||||
mix = np.clip(mix, -1.0, 1.0).astype(np.float32)
|
mix = np.clip(mix, -1.0, 1.0).astype(np.float32)
|
||||||
stereo = np.column_stack([mix, mix])
|
stereo = np.column_stack([mix, mix])
|
||||||
|
|
||||||
return stereo
|
return stereo
|
||||||
|
|
||||||
|
|
||||||
|
def bus_compress(audio: np.ndarray, sr: int, tmp_dir: Path) -> np.ndarray:
|
||||||
|
"""Gentle bus compression on the final stereo mix to glue everything together."""
|
||||||
|
in_path = tmp_dir / "bus_pre.wav"
|
||||||
|
out_path = tmp_dir / "bus_post.wav"
|
||||||
|
sf.write(str(in_path), audio, sr)
|
||||||
|
|
||||||
|
# Gentle glue compressor: slow attack lets transients through,
|
||||||
|
# low ratio just levels out the overall dynamics
|
||||||
|
af = "acompressor=threshold=-20dB:ratio=2:attack=20:release=300:makeup=2dB"
|
||||||
|
cmd = ["ffmpeg", "-y", "-i", str(in_path), "-af", af, str(out_path)]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(f" WARNING: bus compression failed: {result.stderr[:200]}")
|
||||||
|
return audio
|
||||||
|
|
||||||
|
compressed, _ = sf.read(str(out_path), dtype="float32")
|
||||||
|
return compressed
|
||||||
|
|
||||||
|
|
||||||
|
def trim_silence(audio: np.ndarray, sr: int, pad_s: float = 0.5,
|
||||||
|
threshold_db: float = -50) -> np.ndarray:
|
||||||
|
"""Trim leading and trailing silence from stereo audio."""
|
||||||
|
threshold = 10 ** (threshold_db / 20)
|
||||||
|
# Use the louder channel for detection
|
||||||
|
mono = np.max(np.abs(audio), axis=1) if audio.ndim > 1 else np.abs(audio)
|
||||||
|
|
||||||
|
# Smoothed envelope for more reliable detection
|
||||||
|
window = int(sr * 0.05) # 50ms window
|
||||||
|
if len(mono) > window:
|
||||||
|
kernel = np.ones(window) / window
|
||||||
|
envelope = np.convolve(mono, kernel, mode='same')
|
||||||
|
else:
|
||||||
|
envelope = mono
|
||||||
|
|
||||||
|
above = np.where(envelope > threshold)[0]
|
||||||
|
if len(above) == 0:
|
||||||
|
return audio
|
||||||
|
|
||||||
|
pad_samples = int(pad_s * sr)
|
||||||
|
start = max(0, above[0] - pad_samples)
|
||||||
|
end = min(len(audio), above[-1] + pad_samples)
|
||||||
|
|
||||||
|
trimmed_start = start / sr
|
||||||
|
trimmed_end = (len(audio) - end) / sr
|
||||||
|
if trimmed_start > 0.1 or trimmed_end > 0.1:
|
||||||
|
print(f" Trimmed {trimmed_start:.1f}s from start, {trimmed_end:.1f}s from end")
|
||||||
|
else:
|
||||||
|
print(" No significant silence to trim")
|
||||||
|
|
||||||
|
return audio[start:end]
|
||||||
|
|
||||||
|
|
||||||
|
def apply_fades(audio: np.ndarray, sr: int,
|
||||||
|
fade_in_s: float = 1.5, fade_out_s: float = 3.0) -> np.ndarray:
|
||||||
|
"""Apply fade in/out to stereo audio using equal-power curve."""
|
||||||
|
audio = audio.copy()
|
||||||
|
|
||||||
|
# Fade in
|
||||||
|
fade_in_samples = int(fade_in_s * sr)
|
||||||
|
if fade_in_samples > 0 and fade_in_samples < len(audio):
|
||||||
|
# Equal-power: sine curve for smooth perceived volume change
|
||||||
|
curve = np.sin(np.linspace(0, np.pi / 2, fade_in_samples)).astype(np.float32)
|
||||||
|
if audio.ndim > 1:
|
||||||
|
audio[:fade_in_samples] *= curve[:, np.newaxis]
|
||||||
|
else:
|
||||||
|
audio[:fade_in_samples] *= curve
|
||||||
|
|
||||||
|
# Fade out
|
||||||
|
fade_out_samples = int(fade_out_s * sr)
|
||||||
|
if fade_out_samples > 0 and fade_out_samples < len(audio):
|
||||||
|
curve = np.sin(np.linspace(np.pi / 2, 0, fade_out_samples)).astype(np.float32)
|
||||||
|
if audio.ndim > 1:
|
||||||
|
audio[-fade_out_samples:] *= curve[:, np.newaxis]
|
||||||
|
else:
|
||||||
|
audio[-fade_out_samples:] *= curve
|
||||||
|
|
||||||
|
print(f" Fade in: {fade_in_s}s, fade out: {fade_out_s}s")
|
||||||
|
return audio
|
||||||
|
|
||||||
|
|
||||||
|
def detect_chapters(stems: dict[str, np.ndarray], sr: int) -> list[dict]:
|
||||||
|
"""Auto-detect chapter boundaries from stem activity."""
|
||||||
|
window_s = 2 # 2-second analysis windows
|
||||||
|
window_samples = int(sr * window_s)
|
||||||
|
n_windows = min(len(s) for s in stems.values()) // window_samples
|
||||||
|
|
||||||
|
if n_windows == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
chapters = []
|
||||||
|
current_type = None
|
||||||
|
chapter_start = 0
|
||||||
|
|
||||||
|
for w in range(n_windows):
|
||||||
|
start = w * window_samples
|
||||||
|
end = start + window_samples
|
||||||
|
|
||||||
|
ads_rms = np.sqrt(np.mean(stems["ads"][start:end] ** 2))
|
||||||
|
caller_rms = np.sqrt(np.mean(stems["caller"][start:end] ** 2))
|
||||||
|
host_rms = np.sqrt(np.mean(stems["host"][start:end] ** 2))
|
||||||
|
|
||||||
|
# Classify this window
|
||||||
|
if ads_rms > 0.005:
|
||||||
|
seg_type = "Ad Break"
|
||||||
|
elif caller_rms > 0.005:
|
||||||
|
seg_type = "Caller"
|
||||||
|
elif host_rms > 0.005:
|
||||||
|
seg_type = "Host"
|
||||||
|
else:
|
||||||
|
seg_type = current_type # keep current during silence
|
||||||
|
|
||||||
|
if seg_type != current_type and seg_type is not None:
|
||||||
|
if current_type is not None:
|
||||||
|
chapters.append({
|
||||||
|
"title": current_type,
|
||||||
|
"start_ms": int(chapter_start * 1000),
|
||||||
|
"end_ms": int(w * window_s * 1000),
|
||||||
|
})
|
||||||
|
current_type = seg_type
|
||||||
|
chapter_start = w * window_s
|
||||||
|
|
||||||
|
# Final chapter
|
||||||
|
if current_type is not None:
|
||||||
|
chapters.append({
|
||||||
|
"title": current_type,
|
||||||
|
"start_ms": int(chapter_start * 1000),
|
||||||
|
"end_ms": int(n_windows * window_s * 1000),
|
||||||
|
})
|
||||||
|
|
||||||
|
# Merge consecutive chapters of same type
|
||||||
|
merged = []
|
||||||
|
for ch in chapters:
|
||||||
|
if merged and merged[-1]["title"] == ch["title"]:
|
||||||
|
merged[-1]["end_ms"] = ch["end_ms"]
|
||||||
|
else:
|
||||||
|
merged.append(ch)
|
||||||
|
|
||||||
|
# Number duplicate types (Caller 1, Caller 2, etc.)
|
||||||
|
type_counts = {}
|
||||||
|
for ch in merged:
|
||||||
|
base = ch["title"]
|
||||||
|
type_counts[base] = type_counts.get(base, 0) + 1
|
||||||
|
if type_counts[base] > 1 or base in ("Caller", "Ad Break"):
|
||||||
|
ch["title"] = f"{base} {type_counts[base]}"
|
||||||
|
|
||||||
|
# Filter out very short chapters (< 10s)
|
||||||
|
merged = [ch for ch in merged if ch["end_ms"] - ch["start_ms"] >= 10000]
|
||||||
|
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def write_ffmpeg_chapters(chapters: list[dict], output_path: Path):
|
||||||
|
"""Write an ffmpeg-format metadata file with chapter markers."""
|
||||||
|
lines = [";FFMETADATA1"]
|
||||||
|
for ch in chapters:
|
||||||
|
lines.append("[CHAPTER]")
|
||||||
|
lines.append("TIMEBASE=1/1000")
|
||||||
|
lines.append(f"START={ch['start_ms']}")
|
||||||
|
lines.append(f"END={ch['end_ms']}")
|
||||||
|
lines.append(f"title={ch['title']}")
|
||||||
|
output_path.write_text("\n".join(lines) + "\n")
|
||||||
|
|
||||||
|
|
||||||
def normalize_and_export(audio: np.ndarray, sr: int, output_path: Path,
|
def normalize_and_export(audio: np.ndarray, sr: int, output_path: Path,
|
||||||
target_lufs: float = -16, bitrate: str = "128k",
|
target_lufs: float = -16, bitrate: str = "128k",
|
||||||
tmp_dir: Path = None):
|
tmp_dir: Path = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
chapters_file: Path | None = None):
|
||||||
|
import json
|
||||||
|
import shutil
|
||||||
|
|
||||||
tmp_wav = tmp_dir / "pre_loudnorm.wav"
|
tmp_wav = tmp_dir / "pre_loudnorm.wav"
|
||||||
sf.write(str(tmp_wav), audio, sr)
|
sf.write(str(tmp_wav), audio, sr)
|
||||||
|
|
||||||
@@ -244,8 +645,6 @@ def normalize_and_export(audio: np.ndarray, sr: int, output_path: Path,
|
|||||||
result = subprocess.run(measure_cmd, capture_output=True, text=True)
|
result = subprocess.run(measure_cmd, capture_output=True, text=True)
|
||||||
stderr = result.stderr
|
stderr = result.stderr
|
||||||
|
|
||||||
# Parse loudnorm output
|
|
||||||
import json
|
|
||||||
json_start = stderr.rfind("{")
|
json_start = stderr.rfind("{")
|
||||||
json_end = stderr.rfind("}") + 1
|
json_end = stderr.rfind("}") + 1
|
||||||
if json_start >= 0 and json_end > json_start:
|
if json_start >= 0 and json_end > json_start:
|
||||||
@@ -257,7 +656,7 @@ def normalize_and_export(audio: np.ndarray, sr: int, output_path: Path,
|
|||||||
"input_thresh": "-34",
|
"input_thresh": "-34",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Pass 2: apply normalization + limiter + export MP3
|
# Pass 2: normalize + limiter + export MP3
|
||||||
loudnorm_filter = (
|
loudnorm_filter = (
|
||||||
f"loudnorm=I={target_lufs}:TP=-1:LRA=11"
|
f"loudnorm=I={target_lufs}:TP=-1:LRA=11"
|
||||||
f":measured_I={stats['input_i']}"
|
f":measured_I={stats['input_i']}"
|
||||||
@@ -266,29 +665,79 @@ def normalize_and_export(audio: np.ndarray, sr: int, output_path: Path,
|
|||||||
f":measured_thresh={stats['input_thresh']}"
|
f":measured_thresh={stats['input_thresh']}"
|
||||||
f":linear=true"
|
f":linear=true"
|
||||||
)
|
)
|
||||||
export_cmd = [
|
|
||||||
"ffmpeg", "-y", "-i", str(tmp_wav),
|
export_cmd = ["ffmpeg", "-y", "-i", str(tmp_wav)]
|
||||||
|
|
||||||
|
if chapters_file and chapters_file.exists():
|
||||||
|
export_cmd += ["-i", str(chapters_file), "-map_metadata", "1"]
|
||||||
|
|
||||||
|
export_cmd += [
|
||||||
"-af", f"{loudnorm_filter},alimiter=limit=-1dB:level=false",
|
"-af", f"{loudnorm_filter},alimiter=limit=-1dB:level=false",
|
||||||
"-ab", bitrate, "-ar", str(sr),
|
"-ab", bitrate, "-ar", str(sr),
|
||||||
str(output_path),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if metadata:
|
||||||
|
for key, value in metadata.items():
|
||||||
|
if value and not key.startswith("_"):
|
||||||
|
export_cmd += ["-metadata", f"{key}={value}"]
|
||||||
|
|
||||||
|
export_cmd.append(str(output_path))
|
||||||
result = subprocess.run(export_cmd, capture_output=True, text=True)
|
result = subprocess.run(export_cmd, capture_output=True, text=True)
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
print(f" ERROR: export failed: {result.stderr[:300]}")
|
print(f" ERROR: export failed: {result.stderr[:300]}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Embed artwork as a second pass (avoids complex multi-input mapping)
|
||||||
|
artwork = metadata.get("_artwork") if metadata else None
|
||||||
|
if artwork and Path(artwork).exists():
|
||||||
|
tmp_mp3 = tmp_dir / "with_art.mp3"
|
||||||
|
art_cmd = [
|
||||||
|
"ffmpeg", "-y", "-i", str(output_path), "-i", artwork,
|
||||||
|
"-map", "0:a", "-map", "1:0",
|
||||||
|
"-c:a", "copy", "-id3v2_version", "3",
|
||||||
|
"-metadata:s:v", "title=Album cover",
|
||||||
|
"-metadata:s:v", "comment=Cover (front)",
|
||||||
|
"-disposition:v", "attached_pic",
|
||||||
|
str(tmp_mp3),
|
||||||
|
]
|
||||||
|
art_result = subprocess.run(art_cmd, capture_output=True, text=True)
|
||||||
|
if art_result.returncode == 0:
|
||||||
|
shutil.move(str(tmp_mp3), str(output_path))
|
||||||
|
print(f" Embedded artwork: {artwork}")
|
||||||
|
else:
|
||||||
|
print(f" WARNING: artwork embedding failed: {art_result.stderr[:200]}")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="Post-production for AI podcast stems")
|
parser = argparse.ArgumentParser(description="Post-production for AI podcast stems")
|
||||||
parser.add_argument("stems_dir", type=Path, help="Directory containing stem WAV files")
|
parser.add_argument("stems_dir", type=Path, help="Directory containing stem WAV files")
|
||||||
parser.add_argument("-o", "--output", type=str, default="episode.mp3", help="Output filename")
|
parser.add_argument("-o", "--output", type=str, default="episode.mp3", help="Output filename")
|
||||||
parser.add_argument("--gap-threshold", type=float, default=1.5, help="Min silence to cut (seconds)")
|
parser.add_argument("--gap-threshold", type=float, default=2.0, help="Min silence to cut (seconds)")
|
||||||
parser.add_argument("--duck-amount", type=float, default=-12, help="Music duck in dB")
|
parser.add_argument("--duck-amount", type=float, default=-20, help="Music duck in dB")
|
||||||
parser.add_argument("--target-lufs", type=float, default=-16, help="Target loudness (LUFS)")
|
parser.add_argument("--target-lufs", type=float, default=-16, help="Target loudness (LUFS)")
|
||||||
parser.add_argument("--bitrate", type=str, default="128k", help="MP3 bitrate")
|
parser.add_argument("--bitrate", type=str, default="128k", help="MP3 bitrate")
|
||||||
|
parser.add_argument("--fade-in", type=float, default=1.5, help="Fade in duration (seconds)")
|
||||||
|
parser.add_argument("--fade-out", type=float, default=3.0, help="Fade out duration (seconds)")
|
||||||
|
|
||||||
|
# Metadata
|
||||||
|
parser.add_argument("--title", type=str, help="Episode title (ID3 tag)")
|
||||||
|
parser.add_argument("--artist", type=str, default="Luke at the Roost", help="Artist name")
|
||||||
|
parser.add_argument("--album", type=str, default="Luke at the Roost", help="Album/show name")
|
||||||
|
parser.add_argument("--episode-num", type=str, help="Episode number (track tag)")
|
||||||
|
parser.add_argument("--artwork", type=str, help="Path to artwork image (embedded in MP3)")
|
||||||
|
|
||||||
|
# Skip flags
|
||||||
parser.add_argument("--no-gap-removal", action="store_true", help="Skip gap removal")
|
parser.add_argument("--no-gap-removal", action="store_true", help="Skip gap removal")
|
||||||
|
parser.add_argument("--no-denoise", action="store_true", help="Skip noise reduction + HPF")
|
||||||
|
parser.add_argument("--no-deess", action="store_true", help="Skip de-essing")
|
||||||
|
parser.add_argument("--no-breath-reduction", action="store_true", help="Skip breath reduction")
|
||||||
parser.add_argument("--no-compression", action="store_true", help="Skip voice compression")
|
parser.add_argument("--no-compression", action="store_true", help="Skip voice compression")
|
||||||
|
parser.add_argument("--no-phone-eq", action="store_true", help="Skip caller phone EQ")
|
||||||
parser.add_argument("--no-ducking", action="store_true", help="Skip music ducking")
|
parser.add_argument("--no-ducking", action="store_true", help="Skip music ducking")
|
||||||
|
parser.add_argument("--no-stereo", action="store_true", help="Skip stereo imaging (mono mix)")
|
||||||
|
parser.add_argument("--no-trim", action="store_true", help="Skip silence trimming")
|
||||||
|
parser.add_argument("--no-fade", action="store_true", help="Skip fade in/out")
|
||||||
|
parser.add_argument("--no-chapters", action="store_true", help="Skip chapter markers")
|
||||||
parser.add_argument("--dry-run", action="store_true", help="Show what would be done")
|
parser.add_argument("--dry-run", action="store_true", help="Show what would be done")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@@ -303,28 +752,59 @@ def main():
|
|||||||
output_path = stems_dir / output_path
|
output_path = stems_dir / output_path
|
||||||
|
|
||||||
print(f"Post-production: {stems_dir} -> {output_path}")
|
print(f"Post-production: {stems_dir} -> {output_path}")
|
||||||
print(f" Gap removal: {'skip' if args.no_gap_removal else f'threshold={args.gap_threshold}s'}")
|
|
||||||
print(f" Compression: {'skip' if args.no_compression else 'on'}")
|
|
||||||
print(f" Ducking: {'skip' if args.no_ducking else f'{args.duck_amount}dB'}")
|
|
||||||
print(f" Loudness: {args.target_lufs} LUFS, bitrate: {args.bitrate}")
|
|
||||||
|
|
||||||
if args.dry_run:
|
if args.dry_run:
|
||||||
print("Dry run — exiting")
|
print("Dry run — exiting")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
total_steps = 15
|
||||||
|
|
||||||
# Step 1: Load
|
# Step 1: Load
|
||||||
print("\n[1/6] Loading stems...")
|
print(f"\n[1/{total_steps}] Loading stems...")
|
||||||
stems, sr = load_stems(stems_dir)
|
stems, sr = load_stems(stems_dir)
|
||||||
|
|
||||||
# Step 2: Gap removal
|
# Step 2: Gap removal
|
||||||
print("\n[2/6] Gap removal...")
|
print(f"\n[2/{total_steps}] Gap removal...")
|
||||||
if not args.no_gap_removal:
|
if not args.no_gap_removal:
|
||||||
stems = remove_gaps(stems, sr, threshold_s=args.gap_threshold)
|
stems = remove_gaps(stems, sr, threshold_s=args.gap_threshold)
|
||||||
else:
|
else:
|
||||||
print(" Skipped")
|
print(" Skipped")
|
||||||
|
|
||||||
# Step 3: Voice compression
|
# Step 3: Limit ads + SFX (prevent clipping)
|
||||||
print("\n[3/6] Voice compression...")
|
print(f"\n[3/{total_steps}] Limiting ads + SFX...")
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
tmp_dir = Path(tmp)
|
||||||
|
for name in ["ads", "sfx"]:
|
||||||
|
if np.any(stems[name] != 0):
|
||||||
|
stems[name] = limit_stem(stems[name], sr, tmp_dir, name)
|
||||||
|
|
||||||
|
# Step 4: Host mic noise reduction + HPF
|
||||||
|
print(f"\n[4/{total_steps}] Host noise reduction + HPF...")
|
||||||
|
if not args.no_denoise and np.any(stems["host"] != 0):
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
stems["host"] = denoise(stems["host"], sr, Path(tmp))
|
||||||
|
print(" Applied")
|
||||||
|
else:
|
||||||
|
print(" Skipped" if args.no_denoise else " No host audio")
|
||||||
|
|
||||||
|
# Step 5: De-essing
|
||||||
|
print(f"\n[5/{total_steps}] De-essing host...")
|
||||||
|
if not args.no_deess and np.any(stems["host"] != 0):
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
stems["host"] = deess(stems["host"], sr, Path(tmp))
|
||||||
|
print(" Applied")
|
||||||
|
else:
|
||||||
|
print(" Skipped" if args.no_deess else " No host audio")
|
||||||
|
|
||||||
|
# Step 6: Breath reduction
|
||||||
|
print(f"\n[6/{total_steps}] Breath reduction...")
|
||||||
|
if not args.no_breath_reduction and np.any(stems["host"] != 0):
|
||||||
|
stems["host"] = reduce_breaths(stems["host"], sr)
|
||||||
|
else:
|
||||||
|
print(" Skipped" if args.no_breath_reduction else " No host audio")
|
||||||
|
|
||||||
|
# Step 7: Voice compression
|
||||||
|
print(f"\n[7/{total_steps}] Voice compression...")
|
||||||
if not args.no_compression:
|
if not args.no_compression:
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
tmp_dir = Path(tmp)
|
tmp_dir = Path(tmp)
|
||||||
@@ -335,30 +815,103 @@ def main():
|
|||||||
else:
|
else:
|
||||||
print(" Skipped")
|
print(" Skipped")
|
||||||
|
|
||||||
# Step 4: Music ducking
|
# Step 8: Phone EQ on caller
|
||||||
print("\n[4/6] Music ducking...")
|
print(f"\n[8/{total_steps}] Phone EQ on caller...")
|
||||||
|
if not args.no_phone_eq and np.any(stems["caller"] != 0):
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
stems["caller"] = phone_eq(stems["caller"], sr, Path(tmp))
|
||||||
|
print(" Applied")
|
||||||
|
else:
|
||||||
|
print(" Skipped" if args.no_phone_eq else " No caller audio")
|
||||||
|
|
||||||
|
# Step 9: Match voice levels
|
||||||
|
print(f"\n[9/{total_steps}] Matching voice levels...")
|
||||||
|
stems = match_voice_levels(stems)
|
||||||
|
|
||||||
|
# Step 10: Music ducking
|
||||||
|
print(f"\n[10/{total_steps}] Music ducking...")
|
||||||
if not args.no_ducking:
|
if not args.no_ducking:
|
||||||
dialog = stems["host"] + stems["caller"]
|
dialog = stems["host"] + stems["caller"]
|
||||||
if np.any(dialog != 0) and np.any(stems["music"] != 0):
|
if np.any(dialog != 0) and np.any(stems["music"] != 0):
|
||||||
stems["music"] = apply_ducking(stems["music"], dialog, sr, duck_db=args.duck_amount)
|
stems["music"] = apply_ducking(stems["music"], dialog, sr, duck_db=args.duck_amount,
|
||||||
|
mute_signal=stems["ads"])
|
||||||
print(" Applied")
|
print(" Applied")
|
||||||
else:
|
else:
|
||||||
print(" No dialog or music to duck")
|
print(" No dialog or music to duck")
|
||||||
else:
|
else:
|
||||||
print(" Skipped")
|
print(" Skipped")
|
||||||
|
|
||||||
# Step 5: Mix
|
# Step 11: Stereo mix
|
||||||
print("\n[5/6] Mixing...")
|
print(f"\n[11/{total_steps}] Mixing...")
|
||||||
stereo = mix_stems(stems)
|
stereo = mix_stems(stems, stereo_imaging=not args.no_stereo)
|
||||||
print(f" Mixed to stereo: {len(stereo)} samples ({len(stereo)/sr:.1f}s)")
|
imaging = "stereo" if not args.no_stereo else "mono"
|
||||||
|
print(f" Mixed to {imaging}: {len(stereo)} samples ({len(stereo)/sr:.1f}s)")
|
||||||
|
|
||||||
# Step 6: Normalize + export
|
# Step 12: Bus compression
|
||||||
print("\n[6/6] Loudness normalization + export...")
|
print(f"\n[12/{total_steps}] Bus compression...")
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
stereo = bus_compress(stereo, sr, Path(tmp))
|
||||||
|
print(" Applied")
|
||||||
|
|
||||||
|
# Step 13: Silence trimming
|
||||||
|
print(f"\n[13/{total_steps}] Trimming silence...")
|
||||||
|
if not args.no_trim:
|
||||||
|
stereo = trim_silence(stereo, sr)
|
||||||
|
else:
|
||||||
|
print(" Skipped")
|
||||||
|
|
||||||
|
# Step 14: Fade in/out
|
||||||
|
print(f"\n[14/{total_steps}] Fades...")
|
||||||
|
if not args.no_fade:
|
||||||
|
stereo = apply_fades(stereo, sr, fade_in_s=args.fade_in, fade_out_s=args.fade_out)
|
||||||
|
else:
|
||||||
|
print(" Skipped")
|
||||||
|
|
||||||
|
# Step 15: Normalize + export with metadata and chapters
|
||||||
|
print(f"\n[15/{total_steps}] Loudness normalization + export...")
|
||||||
|
|
||||||
|
# Build metadata dict
|
||||||
|
meta = {}
|
||||||
|
if args.title:
|
||||||
|
meta["title"] = args.title
|
||||||
|
if args.artist:
|
||||||
|
meta["artist"] = args.artist
|
||||||
|
if args.album:
|
||||||
|
meta["album"] = args.album
|
||||||
|
if args.episode_num:
|
||||||
|
meta["track"] = args.episode_num
|
||||||
|
if args.artwork:
|
||||||
|
meta["_artwork"] = args.artwork
|
||||||
|
|
||||||
|
# Auto-detect chapters
|
||||||
|
chapters = []
|
||||||
|
if not args.no_chapters:
|
||||||
|
chapters = detect_chapters(stems, sr)
|
||||||
|
if chapters:
|
||||||
|
print(f" Detected {len(chapters)} chapters:")
|
||||||
|
for ch in chapters:
|
||||||
|
start_s = ch["start_ms"] / 1000
|
||||||
|
end_s = ch["end_ms"] / 1000
|
||||||
|
print(f" {start_s:6.1f}s - {end_s:6.1f}s {ch['title']}")
|
||||||
|
else:
|
||||||
|
print(" No chapters detected")
|
||||||
|
else:
|
||||||
|
print(" Skipped")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
tmp_dir = Path(tmp)
|
||||||
|
|
||||||
|
chapters_file = None
|
||||||
|
if chapters:
|
||||||
|
chapters_file = tmp_dir / "chapters.txt"
|
||||||
|
write_ffmpeg_chapters(chapters, chapters_file)
|
||||||
|
|
||||||
normalize_and_export(stereo, sr, output_path,
|
normalize_and_export(stereo, sr, output_path,
|
||||||
target_lufs=args.target_lufs,
|
target_lufs=args.target_lufs,
|
||||||
bitrate=args.bitrate,
|
bitrate=args.bitrate,
|
||||||
tmp_dir=Path(tmp))
|
tmp_dir=tmp_dir,
|
||||||
|
metadata=meta if meta else None,
|
||||||
|
chapters_file=chapters_file)
|
||||||
|
|
||||||
print(f"\nDone! Output: {output_path}")
|
print(f"\nDone! Output: {output_path}")
|
||||||
|
|
||||||
|
|||||||
@@ -130,27 +130,6 @@
|
|||||||
<span>Audio Router</span>
|
<span>Audio Router</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="diagram-row diagram-row-split">
|
|
||||||
<div class="diagram-box">
|
|
||||||
<div class="diagram-icon">
|
|
||||||
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M9 18V5l12-2v13"/><circle cx="6" cy="18" r="3"/><circle cx="18" cy="16" r="3"/></svg>
|
|
||||||
</div>
|
|
||||||
<span>Music</span>
|
|
||||||
</div>
|
|
||||||
<div class="diagram-box">
|
|
||||||
<div class="diagram-icon">
|
|
||||||
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14"/></svg>
|
|
||||||
</div>
|
|
||||||
<span>SFX</span>
|
|
||||||
</div>
|
|
||||||
<div class="diagram-box">
|
|
||||||
<div class="diagram-icon">
|
|
||||||
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><rect x="2" y="7" width="20" height="15" rx="2"/><path d="M16 7V4a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v3"/></svg>
|
|
||||||
</div>
|
|
||||||
<span>Ads</span>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="diagram-arrow">↓</div>
|
|
||||||
<!-- Row 4: Recording -->
|
<!-- Row 4: Recording -->
|
||||||
<div class="diagram-row">
|
<div class="diagram-row">
|
||||||
<div class="diagram-box">
|
<div class="diagram-box">
|
||||||
@@ -255,10 +234,11 @@
|
|||||||
<div class="hiw-step-content">
|
<div class="hiw-step-content">
|
||||||
<h3>A Person Is Born</h3>
|
<h3>A Person Is Born</h3>
|
||||||
<p>Every caller starts as a blank slate. The system generates a complete identity: name, age, job, hometown, and personality. Each caller gets a unique speaking style — some ramble, some are blunt, some deflect with humor. They have relationships, vehicles, strong food opinions, nostalgic memories, and reasons for being up this late. They know what they were watching on TV, what errand they ran today, and what song was on the radio before they called.</p>
|
<p>Every caller starts as a blank slate. The system generates a complete identity: name, age, job, hometown, and personality. Each caller gets a unique speaking style — some ramble, some are blunt, some deflect with humor. They have relationships, vehicles, strong food opinions, nostalgic memories, and reasons for being up this late. They know what they were watching on TV, what errand they ran today, and what song was on the radio before they called.</p>
|
||||||
|
<p>Some callers become regulars. The system tracks returning callers across episodes — they remember past conversations, reference things they talked about before, and their stories evolve over time. You'll hear Carla update you on her divorce, or Carl check in about his gambling recovery. They're not reset between shows.</p>
|
||||||
<div class="hiw-detail-grid">
|
<div class="hiw-detail-grid">
|
||||||
<div class="hiw-detail">
|
<div class="hiw-detail">
|
||||||
<span class="hiw-detail-label">Unique Names</span>
|
<span class="hiw-detail-label">Unique Names</span>
|
||||||
<span class="hiw-detail-value">48 names</span>
|
<span class="hiw-detail-value">160 names</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="hiw-detail">
|
<div class="hiw-detail">
|
||||||
<span class="hiw-detail-label">Personality Layers</span>
|
<span class="hiw-detail-label">Personality Layers</span>
|
||||||
@@ -269,8 +249,8 @@
|
|||||||
<span class="hiw-detail-value">32</span>
|
<span class="hiw-detail-value">32</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="hiw-detail">
|
<div class="hiw-detail">
|
||||||
<span class="hiw-detail-label">Unique Voices</span>
|
<span class="hiw-detail-label">Returning Regulars</span>
|
||||||
<span class="hiw-detail-value">25</span>
|
<span class="hiw-detail-value">12+ callers</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -381,23 +361,23 @@
|
|||||||
<div class="hiw-step-number">8</div>
|
<div class="hiw-step-number">8</div>
|
||||||
<div class="hiw-step-content">
|
<div class="hiw-step-content">
|
||||||
<h3>Post-Production Pipeline</h3>
|
<h3>Post-Production Pipeline</h3>
|
||||||
<p>Once the show ends, an automated six-stage pipeline processes the raw stems into a broadcast-ready episode. Dead air and long silences are removed with crossfaded cuts. Voice tracks get dynamic range compression. Music automatically ducks under dialog. All five stems are mixed into stereo and loudness-normalized to broadcast standards. The whole process runs without manual intervention.</p>
|
<p>Once the show ends, a 15-step automated pipeline processes the raw stems into a broadcast-ready episode. Ads and sound effects are hard-limited to prevent clipping. The host mic gets a high-pass filter, de-essing, and breath reduction. Voice tracks are compressed — the host gets aggressive spoken-word compression for consistent levels, callers get telephone EQ to sound like real phone calls. All stems are level-matched, music is ducked under dialog and muted during ads, then everything is mixed to stereo with panning and width. A bus compressor glues the final mix together before silence trimming, fades, and EBU R128 loudness normalization.</p>
|
||||||
<div class="hiw-detail-grid">
|
<div class="hiw-detail-grid">
|
||||||
<div class="hiw-detail">
|
<div class="hiw-detail">
|
||||||
<span class="hiw-detail-label">Pipeline Stages</span>
|
<span class="hiw-detail-label">Pipeline Steps</span>
|
||||||
<span class="hiw-detail-value">6 steps</span>
|
<span class="hiw-detail-value">15</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="hiw-detail">
|
<div class="hiw-detail">
|
||||||
<span class="hiw-detail-label">Loudness Target</span>
|
<span class="hiw-detail-label">Loudness Target</span>
|
||||||
<span class="hiw-detail-value">-16 LUFS</span>
|
<span class="hiw-detail-value">-16 LUFS</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="hiw-detail">
|
<div class="hiw-detail">
|
||||||
<span class="hiw-detail-label">Music Ducking</span>
|
<span class="hiw-detail-label">Loudness Range</span>
|
||||||
<span class="hiw-detail-value">Automatic</span>
|
<span class="hiw-detail-value">~5.5 LU</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="hiw-detail">
|
<div class="hiw-detail">
|
||||||
<span class="hiw-detail-label">Output</span>
|
<span class="hiw-detail-label">Output</span>
|
||||||
<span class="hiw-detail-value">Broadcast MP3</span>
|
<span class="hiw-detail-value">Stereo MP3</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -494,7 +474,7 @@
|
|||||||
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14"/><path d="M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>
|
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14"/><path d="M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>
|
||||||
</div>
|
</div>
|
||||||
<h3>Broadcast-Grade Audio</h3>
|
<h3>Broadcast-Grade Audio</h3>
|
||||||
<p>Every episode goes through a professional post-production pipeline: five isolated stems are individually processed with dynamic compression, automatic music ducking, and EBU R128 loudness normalization before being mixed to stereo and encoded for distribution.</p>
|
<p>Every episode runs through a 15-step post-production pipeline: stem limiting, high-pass filtering, de-essing, breath reduction, spoken-word compression, telephone EQ, level matching, music ducking with ad muting, stereo imaging, bus compression, and EBU R128 loudness normalization.</p>
|
||||||
</div>
|
</div>
|
||||||
<div class="hiw-feature">
|
<div class="hiw-feature">
|
||||||
<div class="hiw-feature-icon">
|
<div class="hiw-feature-icon">
|
||||||
|
|||||||
Reference in New Issue
Block a user