Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5d8ab57e20 | |||
| d33a022676 |
+260
@@ -0,0 +1,260 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Analyze silence gaps in podcast stems to find optimal strip-silence thresholds.
|
||||||
|
|
||||||
|
Usage: python analyze_gaps.py recordings/2026-03-17_235137/
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
import numpy as np
|
||||||
|
import soundfile as sf
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
BLOCK_SEC = 0.1
|
||||||
|
SILENCE_DB = -30
|
||||||
|
THRESHOLD = 10 ** (SILENCE_DB / 20)
|
||||||
|
MIN_VOICE_SEC = 0.3
|
||||||
|
|
||||||
|
|
||||||
|
def load_stem(path: Path) -> tuple[np.ndarray, int]:
|
||||||
|
audio, sr = sf.read(path, dtype="float32")
|
||||||
|
if audio.ndim > 1:
|
||||||
|
audio = audio[:, 0]
|
||||||
|
return audio, sr
|
||||||
|
|
||||||
|
|
||||||
|
def compute_rms_blocks(audio: np.ndarray, sr: int) -> np.ndarray:
|
||||||
|
block_samples = int(sr * BLOCK_SEC)
|
||||||
|
n_blocks = len(audio) // block_samples
|
||||||
|
if n_blocks == 0:
|
||||||
|
return np.array([0.0])
|
||||||
|
trimmed = audio[:n_blocks * block_samples].reshape(n_blocks, block_samples)
|
||||||
|
return np.sqrt(np.mean(trimmed ** 2, axis=1))
|
||||||
|
|
||||||
|
|
||||||
|
def compute_peak_blocks(audio: np.ndarray, sr: int) -> np.ndarray:
|
||||||
|
block_samples = int(sr * BLOCK_SEC)
|
||||||
|
n_blocks = len(audio) // block_samples
|
||||||
|
if n_blocks == 0:
|
||||||
|
return np.array([0.0])
|
||||||
|
trimmed = audio[:n_blocks * block_samples].reshape(n_blocks, block_samples)
|
||||||
|
return np.max(np.abs(trimmed), axis=1)
|
||||||
|
|
||||||
|
|
||||||
|
def analyze(stems_dir: Path):
|
||||||
|
stems_dir = Path(stems_dir)
|
||||||
|
voice_stems = {}
|
||||||
|
for name in ["host", "devon", "caller"]:
|
||||||
|
path = stems_dir / f"{name}.wav"
|
||||||
|
if path.exists():
|
||||||
|
print(f"Loading {name}...", end=" ", flush=True)
|
||||||
|
audio, sr = load_stem(path)
|
||||||
|
voice_stems[name] = audio
|
||||||
|
print(f"{len(audio)/sr:.0f}s @ {sr}Hz")
|
||||||
|
|
||||||
|
if not voice_stems:
|
||||||
|
print("No voice stems found")
|
||||||
|
return
|
||||||
|
|
||||||
|
sr_val = sr
|
||||||
|
duration = max(len(a) for a in voice_stems.values()) / sr_val
|
||||||
|
print(f"\nTotal duration: {duration/60:.1f} min")
|
||||||
|
|
||||||
|
# Compute per-track RMS and peak blocks
|
||||||
|
track_rms = {}
|
||||||
|
track_peak = {}
|
||||||
|
for name, audio in voice_stems.items():
|
||||||
|
track_rms[name] = compute_rms_blocks(audio, sr_val)
|
||||||
|
track_peak[name] = compute_peak_blocks(audio, sr_val)
|
||||||
|
|
||||||
|
n_blocks = min(len(v) for v in track_peak.values())
|
||||||
|
|
||||||
|
# Detect gaps using same logic as Lua script (RMS for speaker ID, peak for silence)
|
||||||
|
min_voice_blocks = int(MIN_VOICE_SEC / BLOCK_SEC)
|
||||||
|
track_names = list(voice_stems.keys())
|
||||||
|
|
||||||
|
gaps = []
|
||||||
|
in_silence = False
|
||||||
|
silence_start = 0
|
||||||
|
track_before = None
|
||||||
|
last_active = None
|
||||||
|
voice_run = 0
|
||||||
|
voice_run_track = None
|
||||||
|
|
||||||
|
for i in range(n_blocks):
|
||||||
|
# Peak for silence detection
|
||||||
|
best_peak = max(track_peak[name][i] for name in track_names)
|
||||||
|
# RMS for speaker identification
|
||||||
|
best_rms = 0
|
||||||
|
best_track = None
|
||||||
|
for name in track_names:
|
||||||
|
r = track_rms[name][i]
|
||||||
|
if r > best_rms:
|
||||||
|
best_rms = r
|
||||||
|
best_track = name
|
||||||
|
|
||||||
|
all_silent = best_peak < THRESHOLD
|
||||||
|
|
||||||
|
if not all_silent:
|
||||||
|
last_active = best_track
|
||||||
|
|
||||||
|
if in_silence:
|
||||||
|
if all_silent:
|
||||||
|
voice_run = 0
|
||||||
|
voice_run_track = None
|
||||||
|
else:
|
||||||
|
if voice_run == 0:
|
||||||
|
voice_run_track = best_track
|
||||||
|
voice_run += 1
|
||||||
|
if voice_run >= min_voice_blocks:
|
||||||
|
voice_start_block = i - (voice_run - 1)
|
||||||
|
gap_start = silence_start * BLOCK_SEC
|
||||||
|
gap_end = voice_start_block * BLOCK_SEC
|
||||||
|
dur = gap_end - gap_start
|
||||||
|
if dur >= 0.5: # log gaps >= 0.5s
|
||||||
|
gaps.append({
|
||||||
|
"start": gap_start,
|
||||||
|
"end": gap_end,
|
||||||
|
"dur": dur,
|
||||||
|
"before": track_before or "?",
|
||||||
|
"after": voice_run_track or "?",
|
||||||
|
})
|
||||||
|
in_silence = False
|
||||||
|
voice_run = 0
|
||||||
|
voice_run_track = None
|
||||||
|
else:
|
||||||
|
if all_silent:
|
||||||
|
in_silence = True
|
||||||
|
silence_start = i
|
||||||
|
track_before = last_active
|
||||||
|
voice_run = 0
|
||||||
|
voice_run_track = None
|
||||||
|
|
||||||
|
# Trailing silence
|
||||||
|
if in_silence:
|
||||||
|
dur = (n_blocks - silence_start) * BLOCK_SEC
|
||||||
|
if dur >= 0.5:
|
||||||
|
gaps.append({
|
||||||
|
"start": silence_start * BLOCK_SEC,
|
||||||
|
"end": n_blocks * BLOCK_SEC,
|
||||||
|
"dur": dur,
|
||||||
|
"before": track_before or "?",
|
||||||
|
"after": "end",
|
||||||
|
})
|
||||||
|
|
||||||
|
if not gaps:
|
||||||
|
print("No gaps detected")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Categorize gaps
|
||||||
|
categories = {
|
||||||
|
"host_self": [], # Host -> Host
|
||||||
|
"host_to_caller": [], # Host -> Caller (TTS latency)
|
||||||
|
"caller_to_host": [], # Caller -> Host
|
||||||
|
"host_to_devon": [], # Host -> Devon (TTS latency)
|
||||||
|
"devon_to_host": [], # Devon -> Host
|
||||||
|
"caller_to_devon": [],# Caller -> Devon (interjection)
|
||||||
|
"devon_to_caller": [],# Devon -> Caller
|
||||||
|
"other": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
for g in gaps:
|
||||||
|
b, a = g["before"], g["after"]
|
||||||
|
if b == "host" and a == "host":
|
||||||
|
categories["host_self"].append(g)
|
||||||
|
elif b == "host" and a == "caller":
|
||||||
|
categories["host_to_caller"].append(g)
|
||||||
|
elif b == "caller" and a == "host":
|
||||||
|
categories["caller_to_host"].append(g)
|
||||||
|
elif b == "host" and a == "devon":
|
||||||
|
categories["host_to_devon"].append(g)
|
||||||
|
elif b == "devon" and a == "host":
|
||||||
|
categories["devon_to_host"].append(g)
|
||||||
|
elif b == "caller" and a == "devon":
|
||||||
|
categories["caller_to_devon"].append(g)
|
||||||
|
elif b == "devon" and a == "caller":
|
||||||
|
categories["devon_to_caller"].append(g)
|
||||||
|
else:
|
||||||
|
categories["other"].append(g)
|
||||||
|
|
||||||
|
# Print results
|
||||||
|
print(f"\n{'='*70}")
|
||||||
|
print(f"GAP ANALYSIS — {len(gaps)} gaps detected")
|
||||||
|
print(f"{'='*70}")
|
||||||
|
|
||||||
|
total_silence = sum(g["dur"] for g in gaps)
|
||||||
|
print(f"Total silence: {total_silence:.0f}s ({total_silence/60:.1f} min)")
|
||||||
|
print(f"Content after removal: ~{(duration - total_silence)/60:.1f} min")
|
||||||
|
|
||||||
|
for cat_name, cat_gaps in sorted(categories.items(), key=lambda x: -len(x[1])):
|
||||||
|
if not cat_gaps:
|
||||||
|
continue
|
||||||
|
durs = sorted([g["dur"] for g in cat_gaps])
|
||||||
|
print(f"\n--- {cat_name} ({len(cat_gaps)} gaps) ---")
|
||||||
|
print(f" Range: {durs[0]:.1f}s - {durs[-1]:.1f}s")
|
||||||
|
print(f" Median: {np.median(durs):.1f}s Mean: {np.mean(durs):.1f}s")
|
||||||
|
if len(durs) >= 5:
|
||||||
|
print(f" P25: {np.percentile(durs, 25):.1f}s P75: {np.percentile(durs, 75):.1f}s")
|
||||||
|
|
||||||
|
# Histogram
|
||||||
|
brackets = [(0, 1), (1, 2), (2, 3), (3, 5), (5, 8), (8, 12), (12, 18), (18, 30), (30, 60), (60, 999)]
|
||||||
|
print(f" Distribution:")
|
||||||
|
for lo, hi in brackets:
|
||||||
|
count = sum(1 for d in durs if lo <= d < hi)
|
||||||
|
if count > 0:
|
||||||
|
bar = "#" * count
|
||||||
|
label = f"{lo}-{hi}s" if hi < 999 else f"{lo}s+"
|
||||||
|
print(f" {label:>8s}: {bar} ({count})")
|
||||||
|
|
||||||
|
# Find natural clusters and suggest thresholds
|
||||||
|
print(f"\n{'='*70}")
|
||||||
|
print("SUGGESTED THRESHOLDS")
|
||||||
|
print(f"{'='*70}")
|
||||||
|
|
||||||
|
# For each Devon-involved category, find the gap between interjection and TTS gaps
|
||||||
|
devon_gaps = categories["host_to_devon"] + categories["devon_to_host"] + categories["caller_to_devon"] + categories["devon_to_caller"]
|
||||||
|
if devon_gaps:
|
||||||
|
devon_durs = sorted([g["dur"] for g in devon_gaps])
|
||||||
|
# Look for a natural break between short (interjection) and long (TTS) gaps
|
||||||
|
short = [d for d in devon_durs if d < 5]
|
||||||
|
long = [d for d in devon_durs if d >= 5]
|
||||||
|
if short and long:
|
||||||
|
suggested = (max(short) + min(long)) / 2
|
||||||
|
print(f"Devon threshold: {suggested:.1f}s (short gaps: {len(short)} up to {max(short):.1f}s, long gaps: {len(long)} from {min(long):.1f}s)")
|
||||||
|
elif short:
|
||||||
|
print(f"Devon threshold: {max(short) + 1:.1f}s (all gaps are short, max {max(short):.1f}s)")
|
||||||
|
else:
|
||||||
|
print(f"Devon threshold: 3.0s (all gaps are long, min {min(long):.1f}s)")
|
||||||
|
|
||||||
|
caller_gaps = categories["host_to_caller"] + categories["caller_to_host"]
|
||||||
|
if caller_gaps:
|
||||||
|
caller_durs = sorted([g["dur"] for g in caller_gaps])
|
||||||
|
short = [d for d in caller_durs if d < 5]
|
||||||
|
long = [d for d in caller_durs if d >= 5]
|
||||||
|
if short and long:
|
||||||
|
suggested = (max(short) + min(long)) / 2
|
||||||
|
print(f"Caller transition threshold: {suggested:.1f}s (short: {len(short)} up to {max(short):.1f}s, long: {len(long)} from {min(long):.1f}s)")
|
||||||
|
elif long:
|
||||||
|
print(f"Caller transition threshold: {min(long) - 1:.1f}s (all gaps >= {min(long):.1f}s)")
|
||||||
|
|
||||||
|
host_self = categories["host_self"]
|
||||||
|
if host_self:
|
||||||
|
host_durs = sorted([g["dur"] for g in host_self])
|
||||||
|
short = [d for d in host_durs if d < 5]
|
||||||
|
long = [d for d in host_durs if d >= 5]
|
||||||
|
if short and long:
|
||||||
|
suggested = (max(short) + min(long)) / 2
|
||||||
|
print(f"Same-speaker threshold: {suggested:.1f}s (short: {len(short)} up to {max(short):.1f}s, long: {len(long)} from {min(long):.1f}s)")
|
||||||
|
elif long:
|
||||||
|
print(f"Same-speaker threshold: {min(long) - 1:.1f}s (all gaps >= {min(long):.1f}s)")
|
||||||
|
|
||||||
|
all_durs = sorted([g["dur"] for g in gaps])
|
||||||
|
would_cut = [d for d in all_durs if d >= 3.0]
|
||||||
|
print(f"\nWith current thresholds (Devon=3s, others=6s):")
|
||||||
|
print(f" Would cut: ~{len(would_cut)} gaps, ~{sum(would_cut):.0f}s ({sum(would_cut)/60:.1f} min)")
|
||||||
|
print(f" Result: ~{(duration - sum(would_cut))/60:.1f} min")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print("Usage: python analyze_gaps.py <stems_dir>")
|
||||||
|
sys.exit(1)
|
||||||
|
analyze(Path(sys.argv[1]))
|
||||||
+29
-1
@@ -5314,6 +5314,7 @@ TIME: {time_ctx} {season_ctx}
|
|||||||
{fluency_hint}
|
{fluency_hint}
|
||||||
{f'SOME DETAILS ABOUT THEM: {seed_text}' if seed_text else ''}
|
{f'SOME DETAILS ABOUT THEM: {seed_text}' if seed_text else ''}
|
||||||
{f'CALLER ENERGY: {style_hint}' if style_hint else ''}
|
{f'CALLER ENERGY: {style_hint}' if style_hint else ''}
|
||||||
|
{f"SHOW THEME: Tonight's show theme is '{session.show_theme}'. This caller might have a story or angle related to this theme — or they might not. Not every caller has to be about the theme, but if their reason for calling can naturally connect to it, lean into that connection. The theme should feel like a through-line, not a mandate." if session.show_theme else ''}
|
||||||
|
|
||||||
Respond with a JSON object containing these fields:
|
Respond with a JSON object containing these fields:
|
||||||
|
|
||||||
@@ -6014,6 +6015,10 @@ def get_caller_prompt(caller: dict, show_history: str = "",
|
|||||||
parts.append(research_context)
|
parts.append(research_context)
|
||||||
world_context = "\n".join(parts) + "\n"
|
world_context = "\n".join(parts) + "\n"
|
||||||
|
|
||||||
|
theme_context = ""
|
||||||
|
if session.show_theme:
|
||||||
|
theme_context = f"\nSHOW THEME: Tonight's show theme is \"{session.show_theme}\". You're aware of the theme — the host mentioned it at the top of the show. If your story or situation connects to it, you might bring it up naturally. But don't force it. Not every caller has to be about the theme. If the host steers you toward the theme, go with it.\n"
|
||||||
|
|
||||||
now = datetime.now(_MST)
|
now = datetime.now(_MST)
|
||||||
date_str = now.strftime("%A, %B %d")
|
date_str = now.strftime("%A, %B %d")
|
||||||
|
|
||||||
@@ -6060,7 +6065,7 @@ You are {caller['name']}. You are the CALLER. You are NOT Luke. Luke is the HOST
|
|||||||
|
|
||||||
YOUR BACKGROUND:
|
YOUR BACKGROUND:
|
||||||
{caller['vibe']}
|
{caller['vibe']}
|
||||||
{relationship_context}{history}{world_context}{emotional_read}
|
{relationship_context}{history}{world_context}{theme_context}{emotional_read}
|
||||||
You're a real person calling a late-night radio show. You called because you've got something specific and you want to talk about it.
|
You're a real person calling a late-night radio show. You called because you've got something specific and you want to talk about it.
|
||||||
|
|
||||||
{pacing_block}
|
{pacing_block}
|
||||||
@@ -6215,6 +6220,7 @@ class Session:
|
|||||||
self.caller_queue: list[str] = [] # Sorted presentation order of caller keys
|
self.caller_queue: list[str] = [] # Sorted presentation order of caller keys
|
||||||
self.relationship_context: dict[str, str] = {} # caller_key → relationship prompt injection
|
self.relationship_context: dict[str, str] = {} # caller_key → relationship prompt injection
|
||||||
self.intern_monitoring: bool = True # Devon monitors conversations by default
|
self.intern_monitoring: bool = True # Devon monitors conversations by default
|
||||||
|
self.show_theme: str = "" # Current show theme (e.g. "St. Patrick's Day")
|
||||||
|
|
||||||
def start_call(self, caller_key: str):
|
def start_call(self, caller_key: str):
|
||||||
self.current_caller_key = caller_key
|
self.current_caller_key = caller_key
|
||||||
@@ -8501,6 +8507,9 @@ GENRE_KEYWORDS = {
|
|||||||
"valentine": "Ballad",
|
"valentine": "Ballad",
|
||||||
"romantic": "Ballad",
|
"romantic": "Ballad",
|
||||||
"ballad": "Ballad",
|
"ballad": "Ballad",
|
||||||
|
"irish": "Irish",
|
||||||
|
"ireland": "Irish",
|
||||||
|
"patricks": "Irish",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -8759,6 +8768,25 @@ async def update_settings(data: dict):
|
|||||||
return llm_service.get_settings()
|
return llm_service.get_settings()
|
||||||
|
|
||||||
|
|
||||||
|
# --- Show Theme ---
|
||||||
|
|
||||||
|
@app.get("/api/show-theme")
|
||||||
|
async def get_show_theme():
|
||||||
|
return {"theme": session.show_theme}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/show-theme")
|
||||||
|
async def set_show_theme(data: dict):
|
||||||
|
theme = data.get("theme", "").strip()[:100]
|
||||||
|
old_theme = session.show_theme
|
||||||
|
session.show_theme = theme
|
||||||
|
if theme:
|
||||||
|
print(f"[Theme] Show theme set: {theme}")
|
||||||
|
elif old_theme:
|
||||||
|
print(f"[Theme] Show theme cleared (was: {old_theme})")
|
||||||
|
return {"theme": session.show_theme}
|
||||||
|
|
||||||
|
|
||||||
# --- Cost Tracking Endpoints ---
|
# --- Cost Tracking Endpoints ---
|
||||||
|
|
||||||
@app.get("/api/costs")
|
@app.get("/api/costs")
|
||||||
|
|||||||
@@ -113,6 +113,69 @@ header button:hover {
|
|||||||
border-color: rgba(232, 121, 29, 0.3);
|
border-color: rgba(232, 121, 29, 0.3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.theme-bar {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 6px;
|
||||||
|
padding: 4px 12px;
|
||||||
|
background: rgba(255, 255, 255, 0.05);
|
||||||
|
border-radius: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.theme-label {
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: #aaa;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.theme-input {
|
||||||
|
background: rgba(255, 255, 255, 0.08);
|
||||||
|
border: 1px solid rgba(255, 255, 255, 0.15);
|
||||||
|
border-radius: 4px;
|
||||||
|
color: #fff;
|
||||||
|
padding: 4px 8px;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
width: 200px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.theme-input:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: #f5a623;
|
||||||
|
}
|
||||||
|
|
||||||
|
.theme-input.active {
|
||||||
|
border-color: #f5a623;
|
||||||
|
background: rgba(245, 166, 35, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
.theme-btn {
|
||||||
|
padding: 4px 10px;
|
||||||
|
border-radius: 4px;
|
||||||
|
border: none;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.theme-btn.set {
|
||||||
|
background: #f5a623;
|
||||||
|
color: #000;
|
||||||
|
}
|
||||||
|
|
||||||
|
.theme-btn.set:hover {
|
||||||
|
background: #e6991a;
|
||||||
|
}
|
||||||
|
|
||||||
|
.theme-btn.clear {
|
||||||
|
background: rgba(255, 255, 255, 0.1);
|
||||||
|
color: #aaa;
|
||||||
|
padding: 4px 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.theme-btn.clear:hover {
|
||||||
|
background: rgba(255, 80, 80, 0.3);
|
||||||
|
color: #ff5050;
|
||||||
|
}
|
||||||
|
|
||||||
.on-air-btn {
|
.on-air-btn {
|
||||||
font-weight: 700;
|
font-weight: 700;
|
||||||
text-transform: uppercase;
|
text-transform: uppercase;
|
||||||
|
|||||||
@@ -17,6 +17,12 @@
|
|||||||
<button id="export-session-btn">Export</button>
|
<button id="export-session-btn">Export</button>
|
||||||
<button id="settings-btn">Settings</button>
|
<button id="settings-btn">Settings</button>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="theme-bar">
|
||||||
|
<label for="show-theme-input" class="theme-label">Theme:</label>
|
||||||
|
<input type="text" id="show-theme-input" class="theme-input" placeholder="e.g. St. Patrick's Day" maxlength="100">
|
||||||
|
<button id="set-theme-btn" class="theme-btn set" title="Set show theme">Set</button>
|
||||||
|
<button id="clear-theme-btn" class="theme-btn clear hidden" title="Clear theme">✕</button>
|
||||||
|
</div>
|
||||||
<div id="show-clock" class="show-clock">
|
<div id="show-clock" class="show-clock">
|
||||||
<span class="clock-time" id="clock-time"></span>
|
<span class="clock-time" id="clock-time"></span>
|
||||||
<span id="show-timers" class="show-timers hidden">
|
<span id="show-timers" class="show-timers hidden">
|
||||||
|
|||||||
@@ -130,6 +130,7 @@ document.addEventListener('DOMContentLoaded', async () => {
|
|||||||
await loadSettings();
|
await loadSettings();
|
||||||
initEventListeners();
|
initEventListeners();
|
||||||
initClock();
|
initClock();
|
||||||
|
loadShowTheme();
|
||||||
loadVoicemails();
|
loadVoicemails();
|
||||||
setInterval(loadVoicemails, 30000);
|
setInterval(loadVoicemails, 30000);
|
||||||
loadEmails();
|
loadEmails();
|
||||||
@@ -345,6 +346,13 @@ function initEventListeners() {
|
|||||||
document.getElementById('devon-play-btn')?.addEventListener('click', playDevonSuggestion);
|
document.getElementById('devon-play-btn')?.addEventListener('click', playDevonSuggestion);
|
||||||
document.getElementById('devon-dismiss-btn')?.addEventListener('click', dismissDevonSuggestion);
|
document.getElementById('devon-dismiss-btn')?.addEventListener('click', dismissDevonSuggestion);
|
||||||
|
|
||||||
|
// Show Theme
|
||||||
|
document.getElementById('set-theme-btn')?.addEventListener('click', setShowTheme);
|
||||||
|
document.getElementById('clear-theme-btn')?.addEventListener('click', clearShowTheme);
|
||||||
|
document.getElementById('show-theme-input')?.addEventListener('keydown', (e) => {
|
||||||
|
if (e.key === 'Enter') setShowTheme();
|
||||||
|
});
|
||||||
|
|
||||||
// Settings
|
// Settings
|
||||||
document.getElementById('settings-btn')?.addEventListener('click', async () => {
|
document.getElementById('settings-btn')?.addEventListener('click', async () => {
|
||||||
document.getElementById('settings-modal')?.classList.remove('hidden');
|
document.getElementById('settings-modal')?.classList.remove('hidden');
|
||||||
@@ -692,6 +700,7 @@ async function newSession() {
|
|||||||
|
|
||||||
// Reload callers to get new session ID
|
// Reload callers to get new session ID
|
||||||
await loadCallers();
|
await loadCallers();
|
||||||
|
await loadShowTheme();
|
||||||
|
|
||||||
log('New session started - all callers have fresh backgrounds');
|
log('New session started - all callers have fresh backgrounds');
|
||||||
}
|
}
|
||||||
@@ -1159,6 +1168,69 @@ async function playSFX(soundFile) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// --- Show Theme ---
|
||||||
|
async function loadShowTheme() {
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/show-theme');
|
||||||
|
const data = await res.json();
|
||||||
|
const input = document.getElementById('show-theme-input');
|
||||||
|
const setBtn = document.getElementById('set-theme-btn');
|
||||||
|
const clearBtn = document.getElementById('clear-theme-btn');
|
||||||
|
if (data.theme) {
|
||||||
|
input.value = data.theme;
|
||||||
|
input.classList.add('active');
|
||||||
|
setBtn.classList.add('hidden');
|
||||||
|
clearBtn.classList.remove('hidden');
|
||||||
|
} else {
|
||||||
|
input.value = '';
|
||||||
|
input.classList.remove('active');
|
||||||
|
setBtn.classList.remove('hidden');
|
||||||
|
clearBtn.classList.add('hidden');
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load show theme:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function setShowTheme() {
|
||||||
|
const input = document.getElementById('show-theme-input');
|
||||||
|
const theme = input.value.trim();
|
||||||
|
if (!theme) return;
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/show-theme', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ theme })
|
||||||
|
});
|
||||||
|
const data = await res.json();
|
||||||
|
if (data.theme) {
|
||||||
|
input.classList.add('active');
|
||||||
|
document.getElementById('set-theme-btn').classList.add('hidden');
|
||||||
|
document.getElementById('clear-theme-btn').classList.remove('hidden');
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to set show theme:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function clearShowTheme() {
|
||||||
|
try {
|
||||||
|
await fetch('/api/show-theme', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ theme: '' })
|
||||||
|
});
|
||||||
|
const input = document.getElementById('show-theme-input');
|
||||||
|
input.value = '';
|
||||||
|
input.classList.remove('active');
|
||||||
|
document.getElementById('set-theme-btn').classList.remove('hidden');
|
||||||
|
document.getElementById('clear-theme-btn').classList.add('hidden');
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to clear show theme:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// --- Settings ---
|
// --- Settings ---
|
||||||
async function loadSettings() {
|
async function loadSettings() {
|
||||||
try {
|
try {
|
||||||
|
|||||||
+103
-12
@@ -9,12 +9,15 @@
|
|||||||
---------------------------------------------------------------------------
|
---------------------------------------------------------------------------
|
||||||
local SILENCE_DB = -30 -- dBFS — anything below this is "silence"
|
local SILENCE_DB = -30 -- dBFS — anything below this is "silence"
|
||||||
local MIN_SILENCE_SEC = 6.0 -- same-speaker gaps: only remove silences longer than this
|
local MIN_SILENCE_SEC = 6.0 -- same-speaker gaps: only remove silences longer than this
|
||||||
local MIN_SILENCE_TRANSITION_SEC = 2.5 -- cross-speaker gaps: shorter threshold for speaker transitions
|
local MAX_SILENCE_SEC = 999 -- no practical limit (IDENT/AD regions protect real breaks)
|
||||||
|
local MIN_SILENCE_TRANSITION_SEC = 5.0 -- cross-speaker gaps: threshold for caller TTS latency
|
||||||
|
local MIN_SILENCE_DEVON_SEC = 3.0 -- Devon gaps: interjections are prerendered (~2-3s gaps), conversational TTS is 6s+
|
||||||
|
local DEVON_TRACK = 2 -- 1-indexed: Devon track number
|
||||||
local MIN_VOICE_SEC = 0.3 -- ignore non-silent bursts shorter than this (filters transients)
|
local MIN_VOICE_SEC = 0.3 -- ignore non-silent bursts shorter than this (filters transients)
|
||||||
local KEEP_PAD_SEC = 0.5 -- leave this much silence on each side of a cut
|
local KEEP_PAD_SEC = 0.5 -- leave this much silence on each side of a cut
|
||||||
local BLOCK_SEC = 0.1 -- analysis block size (100ms)
|
local BLOCK_SEC = 0.1 -- analysis block size (100ms)
|
||||||
local SAMPLE_RATE = 48000
|
local SAMPLE_RATE = 48000
|
||||||
local CHECK_TRACKS = {1, 2, 3, 4} -- 1-indexed: Host, Devon, Live Caller, AI Caller
|
local CHECK_TRACKS = {1, 2, 3, 4} -- 1-indexed: Host, Devon, AI Caller, Live Caller
|
||||||
local IDENTS_TRACK = 6 -- 1-indexed: Idents track
|
local IDENTS_TRACK = 6 -- 1-indexed: Idents track
|
||||||
local ADS_TRACK = 7 -- 1-indexed: Ads track
|
local ADS_TRACK = 7 -- 1-indexed: Ads track
|
||||||
local MUSIC_TRACK = 8 -- 1-indexed: Music track
|
local MUSIC_TRACK = 8 -- 1-indexed: Music track
|
||||||
@@ -25,7 +28,6 @@ local YIELD_INTERVAL = 200 -- yield to REAPER every N blocks (~20s of audio)
|
|||||||
local BLOCK_SAMPLES = math.floor(SAMPLE_RATE * BLOCK_SEC)
|
local BLOCK_SAMPLES = math.floor(SAMPLE_RATE * BLOCK_SEC)
|
||||||
local THRESHOLD = 10 ^ (SILENCE_DB / 20)
|
local THRESHOLD = 10 ^ (SILENCE_DB / 20)
|
||||||
local MIN_VOICE_BLOCKS = math.ceil(MIN_VOICE_SEC / BLOCK_SEC)
|
local MIN_VOICE_BLOCKS = math.ceil(MIN_VOICE_SEC / BLOCK_SEC)
|
||||||
|
|
||||||
local function log(msg)
|
local function log(msg)
|
||||||
reaper.ShowConsoleMsg("[PostProd] " .. msg .. "\n")
|
reaper.ShowConsoleMsg("[PostProd] " .. msg .. "\n")
|
||||||
end
|
end
|
||||||
@@ -306,13 +308,17 @@ local function read_block_peak_rms(ta, project_time)
|
|||||||
end
|
end
|
||||||
|
|
||||||
-- find_loudest_track: returns 1-based index of the loudest track at a given time, or 0 if silent
|
-- find_loudest_track: returns 1-based index of the loudest track at a given time, or 0 if silent
|
||||||
|
-- Uses RMS (not peak) for speaker identification — ambient mic noise has high peaks but low RMS
|
||||||
local function find_loudest_track(track_audios, project_time)
|
local function find_loudest_track(track_audios, project_time)
|
||||||
local best_peak = 0
|
local best_peak = 0
|
||||||
|
local best_rms = 0
|
||||||
local best_idx = 0
|
local best_idx = 0
|
||||||
for i, ta in ipairs(track_audios) do
|
for i, ta in ipairs(track_audios) do
|
||||||
local peak, _ = read_block_peak_rms(ta, project_time)
|
local peak, sum_sq = read_block_peak_rms(ta, project_time)
|
||||||
if peak > best_peak then
|
if peak > best_peak then best_peak = peak end
|
||||||
best_peak = peak
|
local rms = math.sqrt(sum_sq / BLOCK_SAMPLES)
|
||||||
|
if rms > best_rms then
|
||||||
|
best_rms = rms
|
||||||
best_idx = i
|
best_idx = i
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -340,12 +346,17 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
|
|||||||
|
|
||||||
while t < region.end_pos do
|
while t < region.end_pos do
|
||||||
local best_peak = 0
|
local best_peak = 0
|
||||||
|
local best_rms = 0
|
||||||
local best_sum = 0
|
local best_sum = 0
|
||||||
local best_track = 0
|
local best_track = 0
|
||||||
for i, ta in ipairs(track_audios) do
|
for i, ta in ipairs(track_audios) do
|
||||||
local peak, sum_sq = read_block_peak_rms(ta, t)
|
local peak, sum_sq = read_block_peak_rms(ta, t)
|
||||||
if peak > best_peak then
|
if peak > best_peak then best_peak = peak end
|
||||||
best_peak = peak
|
-- Use RMS for speaker identification (sustained energy, not transient peaks)
|
||||||
|
-- Host mic ambient noise has high peaks but low RMS; TTS speech has high RMS
|
||||||
|
local rms = math.sqrt(sum_sq / BLOCK_SAMPLES)
|
||||||
|
if rms > best_rms then
|
||||||
|
best_rms = rms
|
||||||
best_sum = sum_sq
|
best_sum = sum_sq
|
||||||
best_track = i
|
best_track = i
|
||||||
end
|
end
|
||||||
@@ -375,8 +386,11 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
|
|||||||
local dur = voice_start - silence_start
|
local dur = voice_start - silence_start
|
||||||
local track_after = voice_run_track
|
local track_after = voice_run_track
|
||||||
local is_transition = track_before_silence ~= 0 and track_after ~= 0 and track_before_silence ~= track_after
|
local is_transition = track_before_silence ~= 0 and track_after ~= 0 and track_before_silence ~= track_after
|
||||||
local threshold = is_transition and MIN_SILENCE_TRANSITION_SEC or MIN_SILENCE_SEC
|
local devon_involved = track_before_silence == DEVON_TRACK or track_after == DEVON_TRACK
|
||||||
if dur >= threshold then
|
local threshold = devon_involved and MIN_SILENCE_DEVON_SEC
|
||||||
|
or (is_transition and MIN_SILENCE_TRANSITION_SEC or MIN_SILENCE_SEC)
|
||||||
|
|
||||||
|
if dur >= threshold and dur <= MAX_SILENCE_SEC then
|
||||||
table.insert(silences, {
|
table.insert(silences, {
|
||||||
start_pos = silence_start, end_pos = voice_start, duration = dur,
|
start_pos = silence_start, end_pos = voice_start, duration = dur,
|
||||||
is_transition = is_transition,
|
is_transition = is_transition,
|
||||||
@@ -410,7 +424,7 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
|
|||||||
|
|
||||||
if in_silence then
|
if in_silence then
|
||||||
local dur = region.end_pos - silence_start
|
local dur = region.end_pos - silence_start
|
||||||
if dur >= MIN_SILENCE_SEC then
|
if dur >= MIN_SILENCE_SEC and dur <= MAX_SILENCE_SEC then
|
||||||
table.insert(silences, {start_pos = silence_start, end_pos = region.end_pos, duration = dur})
|
table.insert(silences, {start_pos = silence_start, end_pos = region.end_pos, duration = dur})
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -547,6 +561,7 @@ local function phase1_strip_silence(dialog_regions)
|
|||||||
if (t + 1) == MUSIC_TRACK then goto next_track end
|
if (t + 1) == MUSIC_TRACK then goto next_track end
|
||||||
local track = reaper.GetTrack(0, t)
|
local track = reaper.GetTrack(0, t)
|
||||||
|
|
||||||
|
-- Split and delete the silent portion from items that span r.start_pos
|
||||||
local item = find_item_at(track, r.start_pos)
|
local item = find_item_at(track, r.start_pos)
|
||||||
if item then
|
if item then
|
||||||
local right = reaper.SplitMediaItem(item, r.start_pos)
|
local right = reaper.SplitMediaItem(item, r.start_pos)
|
||||||
@@ -556,10 +571,36 @@ local function phase1_strip_silence(dialog_regions)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Handle sparse track items that START within the removal range
|
||||||
|
-- (not found by find_item_at since they don't contain r.start_pos)
|
||||||
|
for j = reaper.CountTrackMediaItems(track) - 1, 0, -1 do
|
||||||
|
local check = reaper.GetTrackMediaItem(track, j)
|
||||||
|
local cpos = reaper.GetMediaItemInfo_Value(check, "D_POSITION")
|
||||||
|
if cpos >= r.start_pos and cpos < r.end_pos then
|
||||||
|
local clen = reaper.GetMediaItemInfo_Value(check, "D_LENGTH")
|
||||||
|
local cend = cpos + clen
|
||||||
|
if cend <= r.end_pos then
|
||||||
|
-- Entirely within removal — delete
|
||||||
|
reaper.DeleteTrackMediaItem(track, check)
|
||||||
|
else
|
||||||
|
-- Starts in removal but extends past — trim start to r.end_pos
|
||||||
|
local trim = r.end_pos - cpos
|
||||||
|
local take = reaper.GetActiveTake(check)
|
||||||
|
if take then
|
||||||
|
local offset = reaper.GetMediaItemTakeInfo_Value(take, "D_STARTOFFS")
|
||||||
|
reaper.SetMediaItemTakeInfo_Value(take, "D_STARTOFFS", offset + trim)
|
||||||
|
end
|
||||||
|
reaper.SetMediaItemInfo_Value(check, "D_LENGTH", cend - r.end_pos)
|
||||||
|
reaper.SetMediaItemInfo_Value(check, "D_POSITION", r.end_pos)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Shift items AFTER the removal (use r.end_pos, not r.start_pos)
|
||||||
for j = 0, reaper.CountTrackMediaItems(track) - 1 do
|
for j = 0, reaper.CountTrackMediaItems(track) - 1 do
|
||||||
local shift_item = reaper.GetTrackMediaItem(track, j)
|
local shift_item = reaper.GetTrackMediaItem(track, j)
|
||||||
local pos = reaper.GetMediaItemInfo_Value(shift_item, "D_POSITION")
|
local pos = reaper.GetMediaItemInfo_Value(shift_item, "D_POSITION")
|
||||||
if pos >= r.start_pos then
|
if pos >= r.end_pos then
|
||||||
reaper.SetMediaItemInfo_Value(shift_item, "D_POSITION", pos - remove_len)
|
reaper.SetMediaItemInfo_Value(shift_item, "D_POSITION", pos - remove_len)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -766,6 +807,56 @@ local function phase3_trim_music()
|
|||||||
local music_track = reaper.GetTrack(0, MUSIC_TRACK - 1)
|
local music_track = reaper.GetTrack(0, MUSIC_TRACK - 1)
|
||||||
if not music_track then return end
|
if not music_track then return end
|
||||||
|
|
||||||
|
-- Ensure music starts before first voice item.
|
||||||
|
-- Silence removal shifts voice/idents/ads but not music. If voice now starts before
|
||||||
|
-- music, nudge all non-music tracks forward so music has a lead-in.
|
||||||
|
local first_voice_start = math.huge
|
||||||
|
for _, tidx in ipairs(CHECK_TRACKS) do
|
||||||
|
local tr = reaper.GetTrack(0, tidx - 1)
|
||||||
|
if tr and reaper.CountTrackMediaItems(tr) > 0 then
|
||||||
|
local item = reaper.GetTrackMediaItem(tr, 0)
|
||||||
|
local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
|
||||||
|
if pos < first_voice_start then first_voice_start = pos end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local MUSIC_LEAD_SEC = 3.0 -- seconds of music before first voice
|
||||||
|
if first_voice_start < math.huge then
|
||||||
|
local first_music = reaper.GetTrackMediaItem(music_track, 0)
|
||||||
|
if first_music then
|
||||||
|
local music_start = reaper.GetMediaItemInfo_Value(first_music, "D_POSITION")
|
||||||
|
local desired_voice_start = music_start + MUSIC_LEAD_SEC
|
||||||
|
if first_voice_start < desired_voice_start then
|
||||||
|
local nudge = desired_voice_start - first_voice_start
|
||||||
|
-- Shift all non-music tracks forward
|
||||||
|
for t = 0, reaper.CountTracks(0) - 1 do
|
||||||
|
if (t + 1) == MUSIC_TRACK then goto skip_music end
|
||||||
|
local track = reaper.GetTrack(0, t)
|
||||||
|
for i = 0, reaper.CountTrackMediaItems(track) - 1 do
|
||||||
|
local item = reaper.GetTrackMediaItem(track, i)
|
||||||
|
local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
|
||||||
|
reaper.SetMediaItemInfo_Value(item, "D_POSITION", pos + nudge)
|
||||||
|
end
|
||||||
|
::skip_music::
|
||||||
|
end
|
||||||
|
-- Also shift all markers/regions forward
|
||||||
|
local _, num_markers, num_regions = reaper.CountProjectMarkers(0)
|
||||||
|
local total_m = num_markers + num_regions
|
||||||
|
for i = 0, total_m - 1 do
|
||||||
|
local retval, is_region, pos, rgnend, name, idx, color = reaper.EnumProjectMarkers3(0, i)
|
||||||
|
if retval then
|
||||||
|
if is_region then
|
||||||
|
reaper.SetProjectMarker3(0, idx, true, pos + nudge, rgnend + nudge, name, color)
|
||||||
|
else
|
||||||
|
reaper.SetProjectMarker3(0, idx, false, pos + nudge, 0, name, color)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
log("Phase 3: Nudged non-music tracks forward " .. string.format("%.1f", nudge) .. "s for " .. MUSIC_LEAD_SEC .. "s music lead-in")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
local last_end = 0
|
local last_end = 0
|
||||||
for _, tidx in ipairs(CHECK_TRACKS) do
|
for _, tidx in ipairs(CHECK_TRACKS) do
|
||||||
local tr = reaper.GetTrack(0, tidx - 1)
|
local tr = reaper.GetTrack(0, tidx - 1)
|
||||||
|
|||||||
Reference in New Issue
Block a user