2 Commits

Author SHA1 Message Date
luke 5d8ab57e20 Show theme feature, Irish music genre, strip silence overhaul
- Add show theme UI in header bar + backend API (inject into caller prompts)
- Add Irish genre category for music dropdown
- Strip silence: RMS-based speaker detection (fixes Devon not being identified)
- Strip silence: Devon-specific 3s threshold for interjections
- Strip silence: sparse track item handling in shift logic
- Strip silence: music lead-in preservation after silence removal
- Strip silence: no max gap limit (IDENT/AD regions protect breaks)
- Add analyze_gaps.py tool for per-show threshold analysis

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 03:30:15 -06:00
luke d33a022676 Add show theme feature for themed episodes
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:46:48 -06:00
6 changed files with 533 additions and 13 deletions
+260
View File
@@ -0,0 +1,260 @@
#!/usr/bin/env python3
"""Analyze silence gaps in podcast stems to find optimal strip-silence thresholds.
Usage: python analyze_gaps.py recordings/2026-03-17_235137/
"""
import sys
import numpy as np
import soundfile as sf
from pathlib import Path
BLOCK_SEC = 0.1
SILENCE_DB = -30
THRESHOLD = 10 ** (SILENCE_DB / 20)
MIN_VOICE_SEC = 0.3
def load_stem(path: Path) -> tuple[np.ndarray, int]:
audio, sr = sf.read(path, dtype="float32")
if audio.ndim > 1:
audio = audio[:, 0]
return audio, sr
def compute_rms_blocks(audio: np.ndarray, sr: int) -> np.ndarray:
block_samples = int(sr * BLOCK_SEC)
n_blocks = len(audio) // block_samples
if n_blocks == 0:
return np.array([0.0])
trimmed = audio[:n_blocks * block_samples].reshape(n_blocks, block_samples)
return np.sqrt(np.mean(trimmed ** 2, axis=1))
def compute_peak_blocks(audio: np.ndarray, sr: int) -> np.ndarray:
block_samples = int(sr * BLOCK_SEC)
n_blocks = len(audio) // block_samples
if n_blocks == 0:
return np.array([0.0])
trimmed = audio[:n_blocks * block_samples].reshape(n_blocks, block_samples)
return np.max(np.abs(trimmed), axis=1)
def analyze(stems_dir: Path):
stems_dir = Path(stems_dir)
voice_stems = {}
for name in ["host", "devon", "caller"]:
path = stems_dir / f"{name}.wav"
if path.exists():
print(f"Loading {name}...", end=" ", flush=True)
audio, sr = load_stem(path)
voice_stems[name] = audio
print(f"{len(audio)/sr:.0f}s @ {sr}Hz")
if not voice_stems:
print("No voice stems found")
return
sr_val = sr
duration = max(len(a) for a in voice_stems.values()) / sr_val
print(f"\nTotal duration: {duration/60:.1f} min")
# Compute per-track RMS and peak blocks
track_rms = {}
track_peak = {}
for name, audio in voice_stems.items():
track_rms[name] = compute_rms_blocks(audio, sr_val)
track_peak[name] = compute_peak_blocks(audio, sr_val)
n_blocks = min(len(v) for v in track_peak.values())
# Detect gaps using same logic as Lua script (RMS for speaker ID, peak for silence)
min_voice_blocks = int(MIN_VOICE_SEC / BLOCK_SEC)
track_names = list(voice_stems.keys())
gaps = []
in_silence = False
silence_start = 0
track_before = None
last_active = None
voice_run = 0
voice_run_track = None
for i in range(n_blocks):
# Peak for silence detection
best_peak = max(track_peak[name][i] for name in track_names)
# RMS for speaker identification
best_rms = 0
best_track = None
for name in track_names:
r = track_rms[name][i]
if r > best_rms:
best_rms = r
best_track = name
all_silent = best_peak < THRESHOLD
if not all_silent:
last_active = best_track
if in_silence:
if all_silent:
voice_run = 0
voice_run_track = None
else:
if voice_run == 0:
voice_run_track = best_track
voice_run += 1
if voice_run >= min_voice_blocks:
voice_start_block = i - (voice_run - 1)
gap_start = silence_start * BLOCK_SEC
gap_end = voice_start_block * BLOCK_SEC
dur = gap_end - gap_start
if dur >= 0.5: # log gaps >= 0.5s
gaps.append({
"start": gap_start,
"end": gap_end,
"dur": dur,
"before": track_before or "?",
"after": voice_run_track or "?",
})
in_silence = False
voice_run = 0
voice_run_track = None
else:
if all_silent:
in_silence = True
silence_start = i
track_before = last_active
voice_run = 0
voice_run_track = None
# Trailing silence
if in_silence:
dur = (n_blocks - silence_start) * BLOCK_SEC
if dur >= 0.5:
gaps.append({
"start": silence_start * BLOCK_SEC,
"end": n_blocks * BLOCK_SEC,
"dur": dur,
"before": track_before or "?",
"after": "end",
})
if not gaps:
print("No gaps detected")
return
# Categorize gaps
categories = {
"host_self": [], # Host -> Host
"host_to_caller": [], # Host -> Caller (TTS latency)
"caller_to_host": [], # Caller -> Host
"host_to_devon": [], # Host -> Devon (TTS latency)
"devon_to_host": [], # Devon -> Host
"caller_to_devon": [],# Caller -> Devon (interjection)
"devon_to_caller": [],# Devon -> Caller
"other": [],
}
for g in gaps:
b, a = g["before"], g["after"]
if b == "host" and a == "host":
categories["host_self"].append(g)
elif b == "host" and a == "caller":
categories["host_to_caller"].append(g)
elif b == "caller" and a == "host":
categories["caller_to_host"].append(g)
elif b == "host" and a == "devon":
categories["host_to_devon"].append(g)
elif b == "devon" and a == "host":
categories["devon_to_host"].append(g)
elif b == "caller" and a == "devon":
categories["caller_to_devon"].append(g)
elif b == "devon" and a == "caller":
categories["devon_to_caller"].append(g)
else:
categories["other"].append(g)
# Print results
print(f"\n{'='*70}")
print(f"GAP ANALYSIS — {len(gaps)} gaps detected")
print(f"{'='*70}")
total_silence = sum(g["dur"] for g in gaps)
print(f"Total silence: {total_silence:.0f}s ({total_silence/60:.1f} min)")
print(f"Content after removal: ~{(duration - total_silence)/60:.1f} min")
for cat_name, cat_gaps in sorted(categories.items(), key=lambda x: -len(x[1])):
if not cat_gaps:
continue
durs = sorted([g["dur"] for g in cat_gaps])
print(f"\n--- {cat_name} ({len(cat_gaps)} gaps) ---")
print(f" Range: {durs[0]:.1f}s - {durs[-1]:.1f}s")
print(f" Median: {np.median(durs):.1f}s Mean: {np.mean(durs):.1f}s")
if len(durs) >= 5:
print(f" P25: {np.percentile(durs, 25):.1f}s P75: {np.percentile(durs, 75):.1f}s")
# Histogram
brackets = [(0, 1), (1, 2), (2, 3), (3, 5), (5, 8), (8, 12), (12, 18), (18, 30), (30, 60), (60, 999)]
print(f" Distribution:")
for lo, hi in brackets:
count = sum(1 for d in durs if lo <= d < hi)
if count > 0:
bar = "#" * count
label = f"{lo}-{hi}s" if hi < 999 else f"{lo}s+"
print(f" {label:>8s}: {bar} ({count})")
# Find natural clusters and suggest thresholds
print(f"\n{'='*70}")
print("SUGGESTED THRESHOLDS")
print(f"{'='*70}")
# For each Devon-involved category, find the gap between interjection and TTS gaps
devon_gaps = categories["host_to_devon"] + categories["devon_to_host"] + categories["caller_to_devon"] + categories["devon_to_caller"]
if devon_gaps:
devon_durs = sorted([g["dur"] for g in devon_gaps])
# Look for a natural break between short (interjection) and long (TTS) gaps
short = [d for d in devon_durs if d < 5]
long = [d for d in devon_durs if d >= 5]
if short and long:
suggested = (max(short) + min(long)) / 2
print(f"Devon threshold: {suggested:.1f}s (short gaps: {len(short)} up to {max(short):.1f}s, long gaps: {len(long)} from {min(long):.1f}s)")
elif short:
print(f"Devon threshold: {max(short) + 1:.1f}s (all gaps are short, max {max(short):.1f}s)")
else:
print(f"Devon threshold: 3.0s (all gaps are long, min {min(long):.1f}s)")
caller_gaps = categories["host_to_caller"] + categories["caller_to_host"]
if caller_gaps:
caller_durs = sorted([g["dur"] for g in caller_gaps])
short = [d for d in caller_durs if d < 5]
long = [d for d in caller_durs if d >= 5]
if short and long:
suggested = (max(short) + min(long)) / 2
print(f"Caller transition threshold: {suggested:.1f}s (short: {len(short)} up to {max(short):.1f}s, long: {len(long)} from {min(long):.1f}s)")
elif long:
print(f"Caller transition threshold: {min(long) - 1:.1f}s (all gaps >= {min(long):.1f}s)")
host_self = categories["host_self"]
if host_self:
host_durs = sorted([g["dur"] for g in host_self])
short = [d for d in host_durs if d < 5]
long = [d for d in host_durs if d >= 5]
if short and long:
suggested = (max(short) + min(long)) / 2
print(f"Same-speaker threshold: {suggested:.1f}s (short: {len(short)} up to {max(short):.1f}s, long: {len(long)} from {min(long):.1f}s)")
elif long:
print(f"Same-speaker threshold: {min(long) - 1:.1f}s (all gaps >= {min(long):.1f}s)")
all_durs = sorted([g["dur"] for g in gaps])
would_cut = [d for d in all_durs if d >= 3.0]
print(f"\nWith current thresholds (Devon=3s, others=6s):")
print(f" Would cut: ~{len(would_cut)} gaps, ~{sum(would_cut):.0f}s ({sum(would_cut)/60:.1f} min)")
print(f" Result: ~{(duration - sum(would_cut))/60:.1f} min")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python analyze_gaps.py <stems_dir>")
sys.exit(1)
analyze(Path(sys.argv[1]))
+29 -1
View File
@@ -5314,6 +5314,7 @@ TIME: {time_ctx} {season_ctx}
{fluency_hint} {fluency_hint}
{f'SOME DETAILS ABOUT THEM: {seed_text}' if seed_text else ''} {f'SOME DETAILS ABOUT THEM: {seed_text}' if seed_text else ''}
{f'CALLER ENERGY: {style_hint}' if style_hint else ''} {f'CALLER ENERGY: {style_hint}' if style_hint else ''}
{f"SHOW THEME: Tonight's show theme is '{session.show_theme}'. This caller might have a story or angle related to this theme — or they might not. Not every caller has to be about the theme, but if their reason for calling can naturally connect to it, lean into that connection. The theme should feel like a through-line, not a mandate." if session.show_theme else ''}
Respond with a JSON object containing these fields: Respond with a JSON object containing these fields:
@@ -6014,6 +6015,10 @@ def get_caller_prompt(caller: dict, show_history: str = "",
parts.append(research_context) parts.append(research_context)
world_context = "\n".join(parts) + "\n" world_context = "\n".join(parts) + "\n"
theme_context = ""
if session.show_theme:
theme_context = f"\nSHOW THEME: Tonight's show theme is \"{session.show_theme}\". You're aware of the theme — the host mentioned it at the top of the show. If your story or situation connects to it, you might bring it up naturally. But don't force it. Not every caller has to be about the theme. If the host steers you toward the theme, go with it.\n"
now = datetime.now(_MST) now = datetime.now(_MST)
date_str = now.strftime("%A, %B %d") date_str = now.strftime("%A, %B %d")
@@ -6060,7 +6065,7 @@ You are {caller['name']}. You are the CALLER. You are NOT Luke. Luke is the HOST
YOUR BACKGROUND: YOUR BACKGROUND:
{caller['vibe']} {caller['vibe']}
{relationship_context}{history}{world_context}{emotional_read} {relationship_context}{history}{world_context}{theme_context}{emotional_read}
You're a real person calling a late-night radio show. You called because you've got something specific and you want to talk about it. You're a real person calling a late-night radio show. You called because you've got something specific and you want to talk about it.
{pacing_block} {pacing_block}
@@ -6215,6 +6220,7 @@ class Session:
self.caller_queue: list[str] = [] # Sorted presentation order of caller keys self.caller_queue: list[str] = [] # Sorted presentation order of caller keys
self.relationship_context: dict[str, str] = {} # caller_key → relationship prompt injection self.relationship_context: dict[str, str] = {} # caller_key → relationship prompt injection
self.intern_monitoring: bool = True # Devon monitors conversations by default self.intern_monitoring: bool = True # Devon monitors conversations by default
self.show_theme: str = "" # Current show theme (e.g. "St. Patrick's Day")
def start_call(self, caller_key: str): def start_call(self, caller_key: str):
self.current_caller_key = caller_key self.current_caller_key = caller_key
@@ -8501,6 +8507,9 @@ GENRE_KEYWORDS = {
"valentine": "Ballad", "valentine": "Ballad",
"romantic": "Ballad", "romantic": "Ballad",
"ballad": "Ballad", "ballad": "Ballad",
"irish": "Irish",
"ireland": "Irish",
"patricks": "Irish",
} }
@@ -8759,6 +8768,25 @@ async def update_settings(data: dict):
return llm_service.get_settings() return llm_service.get_settings()
# --- Show Theme ---
@app.get("/api/show-theme")
async def get_show_theme():
return {"theme": session.show_theme}
@app.post("/api/show-theme")
async def set_show_theme(data: dict):
theme = data.get("theme", "").strip()[:100]
old_theme = session.show_theme
session.show_theme = theme
if theme:
print(f"[Theme] Show theme set: {theme}")
elif old_theme:
print(f"[Theme] Show theme cleared (was: {old_theme})")
return {"theme": session.show_theme}
# --- Cost Tracking Endpoints --- # --- Cost Tracking Endpoints ---
@app.get("/api/costs") @app.get("/api/costs")
+63
View File
@@ -113,6 +113,69 @@ header button:hover {
border-color: rgba(232, 121, 29, 0.3); border-color: rgba(232, 121, 29, 0.3);
} }
.theme-bar {
display: flex;
align-items: center;
gap: 6px;
padding: 4px 12px;
background: rgba(255, 255, 255, 0.05);
border-radius: 6px;
}
.theme-label {
font-size: 0.8rem;
color: #aaa;
white-space: nowrap;
}
.theme-input {
background: rgba(255, 255, 255, 0.08);
border: 1px solid rgba(255, 255, 255, 0.15);
border-radius: 4px;
color: #fff;
padding: 4px 8px;
font-size: 0.85rem;
width: 200px;
}
.theme-input:focus {
outline: none;
border-color: #f5a623;
}
.theme-input.active {
border-color: #f5a623;
background: rgba(245, 166, 35, 0.1);
}
.theme-btn {
padding: 4px 10px;
border-radius: 4px;
border: none;
cursor: pointer;
font-size: 0.8rem;
}
.theme-btn.set {
background: #f5a623;
color: #000;
}
.theme-btn.set:hover {
background: #e6991a;
}
.theme-btn.clear {
background: rgba(255, 255, 255, 0.1);
color: #aaa;
padding: 4px 6px;
}
.theme-btn.clear:hover {
background: rgba(255, 80, 80, 0.3);
color: #ff5050;
}
.on-air-btn { .on-air-btn {
font-weight: 700; font-weight: 700;
text-transform: uppercase; text-transform: uppercase;
+6
View File
@@ -17,6 +17,12 @@
<button id="export-session-btn">Export</button> <button id="export-session-btn">Export</button>
<button id="settings-btn">Settings</button> <button id="settings-btn">Settings</button>
</div> </div>
<div class="theme-bar">
<label for="show-theme-input" class="theme-label">Theme:</label>
<input type="text" id="show-theme-input" class="theme-input" placeholder="e.g. St. Patrick's Day" maxlength="100">
<button id="set-theme-btn" class="theme-btn set" title="Set show theme">Set</button>
<button id="clear-theme-btn" class="theme-btn clear hidden" title="Clear theme">&#x2715;</button>
</div>
<div id="show-clock" class="show-clock"> <div id="show-clock" class="show-clock">
<span class="clock-time" id="clock-time"></span> <span class="clock-time" id="clock-time"></span>
<span id="show-timers" class="show-timers hidden"> <span id="show-timers" class="show-timers hidden">
+72
View File
@@ -130,6 +130,7 @@ document.addEventListener('DOMContentLoaded', async () => {
await loadSettings(); await loadSettings();
initEventListeners(); initEventListeners();
initClock(); initClock();
loadShowTheme();
loadVoicemails(); loadVoicemails();
setInterval(loadVoicemails, 30000); setInterval(loadVoicemails, 30000);
loadEmails(); loadEmails();
@@ -345,6 +346,13 @@ function initEventListeners() {
document.getElementById('devon-play-btn')?.addEventListener('click', playDevonSuggestion); document.getElementById('devon-play-btn')?.addEventListener('click', playDevonSuggestion);
document.getElementById('devon-dismiss-btn')?.addEventListener('click', dismissDevonSuggestion); document.getElementById('devon-dismiss-btn')?.addEventListener('click', dismissDevonSuggestion);
// Show Theme
document.getElementById('set-theme-btn')?.addEventListener('click', setShowTheme);
document.getElementById('clear-theme-btn')?.addEventListener('click', clearShowTheme);
document.getElementById('show-theme-input')?.addEventListener('keydown', (e) => {
if (e.key === 'Enter') setShowTheme();
});
// Settings // Settings
document.getElementById('settings-btn')?.addEventListener('click', async () => { document.getElementById('settings-btn')?.addEventListener('click', async () => {
document.getElementById('settings-modal')?.classList.remove('hidden'); document.getElementById('settings-modal')?.classList.remove('hidden');
@@ -692,6 +700,7 @@ async function newSession() {
// Reload callers to get new session ID // Reload callers to get new session ID
await loadCallers(); await loadCallers();
await loadShowTheme();
log('New session started - all callers have fresh backgrounds'); log('New session started - all callers have fresh backgrounds');
} }
@@ -1159,6 +1168,69 @@ async function playSFX(soundFile) {
} }
// --- Show Theme ---
async function loadShowTheme() {
try {
const res = await fetch('/api/show-theme');
const data = await res.json();
const input = document.getElementById('show-theme-input');
const setBtn = document.getElementById('set-theme-btn');
const clearBtn = document.getElementById('clear-theme-btn');
if (data.theme) {
input.value = data.theme;
input.classList.add('active');
setBtn.classList.add('hidden');
clearBtn.classList.remove('hidden');
} else {
input.value = '';
input.classList.remove('active');
setBtn.classList.remove('hidden');
clearBtn.classList.add('hidden');
}
} catch (e) {
console.error('Failed to load show theme:', e);
}
}
async function setShowTheme() {
const input = document.getElementById('show-theme-input');
const theme = input.value.trim();
if (!theme) return;
try {
const res = await fetch('/api/show-theme', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ theme })
});
const data = await res.json();
if (data.theme) {
input.classList.add('active');
document.getElementById('set-theme-btn').classList.add('hidden');
document.getElementById('clear-theme-btn').classList.remove('hidden');
}
} catch (e) {
console.error('Failed to set show theme:', e);
}
}
async function clearShowTheme() {
try {
await fetch('/api/show-theme', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ theme: '' })
});
const input = document.getElementById('show-theme-input');
input.value = '';
input.classList.remove('active');
document.getElementById('set-theme-btn').classList.remove('hidden');
document.getElementById('clear-theme-btn').classList.add('hidden');
} catch (e) {
console.error('Failed to clear show theme:', e);
}
}
// --- Settings --- // --- Settings ---
async function loadSettings() { async function loadSettings() {
try { try {
+103 -12
View File
@@ -9,12 +9,15 @@
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
local SILENCE_DB = -30 -- dBFS — anything below this is "silence" local SILENCE_DB = -30 -- dBFS — anything below this is "silence"
local MIN_SILENCE_SEC = 6.0 -- same-speaker gaps: only remove silences longer than this local MIN_SILENCE_SEC = 6.0 -- same-speaker gaps: only remove silences longer than this
local MIN_SILENCE_TRANSITION_SEC = 2.5 -- cross-speaker gaps: shorter threshold for speaker transitions local MAX_SILENCE_SEC = 999 -- no practical limit (IDENT/AD regions protect real breaks)
local MIN_SILENCE_TRANSITION_SEC = 5.0 -- cross-speaker gaps: threshold for caller TTS latency
local MIN_SILENCE_DEVON_SEC = 3.0 -- Devon gaps: interjections are prerendered (~2-3s gaps), conversational TTS is 6s+
local DEVON_TRACK = 2 -- 1-indexed: Devon track number
local MIN_VOICE_SEC = 0.3 -- ignore non-silent bursts shorter than this (filters transients) local MIN_VOICE_SEC = 0.3 -- ignore non-silent bursts shorter than this (filters transients)
local KEEP_PAD_SEC = 0.5 -- leave this much silence on each side of a cut local KEEP_PAD_SEC = 0.5 -- leave this much silence on each side of a cut
local BLOCK_SEC = 0.1 -- analysis block size (100ms) local BLOCK_SEC = 0.1 -- analysis block size (100ms)
local SAMPLE_RATE = 48000 local SAMPLE_RATE = 48000
local CHECK_TRACKS = {1, 2, 3, 4} -- 1-indexed: Host, Devon, Live Caller, AI Caller local CHECK_TRACKS = {1, 2, 3, 4} -- 1-indexed: Host, Devon, AI Caller, Live Caller
local IDENTS_TRACK = 6 -- 1-indexed: Idents track local IDENTS_TRACK = 6 -- 1-indexed: Idents track
local ADS_TRACK = 7 -- 1-indexed: Ads track local ADS_TRACK = 7 -- 1-indexed: Ads track
local MUSIC_TRACK = 8 -- 1-indexed: Music track local MUSIC_TRACK = 8 -- 1-indexed: Music track
@@ -25,7 +28,6 @@ local YIELD_INTERVAL = 200 -- yield to REAPER every N blocks (~20s of audio)
local BLOCK_SAMPLES = math.floor(SAMPLE_RATE * BLOCK_SEC) local BLOCK_SAMPLES = math.floor(SAMPLE_RATE * BLOCK_SEC)
local THRESHOLD = 10 ^ (SILENCE_DB / 20) local THRESHOLD = 10 ^ (SILENCE_DB / 20)
local MIN_VOICE_BLOCKS = math.ceil(MIN_VOICE_SEC / BLOCK_SEC) local MIN_VOICE_BLOCKS = math.ceil(MIN_VOICE_SEC / BLOCK_SEC)
local function log(msg) local function log(msg)
reaper.ShowConsoleMsg("[PostProd] " .. msg .. "\n") reaper.ShowConsoleMsg("[PostProd] " .. msg .. "\n")
end end
@@ -306,13 +308,17 @@ local function read_block_peak_rms(ta, project_time)
end end
-- find_loudest_track: returns 1-based index of the loudest track at a given time, or 0 if silent -- find_loudest_track: returns 1-based index of the loudest track at a given time, or 0 if silent
-- Uses RMS (not peak) for speaker identification — ambient mic noise has high peaks but low RMS
local function find_loudest_track(track_audios, project_time) local function find_loudest_track(track_audios, project_time)
local best_peak = 0 local best_peak = 0
local best_rms = 0
local best_idx = 0 local best_idx = 0
for i, ta in ipairs(track_audios) do for i, ta in ipairs(track_audios) do
local peak, _ = read_block_peak_rms(ta, project_time) local peak, sum_sq = read_block_peak_rms(ta, project_time)
if peak > best_peak then if peak > best_peak then best_peak = peak end
best_peak = peak local rms = math.sqrt(sum_sq / BLOCK_SAMPLES)
if rms > best_rms then
best_rms = rms
best_idx = i best_idx = i
end end
end end
@@ -340,12 +346,17 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
while t < region.end_pos do while t < region.end_pos do
local best_peak = 0 local best_peak = 0
local best_rms = 0
local best_sum = 0 local best_sum = 0
local best_track = 0 local best_track = 0
for i, ta in ipairs(track_audios) do for i, ta in ipairs(track_audios) do
local peak, sum_sq = read_block_peak_rms(ta, t) local peak, sum_sq = read_block_peak_rms(ta, t)
if peak > best_peak then if peak > best_peak then best_peak = peak end
best_peak = peak -- Use RMS for speaker identification (sustained energy, not transient peaks)
-- Host mic ambient noise has high peaks but low RMS; TTS speech has high RMS
local rms = math.sqrt(sum_sq / BLOCK_SAMPLES)
if rms > best_rms then
best_rms = rms
best_sum = sum_sq best_sum = sum_sq
best_track = i best_track = i
end end
@@ -375,8 +386,11 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
local dur = voice_start - silence_start local dur = voice_start - silence_start
local track_after = voice_run_track local track_after = voice_run_track
local is_transition = track_before_silence ~= 0 and track_after ~= 0 and track_before_silence ~= track_after local is_transition = track_before_silence ~= 0 and track_after ~= 0 and track_before_silence ~= track_after
local threshold = is_transition and MIN_SILENCE_TRANSITION_SEC or MIN_SILENCE_SEC local devon_involved = track_before_silence == DEVON_TRACK or track_after == DEVON_TRACK
if dur >= threshold then local threshold = devon_involved and MIN_SILENCE_DEVON_SEC
or (is_transition and MIN_SILENCE_TRANSITION_SEC or MIN_SILENCE_SEC)
if dur >= threshold and dur <= MAX_SILENCE_SEC then
table.insert(silences, { table.insert(silences, {
start_pos = silence_start, end_pos = voice_start, duration = dur, start_pos = silence_start, end_pos = voice_start, duration = dur,
is_transition = is_transition, is_transition = is_transition,
@@ -410,7 +424,7 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
if in_silence then if in_silence then
local dur = region.end_pos - silence_start local dur = region.end_pos - silence_start
if dur >= MIN_SILENCE_SEC then if dur >= MIN_SILENCE_SEC and dur <= MAX_SILENCE_SEC then
table.insert(silences, {start_pos = silence_start, end_pos = region.end_pos, duration = dur}) table.insert(silences, {start_pos = silence_start, end_pos = region.end_pos, duration = dur})
end end
end end
@@ -547,6 +561,7 @@ local function phase1_strip_silence(dialog_regions)
if (t + 1) == MUSIC_TRACK then goto next_track end if (t + 1) == MUSIC_TRACK then goto next_track end
local track = reaper.GetTrack(0, t) local track = reaper.GetTrack(0, t)
-- Split and delete the silent portion from items that span r.start_pos
local item = find_item_at(track, r.start_pos) local item = find_item_at(track, r.start_pos)
if item then if item then
local right = reaper.SplitMediaItem(item, r.start_pos) local right = reaper.SplitMediaItem(item, r.start_pos)
@@ -556,10 +571,36 @@ local function phase1_strip_silence(dialog_regions)
end end
end end
-- Handle sparse track items that START within the removal range
-- (not found by find_item_at since they don't contain r.start_pos)
for j = reaper.CountTrackMediaItems(track) - 1, 0, -1 do
local check = reaper.GetTrackMediaItem(track, j)
local cpos = reaper.GetMediaItemInfo_Value(check, "D_POSITION")
if cpos >= r.start_pos and cpos < r.end_pos then
local clen = reaper.GetMediaItemInfo_Value(check, "D_LENGTH")
local cend = cpos + clen
if cend <= r.end_pos then
-- Entirely within removal — delete
reaper.DeleteTrackMediaItem(track, check)
else
-- Starts in removal but extends past — trim start to r.end_pos
local trim = r.end_pos - cpos
local take = reaper.GetActiveTake(check)
if take then
local offset = reaper.GetMediaItemTakeInfo_Value(take, "D_STARTOFFS")
reaper.SetMediaItemTakeInfo_Value(take, "D_STARTOFFS", offset + trim)
end
reaper.SetMediaItemInfo_Value(check, "D_LENGTH", cend - r.end_pos)
reaper.SetMediaItemInfo_Value(check, "D_POSITION", r.end_pos)
end
end
end
-- Shift items AFTER the removal (use r.end_pos, not r.start_pos)
for j = 0, reaper.CountTrackMediaItems(track) - 1 do for j = 0, reaper.CountTrackMediaItems(track) - 1 do
local shift_item = reaper.GetTrackMediaItem(track, j) local shift_item = reaper.GetTrackMediaItem(track, j)
local pos = reaper.GetMediaItemInfo_Value(shift_item, "D_POSITION") local pos = reaper.GetMediaItemInfo_Value(shift_item, "D_POSITION")
if pos >= r.start_pos then if pos >= r.end_pos then
reaper.SetMediaItemInfo_Value(shift_item, "D_POSITION", pos - remove_len) reaper.SetMediaItemInfo_Value(shift_item, "D_POSITION", pos - remove_len)
end end
end end
@@ -766,6 +807,56 @@ local function phase3_trim_music()
local music_track = reaper.GetTrack(0, MUSIC_TRACK - 1) local music_track = reaper.GetTrack(0, MUSIC_TRACK - 1)
if not music_track then return end if not music_track then return end
-- Ensure music starts before first voice item.
-- Silence removal shifts voice/idents/ads but not music. If voice now starts before
-- music, nudge all non-music tracks forward so music has a lead-in.
local first_voice_start = math.huge
for _, tidx in ipairs(CHECK_TRACKS) do
local tr = reaper.GetTrack(0, tidx - 1)
if tr and reaper.CountTrackMediaItems(tr) > 0 then
local item = reaper.GetTrackMediaItem(tr, 0)
local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
if pos < first_voice_start then first_voice_start = pos end
end
end
local MUSIC_LEAD_SEC = 3.0 -- seconds of music before first voice
if first_voice_start < math.huge then
local first_music = reaper.GetTrackMediaItem(music_track, 0)
if first_music then
local music_start = reaper.GetMediaItemInfo_Value(first_music, "D_POSITION")
local desired_voice_start = music_start + MUSIC_LEAD_SEC
if first_voice_start < desired_voice_start then
local nudge = desired_voice_start - first_voice_start
-- Shift all non-music tracks forward
for t = 0, reaper.CountTracks(0) - 1 do
if (t + 1) == MUSIC_TRACK then goto skip_music end
local track = reaper.GetTrack(0, t)
for i = 0, reaper.CountTrackMediaItems(track) - 1 do
local item = reaper.GetTrackMediaItem(track, i)
local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
reaper.SetMediaItemInfo_Value(item, "D_POSITION", pos + nudge)
end
::skip_music::
end
-- Also shift all markers/regions forward
local _, num_markers, num_regions = reaper.CountProjectMarkers(0)
local total_m = num_markers + num_regions
for i = 0, total_m - 1 do
local retval, is_region, pos, rgnend, name, idx, color = reaper.EnumProjectMarkers3(0, i)
if retval then
if is_region then
reaper.SetProjectMarker3(0, idx, true, pos + nudge, rgnend + nudge, name, color)
else
reaper.SetProjectMarker3(0, idx, false, pos + nudge, 0, name, color)
end
end
end
log("Phase 3: Nudged non-music tracks forward " .. string.format("%.1f", nudge) .. "s for " .. MUSIC_LEAD_SEC .. "s music lead-in")
end
end
end
local last_end = 0 local last_end = 0
for _, tidx in ipairs(CHECK_TRACKS) do for _, tidx in ipairs(CHECK_TRACKS) do
local tr = reaper.GetTrack(0, tidx - 1) local tr = reaper.GetTrack(0, tidx - 1)