Show theme feature, Irish music genre, strip silence overhaul

- Add show theme UI in header bar + backend API (inject into caller prompts) - Add Irish genre category for music dropdown - Strip silence: RMS-based speaker detection (fixes Devon not being identified) - Strip silence: Devon-specific 3s threshold for interjections - Strip silence: sparse track item handling in shift logic - Strip silence: music lead-in preservation after silence removal - Strip silence: no max gap limit (IDENT/AD regions protect breaks) - Add analyze_gaps.py tool for per-show threshold analysis Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Add show theme feature for themed episodes
2026-03-18 03:30:15 -06:00 · 2026-03-17 23:46:48 -06:00
6 changed files with 533 additions and 13 deletions
@@ -0,0 +1,260 @@
 #!/usr/bin/env python3
 """Analyze silence gaps in podcast stems to find optimal strip-silence thresholds.
 Usage: python analyze_gaps.py recordings/2026-03-17_235137/
 """
 import sys
 import numpy as np
 import soundfile as sf
 from pathlib import Path
 BLOCK_SEC = 0.1
 SILENCE_DB = -30
 THRESHOLD = 10 ** (SILENCE_DB / 20)
 MIN_VOICE_SEC = 0.3
 def load_stem(path: Path) -> tuple[np.ndarray, int]:
    audio, sr = sf.read(path, dtype="float32")
    if audio.ndim > 1:
        audio = audio[:, 0]
    return audio, sr
 def compute_rms_blocks(audio: np.ndarray, sr: int) -> np.ndarray:
    block_samples = int(sr * BLOCK_SEC)
    n_blocks = len(audio) // block_samples
    if n_blocks == 0:
        return np.array([0.0])
    trimmed = audio[:n_blocks * block_samples].reshape(n_blocks, block_samples)
    return np.sqrt(np.mean(trimmed ** 2, axis=1))
 def compute_peak_blocks(audio: np.ndarray, sr: int) -> np.ndarray:
    block_samples = int(sr * BLOCK_SEC)
    n_blocks = len(audio) // block_samples
    if n_blocks == 0:
        return np.array([0.0])
    trimmed = audio[:n_blocks * block_samples].reshape(n_blocks, block_samples)
    return np.max(np.abs(trimmed), axis=1)
 def analyze(stems_dir: Path):
    stems_dir = Path(stems_dir)
    voice_stems = {}
    for name in ["host", "devon", "caller"]:
        path = stems_dir / f"{name}.wav"
        if path.exists():
            print(f"Loading {name}...", end=" ", flush=True)
            audio, sr = load_stem(path)
            voice_stems[name] = audio
            print(f"{len(audio)/sr:.0f}s @ {sr}Hz")
    if not voice_stems:
        print("No voice stems found")
        return
    sr_val = sr
    duration = max(len(a) for a in voice_stems.values()) / sr_val
    print(f"\nTotal duration: {duration/60:.1f} min")
    # Compute per-track RMS and peak blocks
    track_rms = {}
    track_peak = {}
    for name, audio in voice_stems.items():
        track_rms[name] = compute_rms_blocks(audio, sr_val)
        track_peak[name] = compute_peak_blocks(audio, sr_val)
    n_blocks = min(len(v) for v in track_peak.values())
    # Detect gaps using same logic as Lua script (RMS for speaker ID, peak for silence)
    min_voice_blocks = int(MIN_VOICE_SEC / BLOCK_SEC)
    track_names = list(voice_stems.keys())
    gaps = []
    in_silence = False
    silence_start = 0
    track_before = None
    last_active = None
    voice_run = 0
    voice_run_track = None
    for i in range(n_blocks):
        # Peak for silence detection
        best_peak = max(track_peak[name][i] for name in track_names)
        # RMS for speaker identification
        best_rms = 0
        best_track = None
        for name in track_names:
            r = track_rms[name][i]
            if r > best_rms:
                best_rms = r
                best_track = name
        all_silent = best_peak < THRESHOLD
        if not all_silent:
            last_active = best_track
        if in_silence:
            if all_silent:
                voice_run = 0
                voice_run_track = None
            else:
                if voice_run == 0:
                    voice_run_track = best_track
                voice_run += 1
                if voice_run >= min_voice_blocks:
                    voice_start_block = i - (voice_run - 1)
                    gap_start = silence_start * BLOCK_SEC
                    gap_end = voice_start_block * BLOCK_SEC
                    dur = gap_end - gap_start
                    if dur >= 0.5:  # log gaps >= 0.5s
                        gaps.append({
                            "start": gap_start,
                            "end": gap_end,
                            "dur": dur,
                            "before": track_before or "?",
                            "after": voice_run_track or "?",
                        })
                    in_silence = False
                    voice_run = 0
                    voice_run_track = None
        else:
            if all_silent:
                in_silence = True
                silence_start = i
                track_before = last_active
                voice_run = 0
                voice_run_track = None
    # Trailing silence
    if in_silence:
        dur = (n_blocks - silence_start) * BLOCK_SEC
        if dur >= 0.5:
            gaps.append({
                "start": silence_start * BLOCK_SEC,
                "end": n_blocks * BLOCK_SEC,
                "dur": dur,
                "before": track_before or "?",
                "after": "end",
            })
    if not gaps:
        print("No gaps detected")
        return
    # Categorize gaps
    categories = {
        "host_self": [],      # Host -> Host
        "host_to_caller": [], # Host -> Caller (TTS latency)
        "caller_to_host": [], # Caller -> Host
        "host_to_devon": [],  # Host -> Devon (TTS latency)
        "devon_to_host": [],  # Devon -> Host
        "caller_to_devon": [],# Caller -> Devon (interjection)
        "devon_to_caller": [],# Devon -> Caller
        "other": [],
    }
    for g in gaps:
        b, a = g["before"], g["after"]
        if b == "host" and a == "host":
            categories["host_self"].append(g)
        elif b == "host" and a == "caller":
            categories["host_to_caller"].append(g)
        elif b == "caller" and a == "host":
            categories["caller_to_host"].append(g)
        elif b == "host" and a == "devon":
            categories["host_to_devon"].append(g)
        elif b == "devon" and a == "host":
            categories["devon_to_host"].append(g)
        elif b == "caller" and a == "devon":
            categories["caller_to_devon"].append(g)
        elif b == "devon" and a == "caller":
            categories["devon_to_caller"].append(g)
        else:
            categories["other"].append(g)
    # Print results
    print(f"\n{'='*70}")
    print(f"GAP ANALYSIS — {len(gaps)} gaps detected")
    print(f"{'='*70}")
    total_silence = sum(g["dur"] for g in gaps)
    print(f"Total silence: {total_silence:.0f}s ({total_silence/60:.1f} min)")
    print(f"Content after removal: ~{(duration - total_silence)/60:.1f} min")
    for cat_name, cat_gaps in sorted(categories.items(), key=lambda x: -len(x[1])):
        if not cat_gaps:
            continue
        durs = sorted([g["dur"] for g in cat_gaps])
        print(f"\n--- {cat_name} ({len(cat_gaps)} gaps) ---")
        print(f"  Range: {durs[0]:.1f}s - {durs[-1]:.1f}s")
        print(f"  Median: {np.median(durs):.1f}s  Mean: {np.mean(durs):.1f}s")
        if len(durs) >= 5:
            print(f"  P25: {np.percentile(durs, 25):.1f}s  P75: {np.percentile(durs, 75):.1f}s")
        # Histogram
        brackets = [(0, 1), (1, 2), (2, 3), (3, 5), (5, 8), (8, 12), (12, 18), (18, 30), (30, 60), (60, 999)]
        print(f"  Distribution:")
        for lo, hi in brackets:
            count = sum(1 for d in durs if lo <= d < hi)
            if count > 0:
                bar = "#" * count
                label = f"{lo}-{hi}s" if hi < 999 else f"{lo}s+"
                print(f"    {label:>8s}: {bar} ({count})")
    # Find natural clusters and suggest thresholds
    print(f"\n{'='*70}")
    print("SUGGESTED THRESHOLDS")
    print(f"{'='*70}")
    # For each Devon-involved category, find the gap between interjection and TTS gaps
    devon_gaps = categories["host_to_devon"] + categories["devon_to_host"] + categories["caller_to_devon"] + categories["devon_to_caller"]
    if devon_gaps:
        devon_durs = sorted([g["dur"] for g in devon_gaps])
        # Look for a natural break between short (interjection) and long (TTS) gaps
        short = [d for d in devon_durs if d < 5]
        long = [d for d in devon_durs if d >= 5]
        if short and long:
            suggested = (max(short) + min(long)) / 2
            print(f"Devon threshold: {suggested:.1f}s  (short gaps: {len(short)} up to {max(short):.1f}s, long gaps: {len(long)} from {min(long):.1f}s)")
        elif short:
            print(f"Devon threshold: {max(short) + 1:.1f}s  (all gaps are short, max {max(short):.1f}s)")
        else:
            print(f"Devon threshold: 3.0s  (all gaps are long, min {min(long):.1f}s)")
    caller_gaps = categories["host_to_caller"] + categories["caller_to_host"]
    if caller_gaps:
        caller_durs = sorted([g["dur"] for g in caller_gaps])
        short = [d for d in caller_durs if d < 5]
        long = [d for d in caller_durs if d >= 5]
        if short and long:
            suggested = (max(short) + min(long)) / 2
            print(f"Caller transition threshold: {suggested:.1f}s  (short: {len(short)} up to {max(short):.1f}s, long: {len(long)} from {min(long):.1f}s)")
        elif long:
            print(f"Caller transition threshold: {min(long) - 1:.1f}s  (all gaps >= {min(long):.1f}s)")
    host_self = categories["host_self"]
    if host_self:
        host_durs = sorted([g["dur"] for g in host_self])
        short = [d for d in host_durs if d < 5]
        long = [d for d in host_durs if d >= 5]
        if short and long:
            suggested = (max(short) + min(long)) / 2
            print(f"Same-speaker threshold: {suggested:.1f}s  (short: {len(short)} up to {max(short):.1f}s, long: {len(long)} from {min(long):.1f}s)")
        elif long:
            print(f"Same-speaker threshold: {min(long) - 1:.1f}s  (all gaps >= {min(long):.1f}s)")
    all_durs = sorted([g["dur"] for g in gaps])
    would_cut = [d for d in all_durs if d >= 3.0]
    print(f"\nWith current thresholds (Devon=3s, others=6s):")
    print(f"  Would cut: ~{len(would_cut)} gaps, ~{sum(would_cut):.0f}s ({sum(would_cut)/60:.1f} min)")
    print(f"  Result: ~{(duration - sum(would_cut))/60:.1f} min")
 if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python analyze_gaps.py <stems_dir>")
        sys.exit(1)
    analyze(Path(sys.argv[1]))
@@ -5314,6 +5314,7 @@ TIME: {time_ctx} {season_ctx}
 {fluency_hint}
 {f'SOME DETAILS ABOUT THEM: {seed_text}' if seed_text else ''}
 {f'CALLER ENERGY: {style_hint}' if style_hint else ''}
 {f"SHOW THEME: Tonight's show theme is '{session.show_theme}'. This caller might have a story or angle related to this theme — or they might not. Not every caller has to be about the theme, but if their reason for calling can naturally connect to it, lean into that connection. The theme should feel like a through-line, not a mandate." if session.show_theme else ''}
 Respond with a JSON object containing these fields:
@@ -6014,6 +6015,10 @@ def get_caller_prompt(caller: dict, show_history: str = "",
            parts.append(research_context)
        world_context = "\n".join(parts) + "\n"
    theme_context = ""
    if session.show_theme:
        theme_context = f"\nSHOW THEME: Tonight's show theme is \"{session.show_theme}\". You're aware of the theme — the host mentioned it at the top of the show. If your story or situation connects to it, you might bring it up naturally. But don't force it. Not every caller has to be about the theme. If the host steers you toward the theme, go with it.\n"
    now = datetime.now(_MST)
    date_str = now.strftime("%A, %B %d")
@@ -6060,7 +6065,7 @@ You are {caller['name']}. You are the CALLER. You are NOT Luke. Luke is the HOST
 YOUR BACKGROUND:
 {caller['vibe']}
-{relationship_context}{history}{world_context}{emotional_read}
+{relationship_context}{history}{world_context}{theme_context}{emotional_read}
 You're a real person calling a late-night radio show. You called because you've got something specific and you want to talk about it.
 {pacing_block}
@@ -6215,6 +6220,7 @@ class Session:
        self.caller_queue: list[str] = []  # Sorted presentation order of caller keys
        self.relationship_context: dict[str, str] = {}  # caller_key → relationship prompt injection
        self.intern_monitoring: bool = True  # Devon monitors conversations by default
        self.show_theme: str = ""  # Current show theme (e.g. "St. Patrick's Day")
    def start_call(self, caller_key: str):
        self.current_caller_key = caller_key
@@ -8501,6 +8507,9 @@ GENRE_KEYWORDS = {
    "valentine": "Ballad",
    "romantic": "Ballad",
    "ballad": "Ballad",
    "irish": "Irish",
    "ireland": "Irish",
    "patricks": "Irish",
 }
@@ -8759,6 +8768,25 @@ async def update_settings(data: dict):
    return llm_service.get_settings()
 # --- Show Theme ---
@app.get("/api/show-theme")
 async def get_show_theme():
    return {"theme": session.show_theme}
@app.post("/api/show-theme")
 async def set_show_theme(data: dict):
    theme = data.get("theme", "").strip()[:100]
    old_theme = session.show_theme
    session.show_theme = theme
    if theme:
        print(f"[Theme] Show theme set: {theme}")
    elif old_theme:
        print(f"[Theme] Show theme cleared (was: {old_theme})")
    return {"theme": session.show_theme}
 # --- Cost Tracking Endpoints ---
@app.get("/api/costs")
@@ -113,6 +113,69 @@ header button:hover {
    border-color: rgba(232, 121, 29, 0.3);
 }
 .theme-bar {
    display: flex;
    align-items: center;
    gap: 6px;
    padding: 4px 12px;
    background: rgba(255, 255, 255, 0.05);
    border-radius: 6px;
 }
 .theme-label {
    font-size: 0.8rem;
    color: #aaa;
    white-space: nowrap;
 }
 .theme-input {
    background: rgba(255, 255, 255, 0.08);
    border: 1px solid rgba(255, 255, 255, 0.15);
    border-radius: 4px;
    color: #fff;
    padding: 4px 8px;
    font-size: 0.85rem;
    width: 200px;
 }
 .theme-input:focus {
    outline: none;
    border-color: #f5a623;
 }
 .theme-input.active {
    border-color: #f5a623;
    background: rgba(245, 166, 35, 0.1);
 }
 .theme-btn {
    padding: 4px 10px;
    border-radius: 4px;
    border: none;
    cursor: pointer;
    font-size: 0.8rem;
 }
 .theme-btn.set {
    background: #f5a623;
    color: #000;
 }
 .theme-btn.set:hover {
    background: #e6991a;
 }
 .theme-btn.clear {
    background: rgba(255, 255, 255, 0.1);
    color: #aaa;
    padding: 4px 6px;
 }
 .theme-btn.clear:hover {
    background: rgba(255, 80, 80, 0.3);
    color: #ff5050;
 }
 .on-air-btn {
    font-weight: 700;
    text-transform: uppercase;
@@ -17,6 +17,12 @@
                <button id="export-session-btn">Export</button>
                <button id="settings-btn">Settings</button>
            </div>
            <div class="theme-bar">
                <label for="show-theme-input" class="theme-label">Theme:</label>
                <input type="text" id="show-theme-input" class="theme-input" placeholder="e.g. St. Patrick's Day" maxlength="100">
                <button id="set-theme-btn" class="theme-btn set" title="Set show theme">Set</button>
                <button id="clear-theme-btn" class="theme-btn clear hidden" title="Clear theme">&#x2715;</button>
            </div>
            <div id="show-clock" class="show-clock">
                <span class="clock-time" id="clock-time"></span>
                <span id="show-timers" class="show-timers hidden">
@@ -130,6 +130,7 @@ document.addEventListener('DOMContentLoaded', async () => {
        await loadSettings();
        initEventListeners();
        initClock();
        loadShowTheme();
        loadVoicemails();
        setInterval(loadVoicemails, 30000);
        loadEmails();
@@ -345,6 +346,13 @@ function initEventListeners() {
    document.getElementById('devon-play-btn')?.addEventListener('click', playDevonSuggestion);
    document.getElementById('devon-dismiss-btn')?.addEventListener('click', dismissDevonSuggestion);
    // Show Theme
    document.getElementById('set-theme-btn')?.addEventListener('click', setShowTheme);
    document.getElementById('clear-theme-btn')?.addEventListener('click', clearShowTheme);
    document.getElementById('show-theme-input')?.addEventListener('keydown', (e) => {
        if (e.key === 'Enter') setShowTheme();
    });
    // Settings
    document.getElementById('settings-btn')?.addEventListener('click', async () => {
        document.getElementById('settings-modal')?.classList.remove('hidden');
@@ -692,6 +700,7 @@ async function newSession() {
    // Reload callers to get new session ID
    await loadCallers();
    await loadShowTheme();
    log('New session started - all callers have fresh backgrounds');
 }
@@ -1159,6 +1168,69 @@ async function playSFX(soundFile) {
 }
 // --- Show Theme ---
 async function loadShowTheme() {
    try {
        const res = await fetch('/api/show-theme');
        const data = await res.json();
        const input = document.getElementById('show-theme-input');
        const setBtn = document.getElementById('set-theme-btn');
        const clearBtn = document.getElementById('clear-theme-btn');
        if (data.theme) {
            input.value = data.theme;
            input.classList.add('active');
            setBtn.classList.add('hidden');
            clearBtn.classList.remove('hidden');
        } else {
            input.value = '';
            input.classList.remove('active');
            setBtn.classList.remove('hidden');
            clearBtn.classList.add('hidden');
        }
    } catch (e) {
        console.error('Failed to load show theme:', e);
    }
 }
 async function setShowTheme() {
    const input = document.getElementById('show-theme-input');
    const theme = input.value.trim();
    if (!theme) return;
    try {
        const res = await fetch('/api/show-theme', {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ theme })
        });
        const data = await res.json();
        if (data.theme) {
            input.classList.add('active');
            document.getElementById('set-theme-btn').classList.add('hidden');
            document.getElementById('clear-theme-btn').classList.remove('hidden');
        }
    } catch (e) {
        console.error('Failed to set show theme:', e);
    }
 }
 async function clearShowTheme() {
    try {
        await fetch('/api/show-theme', {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ theme: '' })
        });
        const input = document.getElementById('show-theme-input');
        input.value = '';
        input.classList.remove('active');
        document.getElementById('set-theme-btn').classList.remove('hidden');
        document.getElementById('clear-theme-btn').classList.add('hidden');
    } catch (e) {
        console.error('Failed to clear show theme:', e);
    }
 }
 // --- Settings ---
 async function loadSettings() {
    try {
@@ -9,12 +9,15 @@
 ---------------------------------------------------------------------------
 local SILENCE_DB       = -30    -- dBFS — anything below this is "silence"
 local MIN_SILENCE_SEC  = 6.0   -- same-speaker gaps: only remove silences longer than this
-local MIN_SILENCE_TRANSITION_SEC = 2.5 -- cross-speaker gaps: shorter threshold for speaker transitions
+local MAX_SILENCE_SEC  = 999   -- no practical limit (IDENT/AD regions protect real breaks)
 local MIN_SILENCE_TRANSITION_SEC = 5.0 -- cross-speaker gaps: threshold for caller TTS latency
 local MIN_SILENCE_DEVON_SEC = 3.0 -- Devon gaps: interjections are prerendered (~2-3s gaps), conversational TTS is 6s+
 local DEVON_TRACK = 2 -- 1-indexed: Devon track number
 local MIN_VOICE_SEC    = 0.3   -- ignore non-silent bursts shorter than this (filters transients)
 local KEEP_PAD_SEC     = 0.5   -- leave this much silence on each side of a cut
 local BLOCK_SEC        = 0.1   -- analysis block size (100ms)
 local SAMPLE_RATE      = 48000
-local CHECK_TRACKS     = {1, 2, 3, 4} -- 1-indexed: Host, Devon, Live Caller, AI Caller
+local CHECK_TRACKS     = {1, 2, 3, 4} -- 1-indexed: Host, Devon, AI Caller, Live Caller
 local IDENTS_TRACK     = 6     -- 1-indexed: Idents track
 local ADS_TRACK        = 7     -- 1-indexed: Ads track
 local MUSIC_TRACK      = 8     -- 1-indexed: Music track
@@ -25,7 +28,6 @@ local YIELD_INTERVAL   = 200   -- yield to REAPER every N blocks (~20s of audio)
 local BLOCK_SAMPLES = math.floor(SAMPLE_RATE * BLOCK_SEC)
 local THRESHOLD = 10 ^ (SILENCE_DB / 20)
 local MIN_VOICE_BLOCKS = math.ceil(MIN_VOICE_SEC / BLOCK_SEC)
 local function log(msg)
  reaper.ShowConsoleMsg("[PostProd] " .. msg .. "\n")
 end
@@ -306,13 +308,17 @@ local function read_block_peak_rms(ta, project_time)
 end
 -- find_loudest_track: returns 1-based index of the loudest track at a given time, or 0 if silent
 -- Uses RMS (not peak) for speaker identification — ambient mic noise has high peaks but low RMS
 local function find_loudest_track(track_audios, project_time)
  local best_peak = 0
  local best_rms = 0
  local best_idx = 0
  for i, ta in ipairs(track_audios) do
-    local peak, _ = read_block_peak_rms(ta, project_time)
+    local peak, sum_sq = read_block_peak_rms(ta, project_time)
-    if peak > best_peak then
+    if peak > best_peak then best_peak = peak end
-      best_peak = peak
+    local rms = math.sqrt(sum_sq / BLOCK_SAMPLES)
    if rms > best_rms then
      best_rms = rms
      best_idx = i
    end
  end
@@ -340,12 +346,17 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
  while t < region.end_pos do
    local best_peak = 0
    local best_rms = 0
    local best_sum = 0
    local best_track = 0
    for i, ta in ipairs(track_audios) do
      local peak, sum_sq = read_block_peak_rms(ta, t)
-      if peak > best_peak then
+      if peak > best_peak then best_peak = peak end
-        best_peak = peak
+      -- Use RMS for speaker identification (sustained energy, not transient peaks)
      -- Host mic ambient noise has high peaks but low RMS; TTS speech has high RMS
      local rms = math.sqrt(sum_sq / BLOCK_SAMPLES)
      if rms > best_rms then
        best_rms = rms
        best_sum = sum_sq
        best_track = i
      end
@@ -375,8 +386,11 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
          local dur = voice_start - silence_start
          local track_after = voice_run_track
          local is_transition = track_before_silence ~= 0 and track_after ~= 0 and track_before_silence ~= track_after
-          local threshold = is_transition and MIN_SILENCE_TRANSITION_SEC or MIN_SILENCE_SEC
+          local devon_involved = track_before_silence == DEVON_TRACK or track_after == DEVON_TRACK
-          if dur >= threshold then
+          local threshold = devon_involved and MIN_SILENCE_DEVON_SEC
                         or (is_transition and MIN_SILENCE_TRANSITION_SEC or MIN_SILENCE_SEC)
          if dur >= threshold and dur <= MAX_SILENCE_SEC then
            table.insert(silences, {
              start_pos = silence_start, end_pos = voice_start, duration = dur,
              is_transition = is_transition,
@@ -410,7 +424,7 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
  if in_silence then
    local dur = region.end_pos - silence_start
-    if dur >= MIN_SILENCE_SEC then
+    if dur >= MIN_SILENCE_SEC and dur <= MAX_SILENCE_SEC then
      table.insert(silences, {start_pos = silence_start, end_pos = region.end_pos, duration = dur})
    end
  end
@@ -547,6 +561,7 @@ local function phase1_strip_silence(dialog_regions)
      if (t + 1) == MUSIC_TRACK then goto next_track end
      local track = reaper.GetTrack(0, t)
      -- Split and delete the silent portion from items that span r.start_pos
      local item = find_item_at(track, r.start_pos)
      if item then
        local right = reaper.SplitMediaItem(item, r.start_pos)
@@ -556,10 +571,36 @@ local function phase1_strip_silence(dialog_regions)
        end
      end
      -- Handle sparse track items that START within the removal range
      -- (not found by find_item_at since they don't contain r.start_pos)
      for j = reaper.CountTrackMediaItems(track) - 1, 0, -1 do
        local check = reaper.GetTrackMediaItem(track, j)
        local cpos = reaper.GetMediaItemInfo_Value(check, "D_POSITION")
        if cpos >= r.start_pos and cpos < r.end_pos then
          local clen = reaper.GetMediaItemInfo_Value(check, "D_LENGTH")
          local cend = cpos + clen
          if cend <= r.end_pos then
            -- Entirely within removal — delete
            reaper.DeleteTrackMediaItem(track, check)
          else
            -- Starts in removal but extends past — trim start to r.end_pos
            local trim = r.end_pos - cpos
            local take = reaper.GetActiveTake(check)
            if take then
              local offset = reaper.GetMediaItemTakeInfo_Value(take, "D_STARTOFFS")
              reaper.SetMediaItemTakeInfo_Value(take, "D_STARTOFFS", offset + trim)
            end
            reaper.SetMediaItemInfo_Value(check, "D_LENGTH", cend - r.end_pos)
            reaper.SetMediaItemInfo_Value(check, "D_POSITION", r.end_pos)
          end
        end
      end
      -- Shift items AFTER the removal (use r.end_pos, not r.start_pos)
      for j = 0, reaper.CountTrackMediaItems(track) - 1 do
        local shift_item = reaper.GetTrackMediaItem(track, j)
        local pos = reaper.GetMediaItemInfo_Value(shift_item, "D_POSITION")
-        if pos >= r.start_pos then
+        if pos >= r.end_pos then
          reaper.SetMediaItemInfo_Value(shift_item, "D_POSITION", pos - remove_len)
        end
      end
@@ -766,6 +807,56 @@ local function phase3_trim_music()
  local music_track = reaper.GetTrack(0, MUSIC_TRACK - 1)
  if not music_track then return end
  -- Ensure music starts before first voice item.
  -- Silence removal shifts voice/idents/ads but not music. If voice now starts before
  -- music, nudge all non-music tracks forward so music has a lead-in.
  local first_voice_start = math.huge
  for _, tidx in ipairs(CHECK_TRACKS) do
    local tr = reaper.GetTrack(0, tidx - 1)
    if tr and reaper.CountTrackMediaItems(tr) > 0 then
      local item = reaper.GetTrackMediaItem(tr, 0)
      local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
      if pos < first_voice_start then first_voice_start = pos end
    end
  end
  local MUSIC_LEAD_SEC = 3.0  -- seconds of music before first voice
  if first_voice_start < math.huge then
    local first_music = reaper.GetTrackMediaItem(music_track, 0)
    if first_music then
      local music_start = reaper.GetMediaItemInfo_Value(first_music, "D_POSITION")
      local desired_voice_start = music_start + MUSIC_LEAD_SEC
      if first_voice_start < desired_voice_start then
        local nudge = desired_voice_start - first_voice_start
        -- Shift all non-music tracks forward
        for t = 0, reaper.CountTracks(0) - 1 do
          if (t + 1) == MUSIC_TRACK then goto skip_music end
          local track = reaper.GetTrack(0, t)
          for i = 0, reaper.CountTrackMediaItems(track) - 1 do
            local item = reaper.GetTrackMediaItem(track, i)
            local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
            reaper.SetMediaItemInfo_Value(item, "D_POSITION", pos + nudge)
          end
          ::skip_music::
        end
        -- Also shift all markers/regions forward
        local _, num_markers, num_regions = reaper.CountProjectMarkers(0)
        local total_m = num_markers + num_regions
        for i = 0, total_m - 1 do
          local retval, is_region, pos, rgnend, name, idx, color = reaper.EnumProjectMarkers3(0, i)
          if retval then
            if is_region then
              reaper.SetProjectMarker3(0, idx, true, pos + nudge, rgnend + nudge, name, color)
            else
              reaper.SetProjectMarker3(0, idx, false, pos + nudge, 0, name, color)
            end
          end
        end
        log("Phase 3: Nudged non-music tracks forward " .. string.format("%.1f", nudge) .. "s for " .. MUSIC_LEAD_SEC .. "s music lead-in")
      end
    end
  end
  local last_end = 0
  for _, tidx in ipairs(CHECK_TRACKS) do
    local tr = reaper.GetTrack(0, tidx - 1)