diff --git a/analyze_gaps.py b/analyze_gaps.py
new file mode 100644
index 0000000..e6dcbca
--- /dev/null
+++ b/analyze_gaps.py
@@ -0,0 +1,260 @@
+#!/usr/bin/env python3
+"""Analyze silence gaps in podcast stems to find optimal strip-silence thresholds.
+
+Usage: python analyze_gaps.py recordings/2026-03-17_235137/
+"""
+import sys
+import numpy as np
+import soundfile as sf
+from pathlib import Path
+
+BLOCK_SEC = 0.1
+SILENCE_DB = -30
+THRESHOLD = 10 ** (SILENCE_DB / 20)
+MIN_VOICE_SEC = 0.3
+
+
+def load_stem(path: Path) -> tuple[np.ndarray, int]:
+    audio, sr = sf.read(path, dtype="float32")
+    if audio.ndim > 1:
+        audio = audio[:, 0]
+    return audio, sr
+
+
+def compute_rms_blocks(audio: np.ndarray, sr: int) -> np.ndarray:
+    block_samples = int(sr * BLOCK_SEC)
+    n_blocks = len(audio) // block_samples
+    if n_blocks == 0:
+        return np.array([0.0])
+    trimmed = audio[:n_blocks * block_samples].reshape(n_blocks, block_samples)
+    return np.sqrt(np.mean(trimmed ** 2, axis=1))
+
+
+def compute_peak_blocks(audio: np.ndarray, sr: int) -> np.ndarray:
+    block_samples = int(sr * BLOCK_SEC)
+    n_blocks = len(audio) // block_samples
+    if n_blocks == 0:
+        return np.array([0.0])
+    trimmed = audio[:n_blocks * block_samples].reshape(n_blocks, block_samples)
+    return np.max(np.abs(trimmed), axis=1)
+
+
+def analyze(stems_dir: Path):
+    stems_dir = Path(stems_dir)
+    voice_stems = {}
+    for name in ["host", "devon", "caller"]:
+        path = stems_dir / f"{name}.wav"
+        if path.exists():
+            print(f"Loading {name}...", end=" ", flush=True)
+            audio, sr = load_stem(path)
+            voice_stems[name] = audio
+            print(f"{len(audio)/sr:.0f}s @ {sr}Hz")
+
+    if not voice_stems:
+        print("No voice stems found")
+        return
+
+    sr_val = sr
+    duration = max(len(a) for a in voice_stems.values()) / sr_val
+    print(f"\nTotal duration: {duration/60:.1f} min")
+
+    # Compute per-track RMS and peak blocks
+    track_rms = {}
+    track_peak = {}
+    for name, audio in voice_stems.items():
+        track_rms[name] = compute_rms_blocks(audio, sr_val)
+        track_peak[name] = compute_peak_blocks(audio, sr_val)
+
+    n_blocks = min(len(v) for v in track_peak.values())
+
+    # Detect gaps using same logic as Lua script (RMS for speaker ID, peak for silence)
+    min_voice_blocks = int(MIN_VOICE_SEC / BLOCK_SEC)
+    track_names = list(voice_stems.keys())
+
+    gaps = []
+    in_silence = False
+    silence_start = 0
+    track_before = None
+    last_active = None
+    voice_run = 0
+    voice_run_track = None
+
+    for i in range(n_blocks):
+        # Peak for silence detection
+        best_peak = max(track_peak[name][i] for name in track_names)
+        # RMS for speaker identification
+        best_rms = 0
+        best_track = None
+        for name in track_names:
+            r = track_rms[name][i]
+            if r > best_rms:
+                best_rms = r
+                best_track = name
+
+        all_silent = best_peak < THRESHOLD
+
+        if not all_silent:
+            last_active = best_track
+
+        if in_silence:
+            if all_silent:
+                voice_run = 0
+                voice_run_track = None
+            else:
+                if voice_run == 0:
+                    voice_run_track = best_track
+                voice_run += 1
+                if voice_run >= min_voice_blocks:
+                    voice_start_block = i - (voice_run - 1)
+                    gap_start = silence_start * BLOCK_SEC
+                    gap_end = voice_start_block * BLOCK_SEC
+                    dur = gap_end - gap_start
+                    if dur >= 0.5:  # log gaps >= 0.5s
+                        gaps.append({
+                            "start": gap_start,
+                            "end": gap_end,
+                            "dur": dur,
+                            "before": track_before or "?",
+                            "after": voice_run_track or "?",
+                        })
+                    in_silence = False
+                    voice_run = 0
+                    voice_run_track = None
+        else:
+            if all_silent:
+                in_silence = True
+                silence_start = i
+                track_before = last_active
+                voice_run = 0
+                voice_run_track = None
+
+    # Trailing silence
+    if in_silence:
+        dur = (n_blocks - silence_start) * BLOCK_SEC
+        if dur >= 0.5:
+            gaps.append({
+                "start": silence_start * BLOCK_SEC,
+                "end": n_blocks * BLOCK_SEC,
+                "dur": dur,
+                "before": track_before or "?",
+                "after": "end",
+            })
+
+    if not gaps:
+        print("No gaps detected")
+        return
+
+    # Categorize gaps
+    categories = {
+        "host_self": [],      # Host -> Host
+        "host_to_caller": [], # Host -> Caller (TTS latency)
+        "caller_to_host": [], # Caller -> Host
+        "host_to_devon": [],  # Host -> Devon (TTS latency)
+        "devon_to_host": [],  # Devon -> Host
+        "caller_to_devon": [],# Caller -> Devon (interjection)
+        "devon_to_caller": [],# Devon -> Caller
+        "other": [],
+    }
+
+    for g in gaps:
+        b, a = g["before"], g["after"]
+        if b == "host" and a == "host":
+            categories["host_self"].append(g)
+        elif b == "host" and a == "caller":
+            categories["host_to_caller"].append(g)
+        elif b == "caller" and a == "host":
+            categories["caller_to_host"].append(g)
+        elif b == "host" and a == "devon":
+            categories["host_to_devon"].append(g)
+        elif b == "devon" and a == "host":
+            categories["devon_to_host"].append(g)
+        elif b == "caller" and a == "devon":
+            categories["caller_to_devon"].append(g)
+        elif b == "devon" and a == "caller":
+            categories["devon_to_caller"].append(g)
+        else:
+            categories["other"].append(g)
+
+    # Print results
+    print(f"\n{'='*70}")
+    print(f"GAP ANALYSIS — {len(gaps)} gaps detected")
+    print(f"{'='*70}")
+
+    total_silence = sum(g["dur"] for g in gaps)
+    print(f"Total silence: {total_silence:.0f}s ({total_silence/60:.1f} min)")
+    print(f"Content after removal: ~{(duration - total_silence)/60:.1f} min")
+
+    for cat_name, cat_gaps in sorted(categories.items(), key=lambda x: -len(x[1])):
+        if not cat_gaps:
+            continue
+        durs = sorted([g["dur"] for g in cat_gaps])
+        print(f"\n--- {cat_name} ({len(cat_gaps)} gaps) ---")
+        print(f"  Range: {durs[0]:.1f}s - {durs[-1]:.1f}s")
+        print(f"  Median: {np.median(durs):.1f}s  Mean: {np.mean(durs):.1f}s")
+        if len(durs) >= 5:
+            print(f"  P25: {np.percentile(durs, 25):.1f}s  P75: {np.percentile(durs, 75):.1f}s")
+
+        # Histogram
+        brackets = [(0, 1), (1, 2), (2, 3), (3, 5), (5, 8), (8, 12), (12, 18), (18, 30), (30, 60), (60, 999)]
+        print(f"  Distribution:")
+        for lo, hi in brackets:
+            count = sum(1 for d in durs if lo <= d < hi)
+            if count > 0:
+                bar = "#" * count
+                label = f"{lo}-{hi}s" if hi < 999 else f"{lo}s+"
+                print(f"    {label:>8s}: {bar} ({count})")
+
+    # Find natural clusters and suggest thresholds
+    print(f"\n{'='*70}")
+    print("SUGGESTED THRESHOLDS")
+    print(f"{'='*70}")
+
+    # For each Devon-involved category, find the gap between interjection and TTS gaps
+    devon_gaps = categories["host_to_devon"] + categories["devon_to_host"] + categories["caller_to_devon"] + categories["devon_to_caller"]
+    if devon_gaps:
+        devon_durs = sorted([g["dur"] for g in devon_gaps])
+        # Look for a natural break between short (interjection) and long (TTS) gaps
+        short = [d for d in devon_durs if d < 5]
+        long = [d for d in devon_durs if d >= 5]
+        if short and long:
+            suggested = (max(short) + min(long)) / 2
+            print(f"Devon threshold: {suggested:.1f}s  (short gaps: {len(short)} up to {max(short):.1f}s, long gaps: {len(long)} from {min(long):.1f}s)")
+        elif short:
+            print(f"Devon threshold: {max(short) + 1:.1f}s  (all gaps are short, max {max(short):.1f}s)")
+        else:
+            print(f"Devon threshold: 3.0s  (all gaps are long, min {min(long):.1f}s)")
+
+    caller_gaps = categories["host_to_caller"] + categories["caller_to_host"]
+    if caller_gaps:
+        caller_durs = sorted([g["dur"] for g in caller_gaps])
+        short = [d for d in caller_durs if d < 5]
+        long = [d for d in caller_durs if d >= 5]
+        if short and long:
+            suggested = (max(short) + min(long)) / 2
+            print(f"Caller transition threshold: {suggested:.1f}s  (short: {len(short)} up to {max(short):.1f}s, long: {len(long)} from {min(long):.1f}s)")
+        elif long:
+            print(f"Caller transition threshold: {min(long) - 1:.1f}s  (all gaps >= {min(long):.1f}s)")
+
+    host_self = categories["host_self"]
+    if host_self:
+        host_durs = sorted([g["dur"] for g in host_self])
+        short = [d for d in host_durs if d < 5]
+        long = [d for d in host_durs if d >= 5]
+        if short and long:
+            suggested = (max(short) + min(long)) / 2
+            print(f"Same-speaker threshold: {suggested:.1f}s  (short: {len(short)} up to {max(short):.1f}s, long: {len(long)} from {min(long):.1f}s)")
+        elif long:
+            print(f"Same-speaker threshold: {min(long) - 1:.1f}s  (all gaps >= {min(long):.1f}s)")
+
+    all_durs = sorted([g["dur"] for g in gaps])
+    would_cut = [d for d in all_durs if d >= 3.0]
+    print(f"\nWith current thresholds (Devon=3s, others=6s):")
+    print(f"  Would cut: ~{len(would_cut)} gaps, ~{sum(would_cut):.0f}s ({sum(would_cut)/60:.1f} min)")
+    print(f"  Result: ~{(duration - sum(would_cut))/60:.1f} min")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python analyze_gaps.py <stems_dir>")
+        sys.exit(1)
+    analyze(Path(sys.argv[1]))
diff --git a/backend/main.py b/backend/main.py
index 33cd6c1..a4054d0 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -8507,6 +8507,9 @@ GENRE_KEYWORDS = {
     "valentine": "Ballad",
     "romantic": "Ballad",
     "ballad": "Ballad",
+    "irish": "Irish",
+    "ireland": "Irish",
+    "patricks": "Irish",
 }
 
 
diff --git a/reaper/strip_silence_dialog.lua b/reaper/strip_silence_dialog.lua
index d8eac8e..ab0e09d 100644
--- a/reaper/strip_silence_dialog.lua
+++ b/reaper/strip_silence_dialog.lua
@@ -9,12 +9,15 @@
 ---------------------------------------------------------------------------
 local SILENCE_DB       = -30    -- dBFS — anything below this is "silence"
 local MIN_SILENCE_SEC  = 6.0   -- same-speaker gaps: only remove silences longer than this
-local MIN_SILENCE_TRANSITION_SEC = 2.5 -- cross-speaker gaps: shorter threshold for speaker transitions
+local MAX_SILENCE_SEC  = 999   -- no practical limit (IDENT/AD regions protect real breaks)
+local MIN_SILENCE_TRANSITION_SEC = 5.0 -- cross-speaker gaps: threshold for caller TTS latency
+local MIN_SILENCE_DEVON_SEC = 3.0 -- Devon gaps: interjections are prerendered (~2-3s gaps), conversational TTS is 6s+
+local DEVON_TRACK = 2 -- 1-indexed: Devon track number
 local MIN_VOICE_SEC    = 0.3   -- ignore non-silent bursts shorter than this (filters transients)
 local KEEP_PAD_SEC     = 0.5   -- leave this much silence on each side of a cut
 local BLOCK_SEC        = 0.1   -- analysis block size (100ms)
 local SAMPLE_RATE      = 48000
-local CHECK_TRACKS     = {1, 2, 3, 4} -- 1-indexed: Host, Devon, Live Caller, AI Caller
+local CHECK_TRACKS     = {1, 2, 3, 4} -- 1-indexed: Host, Devon, AI Caller, Live Caller
 local IDENTS_TRACK     = 6     -- 1-indexed: Idents track
 local ADS_TRACK        = 7     -- 1-indexed: Ads track
 local MUSIC_TRACK      = 8     -- 1-indexed: Music track
@@ -25,7 +28,6 @@ local YIELD_INTERVAL   = 200   -- yield to REAPER every N blocks (~20s of audio)
 local BLOCK_SAMPLES = math.floor(SAMPLE_RATE * BLOCK_SEC)
 local THRESHOLD = 10 ^ (SILENCE_DB / 20)
 local MIN_VOICE_BLOCKS = math.ceil(MIN_VOICE_SEC / BLOCK_SEC)
-
 local function log(msg)
   reaper.ShowConsoleMsg("[PostProd] " .. msg .. "\n")
 end
@@ -306,13 +308,17 @@ local function read_block_peak_rms(ta, project_time)
 end
 
 -- find_loudest_track: returns 1-based index of the loudest track at a given time, or 0 if silent
+-- Uses RMS (not peak) for speaker identification — ambient mic noise has high peaks but low RMS
 local function find_loudest_track(track_audios, project_time)
   local best_peak = 0
+  local best_rms = 0
   local best_idx = 0
   for i, ta in ipairs(track_audios) do
-    local peak, _ = read_block_peak_rms(ta, project_time)
-    if peak > best_peak then
-      best_peak = peak
+    local peak, sum_sq = read_block_peak_rms(ta, project_time)
+    if peak > best_peak then best_peak = peak end
+    local rms = math.sqrt(sum_sq / BLOCK_SAMPLES)
+    if rms > best_rms then
+      best_rms = rms
       best_idx = i
     end
   end
@@ -340,12 +346,17 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
 
   while t < region.end_pos do
     local best_peak = 0
+    local best_rms = 0
     local best_sum = 0
     local best_track = 0
     for i, ta in ipairs(track_audios) do
       local peak, sum_sq = read_block_peak_rms(ta, t)
-      if peak > best_peak then
-        best_peak = peak
+      if peak > best_peak then best_peak = peak end
+      -- Use RMS for speaker identification (sustained energy, not transient peaks)
+      -- Host mic ambient noise has high peaks but low RMS; TTS speech has high RMS
+      local rms = math.sqrt(sum_sq / BLOCK_SAMPLES)
+      if rms > best_rms then
+        best_rms = rms
         best_sum = sum_sq
         best_track = i
       end
@@ -375,8 +386,11 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
           local dur = voice_start - silence_start
           local track_after = voice_run_track
           local is_transition = track_before_silence ~= 0 and track_after ~= 0 and track_before_silence ~= track_after
-          local threshold = is_transition and MIN_SILENCE_TRANSITION_SEC or MIN_SILENCE_SEC
-          if dur >= threshold then
+          local devon_involved = track_before_silence == DEVON_TRACK or track_after == DEVON_TRACK
+          local threshold = devon_involved and MIN_SILENCE_DEVON_SEC
+                         or (is_transition and MIN_SILENCE_TRANSITION_SEC or MIN_SILENCE_SEC)
+
+          if dur >= threshold and dur <= MAX_SILENCE_SEC then
             table.insert(silences, {
               start_pos = silence_start, end_pos = voice_start, duration = dur,
               is_transition = is_transition,
@@ -410,7 +424,7 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
 
   if in_silence then
     local dur = region.end_pos - silence_start
-    if dur >= MIN_SILENCE_SEC then
+    if dur >= MIN_SILENCE_SEC and dur <= MAX_SILENCE_SEC then
       table.insert(silences, {start_pos = silence_start, end_pos = region.end_pos, duration = dur})
     end
   end
@@ -547,6 +561,7 @@ local function phase1_strip_silence(dialog_regions)
       if (t + 1) == MUSIC_TRACK then goto next_track end
       local track = reaper.GetTrack(0, t)
 
+      -- Split and delete the silent portion from items that span r.start_pos
       local item = find_item_at(track, r.start_pos)
       if item then
         local right = reaper.SplitMediaItem(item, r.start_pos)
@@ -556,10 +571,36 @@ local function phase1_strip_silence(dialog_regions)
         end
       end
 
+      -- Handle sparse track items that START within the removal range
+      -- (not found by find_item_at since they don't contain r.start_pos)
+      for j = reaper.CountTrackMediaItems(track) - 1, 0, -1 do
+        local check = reaper.GetTrackMediaItem(track, j)
+        local cpos = reaper.GetMediaItemInfo_Value(check, "D_POSITION")
+        if cpos >= r.start_pos and cpos < r.end_pos then
+          local clen = reaper.GetMediaItemInfo_Value(check, "D_LENGTH")
+          local cend = cpos + clen
+          if cend <= r.end_pos then
+            -- Entirely within removal — delete
+            reaper.DeleteTrackMediaItem(track, check)
+          else
+            -- Starts in removal but extends past — trim start to r.end_pos
+            local trim = r.end_pos - cpos
+            local take = reaper.GetActiveTake(check)
+            if take then
+              local offset = reaper.GetMediaItemTakeInfo_Value(take, "D_STARTOFFS")
+              reaper.SetMediaItemTakeInfo_Value(take, "D_STARTOFFS", offset + trim)
+            end
+            reaper.SetMediaItemInfo_Value(check, "D_LENGTH", cend - r.end_pos)
+            reaper.SetMediaItemInfo_Value(check, "D_POSITION", r.end_pos)
+          end
+        end
+      end
+
+      -- Shift items AFTER the removal (use r.end_pos, not r.start_pos)
       for j = 0, reaper.CountTrackMediaItems(track) - 1 do
         local shift_item = reaper.GetTrackMediaItem(track, j)
         local pos = reaper.GetMediaItemInfo_Value(shift_item, "D_POSITION")
-        if pos >= r.start_pos then
+        if pos >= r.end_pos then
           reaper.SetMediaItemInfo_Value(shift_item, "D_POSITION", pos - remove_len)
         end
       end
@@ -766,6 +807,56 @@ local function phase3_trim_music()
   local music_track = reaper.GetTrack(0, MUSIC_TRACK - 1)
   if not music_track then return end
 
+  -- Ensure music starts before first voice item.
+  -- Silence removal shifts voice/idents/ads but not music. If voice now starts before
+  -- music, nudge all non-music tracks forward so music has a lead-in.
+  local first_voice_start = math.huge
+  for _, tidx in ipairs(CHECK_TRACKS) do
+    local tr = reaper.GetTrack(0, tidx - 1)
+    if tr and reaper.CountTrackMediaItems(tr) > 0 then
+      local item = reaper.GetTrackMediaItem(tr, 0)
+      local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
+      if pos < first_voice_start then first_voice_start = pos end
+    end
+  end
+
+  local MUSIC_LEAD_SEC = 3.0  -- seconds of music before first voice
+  if first_voice_start < math.huge then
+    local first_music = reaper.GetTrackMediaItem(music_track, 0)
+    if first_music then
+      local music_start = reaper.GetMediaItemInfo_Value(first_music, "D_POSITION")
+      local desired_voice_start = music_start + MUSIC_LEAD_SEC
+      if first_voice_start < desired_voice_start then
+        local nudge = desired_voice_start - first_voice_start
+        -- Shift all non-music tracks forward
+        for t = 0, reaper.CountTracks(0) - 1 do
+          if (t + 1) == MUSIC_TRACK then goto skip_music end
+          local track = reaper.GetTrack(0, t)
+          for i = 0, reaper.CountTrackMediaItems(track) - 1 do
+            local item = reaper.GetTrackMediaItem(track, i)
+            local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
+            reaper.SetMediaItemInfo_Value(item, "D_POSITION", pos + nudge)
+          end
+          ::skip_music::
+        end
+        -- Also shift all markers/regions forward
+        local _, num_markers, num_regions = reaper.CountProjectMarkers(0)
+        local total_m = num_markers + num_regions
+        for i = 0, total_m - 1 do
+          local retval, is_region, pos, rgnend, name, idx, color = reaper.EnumProjectMarkers3(0, i)
+          if retval then
+            if is_region then
+              reaper.SetProjectMarker3(0, idx, true, pos + nudge, rgnend + nudge, name, color)
+            else
+              reaper.SetProjectMarker3(0, idx, false, pos + nudge, 0, name, color)
+            end
+          end
+        end
+        log("Phase 3: Nudged non-music tracks forward " .. string.format("%.1f", nudge) .. "s for " .. MUSIC_LEAD_SEC .. "s music lead-in")
+      end
+    end
+  end
+
   local last_end = 0
   for _, tidx in ipairs(CHECK_TRACKS) do
     local tr = reaper.GetTrack(0, tidx - 1)