Strip silence: preserve music intro, fix ad normalization, smart loop range

- Preserve first silence in first DIALOG region (music intro before host speaks) - Fix ad/ident normalization using direct WAV reading (accessor failed after splits) - Loop range starts 0.5s before audible music, ends at last item - Disable broken music lead-in nudge (intro preservation handles it) - Caller dialog model set to Grok for testing Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 02:32:34 -06:00
parent 3dd6a83c68
commit 762b5efc3b
2 changed files with 147 additions and 122 deletions
@@ -37,7 +37,7 @@ class Settings(BaseSettings):
    # Categories: caller_dialog, devon_monitor, devon_ask, background_gen,
    #             call_summary, news_summary, topic_gen, unknown
    category_models: dict = {
-        "caller_dialog": "anthropic/claude-sonnet-4-5",       # quality matters — this IS the show
+        "caller_dialog": "x-ai/grok-4-fast",                   # testing edgier dialog — revert to anthropic/claude-sonnet-4-5
        "devon_ask": "google/gemini-2.5-flash",               # Devon direct questions
        "devon_monitor": "google/gemini-2.5-flash",           # Devon polling — biggest cost saver
        "background_gen": "google/gemini-2.5-flash",          # JSON caller backgrounds
@@ -466,7 +466,10 @@ local function phase1_strip_silence(dialog_regions)
  for _, r in ipairs(get_regions_by_type("^IDENT%s+%d+$")) do table.insert(protected_regions, r) end
  table.sort(protected_regions, function(a, b) return a.start_pos < b.start_pos end)
  if #protected_regions > 0 then
-    log("  Protecting " .. #protected_regions .. " AD/IDENT region(s) from silence removal")
+    log("  Protecting " .. #protected_regions .. " AD/IDENT region(s) from silence removal:")
    for _, pr in ipairs(protected_regions) do
      log("    " .. pr.name .. " at " .. string.format("%.1f", pr.start_pos) .. "-" .. string.format("%.1f", pr.end_pos) .. "s")
    end
  end
  log("Phase 1: Analyzing using " .. tracks_loaded .. "/" .. #CHECK_TRACKS .. " voice tracks")
@@ -512,6 +515,11 @@ local function phase1_strip_silence(dialog_regions)
            break
          end
        end
        -- Preserve the very first silence (music intro before host starts talking)
        if not protected and ri == 1 and #removals == 0 and s.start_pos <= rgn.start_pos + 1.0 then
          protected = true
          log("    KEEP " .. string.format("%.1f", rm_end - rm_start) .. "s at " .. string.format("%.1f", s.start_pos) .. "-" .. string.format("%.1f", s.end_pos) .. " (music intro)")
        end
        if not protected then
          table.insert(removals, {start_pos = rm_start, end_pos = rm_end})
          local tag = s.is_transition and " [transition]" or ""
@@ -561,7 +569,6 @@ local function phase1_strip_silence(dialog_regions)
      if (t + 1) == MUSIC_TRACK then goto next_track end
      local track = reaper.GetTrack(0, t)
      -- Split and delete the silent portion from items that span r.start_pos
      local item = find_item_at(track, r.start_pos)
      if item then
        local right = reaper.SplitMediaItem(item, r.start_pos)
@@ -571,36 +578,10 @@ local function phase1_strip_silence(dialog_regions)
        end
      end
      -- Handle sparse track items that START within the removal range
      -- (not found by find_item_at since they don't contain r.start_pos)
      for j = reaper.CountTrackMediaItems(track) - 1, 0, -1 do
        local check = reaper.GetTrackMediaItem(track, j)
        local cpos = reaper.GetMediaItemInfo_Value(check, "D_POSITION")
        if cpos >= r.start_pos and cpos < r.end_pos then
          local clen = reaper.GetMediaItemInfo_Value(check, "D_LENGTH")
          local cend = cpos + clen
          if cend <= r.end_pos then
            -- Entirely within removal — delete
            reaper.DeleteTrackMediaItem(track, check)
          else
            -- Starts in removal but extends past — trim start to r.end_pos
            local trim = r.end_pos - cpos
            local take = reaper.GetActiveTake(check)
            if take then
              local offset = reaper.GetMediaItemTakeInfo_Value(take, "D_STARTOFFS")
              reaper.SetMediaItemTakeInfo_Value(take, "D_STARTOFFS", offset + trim)
            end
            reaper.SetMediaItemInfo_Value(check, "D_LENGTH", cend - r.end_pos)
            reaper.SetMediaItemInfo_Value(check, "D_POSITION", r.end_pos)
          end
        end
      end
      -- Shift items AFTER the removal (use r.end_pos, not r.start_pos)
      for j = 0, reaper.CountTrackMediaItems(track) - 1 do
        local shift_item = reaper.GetTrackMediaItem(track, j)
        local pos = reaper.GetMediaItemInfo_Value(shift_item, "D_POSITION")
-        if pos >= r.end_pos then
+        if pos >= r.start_pos then
          reaper.SetMediaItemInfo_Value(shift_item, "D_POSITION", pos - remove_len)
        end
      end
@@ -629,63 +610,58 @@ end
 -- Phase 2: Normalize AD/IDENT volume to match dialog
 ---------------------------------------------------------------------------
-local function normalize_track_regions(track_idx, regions, target_db)
+local function normalize_track_items(track_idx, target_db, label)
  -- Normalize all items on a track that have audible content.
  -- Uses direct WAV reading (not audio accessor) so it works after Phase 1 splits.
  local track = reaper.GetTrack(0, track_idx - 1)
  if not track or reaper.CountTrackMediaItems(track) == 0 then return end
-  for _, rgn in ipairs(regions) do
+  local ta = get_track_audio(track_idx)
-    local item = find_item_at(track, rgn.start_pos)
+  if not ta then
-    if not item then goto next_region end
+    log("  " .. label .. ": no audio found")
-
+    return
    local item_start = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
    local segment = item
    if item_start < rgn.start_pos - 0.01 then
      segment = reaper.SplitMediaItem(item, rgn.start_pos)
      if not segment then goto next_region end
    end
    local seg_end = reaper.GetMediaItemInfo_Value(segment, "D_POSITION")
                  + reaper.GetMediaItemInfo_Value(segment, "D_LENGTH")
    if rgn.end_pos < seg_end - 0.01 then
      reaper.SplitMediaItem(segment, rgn.end_pos)
  end
-    local take = reaper.GetActiveTake(segment)
+  local adjusted = 0
-    if not take then goto next_region end
+  for i = 0, reaper.CountTrackMediaItems(track) - 1 do
-
+    local item = reaper.GetTrackMediaItem(track, i)
-    local seg_pos = reaper.GetMediaItemInfo_Value(segment, "D_POSITION")
+    local item_pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
-    local seg_len = reaper.GetMediaItemInfo_Value(segment, "D_LENGTH")
+    local item_len = reaper.GetMediaItemInfo_Value(item, "D_LENGTH")
-    local seg_offset = reaper.GetMediaItemTakeInfo_Value(take, "D_STARTOFFS")
+    local item_end = item_pos + item_len
    local accessor = reaper.CreateTakeAudioAccessor(take)
    -- Measure RMS of audible content in this item
    local sum_sq = 0
    local count = 0
-    local t = seg_pos
+    local t = item_pos
-    while t < seg_pos + seg_len do
+    while t < item_end do
-      local source_time = t - seg_pos + seg_offset
+      local peak, s_sq = read_block_peak_rms(ta, t)
-      local buf = reaper.new_array(BLOCK_SAMPLES)
+      if peak >= THRESHOLD then
-      reaper.GetAudioAccessorSamples(accessor, SAMPLE_RATE, 1, source_time, BLOCK_SAMPLES, buf)
+        sum_sq = sum_sq + s_sq
      for i = 1, BLOCK_SAMPLES do
        sum_sq = sum_sq + buf[i] * buf[i]
      end
        count = count + BLOCK_SAMPLES
      end
      t = t + BLOCK_SEC
    end
    reaper.DestroyAudioAccessor(accessor)
    if count > 0 then
      local item_rms = math.sqrt(sum_sq / count)
      if item_rms > 0 then
        local item_db = 20 * math.log(item_rms, 10)
        local gain_db = target_db - item_db
        -- Only adjust if the difference is significant (> 1dB)
        if math.abs(gain_db) > 1.0 then
          local gain_linear = 10 ^ (gain_db / 20)
-        local current_vol = reaper.GetMediaItemInfo_Value(segment, "D_VOL")
+          local current_vol = reaper.GetMediaItemInfo_Value(item, "D_VOL")
-        reaper.SetMediaItemInfo_Value(segment, "D_VOL", current_vol * gain_linear)
+          reaper.SetMediaItemInfo_Value(item, "D_VOL", current_vol * gain_linear)
-        log("  " .. rgn.name .. ": " .. string.format("%+.1f", gain_db) .. "dB adjustment")
+          log("  " .. label .. " item at " .. string.format("%.0f", item_pos) .. "s: " .. string.format("%+.1f", gain_db) .. "dB")
          adjusted = adjusted + 1
        end
      end
    end
  end
-    ::next_region::
+  destroy_track_audio(ta)
  if adjusted == 0 then
    log("  " .. label .. ": no adjustments needed")
  end
 end
@@ -776,19 +752,16 @@ local function phase2_normalize(dialog_regions, ad_regions, ident_regions, dialo
  local ad_ident_target = dialog_rms_db + AD_IDENT_OFFSET_DB
  log("Phase 2: AD/IDENT target = " .. string.format("%.1f", ad_ident_target) .. " dBFS (" .. AD_IDENT_OFFSET_DB .. "dB offset from dialog)")
  if #ad_regions > 0 then
  progress_detail = "Ads"
  coroutine.yield()
-    log("Phase 2: Normalizing " .. #ad_regions .. " AD region(s)...")
+  log("Phase 2: Normalizing ads track...")
-    normalize_track_regions(ADS_TRACK, ad_regions, ad_ident_target)
+  normalize_track_items(ADS_TRACK, ad_ident_target, "Ads")
-  end
+
  if #ident_regions > 0 then
  progress_detail = "Idents"
  progress_pct = 0.33
  coroutine.yield()
-    log("Phase 2: Normalizing " .. #ident_regions .. " IDENT region(s)...")
+  log("Phase 2: Normalizing idents track...")
-    normalize_track_regions(IDENTS_TRACK, ident_regions, ad_ident_target)
+  normalize_track_items(IDENTS_TRACK, ad_ident_target, "Idents")
  end
  progress_detail = "Music"
  progress_pct = 0.66
@@ -812,54 +785,73 @@ local function phase3_trim_music()
  local music_track = reaper.GetTrack(0, MUSIC_TRACK - 1)
  if not music_track then return end
-  -- Ensure music starts before first voice item.
+  -- Music lead-in: ensure audible music plays before first voice.
-  -- Silence removal shifts voice/idents/ads but not music. If voice now starts before
+  -- Strategy: skip the silent intro in the music WAV (adjust take offset),
-  -- music, nudge all non-music tracks forward so music has a lead-in.
+  -- then nudge all non-music tracks forward by MUSIC_LEAD_SEC so music plays first.
-  local first_voice_start = math.huge
+  local MUSIC_LEAD_SEC = 3.0
-  for _, tidx in ipairs(CHECK_TRACKS) do
+
-    local tr = reaper.GetTrack(0, tidx - 1)
+  -- Find where music becomes audible in the source WAV
-    if tr and reaper.CountTrackMediaItems(tr) > 0 then
+  local music_audible_offset = nil
-      local item = reaper.GetTrackMediaItem(tr, 0)
+  local music_ta = get_track_audio(MUSIC_TRACK)
-      local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
+  if music_ta then
-      if pos < first_voice_start then first_voice_start = pos end
+    local t = music_ta.item_pos
    while t < music_ta.item_end do
      local peak, _ = read_block_peak_rms(music_ta, t)
      if peak >= THRESHOLD then
        music_audible_offset = t - music_ta.item_pos  -- offset into the WAV
        break
      end
      t = t + BLOCK_SEC
    end
    destroy_track_audio(music_ta)
  end
  if false then  -- Music lead-in disabled — intro silence is preserved instead
    -- Skip the silent intro: set take offset so audible music starts at position 0
    local first_music = reaper.GetTrackMediaItem(music_track, 0)
    if first_music then
      local take = reaper.GetActiveTake(first_music)
      if take then
        local current_offset = reaper.GetMediaItemTakeInfo_Value(take, "D_STARTOFFS")
        reaper.SetMediaItemTakeInfo_Value(take, "D_STARTOFFS", current_offset + music_audible_offset)
        -- Trim item length to account for skipped intro
        local item_len = reaper.GetMediaItemInfo_Value(first_music, "D_LENGTH")
        reaper.SetMediaItemInfo_Value(first_music, "D_LENGTH", item_len - music_audible_offset)
        log("Phase 3: Skipped " .. string.format("%.1f", music_audible_offset) .. "s of silent music intro")
      end
    end
-  local MUSIC_LEAD_SEC = 3.0  -- seconds of music before first voice
+    -- Nudge all non-music tracks forward by MUSIC_LEAD_SEC
-  if first_voice_start < math.huge then
+    log("Phase 3: Nudging non-music tracks forward by " .. MUSIC_LEAD_SEC .. "s for music lead-in")
    local first_music = reaper.GetTrackMediaItem(music_track, 0)
    if first_music then
      local music_start = reaper.GetMediaItemInfo_Value(first_music, "D_POSITION")
      local desired_voice_start = music_start + MUSIC_LEAD_SEC
      if first_voice_start < desired_voice_start then
        local nudge = desired_voice_start - first_voice_start
        -- Shift all non-music tracks forward
    for t = 0, reaper.CountTracks(0) - 1 do
      if (t + 1) == MUSIC_TRACK then goto skip_music end
      local track = reaper.GetTrack(0, t)
      for i = 0, reaper.CountTrackMediaItems(track) - 1 do
        local item = reaper.GetTrackMediaItem(track, i)
        local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
-            reaper.SetMediaItemInfo_Value(item, "D_POSITION", pos + nudge)
+        reaper.SetMediaItemInfo_Value(item, "D_POSITION", pos + MUSIC_LEAD_SEC)
      end
      ::skip_music::
    end
-        -- Also shift all markers/regions forward
+
    -- Shift markers/regions forward too
    local markers_to_update = {}
    local _, num_markers, num_regions = reaper.CountProjectMarkers(0)
-        local total_m = num_markers + num_regions
+    for i = 0, num_markers + num_regions - 1 do
        for i = 0, total_m - 1 do
      local retval, is_region, pos, rgnend, name, idx, color = reaper.EnumProjectMarkers3(0, i)
      if retval then
-            if is_region then
+        table.insert(markers_to_update, {is_region=is_region, pos=pos, rgnend=rgnend, name=name, idx=idx, color=color})
-              reaper.SetProjectMarker3(0, idx, true, pos + nudge, rgnend + nudge, name, color)
+      end
    end
    for _, m in ipairs(markers_to_update) do
      if m.is_region then
        reaper.SetProjectMarker3(0, m.idx, true, m.pos + MUSIC_LEAD_SEC, m.rgnend + MUSIC_LEAD_SEC, m.name, m.color)
      else
-              reaper.SetProjectMarker3(0, idx, false, pos + nudge, 0, name, color)
+        reaper.SetProjectMarker3(0, m.idx, false, m.pos + MUSIC_LEAD_SEC, 0, m.name, m.color)
            end
          end
        end
        log("Phase 3: Nudged non-music tracks forward " .. string.format("%.1f", nudge) .. "s for " .. MUSIC_LEAD_SEC .. "s music lead-in")
      end
    end
  else
    log("Phase 3: No silent music intro detected — skipping lead-in adjustment")
  end
  local last_end = 0
@@ -1008,6 +1000,39 @@ local function do_work()
    log("Phase 4: No AD/IDENT regions found — skipping")
  end
  -- Set loop/time selection: start 0.5s before audible music, end at last item
  local loop_start = 0
  local music_ta = get_track_audio(MUSIC_TRACK)
  if music_ta then
    local t = music_ta.item_pos
    while t < music_ta.item_end do
      local peak, _ = read_block_peak_rms(music_ta, t)
      if peak >= THRESHOLD then
        loop_start = math.max(0, t - 0.5)
        break
      end
      t = t + BLOCK_SEC
    end
    destroy_track_audio(music_ta)
  end
  local project_end = 0
  for t = 0, reaper.CountTracks(0) - 1 do
    local track = reaper.GetTrack(0, t)
    local n = reaper.CountTrackMediaItems(track)
    if n > 0 then
      local last_item = reaper.GetTrackMediaItem(track, n - 1)
      local item_end = reaper.GetMediaItemInfo_Value(last_item, "D_POSITION")
                     + reaper.GetMediaItemInfo_Value(last_item, "D_LENGTH")
      if item_end > project_end then project_end = item_end end
    end
  end
  if project_end > 0 then
    reaper.GetSet_LoopTimeRange(true, true, loop_start, project_end, false)
    reaper.GetSet_LoopTimeRange(true, false, loop_start, project_end, false)
    log("Loop range set: " .. string.format("%.1f", loop_start) .. " to " .. string.format("%.1f", project_end) .. "s (" .. string.format("%.1f", (project_end - loop_start) / 60) .. " min)")
  end
  reaper.PreventUIRefresh(-1)
  reaper.Undo_EndBlock("Post-production: strip silence + music fades", -1)
  reaper.UpdateArrange()