Show theme feature, Irish music genre, strip silence overhaul
- Add show theme UI in header bar + backend API (inject into caller prompts) - Add Irish genre category for music dropdown - Strip silence: RMS-based speaker detection (fixes Devon not being identified) - Strip silence: Devon-specific 3s threshold for interjections - Strip silence: sparse track item handling in shift logic - Strip silence: music lead-in preservation after silence removal - Strip silence: no max gap limit (IDENT/AD regions protect breaks) - Add analyze_gaps.py tool for per-show threshold analysis Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+260
@@ -0,0 +1,260 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Analyze silence gaps in podcast stems to find optimal strip-silence thresholds.
|
||||||
|
|
||||||
|
Usage: python analyze_gaps.py recordings/2026-03-17_235137/
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
import numpy as np
|
||||||
|
import soundfile as sf
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
BLOCK_SEC = 0.1
|
||||||
|
SILENCE_DB = -30
|
||||||
|
THRESHOLD = 10 ** (SILENCE_DB / 20)
|
||||||
|
MIN_VOICE_SEC = 0.3
|
||||||
|
|
||||||
|
|
||||||
|
def load_stem(path: Path) -> tuple[np.ndarray, int]:
|
||||||
|
audio, sr = sf.read(path, dtype="float32")
|
||||||
|
if audio.ndim > 1:
|
||||||
|
audio = audio[:, 0]
|
||||||
|
return audio, sr
|
||||||
|
|
||||||
|
|
||||||
|
def compute_rms_blocks(audio: np.ndarray, sr: int) -> np.ndarray:
|
||||||
|
block_samples = int(sr * BLOCK_SEC)
|
||||||
|
n_blocks = len(audio) // block_samples
|
||||||
|
if n_blocks == 0:
|
||||||
|
return np.array([0.0])
|
||||||
|
trimmed = audio[:n_blocks * block_samples].reshape(n_blocks, block_samples)
|
||||||
|
return np.sqrt(np.mean(trimmed ** 2, axis=1))
|
||||||
|
|
||||||
|
|
||||||
|
def compute_peak_blocks(audio: np.ndarray, sr: int) -> np.ndarray:
|
||||||
|
block_samples = int(sr * BLOCK_SEC)
|
||||||
|
n_blocks = len(audio) // block_samples
|
||||||
|
if n_blocks == 0:
|
||||||
|
return np.array([0.0])
|
||||||
|
trimmed = audio[:n_blocks * block_samples].reshape(n_blocks, block_samples)
|
||||||
|
return np.max(np.abs(trimmed), axis=1)
|
||||||
|
|
||||||
|
|
||||||
|
def analyze(stems_dir: Path):
|
||||||
|
stems_dir = Path(stems_dir)
|
||||||
|
voice_stems = {}
|
||||||
|
for name in ["host", "devon", "caller"]:
|
||||||
|
path = stems_dir / f"{name}.wav"
|
||||||
|
if path.exists():
|
||||||
|
print(f"Loading {name}...", end=" ", flush=True)
|
||||||
|
audio, sr = load_stem(path)
|
||||||
|
voice_stems[name] = audio
|
||||||
|
print(f"{len(audio)/sr:.0f}s @ {sr}Hz")
|
||||||
|
|
||||||
|
if not voice_stems:
|
||||||
|
print("No voice stems found")
|
||||||
|
return
|
||||||
|
|
||||||
|
sr_val = sr
|
||||||
|
duration = max(len(a) for a in voice_stems.values()) / sr_val
|
||||||
|
print(f"\nTotal duration: {duration/60:.1f} min")
|
||||||
|
|
||||||
|
# Compute per-track RMS and peak blocks
|
||||||
|
track_rms = {}
|
||||||
|
track_peak = {}
|
||||||
|
for name, audio in voice_stems.items():
|
||||||
|
track_rms[name] = compute_rms_blocks(audio, sr_val)
|
||||||
|
track_peak[name] = compute_peak_blocks(audio, sr_val)
|
||||||
|
|
||||||
|
n_blocks = min(len(v) for v in track_peak.values())
|
||||||
|
|
||||||
|
# Detect gaps using same logic as Lua script (RMS for speaker ID, peak for silence)
|
||||||
|
min_voice_blocks = int(MIN_VOICE_SEC / BLOCK_SEC)
|
||||||
|
track_names = list(voice_stems.keys())
|
||||||
|
|
||||||
|
gaps = []
|
||||||
|
in_silence = False
|
||||||
|
silence_start = 0
|
||||||
|
track_before = None
|
||||||
|
last_active = None
|
||||||
|
voice_run = 0
|
||||||
|
voice_run_track = None
|
||||||
|
|
||||||
|
for i in range(n_blocks):
|
||||||
|
# Peak for silence detection
|
||||||
|
best_peak = max(track_peak[name][i] for name in track_names)
|
||||||
|
# RMS for speaker identification
|
||||||
|
best_rms = 0
|
||||||
|
best_track = None
|
||||||
|
for name in track_names:
|
||||||
|
r = track_rms[name][i]
|
||||||
|
if r > best_rms:
|
||||||
|
best_rms = r
|
||||||
|
best_track = name
|
||||||
|
|
||||||
|
all_silent = best_peak < THRESHOLD
|
||||||
|
|
||||||
|
if not all_silent:
|
||||||
|
last_active = best_track
|
||||||
|
|
||||||
|
if in_silence:
|
||||||
|
if all_silent:
|
||||||
|
voice_run = 0
|
||||||
|
voice_run_track = None
|
||||||
|
else:
|
||||||
|
if voice_run == 0:
|
||||||
|
voice_run_track = best_track
|
||||||
|
voice_run += 1
|
||||||
|
if voice_run >= min_voice_blocks:
|
||||||
|
voice_start_block = i - (voice_run - 1)
|
||||||
|
gap_start = silence_start * BLOCK_SEC
|
||||||
|
gap_end = voice_start_block * BLOCK_SEC
|
||||||
|
dur = gap_end - gap_start
|
||||||
|
if dur >= 0.5: # log gaps >= 0.5s
|
||||||
|
gaps.append({
|
||||||
|
"start": gap_start,
|
||||||
|
"end": gap_end,
|
||||||
|
"dur": dur,
|
||||||
|
"before": track_before or "?",
|
||||||
|
"after": voice_run_track or "?",
|
||||||
|
})
|
||||||
|
in_silence = False
|
||||||
|
voice_run = 0
|
||||||
|
voice_run_track = None
|
||||||
|
else:
|
||||||
|
if all_silent:
|
||||||
|
in_silence = True
|
||||||
|
silence_start = i
|
||||||
|
track_before = last_active
|
||||||
|
voice_run = 0
|
||||||
|
voice_run_track = None
|
||||||
|
|
||||||
|
# Trailing silence
|
||||||
|
if in_silence:
|
||||||
|
dur = (n_blocks - silence_start) * BLOCK_SEC
|
||||||
|
if dur >= 0.5:
|
||||||
|
gaps.append({
|
||||||
|
"start": silence_start * BLOCK_SEC,
|
||||||
|
"end": n_blocks * BLOCK_SEC,
|
||||||
|
"dur": dur,
|
||||||
|
"before": track_before or "?",
|
||||||
|
"after": "end",
|
||||||
|
})
|
||||||
|
|
||||||
|
if not gaps:
|
||||||
|
print("No gaps detected")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Categorize gaps
|
||||||
|
categories = {
|
||||||
|
"host_self": [], # Host -> Host
|
||||||
|
"host_to_caller": [], # Host -> Caller (TTS latency)
|
||||||
|
"caller_to_host": [], # Caller -> Host
|
||||||
|
"host_to_devon": [], # Host -> Devon (TTS latency)
|
||||||
|
"devon_to_host": [], # Devon -> Host
|
||||||
|
"caller_to_devon": [],# Caller -> Devon (interjection)
|
||||||
|
"devon_to_caller": [],# Devon -> Caller
|
||||||
|
"other": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
for g in gaps:
|
||||||
|
b, a = g["before"], g["after"]
|
||||||
|
if b == "host" and a == "host":
|
||||||
|
categories["host_self"].append(g)
|
||||||
|
elif b == "host" and a == "caller":
|
||||||
|
categories["host_to_caller"].append(g)
|
||||||
|
elif b == "caller" and a == "host":
|
||||||
|
categories["caller_to_host"].append(g)
|
||||||
|
elif b == "host" and a == "devon":
|
||||||
|
categories["host_to_devon"].append(g)
|
||||||
|
elif b == "devon" and a == "host":
|
||||||
|
categories["devon_to_host"].append(g)
|
||||||
|
elif b == "caller" and a == "devon":
|
||||||
|
categories["caller_to_devon"].append(g)
|
||||||
|
elif b == "devon" and a == "caller":
|
||||||
|
categories["devon_to_caller"].append(g)
|
||||||
|
else:
|
||||||
|
categories["other"].append(g)
|
||||||
|
|
||||||
|
# Print results
|
||||||
|
print(f"\n{'='*70}")
|
||||||
|
print(f"GAP ANALYSIS — {len(gaps)} gaps detected")
|
||||||
|
print(f"{'='*70}")
|
||||||
|
|
||||||
|
total_silence = sum(g["dur"] for g in gaps)
|
||||||
|
print(f"Total silence: {total_silence:.0f}s ({total_silence/60:.1f} min)")
|
||||||
|
print(f"Content after removal: ~{(duration - total_silence)/60:.1f} min")
|
||||||
|
|
||||||
|
for cat_name, cat_gaps in sorted(categories.items(), key=lambda x: -len(x[1])):
|
||||||
|
if not cat_gaps:
|
||||||
|
continue
|
||||||
|
durs = sorted([g["dur"] for g in cat_gaps])
|
||||||
|
print(f"\n--- {cat_name} ({len(cat_gaps)} gaps) ---")
|
||||||
|
print(f" Range: {durs[0]:.1f}s - {durs[-1]:.1f}s")
|
||||||
|
print(f" Median: {np.median(durs):.1f}s Mean: {np.mean(durs):.1f}s")
|
||||||
|
if len(durs) >= 5:
|
||||||
|
print(f" P25: {np.percentile(durs, 25):.1f}s P75: {np.percentile(durs, 75):.1f}s")
|
||||||
|
|
||||||
|
# Histogram
|
||||||
|
brackets = [(0, 1), (1, 2), (2, 3), (3, 5), (5, 8), (8, 12), (12, 18), (18, 30), (30, 60), (60, 999)]
|
||||||
|
print(f" Distribution:")
|
||||||
|
for lo, hi in brackets:
|
||||||
|
count = sum(1 for d in durs if lo <= d < hi)
|
||||||
|
if count > 0:
|
||||||
|
bar = "#" * count
|
||||||
|
label = f"{lo}-{hi}s" if hi < 999 else f"{lo}s+"
|
||||||
|
print(f" {label:>8s}: {bar} ({count})")
|
||||||
|
|
||||||
|
# Find natural clusters and suggest thresholds
|
||||||
|
print(f"\n{'='*70}")
|
||||||
|
print("SUGGESTED THRESHOLDS")
|
||||||
|
print(f"{'='*70}")
|
||||||
|
|
||||||
|
# For each Devon-involved category, find the gap between interjection and TTS gaps
|
||||||
|
devon_gaps = categories["host_to_devon"] + categories["devon_to_host"] + categories["caller_to_devon"] + categories["devon_to_caller"]
|
||||||
|
if devon_gaps:
|
||||||
|
devon_durs = sorted([g["dur"] for g in devon_gaps])
|
||||||
|
# Look for a natural break between short (interjection) and long (TTS) gaps
|
||||||
|
short = [d for d in devon_durs if d < 5]
|
||||||
|
long = [d for d in devon_durs if d >= 5]
|
||||||
|
if short and long:
|
||||||
|
suggested = (max(short) + min(long)) / 2
|
||||||
|
print(f"Devon threshold: {suggested:.1f}s (short gaps: {len(short)} up to {max(short):.1f}s, long gaps: {len(long)} from {min(long):.1f}s)")
|
||||||
|
elif short:
|
||||||
|
print(f"Devon threshold: {max(short) + 1:.1f}s (all gaps are short, max {max(short):.1f}s)")
|
||||||
|
else:
|
||||||
|
print(f"Devon threshold: 3.0s (all gaps are long, min {min(long):.1f}s)")
|
||||||
|
|
||||||
|
caller_gaps = categories["host_to_caller"] + categories["caller_to_host"]
|
||||||
|
if caller_gaps:
|
||||||
|
caller_durs = sorted([g["dur"] for g in caller_gaps])
|
||||||
|
short = [d for d in caller_durs if d < 5]
|
||||||
|
long = [d for d in caller_durs if d >= 5]
|
||||||
|
if short and long:
|
||||||
|
suggested = (max(short) + min(long)) / 2
|
||||||
|
print(f"Caller transition threshold: {suggested:.1f}s (short: {len(short)} up to {max(short):.1f}s, long: {len(long)} from {min(long):.1f}s)")
|
||||||
|
elif long:
|
||||||
|
print(f"Caller transition threshold: {min(long) - 1:.1f}s (all gaps >= {min(long):.1f}s)")
|
||||||
|
|
||||||
|
host_self = categories["host_self"]
|
||||||
|
if host_self:
|
||||||
|
host_durs = sorted([g["dur"] for g in host_self])
|
||||||
|
short = [d for d in host_durs if d < 5]
|
||||||
|
long = [d for d in host_durs if d >= 5]
|
||||||
|
if short and long:
|
||||||
|
suggested = (max(short) + min(long)) / 2
|
||||||
|
print(f"Same-speaker threshold: {suggested:.1f}s (short: {len(short)} up to {max(short):.1f}s, long: {len(long)} from {min(long):.1f}s)")
|
||||||
|
elif long:
|
||||||
|
print(f"Same-speaker threshold: {min(long) - 1:.1f}s (all gaps >= {min(long):.1f}s)")
|
||||||
|
|
||||||
|
all_durs = sorted([g["dur"] for g in gaps])
|
||||||
|
would_cut = [d for d in all_durs if d >= 3.0]
|
||||||
|
print(f"\nWith current thresholds (Devon=3s, others=6s):")
|
||||||
|
print(f" Would cut: ~{len(would_cut)} gaps, ~{sum(would_cut):.0f}s ({sum(would_cut)/60:.1f} min)")
|
||||||
|
print(f" Result: ~{(duration - sum(would_cut))/60:.1f} min")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print("Usage: python analyze_gaps.py <stems_dir>")
|
||||||
|
sys.exit(1)
|
||||||
|
analyze(Path(sys.argv[1]))
|
||||||
@@ -8507,6 +8507,9 @@ GENRE_KEYWORDS = {
|
|||||||
"valentine": "Ballad",
|
"valentine": "Ballad",
|
||||||
"romantic": "Ballad",
|
"romantic": "Ballad",
|
||||||
"ballad": "Ballad",
|
"ballad": "Ballad",
|
||||||
|
"irish": "Irish",
|
||||||
|
"ireland": "Irish",
|
||||||
|
"patricks": "Irish",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+103
-12
@@ -9,12 +9,15 @@
|
|||||||
---------------------------------------------------------------------------
|
---------------------------------------------------------------------------
|
||||||
local SILENCE_DB = -30 -- dBFS — anything below this is "silence"
|
local SILENCE_DB = -30 -- dBFS — anything below this is "silence"
|
||||||
local MIN_SILENCE_SEC = 6.0 -- same-speaker gaps: only remove silences longer than this
|
local MIN_SILENCE_SEC = 6.0 -- same-speaker gaps: only remove silences longer than this
|
||||||
local MIN_SILENCE_TRANSITION_SEC = 2.5 -- cross-speaker gaps: shorter threshold for speaker transitions
|
local MAX_SILENCE_SEC = 999 -- no practical limit (IDENT/AD regions protect real breaks)
|
||||||
|
local MIN_SILENCE_TRANSITION_SEC = 5.0 -- cross-speaker gaps: threshold for caller TTS latency
|
||||||
|
local MIN_SILENCE_DEVON_SEC = 3.0 -- Devon gaps: interjections are prerendered (~2-3s gaps), conversational TTS is 6s+
|
||||||
|
local DEVON_TRACK = 2 -- 1-indexed: Devon track number
|
||||||
local MIN_VOICE_SEC = 0.3 -- ignore non-silent bursts shorter than this (filters transients)
|
local MIN_VOICE_SEC = 0.3 -- ignore non-silent bursts shorter than this (filters transients)
|
||||||
local KEEP_PAD_SEC = 0.5 -- leave this much silence on each side of a cut
|
local KEEP_PAD_SEC = 0.5 -- leave this much silence on each side of a cut
|
||||||
local BLOCK_SEC = 0.1 -- analysis block size (100ms)
|
local BLOCK_SEC = 0.1 -- analysis block size (100ms)
|
||||||
local SAMPLE_RATE = 48000
|
local SAMPLE_RATE = 48000
|
||||||
local CHECK_TRACKS = {1, 2, 3, 4} -- 1-indexed: Host, Devon, Live Caller, AI Caller
|
local CHECK_TRACKS = {1, 2, 3, 4} -- 1-indexed: Host, Devon, AI Caller, Live Caller
|
||||||
local IDENTS_TRACK = 6 -- 1-indexed: Idents track
|
local IDENTS_TRACK = 6 -- 1-indexed: Idents track
|
||||||
local ADS_TRACK = 7 -- 1-indexed: Ads track
|
local ADS_TRACK = 7 -- 1-indexed: Ads track
|
||||||
local MUSIC_TRACK = 8 -- 1-indexed: Music track
|
local MUSIC_TRACK = 8 -- 1-indexed: Music track
|
||||||
@@ -25,7 +28,6 @@ local YIELD_INTERVAL = 200 -- yield to REAPER every N blocks (~20s of audio)
|
|||||||
local BLOCK_SAMPLES = math.floor(SAMPLE_RATE * BLOCK_SEC)
|
local BLOCK_SAMPLES = math.floor(SAMPLE_RATE * BLOCK_SEC)
|
||||||
local THRESHOLD = 10 ^ (SILENCE_DB / 20)
|
local THRESHOLD = 10 ^ (SILENCE_DB / 20)
|
||||||
local MIN_VOICE_BLOCKS = math.ceil(MIN_VOICE_SEC / BLOCK_SEC)
|
local MIN_VOICE_BLOCKS = math.ceil(MIN_VOICE_SEC / BLOCK_SEC)
|
||||||
|
|
||||||
local function log(msg)
|
local function log(msg)
|
||||||
reaper.ShowConsoleMsg("[PostProd] " .. msg .. "\n")
|
reaper.ShowConsoleMsg("[PostProd] " .. msg .. "\n")
|
||||||
end
|
end
|
||||||
@@ -306,13 +308,17 @@ local function read_block_peak_rms(ta, project_time)
|
|||||||
end
|
end
|
||||||
|
|
||||||
-- find_loudest_track: returns 1-based index of the loudest track at a given time, or 0 if silent
|
-- find_loudest_track: returns 1-based index of the loudest track at a given time, or 0 if silent
|
||||||
|
-- Uses RMS (not peak) for speaker identification — ambient mic noise has high peaks but low RMS
|
||||||
local function find_loudest_track(track_audios, project_time)
|
local function find_loudest_track(track_audios, project_time)
|
||||||
local best_peak = 0
|
local best_peak = 0
|
||||||
|
local best_rms = 0
|
||||||
local best_idx = 0
|
local best_idx = 0
|
||||||
for i, ta in ipairs(track_audios) do
|
for i, ta in ipairs(track_audios) do
|
||||||
local peak, _ = read_block_peak_rms(ta, project_time)
|
local peak, sum_sq = read_block_peak_rms(ta, project_time)
|
||||||
if peak > best_peak then
|
if peak > best_peak then best_peak = peak end
|
||||||
best_peak = peak
|
local rms = math.sqrt(sum_sq / BLOCK_SAMPLES)
|
||||||
|
if rms > best_rms then
|
||||||
|
best_rms = rms
|
||||||
best_idx = i
|
best_idx = i
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -340,12 +346,17 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
|
|||||||
|
|
||||||
while t < region.end_pos do
|
while t < region.end_pos do
|
||||||
local best_peak = 0
|
local best_peak = 0
|
||||||
|
local best_rms = 0
|
||||||
local best_sum = 0
|
local best_sum = 0
|
||||||
local best_track = 0
|
local best_track = 0
|
||||||
for i, ta in ipairs(track_audios) do
|
for i, ta in ipairs(track_audios) do
|
||||||
local peak, sum_sq = read_block_peak_rms(ta, t)
|
local peak, sum_sq = read_block_peak_rms(ta, t)
|
||||||
if peak > best_peak then
|
if peak > best_peak then best_peak = peak end
|
||||||
best_peak = peak
|
-- Use RMS for speaker identification (sustained energy, not transient peaks)
|
||||||
|
-- Host mic ambient noise has high peaks but low RMS; TTS speech has high RMS
|
||||||
|
local rms = math.sqrt(sum_sq / BLOCK_SAMPLES)
|
||||||
|
if rms > best_rms then
|
||||||
|
best_rms = rms
|
||||||
best_sum = sum_sq
|
best_sum = sum_sq
|
||||||
best_track = i
|
best_track = i
|
||||||
end
|
end
|
||||||
@@ -375,8 +386,11 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
|
|||||||
local dur = voice_start - silence_start
|
local dur = voice_start - silence_start
|
||||||
local track_after = voice_run_track
|
local track_after = voice_run_track
|
||||||
local is_transition = track_before_silence ~= 0 and track_after ~= 0 and track_before_silence ~= track_after
|
local is_transition = track_before_silence ~= 0 and track_after ~= 0 and track_before_silence ~= track_after
|
||||||
local threshold = is_transition and MIN_SILENCE_TRANSITION_SEC or MIN_SILENCE_SEC
|
local devon_involved = track_before_silence == DEVON_TRACK or track_after == DEVON_TRACK
|
||||||
if dur >= threshold then
|
local threshold = devon_involved and MIN_SILENCE_DEVON_SEC
|
||||||
|
or (is_transition and MIN_SILENCE_TRANSITION_SEC or MIN_SILENCE_SEC)
|
||||||
|
|
||||||
|
if dur >= threshold and dur <= MAX_SILENCE_SEC then
|
||||||
table.insert(silences, {
|
table.insert(silences, {
|
||||||
start_pos = silence_start, end_pos = voice_start, duration = dur,
|
start_pos = silence_start, end_pos = voice_start, duration = dur,
|
||||||
is_transition = is_transition,
|
is_transition = is_transition,
|
||||||
@@ -410,7 +424,7 @@ local function find_silences(region, track_audios, rms_acc, progress_fn)
|
|||||||
|
|
||||||
if in_silence then
|
if in_silence then
|
||||||
local dur = region.end_pos - silence_start
|
local dur = region.end_pos - silence_start
|
||||||
if dur >= MIN_SILENCE_SEC then
|
if dur >= MIN_SILENCE_SEC and dur <= MAX_SILENCE_SEC then
|
||||||
table.insert(silences, {start_pos = silence_start, end_pos = region.end_pos, duration = dur})
|
table.insert(silences, {start_pos = silence_start, end_pos = region.end_pos, duration = dur})
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -547,6 +561,7 @@ local function phase1_strip_silence(dialog_regions)
|
|||||||
if (t + 1) == MUSIC_TRACK then goto next_track end
|
if (t + 1) == MUSIC_TRACK then goto next_track end
|
||||||
local track = reaper.GetTrack(0, t)
|
local track = reaper.GetTrack(0, t)
|
||||||
|
|
||||||
|
-- Split and delete the silent portion from items that span r.start_pos
|
||||||
local item = find_item_at(track, r.start_pos)
|
local item = find_item_at(track, r.start_pos)
|
||||||
if item then
|
if item then
|
||||||
local right = reaper.SplitMediaItem(item, r.start_pos)
|
local right = reaper.SplitMediaItem(item, r.start_pos)
|
||||||
@@ -556,10 +571,36 @@ local function phase1_strip_silence(dialog_regions)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Handle sparse track items that START within the removal range
|
||||||
|
-- (not found by find_item_at since they don't contain r.start_pos)
|
||||||
|
for j = reaper.CountTrackMediaItems(track) - 1, 0, -1 do
|
||||||
|
local check = reaper.GetTrackMediaItem(track, j)
|
||||||
|
local cpos = reaper.GetMediaItemInfo_Value(check, "D_POSITION")
|
||||||
|
if cpos >= r.start_pos and cpos < r.end_pos then
|
||||||
|
local clen = reaper.GetMediaItemInfo_Value(check, "D_LENGTH")
|
||||||
|
local cend = cpos + clen
|
||||||
|
if cend <= r.end_pos then
|
||||||
|
-- Entirely within removal — delete
|
||||||
|
reaper.DeleteTrackMediaItem(track, check)
|
||||||
|
else
|
||||||
|
-- Starts in removal but extends past — trim start to r.end_pos
|
||||||
|
local trim = r.end_pos - cpos
|
||||||
|
local take = reaper.GetActiveTake(check)
|
||||||
|
if take then
|
||||||
|
local offset = reaper.GetMediaItemTakeInfo_Value(take, "D_STARTOFFS")
|
||||||
|
reaper.SetMediaItemTakeInfo_Value(take, "D_STARTOFFS", offset + trim)
|
||||||
|
end
|
||||||
|
reaper.SetMediaItemInfo_Value(check, "D_LENGTH", cend - r.end_pos)
|
||||||
|
reaper.SetMediaItemInfo_Value(check, "D_POSITION", r.end_pos)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Shift items AFTER the removal (use r.end_pos, not r.start_pos)
|
||||||
for j = 0, reaper.CountTrackMediaItems(track) - 1 do
|
for j = 0, reaper.CountTrackMediaItems(track) - 1 do
|
||||||
local shift_item = reaper.GetTrackMediaItem(track, j)
|
local shift_item = reaper.GetTrackMediaItem(track, j)
|
||||||
local pos = reaper.GetMediaItemInfo_Value(shift_item, "D_POSITION")
|
local pos = reaper.GetMediaItemInfo_Value(shift_item, "D_POSITION")
|
||||||
if pos >= r.start_pos then
|
if pos >= r.end_pos then
|
||||||
reaper.SetMediaItemInfo_Value(shift_item, "D_POSITION", pos - remove_len)
|
reaper.SetMediaItemInfo_Value(shift_item, "D_POSITION", pos - remove_len)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -766,6 +807,56 @@ local function phase3_trim_music()
|
|||||||
local music_track = reaper.GetTrack(0, MUSIC_TRACK - 1)
|
local music_track = reaper.GetTrack(0, MUSIC_TRACK - 1)
|
||||||
if not music_track then return end
|
if not music_track then return end
|
||||||
|
|
||||||
|
-- Ensure music starts before first voice item.
|
||||||
|
-- Silence removal shifts voice/idents/ads but not music. If voice now starts before
|
||||||
|
-- music, nudge all non-music tracks forward so music has a lead-in.
|
||||||
|
local first_voice_start = math.huge
|
||||||
|
for _, tidx in ipairs(CHECK_TRACKS) do
|
||||||
|
local tr = reaper.GetTrack(0, tidx - 1)
|
||||||
|
if tr and reaper.CountTrackMediaItems(tr) > 0 then
|
||||||
|
local item = reaper.GetTrackMediaItem(tr, 0)
|
||||||
|
local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
|
||||||
|
if pos < first_voice_start then first_voice_start = pos end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local MUSIC_LEAD_SEC = 3.0 -- seconds of music before first voice
|
||||||
|
if first_voice_start < math.huge then
|
||||||
|
local first_music = reaper.GetTrackMediaItem(music_track, 0)
|
||||||
|
if first_music then
|
||||||
|
local music_start = reaper.GetMediaItemInfo_Value(first_music, "D_POSITION")
|
||||||
|
local desired_voice_start = music_start + MUSIC_LEAD_SEC
|
||||||
|
if first_voice_start < desired_voice_start then
|
||||||
|
local nudge = desired_voice_start - first_voice_start
|
||||||
|
-- Shift all non-music tracks forward
|
||||||
|
for t = 0, reaper.CountTracks(0) - 1 do
|
||||||
|
if (t + 1) == MUSIC_TRACK then goto skip_music end
|
||||||
|
local track = reaper.GetTrack(0, t)
|
||||||
|
for i = 0, reaper.CountTrackMediaItems(track) - 1 do
|
||||||
|
local item = reaper.GetTrackMediaItem(track, i)
|
||||||
|
local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
|
||||||
|
reaper.SetMediaItemInfo_Value(item, "D_POSITION", pos + nudge)
|
||||||
|
end
|
||||||
|
::skip_music::
|
||||||
|
end
|
||||||
|
-- Also shift all markers/regions forward
|
||||||
|
local _, num_markers, num_regions = reaper.CountProjectMarkers(0)
|
||||||
|
local total_m = num_markers + num_regions
|
||||||
|
for i = 0, total_m - 1 do
|
||||||
|
local retval, is_region, pos, rgnend, name, idx, color = reaper.EnumProjectMarkers3(0, i)
|
||||||
|
if retval then
|
||||||
|
if is_region then
|
||||||
|
reaper.SetProjectMarker3(0, idx, true, pos + nudge, rgnend + nudge, name, color)
|
||||||
|
else
|
||||||
|
reaper.SetProjectMarker3(0, idx, false, pos + nudge, 0, name, color)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
log("Phase 3: Nudged non-music tracks forward " .. string.format("%.1f", nudge) .. "s for " .. MUSIC_LEAD_SEC .. "s music lead-in")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
local last_end = 0
|
local last_end = 0
|
||||||
for _, tidx in ipairs(CHECK_TRACKS) do
|
for _, tidx in ipairs(CHECK_TRACKS) do
|
||||||
local tr = reaper.GetTrack(0, tidx - 1)
|
local tr = reaper.GetTrack(0, tidx - 1)
|
||||||
|
|||||||
Reference in New Issue
Block a user