Ep13 publish, MLX whisper, voicemail system, hero redesign, massive topic expansion

- Switch whisper transcription from faster-whisper (CPU) to lightning-whisper-mlx (GPU)
- Fix word_timestamps hanging, use ffprobe for accurate duration
- Add Cloudflare Pages Worker for SignalWire voicemail fallback when server offline
- Add voicemail sync on startup, delete tracking, save feature
- Add /feed RSS proxy to _worker.js (was broken by worker taking over routing)
- Redesign website hero section: ghost buttons, compact phone, plain text links
- Rewrite caller prompts for faster point-getting and host-following
- Expand TOPIC_CALLIN from ~250 to 547 entries across 34 categories
- Add new categories: biology, psychology, engineering, math, geology, animals,
  work, money, books, movies, relationships, health, language, true crime,
  drunk/high/unhinged callers
- Remove bad Inworld voices (Pixie, Dominus), reduce repeat caller frequency
- Add audio monitor device routing, uvicorn --reload-dir fix
- Publish episode 13

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-16 01:56:47 -07:00
parent 8d3d67a177
commit 3164a70e48
23 changed files with 2944 additions and 512 deletions

View File

@@ -20,6 +20,7 @@ import re
import subprocess
import sys
import tempfile
import xml.etree.ElementTree as ET
from pathlib import Path
import requests
@@ -28,6 +29,8 @@ from dotenv import load_dotenv
load_dotenv(Path(__file__).parent / ".env")
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
RSS_FEED_URL = "https://podcast.macneilmediagroup.com/@LukeAtTheRoost/feed.xml"
EPISODE_CACHE_DIR = Path(__file__).parent / "clips" / ".episode-cache"
WHISPER_MODEL_FAST = "base"
WHISPER_MODEL_QUALITY = "large-v3"
COVER_ART = Path(__file__).parent / "website" / "images" / "cover.png"
@@ -273,7 +276,7 @@ Respond with ONLY a JSON array, no markdown or explanation:
"Content-Type": "application/json",
},
json={
"model": "anthropic/claude-3.5-sonnet",
"model": "anthropic/claude-sonnet-4-5",
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 2048,
"temperature": 0.3,
@@ -309,6 +312,70 @@ Respond with ONLY a JSON array, no markdown or explanation:
return validated
def generate_social_metadata(clips: list[dict], labeled_transcript: str,
episode_number: int | None) -> list[dict]:
"""Generate social media descriptions and hashtags for each clip."""
if not OPENROUTER_API_KEY:
print("Error: OPENROUTER_API_KEY not set in .env")
sys.exit(1)
clips_summary = "\n".join(
f'{i+1}. "{c["title"]}"{c["caption_text"]}'
for i, c in enumerate(clips)
)
episode_context = f"This is Episode {episode_number} of " if episode_number else "This is an episode of "
prompt = f"""{episode_context}the "Luke at the Roost" podcast — a late-night call-in show where AI-generated callers share stories, confessions, and hot takes with host Luke.
Here are {len(clips)} clips selected from this episode:
{clips_summary}
For each clip, generate:
1. description: A short, engaging description for social media (1-2 sentences, hook the viewer, conversational tone). Do NOT include hashtags in the description.
2. hashtags: An array of 5-8 hashtags. Always include #lukeattheroost and #podcast. Add topic-relevant and trending-style tags.
Respond with ONLY a JSON array matching the clip order:
[{{"description": "...", "hashtags": ["#tag1", "#tag2", ...]}}]"""
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json",
},
json={
"model": "anthropic/claude-sonnet-4-5",
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 2048,
"temperature": 0.7,
},
)
if response.status_code != 200:
print(f"Error from OpenRouter: {response.text}")
return clips
content = response.json()["choices"][0]["message"]["content"].strip()
if content.startswith("```"):
content = re.sub(r"^```(?:json)?\n?", "", content)
content = re.sub(r"\n?```$", "", content)
try:
metadata = json.loads(content)
except json.JSONDecodeError as e:
print(f"Error parsing social metadata: {e}")
return clips
for i, clip in enumerate(clips):
if i < len(metadata):
clip["description"] = metadata[i].get("description", "")
clip["hashtags"] = metadata[i].get("hashtags", [])
return clips
def snap_to_sentences(clips: list[dict], segments: list[dict]) -> list[dict]:
"""Snap clip start/end times to sentence boundaries.
@@ -398,11 +465,10 @@ def get_words_in_range(segments: list[dict], start: float, end: float) -> list[d
return words
def _words_similar(a: str, b: str, max_dist: int = 2) -> bool:
"""Check if two words are within edit distance max_dist (Levenshtein)."""
if abs(len(a) - len(b)) > max_dist:
return False
# Simple DP edit distance, bounded
def _edit_distance(a: str, b: str) -> int:
"""Levenshtein edit distance between two strings."""
if abs(len(a) - len(b)) > 5:
return max(len(a), len(b))
prev = list(range(len(b) + 1))
for i in range(1, len(a) + 1):
curr = [i] + [0] * len(b)
@@ -410,139 +476,204 @@ def _words_similar(a: str, b: str, max_dist: int = 2) -> bool:
cost = 0 if a[i - 1] == b[j - 1] else 1
curr[j] = min(curr[j - 1] + 1, prev[j] + 1, prev[j - 1] + cost)
prev = curr
return prev[len(b)] <= max_dist
return prev[len(b)]
def _find_labeled_section(labeled_transcript: str, range_text: str) -> str | None:
"""Find the section of labeled transcript matching a Whisper text range."""
# Strip speaker labels and punctuation from labeled transcript for matching
labeled_stripped = re.sub(r'^[A-Z][A-Z\s\'-]+?:\s*', '', labeled_transcript, flags=re.MULTILINE)
labeled_clean = re.sub(r'[^\w\s]', '', labeled_stripped.lower())
labeled_clean = re.sub(r'\s+', ' ', labeled_clean)
whisper_clean = re.sub(r'[^\w\s]', '', range_text.lower())
whisper_clean = re.sub(r'\s+', ' ', whisper_clean)
whisper_words_list = whisper_clean.split()
# Try progressively shorter phrases from different positions
for phrase_len in [10, 7, 5, 3]:
for start_offset in [0, len(whisper_words_list) // 3, len(whisper_words_list) // 2]:
words_slice = whisper_words_list[start_offset:start_offset + phrase_len]
phrase = " ".join(words_slice)
if len(phrase) < 8:
continue
pos = labeled_clean.find(phrase)
if pos != -1:
# Map back to original transcript — find first word near this position
match_pos = labeled_transcript.lower().find(
words_slice[0], max(0, pos - 300))
if match_pos == -1:
match_pos = max(0, pos)
else:
match_pos = max(0, match_pos - start_offset * 6)
context_start = max(0, match_pos - 400)
context_end = min(len(labeled_transcript), match_pos + len(range_text) + 600)
return labeled_transcript[context_start:context_end]
return None
def _word_score(a: str, b: str) -> int:
"""Alignment score: +2 exact, +1 fuzzy (edit dist ≤2), -1 mismatch."""
if a == b:
return 2
if len(a) >= 3 and len(b) >= 3 and _edit_distance(a, b) <= 2:
return 1
return -1
def _parse_labeled_words(labeled_section: str) -> list[tuple[str, str, str]]:
"""Parse speaker-labeled text into (original_word, clean_lower, speaker) tuples."""
def _align_sequences(whisper_words: list[str],
labeled_words: list[str]) -> list[tuple[int | None, int | None]]:
"""Needleman-Wunsch DP alignment between whisper and labeled word sequences.
Returns list of (whisper_idx, labeled_idx) pairs where None = gap.
"""
n = len(whisper_words)
m = len(labeled_words)
GAP = -1
# Build score matrix
score = [[0] * (m + 1) for _ in range(n + 1)]
for i in range(1, n + 1):
score[i][0] = score[i - 1][0] + GAP
for j in range(1, m + 1):
score[0][j] = score[0][j - 1] + GAP
for i in range(1, n + 1):
for j in range(1, m + 1):
match = score[i - 1][j - 1] + _word_score(whisper_words[i - 1], labeled_words[j - 1])
delete = score[i - 1][j] + GAP
insert = score[i][j - 1] + GAP
score[i][j] = max(match, delete, insert)
# Traceback
pairs = []
i, j = n, m
while i > 0 or j > 0:
if i > 0 and j > 0 and score[i][j] == score[i - 1][j - 1] + _word_score(whisper_words[i - 1], labeled_words[j - 1]):
pairs.append((i - 1, j - 1))
i -= 1
j -= 1
elif i > 0 and score[i][j] == score[i - 1][j] + GAP:
pairs.append((i - 1, None))
i -= 1
else:
pairs.append((None, j - 1))
j -= 1
pairs.reverse()
return pairs
def _parse_full_transcript(labeled_transcript: str) -> list[dict]:
"""Parse entire labeled transcript into flat word list with speaker metadata.
Returns list of {word: str, clean: str, speaker: str} for every word.
"""
result = []
for m in re.finditer(r'^([A-Z][A-Z\s\'-]+?):\s*(.+?)(?=\n[A-Z][A-Z\s\'-]+?:|\n\n|\Z)',
labeled_section, re.MULTILINE | re.DOTALL):
labeled_transcript, re.MULTILINE | re.DOTALL):
speaker = m.group(1).strip()
text = m.group(2)
for w in text.split():
original = w.strip()
clean = re.sub(r"[^\w']", '', original.lower())
if clean:
result.append((original, clean, speaker))
result.append({"word": original, "clean": clean, "speaker": speaker})
return result
def _find_transcript_region(labeled_words: list[dict], whisper_words: list[str],
) -> tuple[int, int] | None:
"""Find the region of labeled_words that best matches the whisper words.
Uses multi-anchor matching: tries phrases from start, middle, and end
of the whisper words to find a consensus region.
"""
if not whisper_words or not labeled_words:
return None
labeled_clean = [w["clean"] for w in labeled_words]
n_labeled = len(labeled_clean)
def find_phrase(phrase_words: list[str], search_start: int = 0,
search_end: int | None = None) -> int | None:
"""Find a phrase in labeled_clean, return index of first word or None."""
if search_end is None:
search_end = n_labeled
plen = len(phrase_words)
for i in range(search_start, min(search_end, n_labeled - plen + 1)):
match = True
for k in range(plen):
if _word_score(phrase_words[k], labeled_clean[i + k]) < 1:
match = False
break
if match:
return i
return None
# Try anchors from different positions in the whisper words
anchors = []
n_whisper = len(whisper_words)
anchor_positions = [0, n_whisper // 2, max(0, n_whisper - 5)]
# Deduplicate positions
anchor_positions = sorted(set(anchor_positions))
for pos in anchor_positions:
for phrase_len in [5, 4, 3]:
phrase = whisper_words[pos:pos + phrase_len]
if len(phrase) < 3:
continue
idx = find_phrase(phrase)
if idx is not None:
# Estimate region start based on anchor's position in whisper
region_start = max(0, idx - pos)
anchors.append(region_start)
break
if not anchors:
return None
# Use median anchor as region start for robustness
anchors.sort()
region_start = anchors[len(anchors) // 2]
# Region extends to cover all whisper words plus margin
margin = max(20, n_whisper // 4)
region_start = max(0, region_start - margin)
region_end = min(n_labeled, region_start + n_whisper + 2 * margin)
return (region_start, region_end)
def add_speaker_labels(words: list[dict], labeled_transcript: str,
start_time: float, end_time: float,
segments: list[dict]) -> list[dict]:
"""Add speaker labels AND correct word text using labeled transcript.
Uses Whisper only for timestamps. Takes text from the labeled transcript,
which has correct names and spelling. Aligns using greedy forward matching
with edit-distance fuzzy matching.
Uses Needleman-Wunsch DP alignment to match Whisper words to the labeled
transcript. This handles insertions/deletions gracefully — one missed word
becomes a single gap instead of cascading failures.
"""
if not labeled_transcript or not words:
return words
# Get the raw Whisper text for this time range
range_text = ""
for seg in segments:
if seg["end"] < start_time or seg["start"] > end_time:
continue
range_text += " " + seg["text"]
range_text = range_text.strip()
# Find matching section in labeled transcript
labeled_section = _find_labeled_section(labeled_transcript, range_text)
if not labeled_section:
# Parse full transcript into flat word list
all_labeled = _parse_full_transcript(labeled_transcript)
if not all_labeled:
return words
labeled_words_flat = _parse_labeled_words(labeled_section)
if not labeled_words_flat:
# Build whisper clean word list
whisper_clean = []
for w in words:
clean = re.sub(r"[^\w']", '', w["word"].lower())
whisper_clean.append(clean if clean else w["word"].lower())
# Find the matching region in the transcript
region = _find_transcript_region(all_labeled, whisper_clean)
if region is None:
return words
# Greedy forward alignment: for each Whisper word, find best match
# in labeled words within a lookahead window
labeled_idx = 0
current_speaker = labeled_words_flat[0][2]
region_start, region_end = region
region_words = all_labeled[region_start:region_end]
region_clean = [w["clean"] for w in region_words]
# Run DP alignment
pairs = _align_sequences(whisper_clean, region_clean)
# Build speaker assignments from aligned pairs
# matched[whisper_idx] = (labeled_word_dict, score)
matched = {}
for w_idx, l_idx in pairs:
if w_idx is not None and l_idx is not None:
score = _word_score(whisper_clean[w_idx], region_clean[l_idx])
if score > 0:
matched[w_idx] = (region_words[l_idx], score)
# Apply matches and interpolate speakers for gaps
corrections = 0
for i, word_entry in enumerate(words):
if i in matched:
labeled_word, score = matched[i]
word_entry["speaker"] = labeled_word["speaker"]
for word_entry in words:
whisper_clean = re.sub(r"[^\w']", '', word_entry["word"].lower())
if not whisper_clean:
word_entry["speaker"] = current_speaker
continue
# Search forward for best match
best_idx = None
best_score = 0 # 2 = exact, 1 = fuzzy
window = min(labeled_idx + 12, len(labeled_words_flat))
for j in range(labeled_idx, window):
labeled_clean = labeled_words_flat[j][1]
if labeled_clean == whisper_clean:
best_idx = j
best_score = 2
break
if len(whisper_clean) >= 3 and len(labeled_clean) >= 3:
if _words_similar(whisper_clean, labeled_clean):
if best_score < 1:
best_idx = j
best_score = 1
# Don't break — keep looking for exact match
if best_idx is not None:
original_word, _, speaker = labeled_words_flat[best_idx]
current_speaker = speaker
# Replace Whisper's word with correct version
corrected = re.sub(r'[^\w\s\'-]', '', original_word)
if corrected and corrected.lower() != whisper_clean:
# Replace text only on confident matches
corrected = re.sub(r'[^\w\s\'-]', '', labeled_word["word"])
if corrected:
if corrected.lower() != whisper_clean[i]:
corrections += 1
word_entry["word"] = corrected
corrections += 1
elif corrected:
word_entry["word"] = corrected
labeled_idx = best_idx + 1
else:
# No match — advance labeled pointer by 1 to stay roughly in sync
if labeled_idx < len(labeled_words_flat):
labeled_idx += 1
word_entry["speaker"] = current_speaker
# Interpolate speaker from nearest matched neighbor
speaker = _interpolate_speaker(i, matched, len(words))
if speaker:
word_entry["speaker"] = speaker
if corrections:
print(f" Corrected {corrections} words from labeled transcript")
@@ -550,6 +681,19 @@ def add_speaker_labels(words: list[dict], labeled_transcript: str,
return words
def _interpolate_speaker(idx: int, matched: dict, n_words: int) -> str | None:
"""Find speaker from nearest matched neighbor."""
# Search outward from idx
for dist in range(1, n_words):
before = idx - dist
after = idx + dist
if before >= 0 and before in matched:
return matched[before][0]["speaker"]
if after < n_words and after in matched:
return matched[after][0]["speaker"]
return None
def group_words_into_lines(words: list[dict], clip_start: float,
clip_duration: float) -> list[dict]:
"""Group words into timed caption lines for rendering.
@@ -894,9 +1038,123 @@ def detect_episode_number(audio_path: str) -> int | None:
return None
def fetch_episodes() -> list[dict]:
"""Fetch episode list from Castopod RSS feed."""
print("Fetching episodes from Castopod...")
try:
resp = requests.get(RSS_FEED_URL, timeout=15)
resp.raise_for_status()
except requests.RequestException as e:
print(f"Error fetching RSS feed: {e}")
sys.exit(1)
root = ET.fromstring(resp.content)
ns = {"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}
episodes = []
for item in root.findall(".//item"):
title = item.findtext("title", "")
enclosure = item.find("enclosure")
audio_url = enclosure.get("url", "") if enclosure is not None else ""
duration = item.findtext("itunes:duration", "", ns)
ep_num = item.findtext("itunes:episode", "", ns)
pub_date = item.findtext("pubDate", "")
if not audio_url:
continue
episodes.append({
"title": title,
"audio_url": audio_url,
"duration": duration,
"episode_number": int(ep_num) if ep_num and ep_num.isdigit() else None,
"pub_date": pub_date,
})
return episodes
def pick_episode(episodes: list[dict]) -> dict:
"""Display episode list and let user pick one."""
if not episodes:
print("No episodes found.")
sys.exit(1)
# Sort by episode number (episodes without numbers go to the end)
episodes.sort(key=lambda e: (e["episode_number"] is None, e["episode_number"] or 0))
print(f"\nFound {len(episodes)} episodes:\n")
for ep in episodes:
num = ep['episode_number']
label = f"Ep{num}" if num else " "
dur = ep['duration'] or "?"
display_num = f"{num:>2}" if num else " ?"
print(f" {display_num}. [{label:>4}] {ep['title']} ({dur})")
print()
while True:
try:
choice = input("Select episode number (or 'q' to quit): ").strip()
if choice.lower() == 'q':
sys.exit(0)
num = int(choice)
# Match by episode number first
match = next((ep for ep in episodes if ep["episode_number"] == num), None)
if match:
return match
print(f" No episode #{num} found. Episodes: {', '.join(str(e['episode_number']) for e in episodes if e['episode_number'])}")
except (ValueError, EOFError):
print(" Enter an episode number")
def download_episode(episode: dict) -> Path:
"""Download episode audio, using a cache to avoid re-downloading."""
EPISODE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
# Build a filename from episode number or title slug
if episode["episode_number"]:
filename = f"episode-{episode['episode_number']}.mp3"
else:
filename = slugify(episode["title"]) + ".mp3"
cached = EPISODE_CACHE_DIR / filename
if cached.exists():
size_mb = cached.stat().st_size / (1024 * 1024)
print(f"Using cached: {cached.name} ({size_mb:.1f} MB)")
return cached
print(f"Downloading: {episode['title']}...")
try:
resp = requests.get(episode["audio_url"], stream=True, timeout=30)
resp.raise_for_status()
total = int(resp.headers.get("content-length", 0))
downloaded = 0
with open(cached, "wb") as f:
for chunk in resp.iter_content(chunk_size=1024 * 1024):
f.write(chunk)
downloaded += len(chunk)
if total:
pct = downloaded / total * 100
print(f"\r {downloaded / (1024*1024):.1f} / {total / (1024*1024):.1f} MB ({pct:.0f}%)", end="", flush=True)
else:
print(f"\r {downloaded / (1024*1024):.1f} MB", end="", flush=True)
print()
except requests.RequestException as e:
if cached.exists():
cached.unlink()
print(f"\nError downloading episode: {e}")
sys.exit(1)
size_mb = cached.stat().st_size / (1024 * 1024)
print(f"Saved: {cached.name} ({size_mb:.1f} MB)")
return cached
def main():
parser = argparse.ArgumentParser(description="Extract short-form clips from podcast episodes")
parser.add_argument("audio_file", help="Path to episode MP3")
parser.add_argument("audio_file", nargs="?", help="Path to episode MP3 (optional if using --pick)")
parser.add_argument("--pick", "-p", action="store_true",
help="Pick an episode from Castopod to clip")
parser.add_argument("--transcript", "-t", help="Path to labeled transcript (.txt)")
parser.add_argument("--chapters", "-c", help="Path to chapters JSON")
parser.add_argument("--count", "-n", type=int, default=3, help="Number of clips to extract (default: 3)")
@@ -911,13 +1169,27 @@ def main():
help="Use quality model for everything (slower, no two-pass)")
args = parser.parse_args()
audio_path = Path(args.audio_file).expanduser().resolve()
if not audio_path.exists():
print(f"Error: Audio file not found: {audio_path}")
sys.exit(1)
# Default to --pick when no audio file provided
if not args.audio_file and not args.pick:
args.pick = True
if args.pick:
episodes = fetch_episodes()
selected = pick_episode(episodes)
audio_path = download_episode(selected)
episode_number = selected["episode_number"] or args.episode_number
else:
audio_path = Path(args.audio_file).expanduser().resolve()
if not audio_path.exists():
print(f"Error: Audio file not found: {audio_path}")
sys.exit(1)
episode_number = None
# Detect episode number
episode_number = args.episode_number or detect_episode_number(str(audio_path))
if not args.pick:
episode_number = args.episode_number or detect_episode_number(str(audio_path))
if args.episode_number:
episode_number = args.episode_number
# Resolve output directory
if args.output_dir:
@@ -959,9 +1231,9 @@ def main():
# Step 2: Fast transcription for clip identification
two_pass = not args.single_pass and args.fast_model != args.quality_model
if two_pass:
print(f"\n[2/6] Fast transcription for clip identification ({args.fast_model})...")
print(f"\n[2/7] Fast transcription for clip identification ({args.fast_model})...")
else:
print(f"\n[2/5] Transcribing with word-level timestamps ({args.quality_model})...")
print(f"\n[2/6] Transcribing with word-level timestamps ({args.quality_model})...")
identify_model = args.fast_model if two_pass else args.quality_model
segments = transcribe_with_timestamps(
str(audio_path), identify_model, labeled_transcript
@@ -980,7 +1252,7 @@ def main():
print(f" Chapters loaded: {chapters_path.name}")
# Step 3: LLM selects best moments
step_total = 6 if two_pass else 5
step_total = 7 if two_pass else 6
print(f"\n[3/{step_total}] Selecting {args.count} best moments with LLM...")
clips = select_clips_with_llm(transcript_text, labeled_transcript,
chapters_json, args.count)
@@ -994,10 +1266,19 @@ def main():
f"({clip['start_time']:.1f}s - {clip['end_time']:.1f}s, {duration:.0f}s)")
print(f" \"{clip['caption_text']}\"")
# Step 4: Refine clip timestamps with quality model (two-pass only)
# Generate social media metadata
meta_step = 4
print(f"\n[{meta_step}/{step_total}] Generating social media descriptions...")
clips = generate_social_metadata(clips, labeled_transcript, episode_number)
for i, clip in enumerate(clips):
if "description" in clip:
print(f" Clip {i+1}: {clip['description'][:80]}...")
print(f" {' '.join(clip.get('hashtags', []))}")
# Step 5: Refine clip timestamps with quality model (two-pass only)
refined = {}
if two_pass:
print(f"\n[4/{step_total}] Refining clips with {args.quality_model}...")
print(f"\n[5/{step_total}] Refining clips with {args.quality_model}...")
refined = refine_clip_timestamps(
str(audio_path), clips, args.quality_model, labeled_transcript
)
@@ -1008,7 +1289,7 @@ def main():
clips[i:i+1] = snap_to_sentences([clip], clip_segments)
# Step N: Extract audio clips
extract_step = 5 if two_pass else 4
extract_step = 6 if two_pass else 5
print(f"\n[{extract_step}/{step_total}] Extracting audio clips...")
for i, clip in enumerate(clips):
slug = slugify(clip["title"])
@@ -1020,7 +1301,7 @@ def main():
else:
print(f" Error extracting clip {i+1} audio")
video_step = 6 if two_pass else 5
video_step = 7 if two_pass else 6
if args.audio_only:
print(f"\n[{video_step}/{step_total}] Skipped video generation (--audio-only)")
print(f"\nDone! {len(clips)} audio clips saved to {output_dir}")
@@ -1071,6 +1352,27 @@ def main():
else:
print(f" Error generating clip {i+1} video")
# Save clips metadata for social upload
metadata_path = output_dir / "clips-metadata.json"
metadata = []
for i, clip in enumerate(clips):
slug = slugify(clip["title"])
metadata.append({
"title": clip["title"],
"clip_file": f"clip-{i+1}-{slug}.mp4",
"audio_file": f"clip-{i+1}-{slug}.mp3",
"caption_text": clip.get("caption_text", ""),
"description": clip.get("description", ""),
"hashtags": clip.get("hashtags", []),
"start_time": clip["start_time"],
"end_time": clip["end_time"],
"duration": round(clip["end_time"] - clip["start_time"], 1),
"episode_number": episode_number,
})
with open(metadata_path, "w") as f:
json.dump(metadata, f, indent=2)
print(f"\nSocial metadata: {metadata_path}")
# Summary
print(f"\nDone! {len(clips)} clips saved to {output_dir}")
for i, clip in enumerate(clips):