Make make_clips.py resilient — timeouts, retries, skip-on-failure

- 60s timeout + retry on all LLM calls - 120-300s timeout on all subprocess/ffmpeg calls - Per-clip error isolation (one failure doesn't kill the run) - Progress indicators for each clip being processed Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 17:36:41 -06:00
parent 4589670b37
commit e0fb3cac68
1 changed files with 142 additions and 114 deletions
@@ -23,6 +23,8 @@ import tempfile
 import xml.etree.ElementTree as ET
 from pathlib import Path
 import time
 import requests
 from dotenv import load_dotenv
@@ -46,6 +48,50 @@ WIDTH = 1080
 HEIGHT = 1920
 def _llm_request(prompt: str, max_tokens: int = 2048, temperature: float = 0.3,
                  timeout: int = 60) -> str | None:
    """Make an LLM API call with timeout and retry. Returns content or None on failure."""
    for attempt in range(2):
        try:
            response = requests.post(
                "https://openrouter.ai/api/v1/chat/completions",
                headers={
                    "Authorization": f"Bearer {OPENROUTER_API_KEY}",
                    "Content-Type": "application/json",
                },
                json={
                    "model": "anthropic/claude-sonnet-4-5",
                    "messages": [{"role": "user", "content": prompt}],
                    "max_tokens": max_tokens,
                    "temperature": temperature,
                },
                timeout=timeout,
            )
            if response.status_code != 200:
                print(f"    LLM error (HTTP {response.status_code}): {response.text[:200]}")
                if attempt == 0:
                    print(f"    Retrying in 5s...")
                    time.sleep(5)
                    continue
                return None
            return response.json()["choices"][0]["message"]["content"].strip()
        except requests.Timeout:
            print(f"    LLM request timed out ({timeout}s)")
            if attempt == 0:
                print(f"    Retrying in 5s...")
                time.sleep(5)
                continue
            return None
        except Exception as e:
            print(f"    LLM request failed: {e}")
            if attempt == 0:
                print(f"    Retrying in 5s...")
                time.sleep(5)
                continue
            return None
    return None
 def _build_whisper_prompt(labeled_transcript: str) -> str:
    """Build an initial_prompt for Whisper from the labeled transcript.
@@ -186,7 +232,12 @@ def refine_clip_timestamps(audio_path: str, clips: list[dict],
                "ffmpeg", "-y", "-ss", str(seg_start), "-t", str(seg_end - seg_start),
                "-i", audio_path, "-ar", "16000", "-ac", "1", seg_path,
            ]
-            result = subprocess.run(cmd, capture_output=True, text=True)
+            try:
                result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
            except subprocess.TimeoutExpired:
                print(f"      Clip {i+1}: ffmpeg timed out (120s), skipping")
                refined[i] = []
                continue
            if result.returncode != 0:
                print(f"      Clip {i+1}: Failed to extract segment")
                refined[i] = []
@@ -279,25 +330,11 @@ IMPORTANT:
 Respond with ONLY a JSON array, no markdown or explanation:
 [{{"title": "...", "start_time": 0.0, "end_time": 0.0, "caption_text": "..."}}]"""
-    response = requests.post(
+    content = _llm_request(prompt, max_tokens=2048, temperature=0.3, timeout=60)
-        "https://openrouter.ai/api/v1/chat/completions",
+    if content is None:
-        headers={
+        print("    Failed to get clip selections from LLM — aborting")
-            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+        return []
            "Content-Type": "application/json",
        },
        json={
            "model": "anthropic/claude-sonnet-4-5",
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": 2048,
            "temperature": 0.3,
        },
    )
    if response.status_code != 200:
        print(f"Error from OpenRouter: {response.text}")
        sys.exit(1)
    content = response.json()["choices"][0]["message"]["content"].strip()
    if content.startswith("```"):
        content = re.sub(r"^```(?:json)?\n?", "", content)
        content = re.sub(r"\n?```$", "", content)
@@ -307,7 +344,7 @@ Respond with ONLY a JSON array, no markdown or explanation:
    except json.JSONDecodeError as e:
        print(f"Error parsing LLM response: {e}")
        print(f"Response was: {content[:500]}")
-        sys.exit(1)
+        return []
    # Validate and clamp durations
    validated = []
@@ -349,25 +386,11 @@ For each clip, generate:
 Respond with ONLY a JSON array matching the clip order:
 [{{"description": "...", "hashtags": ["#tag1", "#tag2", ...]}}]"""
-    response = requests.post(
+    content = _llm_request(prompt, max_tokens=2048, temperature=0.7, timeout=60)
-        "https://openrouter.ai/api/v1/chat/completions",
+    if content is None:
-        headers={
+        print("    Failed to generate social metadata — skipping")
            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
            "Content-Type": "application/json",
        },
        json={
            "model": "anthropic/claude-sonnet-4-5",
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": 2048,
            "temperature": 0.7,
        },
    )
    if response.status_code != 200:
        print(f"Error from OpenRouter: {response.text}")
        return clips
    content = response.json()["choices"][0]["message"]["content"].strip()
    if content.startswith("```"):
        content = re.sub(r"^```(?:json)?\n?", "", content)
        content = re.sub(r"\n?```$", "", content)
@@ -777,43 +800,25 @@ RULES:
 RAW TEXT ({len(words)} words):
 {raw_text}"""
-    try:
+    polished = _llm_request(prompt, max_tokens=2048, temperature=0, timeout=30)
-        response = requests.post(
+    if polished is None:
-            "https://openrouter.ai/api/v1/chat/completions",
+        print(f"      Polish failed, using raw text")
-            headers={
+        return words
                "Authorization": f"Bearer {OPENROUTER_API_KEY}",
                "Content-Type": "application/json",
            },
            json={
                "model": "anthropic/claude-sonnet-4-5",
                "messages": [{"role": "user", "content": prompt}],
                "max_tokens": 2048,
                "temperature": 0,
            },
            timeout=30,
        )
        if response.status_code != 200:
            print(f"      Polish failed ({response.status_code}), using raw text")
            return words
-        polished = response.json()["choices"][0]["message"]["content"].strip()
+    polished_words = polished.split()
        polished_words = polished.split()
-        if len(polished_words) != len(words):
+    if len(polished_words) != len(words):
-            print(f"      Polish word count mismatch ({len(polished_words)} vs {len(words)}), using raw text")
+        print(f"      Polish word count mismatch ({len(polished_words)} vs {len(words)}), using raw text")
-            return words
+        return words
-        changes = 0
+    changes = 0
-        for i, pw in enumerate(polished_words):
+    for i, pw in enumerate(polished_words):
-            if pw != words[i]["word"]:
+        if pw != words[i]["word"]:
-                changes += 1
+            changes += 1
-                words[i]["word"] = pw
+            words[i]["word"] = pw
-        if changes:
+    if changes:
-            print(f"      Polished {changes} words")
+        print(f"      Polished {changes} words")
    except Exception as e:
        print(f"      Polish error: {e}")
    return words
@@ -898,8 +903,12 @@ def extract_clip_audio(audio_path: str, start: float, end: float,
        output_path,
    ]
-    result = subprocess.run(cmd, capture_output=True, text=True)
+    try:
-    return result.returncode == 0
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
        return result.returncode == 0
    except subprocess.TimeoutExpired:
        print(f"    ffmpeg audio extraction timed out (120s)")
        return False
 def generate_background_image(episode_number: int, clip_title: str,
@@ -1153,7 +1162,11 @@ def generate_clip_video(audio_path: str, background_path: str,
            output_path,
        ]
-    result = subprocess.run(cmd, capture_output=True, text=True)
+    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
    except subprocess.TimeoutExpired:
        print(f"    ffmpeg video generation timed out (300s)")
        return False
    if result.returncode != 0:
        print(f"    ffmpeg error: {result.stderr[-300:]}")
        return False
@@ -1235,7 +1248,12 @@ def generate_clip_video_remotion(
        output_path,
    ]
-    result = subprocess.run(cmd, capture_output=True, text=True, cwd=str(REMOTION_DIR))
+    try:
        result = subprocess.run(cmd, capture_output=True, text=True, cwd=str(REMOTION_DIR), timeout=180)
    except subprocess.TimeoutExpired:
        props_path.unlink(missing_ok=True)
        print(f"    Remotion render timed out (180s)")
        return False
    props_path.unlink(missing_ok=True)
    if result.returncode != 0:
@@ -1488,6 +1506,9 @@ def main():
    print(f"\n[3/{step_total}] Selecting {args.count} best moments with LLM...")
    clips = select_clips_with_llm(transcript_text, labeled_transcript,
                                   chapters_json, args.count)
    if not clips:
        print("\nNo clips selected — aborting.")
        return
    # Snap to sentence boundaries so clips don't start/end mid-sentence
    clips = snap_to_sentences(clips, segments)
@@ -1524,14 +1545,18 @@ def main():
    extract_step = 6 if two_pass else 5
    print(f"\n[{extract_step}/{step_total}] Extracting audio clips...")
    for i, clip in enumerate(clips):
        print(f"    [{i+1}/{len(clips)}] \"{clip['title']}\"...")
        slug = slugify(clip["title"])
        mp3_path = output_dir / f"clip-{i+1}-{slug}.mp3"
-        if extract_clip_audio(str(audio_path), clip["start_time"], clip["end_time"],
+        try:
-                              str(mp3_path)):
+            if extract_clip_audio(str(audio_path), clip["start_time"], clip["end_time"],
-            print(f"    Clip {i+1} audio: {mp3_path.name}")
+                                  str(mp3_path)):
-        else:
+                print(f"    Clip {i+1} audio: {mp3_path.name}")
-            print(f"    Error extracting clip {i+1} audio")
+            else:
                print(f"    Error extracting clip {i+1} audio — skipping")
        except Exception as e:
            print(f"    Clip {i+1} audio failed: {e} — skipping")
    video_step = 7 if two_pass else 6
    if args.audio_only:
@@ -1553,49 +1578,52 @@ def main():
            mp4_path = output_dir / f"clip-{i+1}-{slug}.mp4"
            duration = clip["end_time"] - clip["start_time"]
-            print(f"    Clip {i+1}: Generating video...")
+            print(f"    [{i+1}/{len(clips)}] \"{clip['title']}\" ({duration:.0f}s)...")
-            # Get word timestamps — use refined segments if available
+            try:
-            word_source = refined[i] if (two_pass and i in refined and refined[i]) else segments
+                # Get word timestamps — use refined segments if available
-            clip_words = get_words_in_range(word_source, clip["start_time"], clip["end_time"])
+                word_source = refined[i] if (two_pass and i in refined and refined[i]) else segments
                clip_words = get_words_in_range(word_source, clip["start_time"], clip["end_time"])
-            # Add speaker labels
+                # Add speaker labels
-            clip_words = add_speaker_labels(clip_words, labeled_transcript,
+                clip_words = add_speaker_labels(clip_words, labeled_transcript,
-                                            clip["start_time"], clip["end_time"],
+                                                clip["start_time"], clip["end_time"],
-                                            word_source)
+                                                word_source)
-            # Polish text with LLM (fix punctuation, capitalization, mishearings)
+                # Polish text with LLM (fix punctuation, capitalization, mishearings)
-            clip_words = polish_clip_words(clip_words, labeled_transcript)
+                clip_words = polish_clip_words(clip_words, labeled_transcript)
-            # Group words into timed caption lines
+                # Group words into timed caption lines
-            caption_lines = group_words_into_lines(
+                caption_lines = group_words_into_lines(
-                clip_words, clip["start_time"], duration
+                    clip_words, clip["start_time"], duration
-            )
+                )
-            if use_remotion:
+                if use_remotion:
-                if generate_clip_video_remotion(
+                    if generate_clip_video_remotion(
-                    str(mp3_path), caption_lines, clip["start_time"],
+                        str(mp3_path), caption_lines, clip["start_time"],
-                    clip["title"], episode_number, str(mp4_path), duration
+                        clip["title"], episode_number, str(mp4_path), duration
-                ):
+                    ):
-                    file_size = mp4_path.stat().st_size / (1024 * 1024)
+                        file_size = mp4_path.stat().st_size / (1024 * 1024)
-                    print(f"    Clip {i+1} video: {mp4_path.name} ({file_size:.1f} MB)")
+                        print(f"    Clip {i+1} video: {mp4_path.name} ({file_size:.1f} MB)")
                    else:
                        print(f"    Clip {i+1} video failed (Remotion) — skipping")
                else:
-                    print(f"    Error generating clip {i+1} video (Remotion)")
+                    # Legacy PIL+ffmpeg renderer
-            else:
+                    bg_path = str(tmp_dir / f"bg_{i}.png")
-                # Legacy PIL+ffmpeg renderer
+                    generate_background_image(episode_number, clip["title"], bg_path)
                bg_path = str(tmp_dir / f"bg_{i}.png")
                generate_background_image(episode_number, clip["title"], bg_path)
-                clip_tmp = tmp_dir / f"clip_{i}"
+                    clip_tmp = tmp_dir / f"clip_{i}"
-                clip_tmp.mkdir(exist_ok=True)
+                    clip_tmp.mkdir(exist_ok=True)
-                if generate_clip_video(str(mp3_path), bg_path, caption_lines,
+                    if generate_clip_video(str(mp3_path), bg_path, caption_lines,
-                                       clip["start_time"], str(mp4_path),
+                                           clip["start_time"], str(mp4_path),
-                                       duration, clip_tmp):
+                                           duration, clip_tmp):
-                    file_size = mp4_path.stat().st_size / (1024 * 1024)
+                        file_size = mp4_path.stat().st_size / (1024 * 1024)
-                    print(f"    Clip {i+1} video: {mp4_path.name} ({file_size:.1f} MB)")
+                        print(f"    Clip {i+1} video: {mp4_path.name} ({file_size:.1f} MB)")
-                else:
+                    else:
-                    print(f"    Error generating clip {i+1} video")
+                        print(f"    Clip {i+1} video failed (ffmpeg) — skipping")
            except Exception as e:
                print(f"    Clip {i+1} video failed: {e} — skipping")
    # Save clips metadata for social upload
    metadata_path = output_dir / "clips-metadata.json"