- Returning callers now keep their voice across sessions (stored in regulars.json) - Backfilled voice assignments for all 11 existing regulars - Discord button on homepage + link in all page footers - REC and On-Air buttons now toggle together (both directions) - Fixed host mic double-stream bug (stem_mic vs host_stream conflict) - SEO: JSON-LD structured data on episode + how-it-works pages - SEO: noscript fallbacks, RSS links, twitter meta tags - Episode 9 transcript and sitemap update Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
728 lines
26 KiB
Python
Executable File
728 lines
26 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Podcast Episode Publisher
|
|
Transcribes audio, generates metadata, and publishes to Castopod.
|
|
|
|
Usage:
|
|
python publish_episode.py /path/to/episode.mp3
|
|
python publish_episode.py /path/to/episode.mp3 --episode-number 3
|
|
python publish_episode.py /path/to/episode.mp3 --dry-run
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import base64
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
import ssl
|
|
import requests
|
|
import urllib3
|
|
from requests.adapters import HTTPAdapter
|
|
from urllib3.util.ssl_ import create_urllib3_context
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
from dotenv import load_dotenv
|
|
|
|
|
|
class TLSAdapter(HTTPAdapter):
|
|
"""Adapter to handle servers with older TLS configurations."""
|
|
def init_poolmanager(self, *args, **kwargs):
|
|
ctx = create_urllib3_context()
|
|
ctx.set_ciphers('DEFAULT@SECLEVEL=1')
|
|
ctx.check_hostname = False
|
|
ctx.verify_mode = ssl.CERT_NONE
|
|
kwargs['ssl_context'] = ctx
|
|
return super().init_poolmanager(*args, **kwargs)
|
|
|
|
def send(self, *args, **kwargs):
|
|
kwargs['verify'] = False
|
|
return super().send(*args, **kwargs)
|
|
|
|
|
|
# Use a session with TLS compatibility for all Castopod requests
|
|
_session = requests.Session()
|
|
_session.mount('https://', TLSAdapter())
|
|
|
|
# Load environment variables
|
|
load_dotenv(Path(__file__).parent / ".env")
|
|
|
|
# Configuration
|
|
CASTOPOD_URL = "https://podcast.macneilmediagroup.com"
|
|
CASTOPOD_USERNAME = "admin"
|
|
CASTOPOD_PASSWORD = "podcast2026api"
|
|
PODCAST_ID = 1
|
|
PODCAST_HANDLE = "LukeAtTheRoost"
|
|
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
|
|
|
|
WHISPER_MODEL = "base" # Options: tiny, base, small, medium, large
|
|
|
|
# NAS Configuration for chapters upload
|
|
# BunnyCDN Storage
|
|
BUNNY_STORAGE_ZONE = "lukeattheroost"
|
|
BUNNY_STORAGE_KEY = "92749cd3-85df-4cff-938fe35eb994-30f8-4cf2"
|
|
BUNNY_STORAGE_REGION = "la" # Los Angeles
|
|
|
|
NAS_HOST = "mmgnas-10g"
|
|
NAS_USER = "luke"
|
|
NAS_SSH_PORT = 8001
|
|
DOCKER_PATH = "/share/CACHEDEV1_DATA/.qpkg/container-station/bin/docker"
|
|
CASTOPOD_CONTAINER = "castopod-castopod-1"
|
|
MARIADB_CONTAINER = "castopod-mariadb-1"
|
|
DB_USER = "castopod"
|
|
DB_PASS = "BYtbFfk3ndeVabb26xb0UyKU"
|
|
DB_NAME = "castopod"
|
|
|
|
|
|
def get_auth_header():
|
|
"""Get Basic Auth header for Castopod API."""
|
|
credentials = base64.b64encode(
|
|
f"{CASTOPOD_USERNAME}:{CASTOPOD_PASSWORD}".encode()
|
|
).decode()
|
|
return {"Authorization": f"Basic {credentials}"}
|
|
|
|
|
|
def label_transcript_speakers(text):
|
|
"""Add LUKE:/CALLER: speaker labels to transcript using LLM."""
|
|
import time as _time
|
|
|
|
prompt = """Insert speaker labels into this radio show transcript. The show is "Luke at the Roost". The host is LUKE. Callers call in one at a time.
|
|
|
|
CRITICAL: Output EVERY SINGLE WORD from the input. Do NOT summarize, shorten, paraphrase, or skip ANY text. The output must contain the EXACT SAME words as the input, with ONLY speaker labels and line breaks added.
|
|
|
|
At each speaker change, insert a blank line and the new speaker's label (e.g., "LUKE:" or "REGGIE:").
|
|
|
|
Speaker identification:
|
|
- LUKE is the host — he introduces callers, asks questions, does sponsor reads, opens and closes the show
|
|
- Callers are introduced by name by Luke (e.g., "let's talk to Earl", "next up Brenda")
|
|
- Use caller FIRST NAME in caps as the label
|
|
- When Luke says "Tell me about..." or asks a question, that's LUKE
|
|
- When someone responds with their story/opinion/answer, that's the CALLER
|
|
|
|
Output format — ONLY the labeled transcript with blank lines between turns. No notes, no commentary.
|
|
|
|
TRANSCRIPT:
|
|
"""
|
|
# Chunk text into ~8000 char segments
|
|
chunks = []
|
|
remaining = text
|
|
while remaining:
|
|
if len(remaining) <= 8000:
|
|
if chunks and len(remaining) < 1000:
|
|
chunks[-1] = chunks[-1] + " " + remaining
|
|
else:
|
|
chunks.append(remaining)
|
|
break
|
|
pos = remaining[:8000].rfind('. ')
|
|
if pos < 4000:
|
|
pos = remaining[:8000].rfind('? ')
|
|
if pos < 4000:
|
|
pos = remaining[:8000].rfind('! ')
|
|
if pos < 4000:
|
|
pos = 8000
|
|
chunks.append(remaining[:pos + 1].strip())
|
|
remaining = remaining[pos + 1:].strip()
|
|
|
|
labeled_parts = []
|
|
context = ""
|
|
for i, chunk in enumerate(chunks):
|
|
full_prompt = prompt + chunk
|
|
if context:
|
|
full_prompt += f"\n\nCONTEXT: The previous section ended with speaker {context}"
|
|
|
|
response = requests.post(
|
|
"https://openrouter.ai/api/v1/chat/completions",
|
|
headers={
|
|
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
|
"Content-Type": "application/json"
|
|
},
|
|
json={
|
|
"model": "anthropic/claude-3.5-sonnet",
|
|
"messages": [{"role": "user", "content": full_prompt}],
|
|
"max_tokens": 8192,
|
|
"temperature": 0
|
|
}
|
|
)
|
|
if response.status_code != 200:
|
|
print(f" Warning: Speaker labeling failed for chunk {i+1}, using raw text")
|
|
labeled_parts.append(chunk)
|
|
else:
|
|
content = response.json()["choices"][0]["message"]["content"].strip()
|
|
if content.startswith("```"):
|
|
content = re.sub(r'^```\w*\n?', '', content)
|
|
content = re.sub(r'\n?```$', '', content)
|
|
labeled_parts.append(content)
|
|
|
|
# Extract last speaker for context
|
|
for line in reversed(content.strip().split('\n')):
|
|
m = re.match(r'^([A-Z][A-Z\s\'-]+?):', line.strip())
|
|
if m:
|
|
context = m.group(1)
|
|
break
|
|
|
|
if i < len(chunks) - 1:
|
|
_time.sleep(0.5)
|
|
|
|
result = "\n\n".join(labeled_parts)
|
|
result = re.sub(r'\n{3,}', '\n\n', result)
|
|
# Normalize: SPEAKER:\ntext -> SPEAKER: text
|
|
result = re.sub(r'^([A-Z][A-Z\s\'-]+?):\s*\n(?!\n)', r'\1: ', result, flags=re.MULTILINE)
|
|
return result
|
|
|
|
|
|
def transcribe_audio(audio_path: str) -> dict:
|
|
"""Transcribe audio using faster-whisper with timestamps."""
|
|
print(f"[1/5] Transcribing {audio_path}...")
|
|
|
|
try:
|
|
from faster_whisper import WhisperModel
|
|
except ImportError:
|
|
print("Error: faster-whisper not installed. Run: pip install faster-whisper")
|
|
sys.exit(1)
|
|
|
|
model = WhisperModel(WHISPER_MODEL, compute_type="int8")
|
|
segments, info = model.transcribe(audio_path, word_timestamps=True)
|
|
|
|
transcript_segments = []
|
|
full_text = []
|
|
|
|
for segment in segments:
|
|
transcript_segments.append({
|
|
"start": segment.start,
|
|
"end": segment.end,
|
|
"text": segment.text.strip()
|
|
})
|
|
full_text.append(segment.text.strip())
|
|
|
|
print(f" Transcribed {info.duration:.1f} seconds of audio")
|
|
|
|
return {
|
|
"segments": transcript_segments,
|
|
"full_text": " ".join(full_text),
|
|
"duration": int(info.duration)
|
|
}
|
|
|
|
|
|
def generate_metadata(transcript: dict, episode_number: int) -> dict:
|
|
"""Use LLM to generate title, description, and chapters from transcript."""
|
|
print("[2/5] Generating metadata with LLM...")
|
|
|
|
if not OPENROUTER_API_KEY:
|
|
print("Error: OPENROUTER_API_KEY not set in .env")
|
|
sys.exit(1)
|
|
|
|
# Prepare transcript with timestamps for chapter detection
|
|
timestamped_text = ""
|
|
for seg in transcript["segments"]:
|
|
mins = int(seg["start"] // 60)
|
|
secs = int(seg["start"] % 60)
|
|
timestamped_text += f"[{mins:02d}:{secs:02d}] {seg['text']}\n"
|
|
|
|
prompt = f"""Analyze this podcast transcript and generate metadata.
|
|
|
|
TRANSCRIPT:
|
|
{timestamped_text}
|
|
|
|
Generate a JSON response with:
|
|
1. "title": A catchy episode title (include "Episode {episode_number}:" prefix)
|
|
2. "description": A 2-4 sentence description summarizing the episode's content. Mention callers by name and their topics. End with something engaging.
|
|
3. "chapters": An array of chapter objects with "startTime" (in seconds) and "title". Include:
|
|
- "Intro" at 0 seconds
|
|
- A chapter for each caller/topic (use caller names if mentioned)
|
|
- "Outro" near the end
|
|
|
|
Respond with ONLY valid JSON, no markdown or explanation."""
|
|
|
|
response = requests.post(
|
|
"https://openrouter.ai/api/v1/chat/completions",
|
|
headers={
|
|
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
|
"Content-Type": "application/json"
|
|
},
|
|
json={
|
|
"model": "anthropic/claude-3-haiku",
|
|
"messages": [{"role": "user", "content": prompt}],
|
|
"temperature": 0.7
|
|
}
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
print(f"Error from OpenRouter: {response.text}")
|
|
sys.exit(1)
|
|
|
|
result = response.json()
|
|
content = result["choices"][0]["message"]["content"]
|
|
|
|
# Parse JSON from response (handle markdown code blocks)
|
|
content = content.strip()
|
|
if content.startswith("```"):
|
|
content = re.sub(r"^```(?:json)?\n?", "", content)
|
|
content = re.sub(r"\n?```$", "", content)
|
|
|
|
try:
|
|
metadata = json.loads(content)
|
|
except json.JSONDecodeError as e:
|
|
print(f"Error parsing LLM response: {e}")
|
|
print(f"Response was: {content}")
|
|
sys.exit(1)
|
|
|
|
print(f" Title: {metadata['title']}")
|
|
print(f" Chapters: {len(metadata['chapters'])}")
|
|
|
|
return metadata
|
|
|
|
|
|
def create_episode(audio_path: str, metadata: dict, episode_number: int) -> dict:
|
|
"""Create episode on Castopod using curl (handles large file uploads better)."""
|
|
print("[3/5] Creating episode on Castopod...")
|
|
|
|
credentials = base64.b64encode(
|
|
f"{CASTOPOD_USERNAME}:{CASTOPOD_PASSWORD}".encode()
|
|
).decode()
|
|
slug = re.sub(r'[^a-z0-9]+', '-', metadata["title"].lower()).strip('-')
|
|
|
|
cmd = [
|
|
"curl", "-sk", "-X", "POST",
|
|
f"{CASTOPOD_URL}/api/rest/v1/episodes",
|
|
"-H", f"Authorization: Basic {credentials}",
|
|
"-F", f"audio_file=@{audio_path};type=audio/mpeg",
|
|
"-F", f"title={metadata['title']}",
|
|
"-F", f"slug={slug}",
|
|
"-F", f"description={metadata['description']}",
|
|
"-F", "parental_advisory=explicit",
|
|
"-F", "type=full",
|
|
"-F", f"podcast_id={PODCAST_ID}",
|
|
"-F", "created_by=1",
|
|
"-F", "updated_by=1",
|
|
"-F", f"episode_number={episode_number}",
|
|
]
|
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
|
if result.returncode != 0:
|
|
print(f"Error uploading: {result.stderr}")
|
|
sys.exit(1)
|
|
|
|
try:
|
|
episode = json.loads(result.stdout)
|
|
except json.JSONDecodeError:
|
|
print(f"Error parsing response: {result.stdout[:500]}")
|
|
sys.exit(1)
|
|
|
|
if "id" not in episode:
|
|
print(f"Error creating episode: {result.stdout[:500]}")
|
|
sys.exit(1)
|
|
|
|
print(f" Created episode ID: {episode['id']}")
|
|
print(f" Slug: {episode['slug']}")
|
|
|
|
return episode
|
|
|
|
|
|
def publish_episode(episode_id: int) -> dict:
|
|
"""Publish the episode."""
|
|
print("[4/5] Publishing episode...")
|
|
|
|
headers = get_auth_header()
|
|
|
|
response = _session.post(
|
|
f"{CASTOPOD_URL}/api/rest/v1/episodes/{episode_id}/publish",
|
|
headers=headers,
|
|
data={
|
|
"publication_method": "now",
|
|
"created_by": "1"
|
|
},
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
print(f"Error publishing: {response.text}")
|
|
sys.exit(1)
|
|
|
|
episode = response.json()
|
|
published_at = episode.get("published_at", {})
|
|
if isinstance(published_at, dict):
|
|
print(f" Published at: {published_at.get('date', 'unknown')}")
|
|
else:
|
|
print(f" Published at: {published_at}")
|
|
|
|
return episode
|
|
|
|
|
|
def save_chapters(metadata: dict, output_path: str):
|
|
"""Save chapters to JSON file."""
|
|
chapters_data = {
|
|
"version": "1.2.0",
|
|
"chapters": metadata["chapters"]
|
|
}
|
|
|
|
with open(output_path, "w") as f:
|
|
json.dump(chapters_data, f, indent=2)
|
|
|
|
print(f" Chapters saved to: {output_path}")
|
|
|
|
|
|
def run_ssh_command(command: str, timeout: int = 30) -> tuple[bool, str]:
|
|
"""Run a command on the NAS via SSH."""
|
|
ssh_cmd = [
|
|
"ssh", "-p", str(NAS_SSH_PORT),
|
|
f"{NAS_USER}@{NAS_HOST}",
|
|
command
|
|
]
|
|
try:
|
|
result = subprocess.run(ssh_cmd, capture_output=True, text=True, timeout=timeout)
|
|
return result.returncode == 0, result.stdout.strip() or result.stderr.strip()
|
|
except subprocess.TimeoutExpired:
|
|
return False, "SSH command timed out"
|
|
except Exception as e:
|
|
return False, str(e)
|
|
|
|
|
|
def upload_chapters_to_castopod(episode_slug: str, episode_id: int, chapters_path: str) -> bool:
|
|
"""Upload chapters file to Castopod via SSH and link in database."""
|
|
print("[4.5/5] Uploading chapters to Castopod...")
|
|
|
|
chapters_filename = f"{episode_slug}-chapters.json"
|
|
remote_path = f"podcasts/{PODCAST_HANDLE}/{chapters_filename}"
|
|
|
|
# Read local chapters file
|
|
with open(chapters_path, "r") as f:
|
|
chapters_content = f.read()
|
|
|
|
# Base64 encode for safe transfer
|
|
chapters_b64 = base64.b64encode(chapters_content.encode()).decode()
|
|
|
|
# Upload file to container using base64 decode
|
|
upload_cmd = f'echo "{chapters_b64}" | base64 -d | {DOCKER_PATH} exec -i {CASTOPOD_CONTAINER} tee /var/www/castopod/public/media/{remote_path} > /dev/null'
|
|
success, output = run_ssh_command(upload_cmd)
|
|
if not success:
|
|
print(f" Warning: Failed to upload chapters file: {output}")
|
|
return False
|
|
|
|
# Get file size
|
|
file_size = len(chapters_content)
|
|
|
|
# Insert into media table
|
|
insert_sql = f"""INSERT INTO cp_media (file_key, file_size, file_mimetype, type, uploaded_by, updated_by, uploaded_at, updated_at)
|
|
VALUES ('{remote_path}', {file_size}, 'application/json', 'chapters', 1, 1, NOW(), NOW())"""
|
|
db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{insert_sql}; SELECT LAST_INSERT_ID();"'
|
|
success, output = run_ssh_command(db_cmd)
|
|
if not success:
|
|
print(f" Warning: Failed to insert chapters in database: {output}")
|
|
return False
|
|
|
|
# Parse media ID from output
|
|
try:
|
|
lines = output.strip().split('\n')
|
|
media_id = int(lines[-1])
|
|
except (ValueError, IndexError):
|
|
print(f" Warning: Could not parse media ID from: {output}")
|
|
return False
|
|
|
|
# Link chapters to episode
|
|
update_sql = f"UPDATE cp_episodes SET chapters_id = {media_id} WHERE id = {episode_id}"
|
|
db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{update_sql}"'
|
|
success, output = run_ssh_command(db_cmd)
|
|
if not success:
|
|
print(f" Warning: Failed to link chapters to episode: {output}")
|
|
return False
|
|
|
|
# Clear Castopod cache
|
|
cache_cmd = f'{DOCKER_PATH} exec {CASTOPOD_CONTAINER} php spark cache:clear'
|
|
run_ssh_command(cache_cmd)
|
|
|
|
print(f" Chapters uploaded and linked (media_id: {media_id})")
|
|
return True
|
|
|
|
|
|
def upload_to_bunny(local_path: str, remote_path: str, content_type: str = None) -> bool:
|
|
"""Upload a file to BunnyCDN Storage."""
|
|
if not content_type:
|
|
ext = Path(local_path).suffix.lower()
|
|
content_type = {
|
|
".mp3": "audio/mpeg", ".png": "image/png", ".jpg": "image/jpeg",
|
|
".json": "application/json", ".srt": "application/x-subrip",
|
|
}.get(ext, "application/octet-stream")
|
|
|
|
url = f"https://{BUNNY_STORAGE_REGION}.storage.bunnycdn.com/{BUNNY_STORAGE_ZONE}/{remote_path}"
|
|
with open(local_path, "rb") as f:
|
|
resp = requests.put(url, data=f, headers={
|
|
"AccessKey": BUNNY_STORAGE_KEY,
|
|
"Content-Type": content_type,
|
|
})
|
|
if resp.status_code == 201:
|
|
return True
|
|
print(f" Warning: BunnyCDN upload failed ({resp.status_code}): {resp.text[:200]}")
|
|
return False
|
|
|
|
|
|
def download_from_castopod(file_key: str, local_path: str) -> bool:
|
|
"""Download a file from Castopod's container storage to local filesystem."""
|
|
remote_filename = Path(file_key).name
|
|
remote_tmp = f"/tmp/castopod_{remote_filename}"
|
|
cp_cmd = f'{DOCKER_PATH} cp {CASTOPOD_CONTAINER}:/var/www/castopod/public/media/{file_key} {remote_tmp}'
|
|
success, _ = run_ssh_command(cp_cmd, timeout=120)
|
|
if not success:
|
|
return False
|
|
scp_cmd = [
|
|
"scp", "-P", str(NAS_SSH_PORT),
|
|
f"{NAS_USER}@{NAS_HOST}:{remote_tmp}",
|
|
local_path
|
|
]
|
|
try:
|
|
result = subprocess.run(scp_cmd, capture_output=True, text=True, timeout=300)
|
|
ok = result.returncode == 0
|
|
except (subprocess.TimeoutExpired, Exception):
|
|
ok = False
|
|
run_ssh_command(f"rm -f {remote_tmp}")
|
|
return ok
|
|
|
|
|
|
def sync_episode_media_to_bunny(episode_id: int, already_uploaded: set):
|
|
"""Ensure all media linked to an episode exists on BunnyCDN."""
|
|
ep_id = episode_id
|
|
query = (
|
|
"SELECT DISTINCT m.file_key FROM cp_media m WHERE m.id IN ("
|
|
f"SELECT audio_id FROM cp_episodes WHERE id = {ep_id} "
|
|
f"UNION ALL SELECT cover_id FROM cp_episodes WHERE id = {ep_id} AND cover_id IS NOT NULL "
|
|
f"UNION ALL SELECT transcript_id FROM cp_episodes WHERE id = {ep_id} AND transcript_id IS NOT NULL "
|
|
f"UNION ALL SELECT chapters_id FROM cp_episodes WHERE id = {ep_id} AND chapters_id IS NOT NULL)"
|
|
)
|
|
cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -N -e "{query};"'
|
|
success, output = run_ssh_command(cmd)
|
|
if not success or not output:
|
|
return
|
|
file_keys = [line.strip() for line in output.strip().split('\n') if line.strip()]
|
|
for file_key in file_keys:
|
|
if file_key in already_uploaded:
|
|
continue
|
|
cdn_url = f"https://cdn.lukeattheroost.com/media/{file_key}"
|
|
try:
|
|
resp = requests.head(cdn_url, timeout=10)
|
|
if resp.status_code == 200:
|
|
continue
|
|
except Exception:
|
|
pass
|
|
with tempfile.NamedTemporaryFile(suffix=Path(file_key).suffix, delete=False) as tmp:
|
|
tmp_path = tmp.name
|
|
try:
|
|
if download_from_castopod(file_key, tmp_path):
|
|
print(f" Syncing to CDN: {file_key}")
|
|
upload_to_bunny(tmp_path, f"media/{file_key}")
|
|
else:
|
|
print(f" Warning: Could not sync {file_key} to CDN")
|
|
finally:
|
|
Path(tmp_path).unlink(missing_ok=True)
|
|
|
|
|
|
def add_episode_to_sitemap(slug: str):
|
|
"""Add episode transcript page to sitemap.xml."""
|
|
sitemap_path = Path(__file__).parent / "website" / "sitemap.xml"
|
|
if not sitemap_path.exists():
|
|
return
|
|
|
|
url = f"https://lukeattheroost.com/episode.html?slug={slug}"
|
|
content = sitemap_path.read_text()
|
|
|
|
if url in content:
|
|
print(f" Episode already in sitemap")
|
|
return
|
|
|
|
today = datetime.now().strftime("%Y-%m-%d")
|
|
new_entry = f""" <url>
|
|
<loc>{url}</loc>
|
|
<lastmod>{today}</lastmod>
|
|
<changefreq>never</changefreq>
|
|
<priority>0.7</priority>
|
|
</url>
|
|
</urlset>"""
|
|
|
|
content = content.replace("</urlset>", new_entry)
|
|
sitemap_path.write_text(content)
|
|
print(f" Added episode to sitemap.xml")
|
|
|
|
|
|
|
|
def get_next_episode_number() -> int:
|
|
"""Get the next episode number from Castopod."""
|
|
headers = get_auth_header()
|
|
|
|
response = _session.get(
|
|
f"{CASTOPOD_URL}/api/rest/v1/episodes",
|
|
headers=headers,
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
return 1
|
|
|
|
episodes = response.json()
|
|
if not episodes:
|
|
return 1
|
|
|
|
# Filter to our podcast
|
|
our_episodes = [ep for ep in episodes if ep.get("podcast_id") == PODCAST_ID]
|
|
if not our_episodes:
|
|
return 1
|
|
|
|
max_num = max(ep.get("number", 0) or 0 for ep in our_episodes)
|
|
return max_num + 1
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Publish podcast episode to Castopod")
|
|
parser.add_argument("audio_file", help="Path to the audio file (MP3)")
|
|
parser.add_argument("--episode-number", "-n", type=int, help="Episode number (auto-detected if not provided)")
|
|
parser.add_argument("--dry-run", "-d", action="store_true", help="Generate metadata but don't publish")
|
|
parser.add_argument("--title", "-t", help="Override generated title")
|
|
parser.add_argument("--description", help="Override generated description")
|
|
parser.add_argument("--session-data", "-s", help="Path to session export JSON (from /api/session/export)")
|
|
args = parser.parse_args()
|
|
|
|
audio_path = Path(args.audio_file).expanduser().resolve()
|
|
if not audio_path.exists():
|
|
print(f"Error: Audio file not found: {audio_path}")
|
|
sys.exit(1)
|
|
|
|
# Determine episode number
|
|
if args.episode_number:
|
|
episode_number = args.episode_number
|
|
else:
|
|
episode_number = get_next_episode_number()
|
|
print(f"Episode number: {episode_number}")
|
|
|
|
# Load session data if provided
|
|
session_data = None
|
|
if args.session_data:
|
|
session_path = Path(args.session_data).expanduser().resolve()
|
|
if session_path.exists():
|
|
with open(session_path) as f:
|
|
session_data = json.load(f)
|
|
print(f"Loaded session data: {session_data.get('call_count', 0)} calls")
|
|
else:
|
|
print(f"Warning: Session data file not found: {session_path}")
|
|
|
|
# Step 1: Transcribe
|
|
transcript = transcribe_audio(str(audio_path))
|
|
|
|
# Step 2: Generate metadata
|
|
metadata = generate_metadata(transcript, episode_number)
|
|
|
|
# Use session chapters if available (more accurate than LLM-generated)
|
|
if session_data and session_data.get("chapters"):
|
|
metadata["chapters"] = session_data["chapters"]
|
|
print(f" Using {len(metadata['chapters'])} chapters from session data")
|
|
|
|
# Apply overrides
|
|
if args.title:
|
|
metadata["title"] = args.title
|
|
if args.description:
|
|
metadata["description"] = args.description
|
|
|
|
# Save chapters file
|
|
chapters_path = audio_path.with_suffix(".chapters.json")
|
|
save_chapters(metadata, str(chapters_path))
|
|
|
|
# Save transcript text file with LUKE:/CALLER: speaker labels
|
|
transcript_path = audio_path.with_suffix(".transcript.txt")
|
|
raw_text = transcript["full_text"]
|
|
labeled_text = label_transcript_speakers(raw_text)
|
|
with open(transcript_path, "w") as f:
|
|
f.write(labeled_text)
|
|
print(f" Transcript saved to: {transcript_path}")
|
|
|
|
# Save session transcript alongside episode if available (has speaker labels)
|
|
if session_data and session_data.get("transcript"):
|
|
session_transcript_path = audio_path.with_suffix(".session_transcript.txt")
|
|
with open(session_transcript_path, "w") as f:
|
|
f.write(session_data["transcript"])
|
|
print(f" Session transcript saved to: {session_transcript_path}")
|
|
|
|
if args.dry_run:
|
|
print("\n[DRY RUN] Would publish with:")
|
|
print(f" Title: {metadata['title']}")
|
|
print(f" Description: {metadata['description']}")
|
|
print(f" Chapters: {json.dumps(metadata['chapters'], indent=2)}")
|
|
print("\nChapters file saved. Run without --dry-run to publish.")
|
|
return
|
|
|
|
# Step 3: Create episode
|
|
episode = create_episode(str(audio_path), metadata, episode_number)
|
|
|
|
# Step 3.5: Upload to BunnyCDN
|
|
print("[3.5/5] Uploading to BunnyCDN...")
|
|
uploaded_keys = set()
|
|
|
|
# Audio: download Castopod's copy (ensures byte-exact match with RSS metadata)
|
|
ep_id = episode["id"]
|
|
audio_media_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -N -e "SELECT m.file_key FROM cp_media m JOIN cp_episodes e ON e.audio_id = m.id WHERE e.id = {ep_id};"'
|
|
success, audio_file_key = run_ssh_command(audio_media_cmd)
|
|
if success and audio_file_key:
|
|
audio_file_key = audio_file_key.strip()
|
|
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
|
|
tmp_audio = tmp.name
|
|
try:
|
|
print(f" Downloading from Castopod: {audio_file_key}")
|
|
if download_from_castopod(audio_file_key, tmp_audio):
|
|
print(f" Uploading audio to BunnyCDN")
|
|
upload_to_bunny(tmp_audio, f"media/{audio_file_key}", "audio/mpeg")
|
|
else:
|
|
print(f" Castopod download failed, uploading original file")
|
|
upload_to_bunny(str(audio_path), f"media/{audio_file_key}", "audio/mpeg")
|
|
finally:
|
|
Path(tmp_audio).unlink(missing_ok=True)
|
|
uploaded_keys.add(audio_file_key)
|
|
else:
|
|
print(f" Error: Could not determine audio file_key from Castopod DB")
|
|
print(f" Audio will be served from Castopod directly (not CDN)")
|
|
|
|
# Chapters
|
|
chapters_key = f"podcasts/{PODCAST_HANDLE}/{episode['slug']}-chapters.json"
|
|
print(f" Uploading chapters to BunnyCDN")
|
|
upload_to_bunny(str(chapters_path), f"media/{chapters_key}")
|
|
uploaded_keys.add(chapters_key)
|
|
|
|
# Transcript
|
|
print(f" Uploading transcript to BunnyCDN")
|
|
upload_to_bunny(str(transcript_path), f"transcripts/{episode['slug']}.txt", "text/plain")
|
|
|
|
# Copy transcript to website dir for Cloudflare Pages
|
|
import shutil
|
|
website_transcript_dir = Path(__file__).parent / "website" / "transcripts"
|
|
website_transcript_dir.mkdir(exist_ok=True)
|
|
website_transcript_path = website_transcript_dir / f"{episode['slug']}.txt"
|
|
shutil.copy2(str(transcript_path), str(website_transcript_path))
|
|
print(f" Transcript copied to website/transcripts/")
|
|
|
|
# Add to sitemap
|
|
add_episode_to_sitemap(episode["slug"])
|
|
|
|
# Step 4: Publish
|
|
episode = publish_episode(episode["id"])
|
|
|
|
# Step 4.5: Upload chapters via SSH
|
|
chapters_uploaded = upload_chapters_to_castopod(
|
|
episode["slug"],
|
|
episode["id"],
|
|
str(chapters_path)
|
|
)
|
|
|
|
# Sync any remaining episode media to BunnyCDN (cover art, transcripts, etc.)
|
|
print(" Syncing episode media to CDN...")
|
|
sync_episode_media_to_bunny(episode["id"], uploaded_keys)
|
|
|
|
# Step 5: Summary
|
|
print("\n[5/5] Done!")
|
|
print("=" * 50)
|
|
print(f"Episode URL: {CASTOPOD_URL}/@{PODCAST_HANDLE}/episodes/{episode['slug']}")
|
|
print(f"RSS Feed: {CASTOPOD_URL}/@{PODCAST_HANDLE}/feed.xml")
|
|
print("=" * 50)
|
|
if not chapters_uploaded:
|
|
print("\nNote: Chapters upload failed. Add manually via Castopod admin UI")
|
|
print(f" Chapters file: {chapters_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|