TTS fixes, Inworld improvements, footer redesign, episodes 15-25, invoice script fix

- Fix TTS text pipeline: new caps handling (spell out unknown acronyms, lowercase
  emphasis words), action-word lookahead for parenthetical stripping, abbreviation
  expansions (US→United States, NM→New Mexico), pronunciation fixes
- Inworld TTS: camelCase API fields, speakingRate per-voice overrides, retry logic
  with exponential backoff (3 attempts)
- Footer redesign: SVG icons for social/podcast links across all pages
- Stats page: show "Rate us on Spotify" instead of "not public" placeholder
- New voices, expanded caller prompts and problem scenarios
- Social posting via Postiz, YouTube upload in publish pipeline
- Episode transcripts 15-25, terms page, sitemap updates
- Fix invoice script: match Timing totals using merged Task+App intervals

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-02 12:38:58 -07:00
parent 08a35bddeb
commit 6eeab58464
34 changed files with 6545 additions and 512 deletions

View File

@@ -11,6 +11,7 @@ Usage:
import argparse
import base64
import fcntl
import json
import os
import re
@@ -82,6 +83,10 @@ POSTIZ_INTEGRATIONS = {
"bluesky": {"id": "cmlk29h780001p76qa7sstp5h"},
"mastodon": {"id": "cmlk2r3mf0001le6vx9ey0k5a"},
"nostr": {"id": "cmlll3y78000cuc6vh8dcpl2w"},
"linkedin": {"id": "cmluar6cn0004o46x5a1u07vc"},
"threads": {"id": "cmm13sxhq001mo46x24com5p7"},
# TikTok excluded — requires video, not image posts. Use upload_clips.py instead.
# "tiktok": {"id": "cmm2ggsno0001md7134cam9t9"},
}
# NAS Configuration for chapters upload
@@ -90,7 +95,7 @@ BUNNY_STORAGE_ZONE = "lukeattheroost"
BUNNY_STORAGE_KEY = "92749cd3-85df-4cff-938fe35eb994-30f8-4cf2"
BUNNY_STORAGE_REGION = "la" # Los Angeles
NAS_HOST = "mmgnas-10g"
NAS_HOST = "mmgnas"
NAS_USER = "luke"
NAS_SSH_PORT = 8001
DOCKER_PATH = "/share/CACHEDEV1_DATA/.qpkg/container-station/bin/docker"
@@ -100,6 +105,8 @@ DB_USER = "castopod"
DB_PASS = "BYtbFfk3ndeVabb26xb0UyKU"
DB_NAME = "castopod"
LOCK_FILE = Path(__file__).parent / ".publish.lock"
def get_auth_header():
"""Get Basic Auth header for Castopod API."""
@@ -494,6 +501,19 @@ def publish_episode(episode_id: int) -> dict:
return episode
def generate_srt(segments: list, output_path: str):
"""Generate SRT subtitle file from whisper segments."""
with open(output_path, "w") as f:
for i, seg in enumerate(segments, 1):
start = seg["start"]
end = seg["end"]
sh, sm, ss = int(start // 3600), int((start % 3600) // 60), start % 60
eh, em, es = int(end // 3600), int((end % 3600) // 60), end % 60
f.write(f"{i}\n")
f.write(f"{sh:02d}:{sm:02d}:{ss:06.3f} --> {eh:02d}:{em:02d}:{es:06.3f}\n")
f.write(f"{seg['text']}\n\n")
def save_chapters(metadata: dict, output_path: str):
"""Save chapters to JSON file."""
chapters_data = {
@@ -523,6 +543,135 @@ def run_ssh_command(command: str, timeout: int = 30) -> tuple[bool, str]:
return False, str(e)
def _check_episode_exists_in_db(episode_number: int) -> bool:
"""Check if an episode with this number already exists in Castopod DB."""
cmd = (f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} '
f'-N -e "SELECT COUNT(*) FROM cp_episodes WHERE number = {episode_number};"')
success, output = run_ssh_command(cmd)
if success and output.strip():
return int(output.strip()) > 0
return False
def _srt_to_castopod_json(srt_path: str) -> str:
"""Parse SRT to JSON matching Castopod's TranscriptParser format."""
with open(srt_path, "r") as f:
srt_text = f.read()
subs = []
blocks = re.split(r'\n\n+', srt_text.strip())
for block in blocks:
lines = block.strip().split('\n')
if len(lines) < 3:
continue
try:
num = int(lines[0].strip())
except ValueError:
continue
time_match = re.match(
r'(\d{2}:\d{2}:\d{2}[.,]\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}[.,]\d{3})',
lines[1].strip()
)
if not time_match:
continue
text = '\n'.join(lines[2:]).strip()
def ts_to_seconds(ts):
ts = ts.replace(',', '.')
parts = ts.split(':')
return int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
subs.append({
"number": num,
"startTime": ts_to_seconds(time_match.group(1)),
"endTime": ts_to_seconds(time_match.group(2)),
"text": text,
})
return json.dumps(subs, indent=4)
def upload_transcript_to_castopod(episode_slug: str, episode_id: int, transcript_path: str) -> bool:
"""Upload SRT transcript + JSON to Castopod via SSH and link in database."""
print(" Uploading transcript to Castopod...")
is_srt = transcript_path.endswith(".srt")
ext = ".srt" if is_srt else ".txt"
mimetype = "application/x-subrip" if is_srt else "text/plain"
transcript_filename = f"{episode_slug}{ext}"
remote_path = f"podcasts/{PODCAST_HANDLE}/{transcript_filename}"
json_key = f"podcasts/{PODCAST_HANDLE}/{episode_slug}.json"
# Upload SRT via SCP + docker cp (handles large files)
nas_tmp = f"/share/CACHEDEV1_DATA/tmp/_transcript_{episode_slug}{ext}"
scp_cmd = ["scp", "-P", str(NAS_SSH_PORT), transcript_path, f"{NAS_USER}@{NAS_HOST}:{nas_tmp}"]
result = subprocess.run(scp_cmd, capture_output=True, text=True, timeout=60)
if result.returncode != 0:
print(f" Warning: SCP transcript failed: {result.stderr}")
return False
media_path = f"/var/www/castopod/public/media/{remote_path}"
run_ssh_command(f'{DOCKER_PATH} cp {nas_tmp} {CASTOPOD_CONTAINER}:{media_path}', timeout=60)
run_ssh_command(f'{DOCKER_PATH} exec {CASTOPOD_CONTAINER} chown www-data:www-data {media_path}')
run_ssh_command(f'rm -f {nas_tmp}')
# Generate and upload JSON for Castopod's frontend rendering
if is_srt:
json_content = _srt_to_castopod_json(transcript_path)
json_tmp_local = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
json_tmp_local.write(json_content)
json_tmp_local.close()
nas_json_tmp = f"/share/CACHEDEV1_DATA/tmp/_transcript_{episode_slug}.json"
scp_json = ["scp", "-P", str(NAS_SSH_PORT), json_tmp_local.name, f"{NAS_USER}@{NAS_HOST}:{nas_json_tmp}"]
subprocess.run(scp_json, capture_output=True, text=True, timeout=60)
os.remove(json_tmp_local.name)
json_media_path = f"/var/www/castopod/public/media/{json_key}"
run_ssh_command(f'{DOCKER_PATH} cp {nas_json_tmp} {CASTOPOD_CONTAINER}:{json_media_path}', timeout=60)
run_ssh_command(f'{DOCKER_PATH} exec {CASTOPOD_CONTAINER} chown www-data:www-data {json_media_path}')
run_ssh_command(f'rm -f {nas_json_tmp}')
with open(transcript_path, "rb") as f:
file_size = len(f.read())
# Build file_metadata with json_key — escape double quotes for shell embedding
metadata_json = json.dumps({"json_key": json_key}) if is_srt else "NULL"
metadata_sql = f"'{metadata_json}'" if is_srt else "NULL"
metadata_sql_escaped = metadata_sql.replace('"', '\\"')
insert_sql = (
f"INSERT INTO cp_media (file_key, file_size, file_mimetype, file_metadata, type, "
f"uploaded_by, updated_by, uploaded_at, updated_at) VALUES "
f"('{remote_path}', {file_size}, '{mimetype}', {metadata_sql_escaped}, 'transcript', 1, 1, NOW(), NOW())"
)
db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{insert_sql}; SELECT LAST_INSERT_ID();"'
success, output = run_ssh_command(db_cmd)
if not success:
print(f" Warning: Failed to insert transcript in database: {output}")
return False
try:
lines = output.strip().split('\n')
media_id = int(lines[-1])
except (ValueError, IndexError):
print(f" Warning: Could not parse media ID from: {output}")
return False
update_sql = f"UPDATE cp_episodes SET transcript_id = {media_id} WHERE id = {episode_id}"
db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{update_sql}"'
success, output = run_ssh_command(db_cmd)
if not success:
print(f" Warning: Failed to link transcript to episode: {output}")
return False
cache_cmd = f'{DOCKER_PATH} exec {CASTOPOD_CONTAINER} php spark cache:clear'
run_ssh_command(cache_cmd)
print(f" Transcript uploaded and linked (media_id: {media_id})")
return True
def upload_chapters_to_castopod(episode_slug: str, episode_id: int, chapters_path: str) -> bool:
"""Upload chapters file to Castopod via SSH and link in database."""
print("[4.5/5] Uploading chapters to Castopod...")
@@ -799,10 +948,10 @@ def post_to_social(metadata: dict, episode_slug: str, image_path: str = None):
base_content = f"{metadata['title']}\n\n{metadata['description']}\n\n{episode_url}"
hashtags = "#podcast #LukeAtTheRoost #talkradio #callinshow #newepisode"
hashtag_platforms = {"instagram", "facebook", "bluesky", "mastodon", "nostr"}
hashtag_platforms = {"instagram", "facebook", "bluesky", "mastodon", "nostr", "linkedin", "threads", "tiktok"}
# Platform-specific content length limits
PLATFORM_MAX_LENGTH = {"bluesky": 300}
PLATFORM_MAX_LENGTH = {"bluesky": 300, "threads": 500, "tiktok": 2200}
# Post to each platform individually so one failure doesn't block others
posted = 0
@@ -902,7 +1051,7 @@ def upload_to_youtube(audio_path: str, metadata: dict, chapters: list,
"-c:a", "aac", "-b:a", "192k",
"-pix_fmt", "yuv420p", "-shortest",
"-movflags", "+faststart", str(video_path)
], capture_output=True, text=True, timeout=600)
], capture_output=True, text=True, timeout=1800)
if result.returncode != 0:
print(f" Warning: ffmpeg failed: {result.stderr[-200:]}")
return None
@@ -987,22 +1136,32 @@ def upload_to_youtube(audio_path: str, metadata: dict, chapters: list,
def get_next_episode_number() -> int:
"""Get the next episode number from Castopod."""
headers = get_auth_header()
"""Get the next episode number from Castopod (DB first, API fallback)."""
# Query DB directly — the REST API is unreliable
cmd = (f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} '
f'-N -e "SELECT COALESCE(MAX(number), 0) FROM cp_episodes WHERE podcast_id = {PODCAST_ID};"')
success, output = run_ssh_command(cmd)
if success and output.strip():
try:
return int(output.strip()) + 1
except ValueError:
pass
# Fallback to API
headers = get_auth_header()
response = _session.get(
f"{CASTOPOD_URL}/api/rest/v1/episodes",
headers=headers,
)
if response.status_code != 200:
return 1
print("Warning: Could not determine episode number from API or DB")
sys.exit(1)
episodes = response.json()
if not episodes:
return 1
# Filter to our podcast
our_episodes = [ep for ep in episodes if ep.get("podcast_id") == PODCAST_ID]
if not our_episodes:
return 1
@@ -1026,6 +1185,36 @@ def main():
print(f"Error: Audio file not found: {audio_path}")
sys.exit(1)
# Acquire exclusive lock to prevent concurrent/duplicate runs
lock_fp = open(LOCK_FILE, "w")
try:
fcntl.flock(lock_fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
except OSError:
print("Error: Another publish is already running (lock file held)")
sys.exit(1)
lock_fp.write(str(os.getpid()))
lock_fp.flush()
# Kill the backend server to free memory for transcription
server_was_running = False
try:
result = subprocess.run(
["lsof", "-ti", ":8000"], capture_output=True, text=True
)
pids = result.stdout.strip().split('\n') if result.stdout.strip() else []
if pids:
server_was_running = True
print("Stopping backend server for resources...")
for pid in pids:
try:
os.kill(int(pid), 9)
except (ProcessLookupError, ValueError):
pass
import time as _time
_time.sleep(1)
except Exception:
pass
# Determine episode number
if args.episode_number:
episode_number = args.episode_number
@@ -1033,6 +1222,14 @@ def main():
episode_number = get_next_episode_number()
print(f"Episode number: {episode_number}")
# Guard against duplicate publish
if not args.dry_run and _check_episode_exists_in_db(episode_number):
print(f"Error: Episode {episode_number} already exists in Castopod. "
f"Use --episode-number to specify a different number, or remove the existing episode first.")
lock_fp.close()
LOCK_FILE.unlink(missing_ok=True)
sys.exit(1)
# Load session data if provided
session_data = None
if args.session_data:
@@ -1073,6 +1270,11 @@ def main():
f.write(labeled_text)
print(f" Transcript saved to: {transcript_path}")
# Generate SRT from whisper segments (for Castopod/podcast apps)
srt_path = audio_path.with_suffix(".srt")
generate_srt(transcript["segments"], str(srt_path))
print(f" SRT saved to: {srt_path}")
# Save session transcript alongside episode if available (has speaker labels)
if session_data and session_data.get("transcript"):
session_transcript_path = audio_path.with_suffix(".session_transcript.txt")
@@ -1156,13 +1358,20 @@ def main():
else:
raise
# Step 4.5: Upload chapters via SSH
# Step 4.5: Upload chapters and transcript via SSH
chapters_uploaded = upload_chapters_to_castopod(
episode["slug"],
episode["id"],
str(chapters_path)
)
# Upload SRT transcript to Castopod (preferred for podcast apps)
transcript_uploaded = upload_transcript_to_castopod(
episode["slug"],
episode["id"],
str(srt_path)
)
# Sync any remaining episode media to BunnyCDN (cover art, transcripts, etc.)
print(" Syncing episode media to CDN...")
sync_episode_media_to_bunny(episode["id"], uploaded_keys)
@@ -1202,9 +1411,29 @@ def main():
if not chapters_uploaded:
print("\nNote: Chapters upload failed. Add manually via Castopod admin UI")
print(f" Chapters file: {chapters_path}")
if not transcript_uploaded:
print("\nNote: Transcript upload to Castopod failed")
print(f" Transcript file: {srt_path}")
if not yt_video_id:
print("\nNote: YouTube upload failed. Run 'python yt_auth.py' if token expired")
# Restart the backend server if it was running before
if server_was_running:
print("Restarting backend server...")
project_dir = Path(__file__).parent
subprocess.Popen(
[sys.executable, "-m", "uvicorn", "backend.main:app",
"--reload", "--reload-dir", "backend", "--host", "0.0.0.0", "--port", "8000"],
cwd=project_dir,
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
start_new_session=True,
)
print(" Server restarted on port 8000")
# Release lock
lock_fp.close()
LOCK_FILE.unlink(missing_ok=True)
if __name__ == "__main__":
main()