TTS fixes, Inworld improvements, footer redesign, episodes 15-25, invoice script fix
- Fix TTS text pipeline: new caps handling (spell out unknown acronyms, lowercase emphasis words), action-word lookahead for parenthetical stripping, abbreviation expansions (US→United States, NM→New Mexico), pronunciation fixes - Inworld TTS: camelCase API fields, speakingRate per-voice overrides, retry logic with exponential backoff (3 attempts) - Footer redesign: SVG icons for social/podcast links across all pages - Stats page: show "Rate us on Spotify" instead of "not public" placeholder - New voices, expanded caller prompts and problem scenarios - Social posting via Postiz, YouTube upload in publish pipeline - Episode transcripts 15-25, terms page, sitemap updates - Fix invoice script: match Timing totals using merged Task+App intervals Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,7 @@ Usage:
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import fcntl
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -82,6 +83,10 @@ POSTIZ_INTEGRATIONS = {
|
||||
"bluesky": {"id": "cmlk29h780001p76qa7sstp5h"},
|
||||
"mastodon": {"id": "cmlk2r3mf0001le6vx9ey0k5a"},
|
||||
"nostr": {"id": "cmlll3y78000cuc6vh8dcpl2w"},
|
||||
"linkedin": {"id": "cmluar6cn0004o46x5a1u07vc"},
|
||||
"threads": {"id": "cmm13sxhq001mo46x24com5p7"},
|
||||
# TikTok excluded — requires video, not image posts. Use upload_clips.py instead.
|
||||
# "tiktok": {"id": "cmm2ggsno0001md7134cam9t9"},
|
||||
}
|
||||
|
||||
# NAS Configuration for chapters upload
|
||||
@@ -90,7 +95,7 @@ BUNNY_STORAGE_ZONE = "lukeattheroost"
|
||||
BUNNY_STORAGE_KEY = "92749cd3-85df-4cff-938fe35eb994-30f8-4cf2"
|
||||
BUNNY_STORAGE_REGION = "la" # Los Angeles
|
||||
|
||||
NAS_HOST = "mmgnas-10g"
|
||||
NAS_HOST = "mmgnas"
|
||||
NAS_USER = "luke"
|
||||
NAS_SSH_PORT = 8001
|
||||
DOCKER_PATH = "/share/CACHEDEV1_DATA/.qpkg/container-station/bin/docker"
|
||||
@@ -100,6 +105,8 @@ DB_USER = "castopod"
|
||||
DB_PASS = "BYtbFfk3ndeVabb26xb0UyKU"
|
||||
DB_NAME = "castopod"
|
||||
|
||||
LOCK_FILE = Path(__file__).parent / ".publish.lock"
|
||||
|
||||
|
||||
def get_auth_header():
|
||||
"""Get Basic Auth header for Castopod API."""
|
||||
@@ -494,6 +501,19 @@ def publish_episode(episode_id: int) -> dict:
|
||||
return episode
|
||||
|
||||
|
||||
def generate_srt(segments: list, output_path: str):
|
||||
"""Generate SRT subtitle file from whisper segments."""
|
||||
with open(output_path, "w") as f:
|
||||
for i, seg in enumerate(segments, 1):
|
||||
start = seg["start"]
|
||||
end = seg["end"]
|
||||
sh, sm, ss = int(start // 3600), int((start % 3600) // 60), start % 60
|
||||
eh, em, es = int(end // 3600), int((end % 3600) // 60), end % 60
|
||||
f.write(f"{i}\n")
|
||||
f.write(f"{sh:02d}:{sm:02d}:{ss:06.3f} --> {eh:02d}:{em:02d}:{es:06.3f}\n")
|
||||
f.write(f"{seg['text']}\n\n")
|
||||
|
||||
|
||||
def save_chapters(metadata: dict, output_path: str):
|
||||
"""Save chapters to JSON file."""
|
||||
chapters_data = {
|
||||
@@ -523,6 +543,135 @@ def run_ssh_command(command: str, timeout: int = 30) -> tuple[bool, str]:
|
||||
return False, str(e)
|
||||
|
||||
|
||||
def _check_episode_exists_in_db(episode_number: int) -> bool:
|
||||
"""Check if an episode with this number already exists in Castopod DB."""
|
||||
cmd = (f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} '
|
||||
f'-N -e "SELECT COUNT(*) FROM cp_episodes WHERE number = {episode_number};"')
|
||||
success, output = run_ssh_command(cmd)
|
||||
if success and output.strip():
|
||||
return int(output.strip()) > 0
|
||||
return False
|
||||
|
||||
|
||||
def _srt_to_castopod_json(srt_path: str) -> str:
|
||||
"""Parse SRT to JSON matching Castopod's TranscriptParser format."""
|
||||
with open(srt_path, "r") as f:
|
||||
srt_text = f.read()
|
||||
|
||||
subs = []
|
||||
blocks = re.split(r'\n\n+', srt_text.strip())
|
||||
for block in blocks:
|
||||
lines = block.strip().split('\n')
|
||||
if len(lines) < 3:
|
||||
continue
|
||||
try:
|
||||
num = int(lines[0].strip())
|
||||
except ValueError:
|
||||
continue
|
||||
time_match = re.match(
|
||||
r'(\d{2}:\d{2}:\d{2}[.,]\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}[.,]\d{3})',
|
||||
lines[1].strip()
|
||||
)
|
||||
if not time_match:
|
||||
continue
|
||||
text = '\n'.join(lines[2:]).strip()
|
||||
|
||||
def ts_to_seconds(ts):
|
||||
ts = ts.replace(',', '.')
|
||||
parts = ts.split(':')
|
||||
return int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
|
||||
|
||||
subs.append({
|
||||
"number": num,
|
||||
"startTime": ts_to_seconds(time_match.group(1)),
|
||||
"endTime": ts_to_seconds(time_match.group(2)),
|
||||
"text": text,
|
||||
})
|
||||
return json.dumps(subs, indent=4)
|
||||
|
||||
|
||||
def upload_transcript_to_castopod(episode_slug: str, episode_id: int, transcript_path: str) -> bool:
|
||||
"""Upload SRT transcript + JSON to Castopod via SSH and link in database."""
|
||||
print(" Uploading transcript to Castopod...")
|
||||
|
||||
is_srt = transcript_path.endswith(".srt")
|
||||
ext = ".srt" if is_srt else ".txt"
|
||||
mimetype = "application/x-subrip" if is_srt else "text/plain"
|
||||
|
||||
transcript_filename = f"{episode_slug}{ext}"
|
||||
remote_path = f"podcasts/{PODCAST_HANDLE}/{transcript_filename}"
|
||||
json_key = f"podcasts/{PODCAST_HANDLE}/{episode_slug}.json"
|
||||
|
||||
# Upload SRT via SCP + docker cp (handles large files)
|
||||
nas_tmp = f"/share/CACHEDEV1_DATA/tmp/_transcript_{episode_slug}{ext}"
|
||||
scp_cmd = ["scp", "-P", str(NAS_SSH_PORT), transcript_path, f"{NAS_USER}@{NAS_HOST}:{nas_tmp}"]
|
||||
result = subprocess.run(scp_cmd, capture_output=True, text=True, timeout=60)
|
||||
if result.returncode != 0:
|
||||
print(f" Warning: SCP transcript failed: {result.stderr}")
|
||||
return False
|
||||
|
||||
media_path = f"/var/www/castopod/public/media/{remote_path}"
|
||||
run_ssh_command(f'{DOCKER_PATH} cp {nas_tmp} {CASTOPOD_CONTAINER}:{media_path}', timeout=60)
|
||||
run_ssh_command(f'{DOCKER_PATH} exec {CASTOPOD_CONTAINER} chown www-data:www-data {media_path}')
|
||||
run_ssh_command(f'rm -f {nas_tmp}')
|
||||
|
||||
# Generate and upload JSON for Castopod's frontend rendering
|
||||
if is_srt:
|
||||
json_content = _srt_to_castopod_json(transcript_path)
|
||||
json_tmp_local = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
|
||||
json_tmp_local.write(json_content)
|
||||
json_tmp_local.close()
|
||||
|
||||
nas_json_tmp = f"/share/CACHEDEV1_DATA/tmp/_transcript_{episode_slug}.json"
|
||||
scp_json = ["scp", "-P", str(NAS_SSH_PORT), json_tmp_local.name, f"{NAS_USER}@{NAS_HOST}:{nas_json_tmp}"]
|
||||
subprocess.run(scp_json, capture_output=True, text=True, timeout=60)
|
||||
os.remove(json_tmp_local.name)
|
||||
|
||||
json_media_path = f"/var/www/castopod/public/media/{json_key}"
|
||||
run_ssh_command(f'{DOCKER_PATH} cp {nas_json_tmp} {CASTOPOD_CONTAINER}:{json_media_path}', timeout=60)
|
||||
run_ssh_command(f'{DOCKER_PATH} exec {CASTOPOD_CONTAINER} chown www-data:www-data {json_media_path}')
|
||||
run_ssh_command(f'rm -f {nas_json_tmp}')
|
||||
|
||||
with open(transcript_path, "rb") as f:
|
||||
file_size = len(f.read())
|
||||
|
||||
# Build file_metadata with json_key — escape double quotes for shell embedding
|
||||
metadata_json = json.dumps({"json_key": json_key}) if is_srt else "NULL"
|
||||
metadata_sql = f"'{metadata_json}'" if is_srt else "NULL"
|
||||
metadata_sql_escaped = metadata_sql.replace('"', '\\"')
|
||||
|
||||
insert_sql = (
|
||||
f"INSERT INTO cp_media (file_key, file_size, file_mimetype, file_metadata, type, "
|
||||
f"uploaded_by, updated_by, uploaded_at, updated_at) VALUES "
|
||||
f"('{remote_path}', {file_size}, '{mimetype}', {metadata_sql_escaped}, 'transcript', 1, 1, NOW(), NOW())"
|
||||
)
|
||||
db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{insert_sql}; SELECT LAST_INSERT_ID();"'
|
||||
success, output = run_ssh_command(db_cmd)
|
||||
if not success:
|
||||
print(f" Warning: Failed to insert transcript in database: {output}")
|
||||
return False
|
||||
|
||||
try:
|
||||
lines = output.strip().split('\n')
|
||||
media_id = int(lines[-1])
|
||||
except (ValueError, IndexError):
|
||||
print(f" Warning: Could not parse media ID from: {output}")
|
||||
return False
|
||||
|
||||
update_sql = f"UPDATE cp_episodes SET transcript_id = {media_id} WHERE id = {episode_id}"
|
||||
db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{update_sql}"'
|
||||
success, output = run_ssh_command(db_cmd)
|
||||
if not success:
|
||||
print(f" Warning: Failed to link transcript to episode: {output}")
|
||||
return False
|
||||
|
||||
cache_cmd = f'{DOCKER_PATH} exec {CASTOPOD_CONTAINER} php spark cache:clear'
|
||||
run_ssh_command(cache_cmd)
|
||||
|
||||
print(f" Transcript uploaded and linked (media_id: {media_id})")
|
||||
return True
|
||||
|
||||
|
||||
def upload_chapters_to_castopod(episode_slug: str, episode_id: int, chapters_path: str) -> bool:
|
||||
"""Upload chapters file to Castopod via SSH and link in database."""
|
||||
print("[4.5/5] Uploading chapters to Castopod...")
|
||||
@@ -799,10 +948,10 @@ def post_to_social(metadata: dict, episode_slug: str, image_path: str = None):
|
||||
base_content = f"{metadata['title']}\n\n{metadata['description']}\n\n{episode_url}"
|
||||
|
||||
hashtags = "#podcast #LukeAtTheRoost #talkradio #callinshow #newepisode"
|
||||
hashtag_platforms = {"instagram", "facebook", "bluesky", "mastodon", "nostr"}
|
||||
hashtag_platforms = {"instagram", "facebook", "bluesky", "mastodon", "nostr", "linkedin", "threads", "tiktok"}
|
||||
|
||||
# Platform-specific content length limits
|
||||
PLATFORM_MAX_LENGTH = {"bluesky": 300}
|
||||
PLATFORM_MAX_LENGTH = {"bluesky": 300, "threads": 500, "tiktok": 2200}
|
||||
|
||||
# Post to each platform individually so one failure doesn't block others
|
||||
posted = 0
|
||||
@@ -902,7 +1051,7 @@ def upload_to_youtube(audio_path: str, metadata: dict, chapters: list,
|
||||
"-c:a", "aac", "-b:a", "192k",
|
||||
"-pix_fmt", "yuv420p", "-shortest",
|
||||
"-movflags", "+faststart", str(video_path)
|
||||
], capture_output=True, text=True, timeout=600)
|
||||
], capture_output=True, text=True, timeout=1800)
|
||||
if result.returncode != 0:
|
||||
print(f" Warning: ffmpeg failed: {result.stderr[-200:]}")
|
||||
return None
|
||||
@@ -987,22 +1136,32 @@ def upload_to_youtube(audio_path: str, metadata: dict, chapters: list,
|
||||
|
||||
|
||||
def get_next_episode_number() -> int:
|
||||
"""Get the next episode number from Castopod."""
|
||||
headers = get_auth_header()
|
||||
"""Get the next episode number from Castopod (DB first, API fallback)."""
|
||||
# Query DB directly — the REST API is unreliable
|
||||
cmd = (f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} '
|
||||
f'-N -e "SELECT COALESCE(MAX(number), 0) FROM cp_episodes WHERE podcast_id = {PODCAST_ID};"')
|
||||
success, output = run_ssh_command(cmd)
|
||||
if success and output.strip():
|
||||
try:
|
||||
return int(output.strip()) + 1
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Fallback to API
|
||||
headers = get_auth_header()
|
||||
response = _session.get(
|
||||
f"{CASTOPOD_URL}/api/rest/v1/episodes",
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
return 1
|
||||
print("Warning: Could not determine episode number from API or DB")
|
||||
sys.exit(1)
|
||||
|
||||
episodes = response.json()
|
||||
if not episodes:
|
||||
return 1
|
||||
|
||||
# Filter to our podcast
|
||||
our_episodes = [ep for ep in episodes if ep.get("podcast_id") == PODCAST_ID]
|
||||
if not our_episodes:
|
||||
return 1
|
||||
@@ -1026,6 +1185,36 @@ def main():
|
||||
print(f"Error: Audio file not found: {audio_path}")
|
||||
sys.exit(1)
|
||||
|
||||
# Acquire exclusive lock to prevent concurrent/duplicate runs
|
||||
lock_fp = open(LOCK_FILE, "w")
|
||||
try:
|
||||
fcntl.flock(lock_fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except OSError:
|
||||
print("Error: Another publish is already running (lock file held)")
|
||||
sys.exit(1)
|
||||
lock_fp.write(str(os.getpid()))
|
||||
lock_fp.flush()
|
||||
|
||||
# Kill the backend server to free memory for transcription
|
||||
server_was_running = False
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["lsof", "-ti", ":8000"], capture_output=True, text=True
|
||||
)
|
||||
pids = result.stdout.strip().split('\n') if result.stdout.strip() else []
|
||||
if pids:
|
||||
server_was_running = True
|
||||
print("Stopping backend server for resources...")
|
||||
for pid in pids:
|
||||
try:
|
||||
os.kill(int(pid), 9)
|
||||
except (ProcessLookupError, ValueError):
|
||||
pass
|
||||
import time as _time
|
||||
_time.sleep(1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Determine episode number
|
||||
if args.episode_number:
|
||||
episode_number = args.episode_number
|
||||
@@ -1033,6 +1222,14 @@ def main():
|
||||
episode_number = get_next_episode_number()
|
||||
print(f"Episode number: {episode_number}")
|
||||
|
||||
# Guard against duplicate publish
|
||||
if not args.dry_run and _check_episode_exists_in_db(episode_number):
|
||||
print(f"Error: Episode {episode_number} already exists in Castopod. "
|
||||
f"Use --episode-number to specify a different number, or remove the existing episode first.")
|
||||
lock_fp.close()
|
||||
LOCK_FILE.unlink(missing_ok=True)
|
||||
sys.exit(1)
|
||||
|
||||
# Load session data if provided
|
||||
session_data = None
|
||||
if args.session_data:
|
||||
@@ -1073,6 +1270,11 @@ def main():
|
||||
f.write(labeled_text)
|
||||
print(f" Transcript saved to: {transcript_path}")
|
||||
|
||||
# Generate SRT from whisper segments (for Castopod/podcast apps)
|
||||
srt_path = audio_path.with_suffix(".srt")
|
||||
generate_srt(transcript["segments"], str(srt_path))
|
||||
print(f" SRT saved to: {srt_path}")
|
||||
|
||||
# Save session transcript alongside episode if available (has speaker labels)
|
||||
if session_data and session_data.get("transcript"):
|
||||
session_transcript_path = audio_path.with_suffix(".session_transcript.txt")
|
||||
@@ -1156,13 +1358,20 @@ def main():
|
||||
else:
|
||||
raise
|
||||
|
||||
# Step 4.5: Upload chapters via SSH
|
||||
# Step 4.5: Upload chapters and transcript via SSH
|
||||
chapters_uploaded = upload_chapters_to_castopod(
|
||||
episode["slug"],
|
||||
episode["id"],
|
||||
str(chapters_path)
|
||||
)
|
||||
|
||||
# Upload SRT transcript to Castopod (preferred for podcast apps)
|
||||
transcript_uploaded = upload_transcript_to_castopod(
|
||||
episode["slug"],
|
||||
episode["id"],
|
||||
str(srt_path)
|
||||
)
|
||||
|
||||
# Sync any remaining episode media to BunnyCDN (cover art, transcripts, etc.)
|
||||
print(" Syncing episode media to CDN...")
|
||||
sync_episode_media_to_bunny(episode["id"], uploaded_keys)
|
||||
@@ -1202,9 +1411,29 @@ def main():
|
||||
if not chapters_uploaded:
|
||||
print("\nNote: Chapters upload failed. Add manually via Castopod admin UI")
|
||||
print(f" Chapters file: {chapters_path}")
|
||||
if not transcript_uploaded:
|
||||
print("\nNote: Transcript upload to Castopod failed")
|
||||
print(f" Transcript file: {srt_path}")
|
||||
if not yt_video_id:
|
||||
print("\nNote: YouTube upload failed. Run 'python yt_auth.py' if token expired")
|
||||
|
||||
# Restart the backend server if it was running before
|
||||
if server_was_running:
|
||||
print("Restarting backend server...")
|
||||
project_dir = Path(__file__).parent
|
||||
subprocess.Popen(
|
||||
[sys.executable, "-m", "uvicorn", "backend.main:app",
|
||||
"--reload", "--reload-dir", "backend", "--host", "0.0.0.0", "--port", "8000"],
|
||||
cwd=project_dir,
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||
start_new_session=True,
|
||||
)
|
||||
print(" Server restarted on port 8000")
|
||||
|
||||
# Release lock
|
||||
lock_fp.close()
|
||||
LOCK_FILE.unlink(missing_ok=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user