Expand all caller topic pools, add cross-episode topic dedup, publish ep35

Massively expanded all 8 caller topic pools from ~1200 to ~2500 entries to reduce repeat calls. Added persistent topic history (data/used_topics_history.json) with 30-day aging to prevent cross-episode duplicates. Published episode 35. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 05:45:22 -06:00
parent 0c2201fab5
commit d3490e1521
11 changed files with 2316 additions and 195 deletions
@@ -31,8 +31,8 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 from dotenv import load_dotenv


-class TLSAdapter(HTTPAdapter):
-    """Adapter to handle servers with older TLS configurations."""
+class CastopodTLSAdapter(HTTPAdapter):
+    """Adapter for Castopod's older TLS configuration (scoped to Castopod only)."""
    def init_poolmanager(self, *args, **kwargs):
        ctx = create_urllib3_context()
        ctx.set_ciphers('DEFAULT@SECLEVEL=1')
@@ -46,9 +46,10 @@ class TLSAdapter(HTTPAdapter):
        return super().send(*args, **kwargs)


-# Use a session with TLS compatibility for all Castopod requests
+# TLS compatibility only for Castopod domain — all other HTTPS uses default secure verification
 _session = requests.Session()
-_session.mount('https://', TLSAdapter())
+_CASTOPOD_ORIGIN = "https://podcast.macneilmediagroup.com"
+_session.mount(_CASTOPOD_ORIGIN, CastopodTLSAdapter())

 # Load environment variables
 load_dotenv(Path(__file__).parent / ".env")
@@ -485,7 +486,7 @@ def _create_episode_direct(audio_path: str, metadata: dict, episode_number: int,

    # Copy SQL into MariaDB container and execute
    run_ssh_command(f'{DOCKER_PATH} cp {nas_sql_path} {MARIADB_CONTAINER}:/tmp/_insert.sql')
-    exec_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} sh -c "mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -N < /tmp/_insert.sql"'
+    exec_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} sh -c "mysql --defaults-extra-file=/tmp/.my.cnf -u {DB_USER} {DB_NAME} -N < /tmp/_insert.sql"'
    success, output = run_ssh_command(exec_cmd, timeout=30)
    run_ssh_command(f'rm -f {nas_sql_path}')
    run_ssh_command(f'{DOCKER_PATH} exec {MARIADB_CONTAINER} rm -f /tmp/_insert.sql')
@@ -496,7 +497,7 @@ def _create_episode_direct(audio_path: str, metadata: dict, episode_number: int,

    episode_id = int(output.strip().split('\n')[-1])
    # Get the audio media ID for CDN upload
-    audio_id_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -N -e "SELECT audio_id FROM cp_episodes WHERE id = {episode_id};"'
+    audio_id_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql --defaults-extra-file=/tmp/.my.cnf -u {DB_USER} {DB_NAME} -N -e "SELECT audio_id FROM cp_episodes WHERE id = {episode_id};"'
    success, audio_id_str = run_ssh_command(audio_id_cmd)
    audio_id = int(audio_id_str.strip()) if success else None
    if audio_id:
@@ -582,10 +583,29 @@ def run_ssh_command(command: str, timeout: int = 30) -> tuple[bool, str]:
        return False, str(e)


+def _setup_mysql_auth():
+    """Create a temp MySQL defaults file inside the MariaDB container.
+    This avoids passing the DB password on the command line (visible in ps)."""
+    content = f"[client]\npassword={DB_PASS}\n"
+    b64 = base64.b64encode(content.encode()).decode()
+    cmd = (f'{DOCKER_PATH} exec {MARIADB_CONTAINER} sh -c '
+           f'"echo {b64} | base64 -d > /tmp/.my.cnf && chmod 600 /tmp/.my.cnf"')
+    success, output = run_ssh_command(cmd)
+    if not success:
+        print(f"Warning: Failed to set up MySQL auth file: {output}")
+        return False
+    return True
+
+
+def _cleanup_mysql_auth():
+    """Remove the temp MySQL defaults file from the MariaDB container."""
+    run_ssh_command(f'{DOCKER_PATH} exec {MARIADB_CONTAINER} rm -f /tmp/.my.cnf')
+
+
 def _check_episode_exists_in_db(episode_number: int) -> bool | None:
    """Check if an episode with this number already exists in Castopod DB.
    Returns True/False on success, None if the check itself failed."""
-    cmd = (f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} '
+    cmd = (f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql --defaults-extra-file=/tmp/.my.cnf -u {DB_USER} {DB_NAME} '
           f'-N -e "SELECT COUNT(*) FROM cp_episodes WHERE number = {episode_number};"')
    success, output = run_ssh_command(cmd)
    if success and output.strip():
@@ -685,7 +705,7 @@ def upload_transcript_to_castopod(episode_slug: str, episode_id: int, transcript
        f"uploaded_by, updated_by, uploaded_at, updated_at) VALUES "
        f"('{remote_path}', {file_size}, '{mimetype}', {metadata_sql_escaped}, 'transcript', 1, 1, NOW(), NOW())"
    )
-    db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{insert_sql}; SELECT LAST_INSERT_ID();"'
+    db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql --defaults-extra-file=/tmp/.my.cnf -u {DB_USER} {DB_NAME} -e "{insert_sql}; SELECT LAST_INSERT_ID();"'
    success, output = run_ssh_command(db_cmd)
    if not success:
        print(f"    Warning: Failed to insert transcript in database: {output}")
@@ -699,7 +719,7 @@ def upload_transcript_to_castopod(episode_slug: str, episode_id: int, transcript
        return False

    update_sql = f"UPDATE cp_episodes SET transcript_id = {media_id} WHERE id = {episode_id}"
-    db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{update_sql}"'
+    db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql --defaults-extra-file=/tmp/.my.cnf -u {DB_USER} {DB_NAME} -e "{update_sql}"'
    success, output = run_ssh_command(db_cmd)
    if not success:
        print(f"    Warning: Failed to link transcript to episode: {output}")
@@ -739,7 +759,7 @@ def upload_chapters_to_castopod(episode_slug: str, episode_id: int, chapters_pat
    # Insert into media table
    insert_sql = f"""INSERT INTO cp_media (file_key, file_size, file_mimetype, type, uploaded_by, updated_by, uploaded_at, updated_at)
        VALUES ('{remote_path}', {file_size}, 'application/json', 'chapters', 1, 1, NOW(), NOW())"""
-    db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{insert_sql}; SELECT LAST_INSERT_ID();"'
+    db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql --defaults-extra-file=/tmp/.my.cnf -u {DB_USER} {DB_NAME} -e "{insert_sql}; SELECT LAST_INSERT_ID();"'
    success, output = run_ssh_command(db_cmd)
    if not success:
        print(f"    Warning: Failed to insert chapters in database: {output}")
@@ -755,7 +775,7 @@ def upload_chapters_to_castopod(episode_slug: str, episode_id: int, chapters_pat

    # Link chapters to episode
    update_sql = f"UPDATE cp_episodes SET chapters_id = {media_id} WHERE id = {episode_id}"
-    db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{update_sql}"'
+    db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql --defaults-extra-file=/tmp/.my.cnf -u {DB_USER} {DB_NAME} -e "{update_sql}"'
    success, output = run_ssh_command(db_cmd)
    if not success:
        print(f"    Warning: Failed to link chapters to episode: {output}")
@@ -822,7 +842,7 @@ def sync_episode_media_to_bunny(episode_id: int, already_uploaded: set):
        f"UNION ALL SELECT transcript_id FROM cp_episodes WHERE id = {ep_id} AND transcript_id IS NOT NULL "
        f"UNION ALL SELECT chapters_id FROM cp_episodes WHERE id = {ep_id} AND chapters_id IS NOT NULL)"
    )
-    cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -N -e "{query};"'
+    cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql --defaults-extra-file=/tmp/.my.cnf -u {DB_USER} {DB_NAME} -N -e "{query};"'
    success, output = run_ssh_command(cmd)
    if not success or not output:
        return
@@ -1209,7 +1229,7 @@ def upload_to_youtube(audio_path: str, metadata: dict, chapters: list,
 def get_next_episode_number() -> int:
    """Get the next episode number from Castopod (DB first, API fallback)."""
    # Query DB directly — the REST API is unreliable
-    cmd = (f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} '
+    cmd = (f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql --defaults-extra-file=/tmp/.my.cnf -u {DB_USER} {DB_NAME} '
           f'-N -e "SELECT COALESCE(MAX(number), 0) FROM cp_episodes WHERE podcast_id = {PODCAST_ID};"')
    success, output = run_ssh_command(cmd)
    if success and output.strip():
@@ -1286,6 +1306,9 @@ def main():
    except Exception:
        pass

+    # Set up MySQL auth (avoids password on command line)
+    _setup_mysql_auth()
+
    # Determine episode number
    if args.episode_number:
        episode_number = args.episode_number
@@ -1299,12 +1322,14 @@ def main():
        if exists is None:
            print(f"Error: Could not reach Castopod DB to check for duplicates. "
                  f"Aborting to prevent duplicate uploads. Fix NAS connectivity and retry.")
+            _cleanup_mysql_auth()
            lock_fp.close()
            LOCK_FILE.unlink(missing_ok=True)
            sys.exit(1)
        if exists:
            print(f"Error: Episode {episode_number} already exists in Castopod. "
                  f"Use --episode-number to specify a different number, or remove the existing episode first.")
+            _cleanup_mysql_auth()
            lock_fp.close()
            LOCK_FILE.unlink(missing_ok=True)
            sys.exit(1)
@@ -1374,20 +1399,38 @@ def main():
    episode = create_episode(str(audio_path), metadata, episode_number, duration=transcript["duration"])
    _mark_step_done(episode_number, "castopod", {"episode_id": episode["id"], "slug": episode.get("slug")})

-    # Step 3.5: Upload to BunnyCDN
-    print("[3.5/5] Uploading to BunnyCDN...")
+    # Step 3.5: Upload chapters and transcript to Castopod
+    # (must happen before CDN sync so media records exist for syncing)
+    chapters_uploaded = upload_chapters_to_castopod(
+        episode["slug"],
+        episode["id"],
+        str(chapters_path)
+    )
+
+    transcript_uploaded = upload_transcript_to_castopod(
+        episode["slug"],
+        episode["id"],
+        str(srt_path)
+    )
+
+    # Step 3.7: Upload to BunnyCDN
+    # All media must be on CDN before publish triggers RSS rebuild
+    print("[3.7/5] Uploading to BunnyCDN...")
    uploaded_keys = set()

    # Audio: query file_key from DB, then upload to CDN
    ep_id = episode["id"]
-    audio_media_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -N -e "SELECT m.file_key FROM cp_media m JOIN cp_episodes e ON e.audio_id = m.id WHERE e.id = {ep_id};"'
+    audio_media_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql --defaults-extra-file=/tmp/.my.cnf -u {DB_USER} {DB_NAME} -N -e "SELECT m.file_key FROM cp_media m JOIN cp_episodes e ON e.audio_id = m.id WHERE e.id = {ep_id};"'
    success, audio_file_key = run_ssh_command(audio_media_cmd)
    if success and audio_file_key:
        audio_file_key = audio_file_key.strip()
        if direct_upload:
            # Direct upload: we have the original file locally, upload straight to CDN
            print(f"    Uploading audio to BunnyCDN")
-            upload_to_bunny(str(audio_path), f"media/{audio_file_key}", "audio/mpeg")
+            if upload_to_bunny(str(audio_path), f"media/{audio_file_key}", "audio/mpeg"):
+                uploaded_keys.add(audio_file_key)
+            else:
+                print(f"    Warning: Audio CDN upload failed, will be served from Castopod")
        else:
            # API upload: download Castopod's copy (ensures byte-exact match with RSS metadata)
            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
@@ -1396,13 +1439,18 @@ def main():
                print(f"    Downloading from Castopod: {audio_file_key}")
                if download_from_castopod(audio_file_key, tmp_audio):
                    print(f"    Uploading audio to BunnyCDN")
-                    upload_to_bunny(tmp_audio, f"media/{audio_file_key}", "audio/mpeg")
+                    if upload_to_bunny(tmp_audio, f"media/{audio_file_key}", "audio/mpeg"):
+                        uploaded_keys.add(audio_file_key)
+                    else:
+                        print(f"    Warning: Audio CDN upload failed, will be served from Castopod")
                else:
                    print(f"    Castopod download failed, uploading original file")
-                    upload_to_bunny(str(audio_path), f"media/{audio_file_key}", "audio/mpeg")
+                    if upload_to_bunny(str(audio_path), f"media/{audio_file_key}", "audio/mpeg"):
+                        uploaded_keys.add(audio_file_key)
+                    else:
+                        print(f"    Warning: Audio CDN upload failed, will be served from Castopod")
            finally:
                Path(tmp_audio).unlink(missing_ok=True)
-        uploaded_keys.add(audio_file_key)
    else:
        print(f"    Error: Could not determine audio file_key from Castopod DB")
        print(f"    Audio will be served from Castopod directly (not CDN)")
@@ -1410,8 +1458,8 @@ def main():
    # Chapters
    chapters_key = f"podcasts/{PODCAST_HANDLE}/{episode['slug']}-chapters.json"
    print(f"    Uploading chapters to BunnyCDN")
-    upload_to_bunny(str(chapters_path), f"media/{chapters_key}")
-    uploaded_keys.add(chapters_key)
+    if upload_to_bunny(str(chapters_path), f"media/{chapters_key}"):
+        uploaded_keys.add(chapters_key)

    # Transcript
    print(f"    Uploading transcript to BunnyCDN")
@@ -1427,7 +1475,12 @@ def main():
    # Add to sitemap
    add_episode_to_sitemap(episode["slug"])

+    # Sync any remaining episode media to BunnyCDN (cover art, etc.)
+    print("    Syncing remaining episode media to CDN...")
+    sync_episode_media_to_bunny(episode["id"], uploaded_keys)
+
    # Step 4: Publish via API (triggers RSS rebuild, federation, etc.)
+    # All media is now on CDN, so RSS links will resolve immediately
    try:
        published = publish_episode(episode["id"])
        if "slug" in published:
@@ -1438,24 +1491,6 @@ def main():
        else:
            raise

-    # Step 4.5: Upload chapters and transcript via SSH
-    chapters_uploaded = upload_chapters_to_castopod(
-        episode["slug"],
-        episode["id"],
-        str(chapters_path)
-    )
-
-    # Upload SRT transcript to Castopod (preferred for podcast apps)
-    transcript_uploaded = upload_transcript_to_castopod(
-        episode["slug"],
-        episode["id"],
-        str(srt_path)
-    )
-
-    # Sync any remaining episode media to BunnyCDN (cover art, transcripts, etc.)
-    print("    Syncing episode media to CDN...")
-    sync_episode_media_to_bunny(episode["id"], uploaded_keys)
-
    # Step 5: Deploy website (transcript + sitemap must be live before social links go out)
    print("[5/5] Deploying website...")
    project_dir = Path(__file__).parent
@@ -1521,6 +1556,9 @@ def main():
        )
        print("    Server restarted on port 8000")

+    # Clean up MySQL auth file
+    _cleanup_mysql_auth()
+
    # Release lock
    lock_fp.close()
    LOCK_FILE.unlink(missing_ok=True)