#!/usr/bin/env python3
"""
Podcast Episode Publisher
Transcribes audio, generates metadata, and publishes to Castopod.

Usage:
    python publish_episode.py /path/to/episode.mp3
    python publish_episode.py /path/to/episode.mp3 --episode-number 3
    python publish_episode.py /path/to/episode.mp3 --dry-run
"""

import argparse
import base64
import json
import os
import re
import shutil
import subprocess
import sys
import tempfile
from datetime import datetime, timezone
from pathlib import Path

import ssl
import requests
import urllib3
from requests.adapters import HTTPAdapter
from urllib3.util.ssl_ import create_urllib3_context
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from dotenv import load_dotenv


class TLSAdapter(HTTPAdapter):
    """Adapter to handle servers with older TLS configurations."""
    def init_poolmanager(self, *args, **kwargs):
        ctx = create_urllib3_context()
        ctx.set_ciphers('DEFAULT@SECLEVEL=1')
        ctx.check_hostname = False
        ctx.verify_mode = ssl.CERT_NONE
        kwargs['ssl_context'] = ctx
        return super().init_poolmanager(*args, **kwargs)

    def send(self, *args, **kwargs):
        kwargs['verify'] = False
        return super().send(*args, **kwargs)


# Use a session with TLS compatibility for all Castopod requests
_session = requests.Session()
_session.mount('https://', TLSAdapter())

# Load environment variables
load_dotenv(Path(__file__).parent / ".env")

# Configuration
CASTOPOD_URL = "https://podcast.macneilmediagroup.com"
CASTOPOD_USERNAME = "admin"
CASTOPOD_PASSWORD = "podcast2026api"
PODCAST_ID = 1
PODCAST_HANDLE = "LukeAtTheRoost"
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

WHISPER_MODEL = "base"  # Options: tiny, base, small, medium, large

# Postiz (social media posting)
POSTIZ_URL = "https://social.lukeattheroost.com"
POSTIZ_JWT_SECRET = "9d499bab97b303506af6ae18b29a60e6b5a0b1049177f533232ad14dd9729814"
POSTIZ_USER_ID = "00c14319-9eac-42c3-a467-68d3c1634fe1"
POSTIZ_INTEGRATIONS = {
    "facebook": {"id": "cmll9hwqj0001mt6xnas2f17w"},
    "instagram": {"id": "cmlljn8920001pk6qqzutqwik"},
    "discord": {"id": "cmllkprk90001uc6v6fwd5y9p", "channel": "1471386314447519754"},
    "bluesky": {"id": "cmlk29h780001p76qa7sstp5h"},
    "mastodon": {"id": "cmlk2r3mf0001le6vx9ey0k5a"},
    "nostr": {"id": "cmlll3y78000cuc6vh8dcpl2w"},
}

# NAS Configuration for chapters upload
# BunnyCDN Storage
BUNNY_STORAGE_ZONE = "lukeattheroost"
BUNNY_STORAGE_KEY = "92749cd3-85df-4cff-938fe35eb994-30f8-4cf2"
BUNNY_STORAGE_REGION = "la"  # Los Angeles

NAS_HOST = "mmgnas-10g"
NAS_USER = "luke"
NAS_SSH_PORT = 8001
DOCKER_PATH = "/share/CACHEDEV1_DATA/.qpkg/container-station/bin/docker"
CASTOPOD_CONTAINER = "castopod-castopod-1"
MARIADB_CONTAINER = "castopod-mariadb-1"
DB_USER = "castopod"
DB_PASS = "BYtbFfk3ndeVabb26xb0UyKU"
DB_NAME = "castopod"


def get_auth_header():
    """Get Basic Auth header for Castopod API."""
    credentials = base64.b64encode(
        f"{CASTOPOD_USERNAME}:{CASTOPOD_PASSWORD}".encode()
    ).decode()
    return {"Authorization": f"Basic {credentials}"}


def label_transcript_speakers(text):
    """Add LUKE:/CALLER: speaker labels to transcript using LLM."""
    import time as _time

    prompt = """Insert speaker labels into this radio show transcript. The show is "Luke at the Roost". The host is LUKE. Callers call in one at a time.

CRITICAL: Output EVERY SINGLE WORD from the input. Do NOT summarize, shorten, paraphrase, or skip ANY text. The output must contain the EXACT SAME words as the input, with ONLY speaker labels and line breaks added.

At each speaker change, insert a blank line and the new speaker's label (e.g., "LUKE:" or "REGGIE:").

Speaker identification:
- LUKE is the host — he introduces callers, asks questions, does sponsor reads, opens and closes the show
- Callers are introduced by name by Luke (e.g., "let's talk to Earl", "next up Brenda")
- Use caller FIRST NAME in caps as the label
- When Luke says "Tell me about..." or asks a question, that's LUKE
- When someone responds with their story/opinion/answer, that's the CALLER

Output format — ONLY the labeled transcript with blank lines between turns. No notes, no commentary.

TRANSCRIPT:
"""
    # Chunk text into ~8000 char segments
    chunks = []
    remaining = text
    while remaining:
        if len(remaining) <= 8000:
            if chunks and len(remaining) < 1000:
                chunks[-1] = chunks[-1] + " " + remaining
            else:
                chunks.append(remaining)
            break
        pos = remaining[:8000].rfind('. ')
        if pos < 4000:
            pos = remaining[:8000].rfind('? ')
        if pos < 4000:
            pos = remaining[:8000].rfind('! ')
        if pos < 4000:
            pos = 8000
        chunks.append(remaining[:pos + 1].strip())
        remaining = remaining[pos + 1:].strip()

    labeled_parts = []
    context = ""
    for i, chunk in enumerate(chunks):
        full_prompt = prompt + chunk
        if context:
            full_prompt += f"\n\nCONTEXT: The previous section ended with speaker {context}"

        response = requests.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers={
                "Authorization": f"Bearer {OPENROUTER_API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": "anthropic/claude-3.5-sonnet",
                "messages": [{"role": "user", "content": full_prompt}],
                "max_tokens": 8192,
                "temperature": 0
            }
        )
        if response.status_code != 200:
            print(f"    Warning: Speaker labeling failed for chunk {i+1}, using raw text")
            labeled_parts.append(chunk)
        else:
            content = response.json()["choices"][0]["message"]["content"].strip()
            if content.startswith("```"):
                content = re.sub(r'^```\w*\n?', '', content)
                content = re.sub(r'\n?```$', '', content)
            labeled_parts.append(content)

            # Extract last speaker for context
            for line in reversed(content.strip().split('\n')):
                m = re.match(r'^([A-Z][A-Z\s\'-]+?):', line.strip())
                if m:
                    context = m.group(1)
                    break

        if i < len(chunks) - 1:
            _time.sleep(0.5)

    result = "\n\n".join(labeled_parts)
    result = re.sub(r'\n{3,}', '\n\n', result)
    # Normalize: SPEAKER:\ntext -> SPEAKER: text
    result = re.sub(r'^([A-Z][A-Z\s\'-]+?):\s*\n(?!\n)', r'\1: ', result, flags=re.MULTILINE)
    return result


def transcribe_audio(audio_path: str) -> dict:
    """Transcribe audio using faster-whisper with timestamps."""
    print(f"[1/5] Transcribing {audio_path}...")

    try:
        from faster_whisper import WhisperModel
    except ImportError:
        print("Error: faster-whisper not installed. Run: pip install faster-whisper")
        sys.exit(1)

    model = WhisperModel(WHISPER_MODEL, compute_type="int8")
    segments, info = model.transcribe(audio_path, word_timestamps=True)

    transcript_segments = []
    full_text = []

    for segment in segments:
        transcript_segments.append({
            "start": segment.start,
            "end": segment.end,
            "text": segment.text.strip()
        })
        full_text.append(segment.text.strip())

    print(f"    Transcribed {info.duration:.1f} seconds of audio")

    return {
        "segments": transcript_segments,
        "full_text": " ".join(full_text),
        "duration": int(info.duration)
    }


def generate_metadata(transcript: dict, episode_number: int) -> dict:
    """Use LLM to generate title, description, and chapters from transcript."""
    print("[2/5] Generating metadata with LLM...")

    if not OPENROUTER_API_KEY:
        print("Error: OPENROUTER_API_KEY not set in .env")
        sys.exit(1)

    # Prepare transcript with timestamps for chapter detection
    timestamped_text = ""
    for seg in transcript["segments"]:
        mins = int(seg["start"] // 60)
        secs = int(seg["start"] % 60)
        timestamped_text += f"[{mins:02d}:{secs:02d}] {seg['text']}\n"

    prompt = f"""Analyze this podcast transcript and generate metadata.

TRANSCRIPT:
{timestamped_text}

Generate a JSON response with:
1. "title": A catchy episode title (include "Episode {episode_number}:" prefix)
2. "description": A 2-4 sentence description summarizing the episode's content. Mention callers by name and their topics. End with something engaging.
3. "chapters": An array of chapter objects with "startTime" (in seconds) and "title". Include:
   - "Intro" at 0 seconds
   - A chapter for each caller/topic (use caller names if mentioned)
   - "Outro" near the end

Respond with ONLY valid JSON, no markdown or explanation."""

    response = requests.post(
        "https://openrouter.ai/api/v1/chat/completions",
        headers={
            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
            "Content-Type": "application/json"
        },
        json={
            "model": "anthropic/claude-3-haiku",
            "messages": [{"role": "user", "content": prompt}],
            "temperature": 0.7
        }
    )

    if response.status_code != 200:
        print(f"Error from OpenRouter: {response.text}")
        sys.exit(1)

    result = response.json()
    content = result["choices"][0]["message"]["content"]

    # Parse JSON from response (handle markdown code blocks)
    content = content.strip()
    if content.startswith("```"):
        content = re.sub(r"^```(?:json)?\n?", "", content)
        content = re.sub(r"\n?```$", "", content)

    try:
        metadata = json.loads(content)
    except json.JSONDecodeError as e:
        print(f"Error parsing LLM response: {e}")
        print(f"Response was: {content}")
        sys.exit(1)

    print(f"    Title: {metadata['title']}")
    print(f"    Chapters: {len(metadata['chapters'])}")

    return metadata


CLOUDFLARE_UPLOAD_LIMIT = 100 * 1024 * 1024  # 100 MB


def create_episode(audio_path: str, metadata: dict, episode_number: int, duration: int = 0) -> dict:
    """Create episode on Castopod. Bypasses Cloudflare for large files."""
    file_size = os.path.getsize(audio_path)

    if file_size > CLOUDFLARE_UPLOAD_LIMIT:
        print(f"[3/5] Creating episode on Castopod (direct, {file_size / 1024 / 1024:.0f} MB > 100 MB limit)...")
        return _create_episode_direct(audio_path, metadata, episode_number, file_size, duration)

    print("[3/5] Creating episode on Castopod...")
    return _create_episode_api(audio_path, metadata, episode_number)


def _create_episode_api(audio_path: str, metadata: dict, episode_number: int) -> dict:
    """Create episode via Castopod REST API (through Cloudflare)."""
    credentials = base64.b64encode(
        f"{CASTOPOD_USERNAME}:{CASTOPOD_PASSWORD}".encode()
    ).decode()
    slug = re.sub(r'[^a-z0-9]+', '-', metadata["title"].lower()).strip('-')

    cmd = [
        "curl", "-sk", "-X", "POST",
        f"{CASTOPOD_URL}/api/rest/v1/episodes",
        "-H", f"Authorization: Basic {credentials}",
        "-F", f"audio_file=@{audio_path};type=audio/mpeg",
        "-F", f"title={metadata['title']}",
        "-F", f"slug={slug}",
        "-F", f"description={metadata['description']}",
        "-F", "parental_advisory=explicit",
        "-F", "type=full",
        "-F", f"podcast_id={PODCAST_ID}",
        "-F", "created_by=1",
        "-F", "updated_by=1",
        "-F", f"episode_number={episode_number}",
    ]

    result = subprocess.run(cmd, capture_output=True, text=True, timeout=900)
    if result.returncode != 0:
        print(f"Error uploading: {result.stderr}")
        sys.exit(1)

    try:
        episode = json.loads(result.stdout)
    except json.JSONDecodeError:
        print(f"Error parsing response: {result.stdout[:500]}")
        sys.exit(1)

    if "id" not in episode:
        print(f"Error creating episode: {result.stdout[:500]}")
        sys.exit(1)

    print(f"    Created episode ID: {episode['id']}")
    print(f"    Slug: {episode['slug']}")

    return episode


def _create_episode_direct(audio_path: str, metadata: dict, episode_number: int,
                           file_size: int, duration: int) -> dict:
    """Create episode by uploading directly to NAS and inserting into DB."""
    import time as _time
    slug = re.sub(r'[^a-z0-9]+', '-', metadata["title"].lower()).strip('-')
    timestamp = int(_time.time())
    rand_hex = os.urandom(10).hex()
    filename = f"{timestamp}_{rand_hex}.mp3"
    file_key = f"podcasts/{PODCAST_HANDLE}/{filename}"
    nas_tmp = f"/share/CACHEDEV1_DATA/tmp/{filename}"
    guid = f"{CASTOPOD_URL}/@{PODCAST_HANDLE}/episodes/{slug}"
    desc_md = metadata["description"]
    desc_html = f"<p>{desc_md}</p>"
    duration_json = json.dumps({"playtime_seconds": duration, "avdataoffset": 85})

    # SCP audio to NAS
    print("    Uploading audio to NAS...")
    scp_cmd = ["scp", "-P", str(NAS_SSH_PORT), audio_path, f"{NAS_USER}@{NAS_HOST}:{nas_tmp}"]
    result = subprocess.run(scp_cmd, capture_output=True, text=True, timeout=600)
    if result.returncode != 0:
        print(f"Error: SCP failed: {result.stderr}")
        sys.exit(1)

    # Docker cp into Castopod container
    print("    Copying into Castopod container...")
    media_path = f"/var/www/castopod/public/media/{file_key}"
    cp_cmd = f'{DOCKER_PATH} cp {nas_tmp} {CASTOPOD_CONTAINER}:{media_path}'
    success, output = run_ssh_command(cp_cmd, timeout=120)
    if not success:
        print(f"Error: docker cp failed: {output}")
        sys.exit(1)
    run_ssh_command(f'{DOCKER_PATH} exec {CASTOPOD_CONTAINER} chown www-data:www-data {media_path}')
    run_ssh_command(f"rm -f {nas_tmp}")

    # Build SQL and transfer via base64 to avoid shell escaping issues
    print("    Inserting media and episode records...")

    def _mysql_escape(s: str) -> str:
        """Escape a string for MySQL single-quoted literals."""
        return s.replace("\\", "\\\\").replace("'", "\\'")

    title_esc = _mysql_escape(metadata["title"])
    desc_md_esc = _mysql_escape(desc_md)
    desc_html_esc = _mysql_escape(desc_html)
    duration_json_esc = _mysql_escape(duration_json)

    sql = (
        f"INSERT INTO cp_media (file_key, file_size, file_mimetype, file_metadata, type, "
        f"uploaded_by, updated_by, uploaded_at, updated_at) VALUES "
        f"('{file_key}', {file_size}, 'audio/mpeg', '{duration_json_esc}', 'audio', 1, 1, NOW(), NOW());\n"
        f"SET @audio_id = LAST_INSERT_ID();\n"
        f"INSERT INTO cp_episodes (podcast_id, guid, title, slug, audio_id, "
        f"description_markdown, description_html, parental_advisory, number, type, "
        f"is_blocked, is_published_on_hubs, is_premium, created_by, updated_by, "
        f"published_at, created_at, updated_at) VALUES "
        f"(1, '{guid}', '{title_esc}', '{slug}', @audio_id, "
        f"'{desc_md_esc}', '{desc_html_esc}', 'explicit', {episode_number}, 'full', "
        f"0, 0, 0, 1, 1, NOW(), NOW(), NOW());\n"
        f"SELECT LAST_INSERT_ID();\n"
    )

    # Write SQL to local temp file, SCP to NAS, docker cp into MariaDB
    local_sql_path = "/tmp/_castopod_insert.sql"
    nas_sql_path = "/share/CACHEDEV1_DATA/tmp/_castopod_insert.sql"
    with open(local_sql_path, "w") as f:
        f.write(sql)
    scp_sql = ["scp", "-P", str(NAS_SSH_PORT), local_sql_path, f"{NAS_USER}@{NAS_HOST}:{nas_sql_path}"]
    result = subprocess.run(scp_sql, capture_output=True, text=True, timeout=30)
    os.remove(local_sql_path)
    if result.returncode != 0:
        print(f"Error: failed to SCP SQL file: {result.stderr}")
        sys.exit(1)

    # Copy SQL into MariaDB container and execute
    run_ssh_command(f'{DOCKER_PATH} cp {nas_sql_path} {MARIADB_CONTAINER}:/tmp/_insert.sql')
    exec_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} sh -c "mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -N < /tmp/_insert.sql"'
    success, output = run_ssh_command(exec_cmd, timeout=30)
    run_ssh_command(f'rm -f {nas_sql_path}')
    run_ssh_command(f'{DOCKER_PATH} exec {MARIADB_CONTAINER} rm -f /tmp/_insert.sql')

    if not success:
        print(f"Error: DB insert failed: {output}")
        sys.exit(1)

    episode_id = int(output.strip().split('\n')[-1])
    # Get the audio media ID for CDN upload
    audio_id_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -N -e "SELECT audio_id FROM cp_episodes WHERE id = {episode_id};"'
    success, audio_id_str = run_ssh_command(audio_id_cmd)
    audio_id = int(audio_id_str.strip()) if success else None
    if audio_id:
        print(f"    Audio media ID: {audio_id}")

    # Clear cache
    run_ssh_command(f'{DOCKER_PATH} exec {CASTOPOD_CONTAINER} php spark cache:clear')

    print(f"    Created episode ID: {episode_id}")
    print(f"    Slug: {slug}")

    return {"id": episode_id, "slug": slug}


def publish_episode(episode_id: int) -> dict:
    """Publish the episode."""
    print("[4/5] Publishing episode...")

    headers = get_auth_header()

    response = _session.post(
        f"{CASTOPOD_URL}/api/rest/v1/episodes/{episode_id}/publish",
        headers=headers,
        data={
            "publication_method": "now",
            "created_by": "1"
        },
    )

    if response.status_code != 200:
        print(f"Error publishing: {response.text}")
        sys.exit(1)

    episode = response.json()
    published_at = episode.get("published_at", {})
    if isinstance(published_at, dict):
        print(f"    Published at: {published_at.get('date', 'unknown')}")
    else:
        print(f"    Published at: {published_at}")

    return episode


def save_chapters(metadata: dict, output_path: str):
    """Save chapters to JSON file."""
    chapters_data = {
        "version": "1.2.0",
        "chapters": metadata["chapters"]
    }

    with open(output_path, "w") as f:
        json.dump(chapters_data, f, indent=2)

    print(f"    Chapters saved to: {output_path}")


def run_ssh_command(command: str, timeout: int = 30) -> tuple[bool, str]:
    """Run a command on the NAS via SSH."""
    ssh_cmd = [
        "ssh", "-p", str(NAS_SSH_PORT),
        f"{NAS_USER}@{NAS_HOST}",
        command
    ]
    try:
        result = subprocess.run(ssh_cmd, capture_output=True, text=True, timeout=timeout)
        return result.returncode == 0, result.stdout.strip() or result.stderr.strip()
    except subprocess.TimeoutExpired:
        return False, "SSH command timed out"
    except Exception as e:
        return False, str(e)


def upload_chapters_to_castopod(episode_slug: str, episode_id: int, chapters_path: str) -> bool:
    """Upload chapters file to Castopod via SSH and link in database."""
    print("[4.5/5] Uploading chapters to Castopod...")

    chapters_filename = f"{episode_slug}-chapters.json"
    remote_path = f"podcasts/{PODCAST_HANDLE}/{chapters_filename}"

    # Read local chapters file
    with open(chapters_path, "r") as f:
        chapters_content = f.read()

    # Base64 encode for safe transfer
    chapters_b64 = base64.b64encode(chapters_content.encode()).decode()

    # Upload file to container using base64 decode
    upload_cmd = f'echo "{chapters_b64}" | base64 -d | {DOCKER_PATH} exec -i {CASTOPOD_CONTAINER} tee /var/www/castopod/public/media/{remote_path} > /dev/null'
    success, output = run_ssh_command(upload_cmd)
    if not success:
        print(f"    Warning: Failed to upload chapters file: {output}")
        return False

    # Get file size
    file_size = len(chapters_content)

    # Insert into media table
    insert_sql = f"""INSERT INTO cp_media (file_key, file_size, file_mimetype, type, uploaded_by, updated_by, uploaded_at, updated_at)
        VALUES ('{remote_path}', {file_size}, 'application/json', 'chapters', 1, 1, NOW(), NOW())"""
    db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{insert_sql}; SELECT LAST_INSERT_ID();"'
    success, output = run_ssh_command(db_cmd)
    if not success:
        print(f"    Warning: Failed to insert chapters in database: {output}")
        return False

    # Parse media ID from output
    try:
        lines = output.strip().split('\n')
        media_id = int(lines[-1])
    except (ValueError, IndexError):
        print(f"    Warning: Could not parse media ID from: {output}")
        return False

    # Link chapters to episode
    update_sql = f"UPDATE cp_episodes SET chapters_id = {media_id} WHERE id = {episode_id}"
    db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{update_sql}"'
    success, output = run_ssh_command(db_cmd)
    if not success:
        print(f"    Warning: Failed to link chapters to episode: {output}")
        return False

    # Clear Castopod cache
    cache_cmd = f'{DOCKER_PATH} exec {CASTOPOD_CONTAINER} php spark cache:clear'
    run_ssh_command(cache_cmd)

    print(f"    Chapters uploaded and linked (media_id: {media_id})")
    return True


def upload_to_bunny(local_path: str, remote_path: str, content_type: str = None) -> bool:
    """Upload a file to BunnyCDN Storage."""
    if not content_type:
        ext = Path(local_path).suffix.lower()
        content_type = {
            ".mp3": "audio/mpeg", ".png": "image/png", ".jpg": "image/jpeg",
            ".json": "application/json", ".srt": "application/x-subrip",
        }.get(ext, "application/octet-stream")

    url = f"https://{BUNNY_STORAGE_REGION}.storage.bunnycdn.com/{BUNNY_STORAGE_ZONE}/{remote_path}"
    with open(local_path, "rb") as f:
        resp = requests.put(url, data=f, headers={
            "AccessKey": BUNNY_STORAGE_KEY,
            "Content-Type": content_type,
        }, timeout=600)
    if resp.status_code == 201:
        return True
    print(f"    Warning: BunnyCDN upload failed ({resp.status_code}): {resp.text[:200]}")
    return False


def download_from_castopod(file_key: str, local_path: str) -> bool:
    """Download a file from Castopod's container storage to local filesystem."""
    remote_filename = Path(file_key).name
    remote_tmp = f"/share/CACHEDEV1_DATA/tmp/castopod_{remote_filename}"
    cp_cmd = f'{DOCKER_PATH} cp {CASTOPOD_CONTAINER}:/var/www/castopod/public/media/{file_key} {remote_tmp}'
    success, _ = run_ssh_command(cp_cmd, timeout=120)
    if not success:
        return False
    scp_cmd = [
        "scp", "-P", str(NAS_SSH_PORT),
        f"{NAS_USER}@{NAS_HOST}:{remote_tmp}",
        local_path
    ]
    try:
        result = subprocess.run(scp_cmd, capture_output=True, text=True, timeout=300)
        ok = result.returncode == 0
    except (subprocess.TimeoutExpired, Exception):
        ok = False
    run_ssh_command(f"rm -f {remote_tmp}")
    return ok


def sync_episode_media_to_bunny(episode_id: int, already_uploaded: set):
    """Ensure all media linked to an episode exists on BunnyCDN."""
    ep_id = episode_id
    query = (
        "SELECT DISTINCT m.file_key FROM cp_media m WHERE m.id IN ("
        f"SELECT audio_id FROM cp_episodes WHERE id = {ep_id} "
        f"UNION ALL SELECT cover_id FROM cp_episodes WHERE id = {ep_id} AND cover_id IS NOT NULL "
        f"UNION ALL SELECT transcript_id FROM cp_episodes WHERE id = {ep_id} AND transcript_id IS NOT NULL "
        f"UNION ALL SELECT chapters_id FROM cp_episodes WHERE id = {ep_id} AND chapters_id IS NOT NULL)"
    )
    cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -N -e "{query};"'
    success, output = run_ssh_command(cmd)
    if not success or not output:
        return
    file_keys = [line.strip() for line in output.strip().split('\n') if line.strip()]
    for file_key in file_keys:
        if file_key in already_uploaded:
            continue
        cdn_url = f"https://cdn.lukeattheroost.com/media/{file_key}"
        try:
            resp = requests.head(cdn_url, timeout=10)
            if resp.status_code == 200:
                continue
        except Exception:
            pass
        with tempfile.NamedTemporaryFile(suffix=Path(file_key).suffix, delete=False) as tmp:
            tmp_path = tmp.name
        try:
            if download_from_castopod(file_key, tmp_path):
                print(f"    Syncing to CDN: {file_key}")
                upload_to_bunny(tmp_path, f"media/{file_key}")
            else:
                print(f"    Warning: Could not sync {file_key} to CDN")
        finally:
            Path(tmp_path).unlink(missing_ok=True)


def add_episode_to_sitemap(slug: str):
    """Add episode transcript page to sitemap.xml."""
    sitemap_path = Path(__file__).parent / "website" / "sitemap.xml"
    if not sitemap_path.exists():
        return

    url = f"https://lukeattheroost.com/episode.html?slug={slug}"
    content = sitemap_path.read_text()

    if url in content:
        print(f"    Episode already in sitemap")
        return

    today = datetime.now().strftime("%Y-%m-%d")
    new_entry = f"""  <url>
    <loc>{url}</loc>
    <lastmod>{today}</lastmod>
    <changefreq>never</changefreq>
    <priority>0.7</priority>
  </url>
</urlset>"""

    content = content.replace("</urlset>", new_entry)
    sitemap_path.write_text(content)
    print(f"    Added episode to sitemap.xml")


def generate_social_image(episode_number: int, description: str, output_path: str) -> str:
    """Generate a social media image with cover art, episode number, and description."""
    from PIL import Image, ImageDraw, ImageFont
    import textwrap

    COVER_ART = Path(__file__).parent / "website" / "images" / "cover.png"
    SIZE = 1080

    img = Image.open(COVER_ART).convert("RGBA")
    img = img.resize((SIZE, SIZE), Image.LANCZOS)

    # Dark gradient overlay on the bottom ~45%
    gradient = Image.new("RGBA", (SIZE, SIZE), (0, 0, 0, 0))
    draw_grad = ImageDraw.Draw(gradient)
    gradient_start = int(SIZE * 0.50)
    for y in range(gradient_start, SIZE):
        progress = (y - gradient_start) / (SIZE - gradient_start)
        alpha = int(210 * progress)
        draw_grad.line([(0, y), (SIZE, y)], fill=(0, 0, 0, alpha))

    img = Image.alpha_composite(img, gradient)
    draw = ImageDraw.Draw(img)

    # Fonts
    try:
        font_episode = ImageFont.truetype("/Library/Fonts/Montserrat-ExtraBold.ttf", 64)
        font_desc = ImageFont.truetype("/Library/Fonts/Montserrat-Medium.ttf", 36)
        font_url = ImageFont.truetype("/Library/Fonts/Montserrat-SemiBold.ttf", 28)
    except OSError:
        font_episode = ImageFont.truetype("/Library/Fonts/Arial Unicode.ttf", 64)
        font_desc = ImageFont.truetype("/Library/Fonts/Arial Unicode.ttf", 36)
        font_url = ImageFont.truetype("/Library/Fonts/Arial Unicode.ttf", 28)

    margin = 60
    max_width = SIZE - margin * 2

    # Episode number
    ep_text = f"EPISODE {episode_number}"
    draw.text((margin, SIZE - 300), ep_text, font=font_episode, fill=(255, 200, 80))

    # Description — word-wrap to fit
    wrapped = textwrap.fill(description, width=45)
    lines = wrapped.split("\n")[:4]  # max 4 lines
    if len(wrapped.split("\n")) > 4:
        lines[-1] = lines[-1][:lines[-1].rfind(" ")] + "..."
    desc_text = "\n".join(lines)
    draw.text((margin, SIZE - 220), desc_text, font=font_desc, fill=(255, 255, 255, 230),
              spacing=8)

    # Website URL — bottom right
    url_text = "lukeattheroost.com"
    bbox = draw.textbbox((0, 0), url_text, font=font_url)
    url_width = bbox[2] - bbox[0]
    draw.text((SIZE - margin - url_width, SIZE - 50), url_text, font=font_url,
              fill=(255, 200, 80, 200))

    img = img.convert("RGB")
    img.save(output_path, "JPEG", quality=92)
    print(f"    Social image saved: {output_path}")
    return output_path


def _get_postiz_token():
    """Generate a JWT token for Postiz API authentication."""
    import jwt
    return jwt.encode(
        {"id": POSTIZ_USER_ID, "email": "luke@macneilmediagroup.com",
         "providerName": "LOCAL", "activated": True, "isSuperAdmin": False},
        POSTIZ_JWT_SECRET, algorithm="HS256"
    )


def upload_image_to_postiz(image_path: str) -> dict | None:
    """Upload an image to Postiz and return the media object."""
    token = _get_postiz_token()
    try:
        with open(image_path, "rb") as f:
            resp = requests.post(
                f"{POSTIZ_URL}/api/media/upload-simple",
                headers={"auth": token},
                files={"file": ("social.jpg", f, "image/jpeg")},
                timeout=30,
            )
        if resp.status_code in (200, 201):
            media = resp.json()
            print(f"    Uploaded image to Postiz (id: {media.get('id', 'unknown')})")
            return media
        else:
            print(f"    Warning: Postiz image upload returned {resp.status_code}: {resp.text[:200]}")
    except Exception as e:
        print(f"    Warning: Postiz image upload failed: {e}")
    return None


def post_to_social(metadata: dict, episode_slug: str, image_path: str = None):
    """Post episode announcement to all connected social channels via Postiz."""
    print("[5.5/5] Posting to social media...")

    token = _get_postiz_token()

    # Upload image if provided
    image_ids = []
    if image_path:
        media = upload_image_to_postiz(image_path)
        if media and media.get("id"):
            image_ids = [{"id": media["id"], "path": media.get("path", "")}]

    episode_url = f"https://lukeattheroost.com/episode.html?slug={episode_slug}"
    base_content = f"{metadata['title']}\n\n{metadata['description']}\n\n{episode_url}"

    hashtags = "#podcast #LukeAtTheRoost #talkradio #callinshow #newepisode"
    hashtag_platforms = {"instagram", "facebook", "bluesky", "mastodon", "nostr"}

    # Platform-specific content length limits
    PLATFORM_MAX_LENGTH = {"bluesky": 300}

    # Post to each platform individually so one failure doesn't block others
    posted = 0
    for platform, intg_config in POSTIZ_INTEGRATIONS.items():
        content = base_content
        if platform in hashtag_platforms:
            content += f"\n\n{hashtags}"

        # Truncate for platforms with short limits
        max_len = PLATFORM_MAX_LENGTH.get(platform)
        if max_len and len(content) > max_len:
            # Keep title + URL, truncate description
            short = f"{metadata['title']}\n\n{episode_url}"
            if platform in hashtag_platforms:
                short += f"\n\n{hashtags}"
            content = short[:max_len]

        settings = {"post_type": "post"}
        if "channel" in intg_config:
            settings["channel"] = intg_config["channel"]

        post = {
            "integration": {"id": intg_config["id"]},
            "value": [{"content": content, "image": image_ids}],
            "settings": settings,
        }

        payload = {
            "type": "now",
            "shortLink": False,
            "date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z"),
            "tags": [],
            "posts": [post],
        }

        try:
            resp = requests.post(
                f"{POSTIZ_URL}/api/posts",
                headers={"auth": token, "Content-Type": "application/json"},
                json=payload,
                timeout=60,
            )
            if resp.status_code in (200, 201):
                posted += 1
                print(f"    Posted to {platform}")
            else:
                print(f"    Warning: {platform} failed ({resp.status_code}): {resp.text[:150]}")
        except Exception as e:
            print(f"    Warning: {platform} failed: {e}")

    print(f"    Posted to {posted}/{len(POSTIZ_INTEGRATIONS)} channels")


def get_next_episode_number() -> int:
    """Get the next episode number from Castopod."""
    headers = get_auth_header()

    response = _session.get(
        f"{CASTOPOD_URL}/api/rest/v1/episodes",
        headers=headers,
    )

    if response.status_code != 200:
        return 1

    episodes = response.json()
    if not episodes:
        return 1

    # Filter to our podcast
    our_episodes = [ep for ep in episodes if ep.get("podcast_id") == PODCAST_ID]
    if not our_episodes:
        return 1

    max_num = max(ep.get("number", 0) or 0 for ep in our_episodes)
    return max_num + 1


def main():
    parser = argparse.ArgumentParser(description="Publish podcast episode to Castopod")
    parser.add_argument("audio_file", help="Path to the audio file (MP3)")
    parser.add_argument("--episode-number", "-n", type=int, help="Episode number (auto-detected if not provided)")
    parser.add_argument("--dry-run", "-d", action="store_true", help="Generate metadata but don't publish")
    parser.add_argument("--title", "-t", help="Override generated title")
    parser.add_argument("--description", help="Override generated description")
    parser.add_argument("--session-data", "-s", help="Path to session export JSON (from /api/session/export)")
    args = parser.parse_args()

    audio_path = Path(args.audio_file).expanduser().resolve()
    if not audio_path.exists():
        print(f"Error: Audio file not found: {audio_path}")
        sys.exit(1)

    # Determine episode number
    if args.episode_number:
        episode_number = args.episode_number
    else:
        episode_number = get_next_episode_number()
    print(f"Episode number: {episode_number}")

    # Load session data if provided
    session_data = None
    if args.session_data:
        session_path = Path(args.session_data).expanduser().resolve()
        if session_path.exists():
            with open(session_path) as f:
                session_data = json.load(f)
            print(f"Loaded session data: {session_data.get('call_count', 0)} calls")
        else:
            print(f"Warning: Session data file not found: {session_path}")

    # Step 1: Transcribe
    transcript = transcribe_audio(str(audio_path))

    # Step 2: Generate metadata
    metadata = generate_metadata(transcript, episode_number)

    # Use session chapters if available (more accurate than LLM-generated)
    if session_data and session_data.get("chapters"):
        metadata["chapters"] = session_data["chapters"]
        print(f"    Using {len(metadata['chapters'])} chapters from session data")

    # Apply overrides
    if args.title:
        metadata["title"] = args.title
    if args.description:
        metadata["description"] = args.description

    # Save chapters file
    chapters_path = audio_path.with_suffix(".chapters.json")
    save_chapters(metadata, str(chapters_path))

    # Save transcript text file with LUKE:/CALLER: speaker labels
    transcript_path = audio_path.with_suffix(".transcript.txt")
    raw_text = transcript["full_text"]
    labeled_text = label_transcript_speakers(raw_text)
    with open(transcript_path, "w") as f:
        f.write(labeled_text)
    print(f"    Transcript saved to: {transcript_path}")

    # Save session transcript alongside episode if available (has speaker labels)
    if session_data and session_data.get("transcript"):
        session_transcript_path = audio_path.with_suffix(".session_transcript.txt")
        with open(session_transcript_path, "w") as f:
            f.write(session_data["transcript"])
        print(f"    Session transcript saved to: {session_transcript_path}")

    if args.dry_run:
        print("\n[DRY RUN] Would publish with:")
        print(f"  Title: {metadata['title']}")
        print(f"  Description: {metadata['description']}")
        print(f"  Chapters: {json.dumps(metadata['chapters'], indent=2)}")
        print("\nChapters file saved. Run without --dry-run to publish.")
        return

    # Step 3: Create episode
    direct_upload = os.path.getsize(str(audio_path)) > CLOUDFLARE_UPLOAD_LIMIT
    episode = create_episode(str(audio_path), metadata, episode_number, duration=transcript["duration"])

    # Step 3.5: Upload to BunnyCDN
    print("[3.5/5] Uploading to BunnyCDN...")
    uploaded_keys = set()

    # Audio: query file_key from DB, then upload to CDN
    ep_id = episode["id"]
    audio_media_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -N -e "SELECT m.file_key FROM cp_media m JOIN cp_episodes e ON e.audio_id = m.id WHERE e.id = {ep_id};"'
    success, audio_file_key = run_ssh_command(audio_media_cmd)
    if success and audio_file_key:
        audio_file_key = audio_file_key.strip()
        if direct_upload:
            # Direct upload: we have the original file locally, upload straight to CDN
            print(f"    Uploading audio to BunnyCDN")
            upload_to_bunny(str(audio_path), f"media/{audio_file_key}", "audio/mpeg")
        else:
            # API upload: download Castopod's copy (ensures byte-exact match with RSS metadata)
            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
                tmp_audio = tmp.name
            try:
                print(f"    Downloading from Castopod: {audio_file_key}")
                if download_from_castopod(audio_file_key, tmp_audio):
                    print(f"    Uploading audio to BunnyCDN")
                    upload_to_bunny(tmp_audio, f"media/{audio_file_key}", "audio/mpeg")
                else:
                    print(f"    Castopod download failed, uploading original file")
                    upload_to_bunny(str(audio_path), f"media/{audio_file_key}", "audio/mpeg")
            finally:
                Path(tmp_audio).unlink(missing_ok=True)
        uploaded_keys.add(audio_file_key)
    else:
        print(f"    Error: Could not determine audio file_key from Castopod DB")
        print(f"    Audio will be served from Castopod directly (not CDN)")

    # Chapters
    chapters_key = f"podcasts/{PODCAST_HANDLE}/{episode['slug']}-chapters.json"
    print(f"    Uploading chapters to BunnyCDN")
    upload_to_bunny(str(chapters_path), f"media/{chapters_key}")
    uploaded_keys.add(chapters_key)

    # Transcript
    print(f"    Uploading transcript to BunnyCDN")
    upload_to_bunny(str(transcript_path), f"transcripts/{episode['slug']}.txt", "text/plain")

    # Copy transcript to website dir for Cloudflare Pages
    website_transcript_dir = Path(__file__).parent / "website" / "transcripts"
    website_transcript_dir.mkdir(exist_ok=True)
    website_transcript_path = website_transcript_dir / f"{episode['slug']}.txt"
    shutil.copy2(str(transcript_path), str(website_transcript_path))
    print(f"    Transcript copied to website/transcripts/")

    # Add to sitemap
    add_episode_to_sitemap(episode["slug"])

    # Step 4: Publish via API (triggers RSS rebuild, federation, etc.)
    try:
        published = publish_episode(episode["id"])
        if "slug" in published:
            episode = published
    except SystemExit:
        if direct_upload:
            print("    Warning: Publish API failed, but episode is in DB with published_at set")
        else:
            raise

    # Step 4.5: Upload chapters via SSH
    chapters_uploaded = upload_chapters_to_castopod(
        episode["slug"],
        episode["id"],
        str(chapters_path)
    )

    # Sync any remaining episode media to BunnyCDN (cover art, transcripts, etc.)
    print("    Syncing episode media to CDN...")
    sync_episode_media_to_bunny(episode["id"], uploaded_keys)

    # Step 5: Deploy website (transcript + sitemap must be live before social links go out)
    print("[5/5] Deploying website...")
    project_dir = Path(__file__).parent
    deploy_result = subprocess.run(
        ["npx", "wrangler", "pages", "deploy", "website/",
         "--project-name=lukeattheroost", "--branch=main", "--commit-dirty=true"],
        capture_output=True, text=True, cwd=project_dir, timeout=120
    )
    if deploy_result.returncode == 0:
        print("    Website deployed")
    else:
        print(f"    Warning: Website deploy failed: {deploy_result.stderr[:200]}")

    # Step 5.5: Generate social image and post
    social_image_path = str(audio_path.with_suffix(".social.jpg"))
    generate_social_image(episode_number, metadata["description"], social_image_path)
    post_to_social(metadata, episode["slug"], social_image_path)

    # Step 6: Summary
    print("\n[6/6] Done!")
    print("=" * 50)
    print(f"Episode URL: {CASTOPOD_URL}/@{PODCAST_HANDLE}/episodes/{episode['slug']}")
    print(f"RSS Feed: {CASTOPOD_URL}/@{PODCAST_HANDLE}/feed.xml")
    print("=" * 50)
    if not chapters_uploaded:
        print("\nNote: Chapters upload failed. Add manually via Castopod admin UI")
        print(f"      Chapters file: {chapters_path}")


if __name__ == "__main__":
    main()