Initial commit: AI Radio Show web application

- FastAPI backend with multiple TTS providers (Inworld, ElevenLabs, Kokoro, F5-TTS, etc.) - Web frontend with caller management, music, and soundboard - Whisper transcription integration - OpenRouter/Ollama LLM support - Castopod podcast publishing script Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 23:11:20 -07:00
commit 029ce6d689
25 changed files with 6817 additions and 0 deletions
@@ -0,0 +1,54 @@
+# Environment
+.env
+*.env
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+.venv/
+venv/
+env/
+*.egg-info/
+
+# Audio/Media (large files)
+*.mp3
+*.wav
+*.m4a
+*.ogg
+
+# Sessions
+sessions/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Whisper models (downloaded automatically)
+*.pt
+
+# Temporary
+*.tmp
+*.log
+
+# Large model files (download separately)
+*.onnx
+*.safetensors
+*.tar.bz2
+*.bin
+models/
+asset/
+kokoro-v1.0.onnx
+voices-v1.0.bin
+
+# Reference voices for TTS
+ref_audio/
+
+# Claude settings (local)
+.claude/
@@ -0,0 +1,9 @@
+{
+  "input_device": 13,
+  "input_channel": 1,
+  "output_device": 13,
+  "caller_channel": 3,
+  "music_channel": 5,
+  "sfx_channel": 7,
+  "phone_filter": false
+}
@@ -0,0 +1 @@
+# Backend package
@@ -0,0 +1,41 @@
+"""Configuration settings for the AI Radio Show backend"""
+
+import os
+from pathlib import Path
+from pydantic_settings import BaseSettings
+from dotenv import load_dotenv
+
+# Load .env from parent directory
+load_dotenv(Path(__file__).parent.parent / ".env")
+
+
+class Settings(BaseSettings):
+    # API Keys
+    elevenlabs_api_key: str = os.getenv("ELEVENLABS_API_KEY", "")
+    openrouter_api_key: str = os.getenv("OPENROUTER_API_KEY", "")
+    inworld_api_key: str = os.getenv("INWORLD_API_KEY", "")
+
+    # LLM Settings
+    llm_provider: str = "openrouter"  # "openrouter" or "ollama"
+    openrouter_model: str = "anthropic/claude-3-haiku"
+    ollama_model: str = "llama3.2"
+    ollama_host: str = "http://localhost:11434"
+
+    # TTS Settings
+    tts_provider: str = "kokoro"  # "kokoro", "elevenlabs", "vits", or "bark"
+
+    # Audio Settings
+    sample_rate: int = 24000
+
+    # Paths
+    base_dir: Path = Path(__file__).parent.parent
+    sounds_dir: Path = base_dir / "sounds"
+    music_dir: Path = base_dir / "music"
+    sessions_dir: Path = base_dir / "sessions"
+
+    class Config:
+        env_file = ".env"
+        extra = "ignore"
+
+
+settings = Settings()
@@ -0,0 +1,787 @@
+"""AI Radio Show - Control Panel Backend"""
+
+import uuid
+import asyncio
+from pathlib import Path
+from fastapi import FastAPI, HTTPException, UploadFile, File
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Optional
+
+from .config import settings
+from .services.transcription import transcribe_audio
+from .services.llm import llm_service
+from .services.tts import generate_speech
+from .services.audio import audio_service
+
+app = FastAPI(title="AI Radio Show")
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# --- Callers ---
+# Base caller info (name, voice) - backgrounds generated dynamically per session
+import random
+
+CALLER_BASES = {
+    "1": {"name": "Tony", "voice": "VR6AewLTigWG4xSOukaG", "gender": "male", "age_range": (35, 55)},
+    "2": {"name": "Jasmine", "voice": "jBpfuIE2acCO8z3wKNLl", "gender": "female", "age_range": (25, 38)},
+    "3": {"name": "Rick", "voice": "TxGEqnHWrfWFTfGW9XjX", "gender": "male", "age_range": (40, 58)},
+    "4": {"name": "Megan", "voice": "EXAVITQu4vr4xnSDxMaL", "gender": "female", "age_range": (24, 35)},
+    "5": {"name": "Dennis", "voice": "pNInz6obpgDQGcFmaJgB", "gender": "male", "age_range": (32, 48)},
+    "6": {"name": "Tanya", "voice": "21m00Tcm4TlvDq8ikWAM", "gender": "female", "age_range": (30, 45)},
+    "7": {"name": "Earl", "voice": "ODq5zmih8GrVes37Dizd", "gender": "male", "age_range": (58, 72)},
+    "8": {"name": "Carla", "voice": "XB0fDUnXU5powFXDhCwa", "gender": "female", "age_range": (38, 52)},
+    "9": {"name": "Marcus", "voice": "IKne3meq5aSn9XLyUdCD", "gender": "male", "age_range": (24, 34)},
+    "0": {"name": "Brenda", "voice": "pFZP5JQG7iQjIQuC4Bku", "gender": "female", "age_range": (45, 60)},
+}
+
+# Background components for dynamic generation
+JOBS_MALE = [
+    "runs a small HVAC business", "works as a long-haul trucker", "is a high school football coach",
+    "works construction, mostly commercial jobs", "is a paramedic", "manages a warehouse",
+    "is a line cook at a decent restaurant", "works IT for the city", "is a union electrician",
+    "owns a small landscaping company", "is a cop, 12 years on the force", "works at a car dealership",
+    "is a freelance photographer", "teaches middle school history", "is a firefighter",
+    "works as a hospital security guard", "runs a food truck", "is a session musician",
+    "works at a brewery", "is a physical therapist", "drives for UPS", "is a tattoo artist",
+    "works in insurance, hates it", "is a youth pastor", "manages a gym",
+]
+
+JOBS_FEMALE = [
+    "works as an ER nurse", "is a social worker", "runs a small bakery", "is a dental hygienist",
+    "works in HR for a hospital", "is a real estate agent", "teaches kindergarten",
+    "works as a bartender at a nice place", "is a paralegal", "runs a daycare out of her home",
+    "works retail management", "is a hairstylist, owns her chair", "is a vet tech",
+    "works in hospital billing", "is a massage therapist", "manages a restaurant",
+    "is a flight attendant", "works as a 911 dispatcher", "is a personal trainer",
+    "works at a nonprofit", "is an accountant at a small firm", "does medical transcription from home",
+    "is a court reporter", "works in pharmaceutical sales", "is a wedding planner",
+]
+
+PROBLEMS = [
+    # Family drama
+    "hasn't talked to their father in years and just got a call that he's dying",
+    "found out they were adopted and doesn't know how to process it",
+    "is being pressured to take care of an aging parent who was never there for them",
+    "just discovered a family secret that changes everything they thought they knew",
+    "has a sibling who's destroying themselves and nobody will intervene",
+    "is estranged from their kids and it's killing them",
+    "found out their parent had a whole other family nobody knew about",
+    "is watching their parents' marriage fall apart after 40 years",
+
+    # Career and purpose
+    "woke up and realized they've been in the wrong career for 15 years",
+    "got passed over for a promotion they deserved and is questioning everything",
+    "has a dream they gave up on years ago and it's haunting them",
+    "is successful on paper but feels completely empty inside",
+    "hates their job but can't afford to leave and it's breaking them",
+    "just got fired and doesn't know who they are without their work",
+    "is being asked to do something unethical at work and doesn't know what to do",
+    "watches their boss take credit for everything and is losing their mind",
+
+    # Mental health and inner struggles
+    "has been putting on a brave face but is barely holding it together",
+    "can't shake the feeling that their best years are behind them",
+    "keeps self-sabotaging every good thing in their life and doesn't know why",
+    "has been numb for months and is starting to scare themselves",
+    "can't stop comparing themselves to everyone else and it's destroying them",
+    "has intrusive thoughts they've never told anyone about",
+    "feels like a fraud and is waiting to be found out",
+    "is exhausted from being the strong one for everyone else",
+
+    # Grief and loss
+    "lost someone close and hasn't really dealt with it",
+    "is grieving someone who's still alive but is no longer the person they knew",
+    "never got closure with someone who died and it's eating at them",
+    "is watching their best friend slowly die and doesn't know how to be there",
+    "had a miscarriage nobody knows about and carries it alone",
+
+    # Regrets and past mistakes
+    "made a choice years ago that changed everything and wonders what if",
+    "hurt someone badly and never apologized, and it haunts them",
+    "let the one that got away go and thinks about them constantly",
+    "gave up on something important to make someone else happy and resents it",
+    "said something they can never take back and the guilt won't fade",
+    "was a bully growing up and is finally reckoning with it",
+
+    # Relationships (non-sexual)
+    "is falling out of love with their spouse and doesn't know what to do",
+    "married the wrong person and everyone knows it but them",
+    "feels invisible in their own relationship",
+    "is staying for the kids but dying inside",
+    "realized they don't actually like their partner as a person",
+    "is jealous of their partner's success and it's poisoning everything",
+    "found out their partner has been lying about something big",
+
+    # Friendship and loneliness
+    "realized they don't have any real friends, just people who need things from them",
+    "had a falling out with their best friend and the silence is deafening",
+    "is surrounded by people but has never felt more alone",
+    "is jealous of a friend's life and hates themselves for it",
+    "suspects a close friend is talking shit behind their back",
+
+    # Big life decisions
+    "is thinking about leaving everything behind and starting over somewhere new",
+    "has to make a choice that will hurt someone no matter what",
+    "is being pressured into something they don't want but can't say no",
+    "has been offered an opportunity that would change everything but they're terrified",
+    "knows they need to end something but can't pull the trigger",
+
+    # Addiction and bad habits
+    "is hiding how much they drink from everyone",
+    "can't stop gambling and is in deeper than anyone knows",
+    "is watching themselves become someone they don't recognize",
+    "keeps making the same mistake over and over expecting different results",
+
+    # Attraction and affairs (keep some of the original)
+    "is attracted to someone they shouldn't be and it's getting harder to ignore",
+    "has been seeing {affair_person} on the side",
+    "caught feelings for someone at work and it's fucking everything up",
+
+    # Sexual/desire (keep some but less dominant)
+    "can't stop thinking about {fantasy_subject}",
+    "discovered something about their own desires that surprised them",
+    "is questioning their sexuality after something that happened recently",
+
+    # General late-night confessions
+    "can't sleep and has been thinking too much about their life choices",
+    "had a weird day and needs to process it with someone",
+    "has been keeping a secret that's eating them alive",
+    "finally ready to admit something they've never said out loud",
+]
+
+PROBLEM_FILLS = {
+    "time": ["a few weeks", "months", "six months", "a year", "way too long"],
+    # Affairs (all adults)
+    "affair_person": ["their partner's best friend", "a coworker", "their ex", "a neighbor", "their boss", "their trainer", "someone they met online", "an old flame"],
+    # Fantasies and kinks (consensual adult stuff)
+    "fantasy_subject": ["a threesome", "being dominated", "dominating someone", "their partner with someone else", "a specific coworker", "group sex", "rough sex", "being watched", "exhibitionism"],
+    "kink": ["anal", "BDSM", "roleplay", "a threesome", "toys", "being tied up", "public sex", "swinging", "filming themselves", "bondage"],
+    # Secret behaviors (legal adult stuff)
+    "secret_behavior": ["hooking up with strangers", "sexting people online", "using dating apps behind their partner's back", "having an affair", "going to sex clubs", "watching way too much porn"],
+    "double_life": ["vanilla at home, freak elsewhere", "straight to their family, not so much in private", "married but on dating apps", "in a relationship but seeing other people"],
+    "hookup_person": ["their roommate", "a coworker", "their ex", "a friend's spouse", "a stranger from an app", "multiple people", "someone from the gym"],
+    # Discovery and identity (adult experiences)
+    "new_discovery": ["the same sex", "being submissive", "being dominant", "kink", "casual sex", "exhibitionism", "that they're bi"],
+    "unexpected_person": ["the same sex for the first time", "more than one person", "a complete stranger", "someone they never expected to be attracted to", "a friend"],
+    "sexuality_trigger": ["a specific hookup", "watching certain porn", "a drunk encounter", "realizing they're attracted to a friend", "an unexpected experience"],
+    "first_time": ["anal", "a threesome", "same-sex stuff", "BDSM", "an open relationship", "casual hookups", "being dominant", "being submissive"],
+    # Relationship issues
+    "partner_wants": ["an open relationship", "to bring someone else in", "things they're not sure about", "to watch them with someone else", "to try new things"],
+    "caught_doing": ["sexting someone", "on a dating app", "watching porn they'd never admit to", "flirting with someone else", "looking at someone's pics"],
+    # Attractions (appropriate adult scenarios)
+    "taboo_fantasy": ["someone they work with", "a friend's partner", "a specific scenario", "something they've never said out loud"],
+    "taboo_attraction": ["someone they work with", "a friend's partner", "their partner's friend", "someone they see all the time"],
+}
+
+INTERESTS = [
+    # General interests (normal people)
+    "really into true crime podcasts", "watches a lot of reality TV", "into fitness",
+    "follows sports", "big movie person", "reads a lot", "into music, has opinions",
+    "goes out a lot, active social life", "homebody, prefers staying in",
+    "into cooking and food", "outdoorsy type", "gamer", "works a lot, career focused",
+    # Relationship/psychology focused
+    "listens to relationship podcasts", "has done therapy, believes in it",
+    "reads about psychology and why people do what they do", "very online, knows all the discourse",
+    "into self-improvement stuff", "follows dating advice content",
+    # Sexually open (not the focus, but present)
+    "sex-positive, doesn't judge", "has experimented, open about it",
+    "comfortable with their body", "has stories if you ask",
+]
+
+QUIRKS = [
+    # Conversational style
+    "says 'honestly' and 'I mean' a lot", "trails off when thinking, then picks back up",
+    "laughs nervously when things get real", "very direct, doesn't sugarcoat",
+    "rambles a bit when nervous", "gets quiet when the topic hits close to home",
+    "deflects with humor when uncomfortable", "asks the host questions back",
+    # Openness about sex
+    "comfortable talking about sex when it comes up", "no shame about their desires",
+    "gets more explicit as they get comfortable", "treats sex like a normal topic",
+    "will share details if you ask", "surprisingly open once they start talking",
+    "has stories they've never told anyone", "testing how the host reacts before going deeper",
+    # Personality
+    "self-aware about their own bullshit", "confessional, needed to tell someone",
+    "a little drunk and honest because of it", "can't believe they're saying this out loud",
+]
+
+LOCATIONS = [
+    "outside Chicago", "in Phoenix", "near Atlanta", "in the Detroit area", "outside Boston",
+    "in North Jersey", "near Austin", "in the Bay Area", "outside Philadelphia", "in Denver",
+    "near Seattle", "in South Florida", "outside Nashville", "in Cleveland", "near Portland",
+    "in the Twin Cities", "outside Dallas", "in Baltimore", "near Sacramento", "in Pittsburgh",
+]
+
+
+def generate_caller_background(base: dict) -> str:
+    """Generate a unique background for a caller"""
+    age = random.randint(*base["age_range"])
+    jobs = JOBS_MALE if base["gender"] == "male" else JOBS_FEMALE
+    job = random.choice(jobs)
+    location = random.choice(LOCATIONS)
+
+    # Generate problem with fills
+    problem_template = random.choice(PROBLEMS)
+    problem = problem_template
+    for key, options in PROBLEM_FILLS.items():
+        if "{" + key + "}" in problem:
+            problem = problem.replace("{" + key + "}", random.choice(options))
+
+    interest1, interest2 = random.sample(INTERESTS, 2)
+    quirk1, quirk2 = random.sample(QUIRKS, 2)
+
+    return f"""{age}, {job} {location}. {problem.capitalize()}. {interest1.capitalize()}, {interest2}. {quirk1.capitalize()}, {quirk2}."""
+
+def get_caller_prompt(caller: dict, conversation_summary: str = "") -> str:
+    """Generate a natural system prompt for a caller"""
+    context = ""
+    if conversation_summary:
+        context = f"""
+CONVERSATION SO FAR:
+{conversation_summary}
+Continue naturally. Don't repeat yourself.
+"""
+
+    return f"""You're {caller['name']}, calling a late-night radio show. You trust this host.
+
+{caller['vibe']}
+{context}
+HOW TO TALK:
+- Sound like a real person chatting, not writing.
+- Keep responses to 2-3 sentences. Enough to make your point, short enough for back-and-forth.
+- ALWAYS finish your thought completely. Never stop mid-sentence.
+- Swear naturally if it fits: fuck, shit, damn, etc.
+
+SPELLING FOR TEXT-TO-SPEECH (use proper spelling so TTS pronounces correctly):
+- Write "you know" not "yanno" or "y'know"
+- Write "kind of" not "kinda"
+- Write "sort of" not "sorta"
+- Write "going to" not "gonna"
+- Write "want to" not "wanna"
+- Write "I don't know" not "dunno"
+- Use full words, not phonetic slang spellings
+
+GOOD examples:
+- "I mean, it started innocent enough, but now I can't stop thinking about her."
+- "Honestly I don't even know why I'm calling. I just needed to tell someone."
+- "Look, my marriage is fine on paper, but something's missing, you know?"
+
+BAD examples (too formal/literary):
+- "I have this confession about my obsession"
+- "It has been consuming all of my thoughts"
+
+ABOUT SEX:
+- You don't have to jump straight to explicit stuff
+- But if the host asks or the conversation goes there, you're completely open about it
+- When talking about sex, be direct and explicit - say exactly what happened, what you want, what turns you on
+- No euphemisms when being explicit - use real words: fuck, cock, pussy, etc.
+
+HARD LIMITS - NEVER mention or reference:
+- Children, minors, or anyone under 18 in any sexual context
+- Violence, harm, or non-consensual acts
+- Illegal activities beyond normal adult behavior
+- All sexual content must be between consenting adults only
+
+OUTPUT: Spoken words only. No (actions), no *gestures*, no stage directions."""
+
+
+# --- Session State ---
+class Session:
+    def __init__(self):
+        self.id = str(uuid.uuid4())[:8]
+        self.current_caller_key: str = None
+        self.conversation: list[dict] = []
+        self.caller_backgrounds: dict[str, str] = {}  # Generated backgrounds for this session
+
+    def start_call(self, caller_key: str):
+        self.current_caller_key = caller_key
+        self.conversation = []
+
+    def end_call(self):
+        self.current_caller_key = None
+        self.conversation = []
+
+    def add_message(self, role: str, content: str):
+        self.conversation.append({"role": role, "content": content})
+
+    def get_caller_background(self, caller_key: str) -> str:
+        """Get or generate background for a caller in this session"""
+        if caller_key not in self.caller_backgrounds:
+            base = CALLER_BASES.get(caller_key)
+            if base:
+                self.caller_backgrounds[caller_key] = generate_caller_background(base)
+                print(f"[Session {self.id}] Generated background for {base['name']}: {self.caller_backgrounds[caller_key][:100]}...")
+        return self.caller_backgrounds.get(caller_key, "")
+
+    def get_conversation_summary(self) -> str:
+        """Get a brief summary of conversation so far for context"""
+        if len(self.conversation) <= 2:
+            return ""
+        # Just include the key exchanges, not the full history
+        summary_parts = []
+        for msg in self.conversation[-6:]:  # Last 3 exchanges
+            role = "Host" if msg["role"] == "user" else self.caller["name"]
+            summary_parts.append(f'{role}: "{msg["content"][:100]}..."' if len(msg["content"]) > 100 else f'{role}: "{msg["content"]}"')
+        return "\n".join(summary_parts)
+
+    @property
+    def caller(self) -> dict:
+        if self.current_caller_key:
+            base = CALLER_BASES.get(self.current_caller_key)
+            if base:
+                return {
+                    "name": base["name"],
+                    "voice": base["voice"],
+                    "vibe": self.get_caller_background(self.current_caller_key)
+                }
+        return None
+
+    def reset(self):
+        """Reset session - clears all caller backgrounds for fresh personalities"""
+        self.caller_backgrounds = {}
+        self.current_caller_key = None
+        self.conversation = []
+        self.id = str(uuid.uuid4())[:8]
+        print(f"[Session] Reset - new session ID: {self.id}")
+
+
+session = Session()
+
+
+# --- Static Files ---
+frontend_dir = Path(__file__).parent.parent / "frontend"
+app.mount("/css", StaticFiles(directory=frontend_dir / "css"), name="css")
+app.mount("/js", StaticFiles(directory=frontend_dir / "js"), name="js")
+
+
+@app.get("/")
+async def index():
+    return FileResponse(frontend_dir / "index.html")
+
+
+# --- Request Models ---
+
+class ChatRequest(BaseModel):
+    text: str
+
+class TTSRequest(BaseModel):
+    text: str
+    voice_id: str
+    phone_filter: bool = True
+
+class AudioDeviceSettings(BaseModel):
+    input_device: Optional[int] = None
+    input_channel: Optional[int] = None
+    output_device: Optional[int] = None
+    caller_channel: Optional[int] = None
+    music_channel: Optional[int] = None
+    sfx_channel: Optional[int] = None
+    phone_filter: Optional[bool] = None
+
+class MusicRequest(BaseModel):
+    track: str
+    action: str  # "play", "stop", "volume"
+    volume: Optional[float] = None
+
+class SFXRequest(BaseModel):
+    sound: str
+
+
+# --- Audio Device Endpoints ---
+
+@app.get("/api/audio/devices")
+async def list_audio_devices():
+    """List all available audio devices"""
+    return {"devices": audio_service.list_devices()}
+
+
+@app.get("/api/audio/settings")
+async def get_audio_settings():
+    """Get current audio device configuration"""
+    return audio_service.get_device_settings()
+
+
+@app.post("/api/audio/settings")
+async def set_audio_settings(settings: AudioDeviceSettings):
+    """Configure audio devices and channels"""
+    audio_service.set_devices(
+        input_device=settings.input_device,
+        input_channel=settings.input_channel,
+        output_device=settings.output_device,
+        caller_channel=settings.caller_channel,
+        music_channel=settings.music_channel,
+        sfx_channel=settings.sfx_channel,
+        phone_filter=settings.phone_filter
+    )
+    return audio_service.get_device_settings()
+
+
+# --- Recording Endpoints ---
+
+@app.post("/api/record/start")
+async def start_recording():
+    """Start recording from configured input device"""
+    if audio_service.input_device is None:
+        raise HTTPException(400, "No input device configured. Set one in /api/audio/settings")
+
+    success = audio_service.start_recording()
+    if not success:
+        raise HTTPException(400, "Failed to start recording (already recording?)")
+
+    return {"status": "recording"}
+
+
+@app.post("/api/record/stop")
+async def stop_recording():
+    """Stop recording and transcribe"""
+    audio_bytes = audio_service.stop_recording()
+
+    if len(audio_bytes) < 100:
+        return {"text": "", "status": "no_audio"}
+
+    # Transcribe the recorded audio (16kHz raw PCM from audio service)
+    text = await transcribe_audio(audio_bytes, source_sample_rate=16000)
+    return {"text": text, "status": "transcribed"}
+
+
+# --- Caller Endpoints ---
+
+@app.get("/api/callers")
+async def get_callers():
+    """Get list of available callers"""
+    return {
+        "callers": [
+            {"key": k, "name": v["name"]}
+            for k, v in CALLER_BASES.items()
+        ],
+        "current": session.current_caller_key,
+        "session_id": session.id
+    }
+
+
+@app.post("/api/session/reset")
+async def reset_session():
+    """Reset session - all callers get fresh backgrounds"""
+    session.reset()
+    return {"status": "reset", "session_id": session.id}
+
+
+@app.post("/api/call/{caller_key}")
+async def start_call(caller_key: str):
+    """Start a call with a caller"""
+    if caller_key not in CALLER_BASES:
+        raise HTTPException(404, "Caller not found")
+
+    session.start_call(caller_key)
+    caller = session.caller  # This generates the background if needed
+
+    return {
+        "status": "connected",
+        "caller": caller["name"],
+        "background": caller["vibe"]  # Send background so you can see who you're talking to
+    }
+
+
+@app.post("/api/hangup")
+async def hangup():
+    """Hang up current call"""
+    # Stop any playing caller audio immediately
+    audio_service.stop_caller_audio()
+
+    caller_name = session.caller["name"] if session.caller else None
+    session.end_call()
+
+    # Play hangup sound
+    hangup_sound = settings.sounds_dir / "hangup.wav"
+    if hangup_sound.exists():
+        audio_service.play_sfx(str(hangup_sound))
+
+    return {"status": "disconnected", "caller": caller_name}
+
+
+# --- Chat & TTS Endpoints ---
+
+import re
+
+def clean_for_tts(text: str) -> str:
+    """Strip out non-speakable content and fix phonetic spellings for TTS"""
+    # Remove content in parentheses: (laughs), (pausing), (looking away), etc.
+    text = re.sub(r'\s*\([^)]*\)\s*', ' ', text)
+    # Remove content in asterisks: *laughs*, *sighs*, etc.
+    text = re.sub(r'\s*\*[^*]*\*\s*', ' ', text)
+    # Remove content in brackets: [laughs], [pause], etc. (only Bark uses these)
+    text = re.sub(r'\s*\[[^\]]*\]\s*', ' ', text)
+    # Remove content in angle brackets: <laughs>, <sigh>, etc.
+    text = re.sub(r'\s*<[^>]*>\s*', ' ', text)
+    # Remove "He/She sighs" style stage directions (full phrase)
+    text = re.sub(r'\b(He|She|I|They)\s+(sighs?|laughs?|pauses?|smiles?|chuckles?|grins?|nods?|shrugs?|frowns?)[^.]*\.\s*', '', text, flags=re.IGNORECASE)
+    # Remove standalone stage direction words only if they look like directions (with adverbs)
+    text = re.sub(r'\b(sighs?|laughs?|pauses?|chuckles?)\s+(heavily|softly|deeply|quietly|loudly|nervously|sadly)\b[.,]?\s*', '', text, flags=re.IGNORECASE)
+    # Remove quotes around the response if LLM wrapped it
+    text = re.sub(r'^["\']|["\']$', '', text.strip())
+
+    # Fix phonetic spellings for proper TTS pronunciation
+    text = re.sub(r"\by'know\b", "you know", text, flags=re.IGNORECASE)
+    text = re.sub(r"\byanno\b", "you know", text, flags=re.IGNORECASE)
+    text = re.sub(r"\byknow\b", "you know", text, flags=re.IGNORECASE)
+    text = re.sub(r"\bkinda\b", "kind of", text, flags=re.IGNORECASE)
+    text = re.sub(r"\bsorta\b", "sort of", text, flags=re.IGNORECASE)
+    text = re.sub(r"\bgonna\b", "going to", text, flags=re.IGNORECASE)
+    text = re.sub(r"\bwanna\b", "want to", text, flags=re.IGNORECASE)
+    text = re.sub(r"\bgotta\b", "got to", text, flags=re.IGNORECASE)
+    text = re.sub(r"\bdunno\b", "don't know", text, flags=re.IGNORECASE)
+    text = re.sub(r"\blemme\b", "let me", text, flags=re.IGNORECASE)
+    text = re.sub(r"\bcuz\b", "because", text, flags=re.IGNORECASE)
+    text = re.sub(r"\b'cause\b", "because", text, flags=re.IGNORECASE)
+    text = re.sub(r"\blotta\b", "lot of", text, flags=re.IGNORECASE)
+    text = re.sub(r"\boutta\b", "out of", text, flags=re.IGNORECASE)
+    text = re.sub(r"\bimma\b", "I'm going to", text, flags=re.IGNORECASE)
+    text = re.sub(r"\btryna\b", "trying to", text, flags=re.IGNORECASE)
+
+    # Clean up extra whitespace
+    text = re.sub(r'\s+', ' ', text)
+    # Fix spaces before punctuation
+    text = re.sub(r'\s+([.,!?])', r'\1', text)
+    # Remove orphaned punctuation at start
+    text = re.sub(r'^[.,]\s*', '', text)
+    return text.strip()
+
+
+@app.post("/api/chat")
+async def chat(request: ChatRequest):
+    """Chat with current caller"""
+    if not session.caller:
+        raise HTTPException(400, "No active call")
+
+    session.add_message("user", request.text)
+
+    # Include conversation summary for context
+    conversation_summary = session.get_conversation_summary()
+    system_prompt = get_caller_prompt(session.caller, conversation_summary)
+
+    response = await llm_service.generate(
+        messages=session.conversation[-10:],  # Reduced history for speed
+        system_prompt=system_prompt
+    )
+
+    print(f"[Chat] Raw LLM: {response[:100] if response else '(empty)'}...")
+
+    # Clean response for TTS (remove parenthetical actions, asterisks, etc.)
+    response = clean_for_tts(response)
+
+    print(f"[Chat] Cleaned: {response[:100] if response else '(empty)'}...")
+
+    # Ensure we have a valid response
+    if not response or not response.strip():
+        response = "Uh... sorry, what was that?"
+
+    session.add_message("assistant", response)
+
+    return {
+        "text": response,
+        "caller": session.caller["name"],
+        "voice_id": session.caller["voice"]
+    }
+
+
+@app.post("/api/tts")
+async def text_to_speech(request: TTSRequest):
+    """Generate and play speech on caller output device (non-blocking)"""
+    # Validate text is not empty
+    if not request.text or not request.text.strip():
+        raise HTTPException(400, "Text cannot be empty")
+
+    # Phone filter disabled - always use "none"
+    audio_bytes = await generate_speech(
+        request.text,
+        request.voice_id,
+        "none"
+    )
+
+    # Play in background thread - returns immediately, can be interrupted by hangup
+    import threading
+    thread = threading.Thread(
+        target=audio_service.play_caller_audio,
+        args=(audio_bytes, 24000),
+        daemon=True
+    )
+    thread.start()
+
+    return {"status": "playing", "duration": len(audio_bytes) / 2 / 24000}
+
+
+@app.post("/api/tts/stop")
+async def stop_tts():
+    """Stop any playing caller audio"""
+    audio_service.stop_caller_audio()
+    return {"status": "stopped"}
+
+
+# --- Music Endpoints ---
+
+@app.get("/api/music")
+async def get_music():
+    """Get available music tracks"""
+    tracks = []
+    if settings.music_dir.exists():
+        for ext in ['*.wav', '*.mp3', '*.flac']:
+            for f in settings.music_dir.glob(ext):
+                tracks.append({
+                    "name": f.stem,
+                    "file": f.name,
+                    "path": str(f)
+                })
+    return {
+        "tracks": tracks,
+        "playing": audio_service.is_music_playing()
+    }
+
+
+@app.post("/api/music/play")
+async def play_music(request: MusicRequest):
+    """Load and play a music track"""
+    track_path = settings.music_dir / request.track
+    if not track_path.exists():
+        raise HTTPException(404, "Track not found")
+
+    audio_service.load_music(str(track_path))
+    audio_service.play_music()
+    return {"status": "playing", "track": request.track}
+
+
+@app.post("/api/music/stop")
+async def stop_music():
+    """Stop music playback"""
+    audio_service.stop_music()
+    return {"status": "stopped"}
+
+
+@app.post("/api/music/volume")
+async def set_music_volume(request: MusicRequest):
+    """Set music volume"""
+    if request.volume is not None:
+        audio_service.set_music_volume(request.volume)
+    return {"status": "ok", "volume": request.volume}
+
+
+# --- Sound Effects Endpoints ---
+
+@app.get("/api/sounds")
+async def get_sounds():
+    """Get available sound effects"""
+    sounds = []
+    if settings.sounds_dir.exists():
+        for f in settings.sounds_dir.glob('*.wav'):
+            sounds.append({
+                "name": f.stem,
+                "file": f.name,
+                "path": str(f)
+            })
+    return {"sounds": sounds}
+
+
+@app.post("/api/sfx/play")
+async def play_sfx(request: SFXRequest):
+    """Play a sound effect"""
+    sound_path = settings.sounds_dir / request.sound
+    if not sound_path.exists():
+        raise HTTPException(404, "Sound not found")
+
+    audio_service.play_sfx(str(sound_path))
+    return {"status": "playing", "sound": request.sound}
+
+
+# --- LLM Settings Endpoints ---
+
+@app.get("/api/settings")
+async def get_settings():
+    """Get LLM settings"""
+    return await llm_service.get_settings_async()
+
+
+@app.post("/api/settings")
+async def update_settings(data: dict):
+    """Update LLM and TTS settings"""
+    llm_service.update_settings(
+        provider=data.get("provider"),
+        openrouter_model=data.get("openrouter_model"),
+        ollama_model=data.get("ollama_model"),
+        ollama_host=data.get("ollama_host"),
+        tts_provider=data.get("tts_provider")
+    )
+    return llm_service.get_settings()
+
+
+# --- Server Control Endpoints ---
+
+import subprocess
+from collections import deque
+
+# In-memory log buffer
+_log_buffer = deque(maxlen=500)
+
+def add_log(message: str):
+    """Add a message to the log buffer"""
+    import datetime
+    timestamp = datetime.datetime.now().strftime("%H:%M:%S")
+    _log_buffer.append(f"[{timestamp}] {message}")
+
+# Override print to also log to buffer
+import builtins
+_original_print = builtins.print
+def _logging_print(*args, **kwargs):
+    try:
+        _original_print(*args, **kwargs)
+    except (BrokenPipeError, OSError):
+        pass  # Ignore broken pipe errors from traceback printing
+    try:
+        message = " ".join(str(a) for a in args)
+        if message.strip():
+            add_log(message)
+    except Exception:
+        pass  # Don't let logging errors break the app
+builtins.print = _logging_print
+
+
+@app.get("/api/logs")
+async def get_logs(lines: int = 100):
+    """Get recent log lines"""
+    log_lines = list(_log_buffer)[-lines:]
+    return {"logs": log_lines}
+
+
+@app.post("/api/server/restart")
+async def restart_server():
+    """Signal the server to restart (requires run.sh wrapper)"""
+    restart_flag = Path("/tmp/ai-radio-show.restart")
+    restart_flag.touch()
+    add_log("Restart signal sent - server will restart shortly")
+    return {"status": "restarting"}
+
+
+@app.post("/api/server/stop")
+async def stop_server():
+    """Signal the server to stop (requires run.sh wrapper)"""
+    stop_flag = Path("/tmp/ai-radio-show.stop")
+    stop_flag.touch()
+    add_log("Stop signal sent - server will stop shortly")
+    return {"status": "stopping"}
+
+
+@app.get("/api/server/status")
+async def server_status():
+    """Get server status info"""
+    return {
+        "status": "running",
+        "tts_provider": settings.tts_provider,
+        "llm_provider": llm_service.provider,
+        "session_id": session.id
+    }
@@ -0,0 +1 @@
+# Services package
@@ -0,0 +1,479 @@
+"""Server-side audio service for Loopback routing"""
+
+import sounddevice as sd
+import numpy as np
+import threading
+import queue
+import json
+from pathlib import Path
+from typing import Optional, Callable
+import wave
+import time
+
+# Settings file path
+SETTINGS_FILE = Path(__file__).parent.parent.parent / "audio_settings.json"
+
+
+class AudioService:
+    """Manages audio I/O with multi-channel support for Loopback routing"""
+
+    def __init__(self):
+        # Device configuration
+        self.input_device: Optional[int] = None
+        self.input_channel: int = 1  # 1-indexed channel
+
+        self.output_device: Optional[int] = None  # Single output device (multi-channel)
+        self.caller_channel: int = 1   # Channel for caller TTS
+        self.music_channel: int = 2    # Channel for music
+        self.sfx_channel: int = 3      # Channel for SFX
+        self.phone_filter: bool = False  # Phone filter on caller voices
+
+        # Recording state
+        self._recording = False
+        self._record_thread: Optional[threading.Thread] = None
+        self._audio_queue: queue.Queue = queue.Queue()
+        self._recorded_audio: list = []
+        self._record_device_sr: int = 48000
+
+        # Music playback state
+        self._music_stream: Optional[sd.OutputStream] = None
+        self._music_data: Optional[np.ndarray] = None
+        self._music_resampled: Optional[np.ndarray] = None
+        self._music_position: int = 0
+        self._music_playing: bool = False
+        self._music_volume: float = 0.3
+        self._music_loop: bool = True
+
+        # Caller playback state
+        self._caller_stop_event = threading.Event()
+        self._caller_thread: Optional[threading.Thread] = None
+
+        # Sample rates
+        self.input_sample_rate = 16000  # For Whisper
+        self.output_sample_rate = 24000  # For TTS
+
+        # Load saved settings
+        self._load_settings()
+
+    def _load_settings(self):
+        """Load settings from disk"""
+        if SETTINGS_FILE.exists():
+            try:
+                with open(SETTINGS_FILE) as f:
+                    data = json.load(f)
+                self.input_device = data.get("input_device")
+                self.input_channel = data.get("input_channel", 1)
+                self.output_device = data.get("output_device")
+                self.caller_channel = data.get("caller_channel", 1)
+                self.music_channel = data.get("music_channel", 2)
+                self.sfx_channel = data.get("sfx_channel", 3)
+                self.phone_filter = data.get("phone_filter", False)
+                print(f"Loaded audio settings: output={self.output_device}, channels={self.caller_channel}/{self.music_channel}/{self.sfx_channel}, phone_filter={self.phone_filter}")
+            except Exception as e:
+                print(f"Failed to load audio settings: {e}")
+
+    def _save_settings(self):
+        """Save settings to disk"""
+        try:
+            data = {
+                "input_device": self.input_device,
+                "input_channel": self.input_channel,
+                "output_device": self.output_device,
+                "caller_channel": self.caller_channel,
+                "music_channel": self.music_channel,
+                "sfx_channel": self.sfx_channel,
+                "phone_filter": self.phone_filter,
+            }
+            with open(SETTINGS_FILE, "w") as f:
+                json.dump(data, f, indent=2)
+            print(f"Saved audio settings")
+        except Exception as e:
+            print(f"Failed to save audio settings: {e}")
+
+    def list_devices(self) -> list[dict]:
+        """List all available audio devices"""
+        devices = sd.query_devices()
+        result = []
+        for i, d in enumerate(devices):
+            result.append({
+                "id": i,
+                "name": d["name"],
+                "inputs": d["max_input_channels"],
+                "outputs": d["max_output_channels"],
+                "default_sr": d["default_samplerate"]
+            })
+        return result
+
+    def set_devices(
+        self,
+        input_device: Optional[int] = None,
+        input_channel: Optional[int] = None,
+        output_device: Optional[int] = None,
+        caller_channel: Optional[int] = None,
+        music_channel: Optional[int] = None,
+        sfx_channel: Optional[int] = None,
+        phone_filter: Optional[bool] = None
+    ):
+        """Configure audio devices and channels"""
+        if input_device is not None:
+            self.input_device = input_device
+        if input_channel is not None:
+            self.input_channel = input_channel
+        if output_device is not None:
+            self.output_device = output_device
+        if caller_channel is not None:
+            self.caller_channel = caller_channel
+        if music_channel is not None:
+            self.music_channel = music_channel
+        if sfx_channel is not None:
+            self.sfx_channel = sfx_channel
+        if phone_filter is not None:
+            self.phone_filter = phone_filter
+
+        # Persist to disk
+        self._save_settings()
+
+    def get_device_settings(self) -> dict:
+        """Get current device configuration"""
+        return {
+            "input_device": self.input_device,
+            "input_channel": self.input_channel,
+            "output_device": self.output_device,
+            "caller_channel": self.caller_channel,
+            "music_channel": self.music_channel,
+            "sfx_channel": self.sfx_channel,
+            "phone_filter": self.phone_filter,
+        }
+
+    # --- Recording ---
+
+    def start_recording(self) -> bool:
+        """Start recording from input device"""
+        if self._recording:
+            return False
+
+        if self.input_device is None:
+            print("No input device configured")
+            return False
+
+        self._recording = True
+        self._recorded_audio = []
+        self._record_thread = threading.Thread(target=self._record_worker)
+        self._record_thread.start()
+        print(f"Recording started from device {self.input_device}")
+        return True
+
+    def stop_recording(self) -> bytes:
+        """Stop recording and return audio data resampled to 16kHz for Whisper"""
+        import librosa
+
+        if not self._recording:
+            return b""
+
+        self._recording = False
+        if self._record_thread:
+            self._record_thread.join(timeout=2.0)
+
+        if not self._recorded_audio:
+            return b""
+
+        # Combine all chunks
+        audio = np.concatenate(self._recorded_audio)
+        device_sr = getattr(self, '_record_device_sr', 48000)
+        print(f"Recording stopped: {len(audio)} samples @ {device_sr}Hz ({len(audio)/device_sr:.2f}s)")
+
+        # Resample to 16kHz for Whisper
+        if device_sr != 16000:
+            audio = librosa.resample(audio, orig_sr=device_sr, target_sr=16000)
+            print(f"Resampled to 16kHz: {len(audio)} samples")
+
+        # Convert to bytes (16-bit PCM)
+        audio_int16 = (audio * 32767).astype(np.int16)
+        return audio_int16.tobytes()
+
+    def _record_worker(self):
+        """Background thread for recording from specific channel"""
+        try:
+            # Get device info
+            device_info = sd.query_devices(self.input_device)
+            max_channels = device_info['max_input_channels']
+            device_sr = int(device_info['default_samplerate'])
+            record_channel = min(self.input_channel, max_channels) - 1
+
+            # Store device sample rate for later resampling
+            self._record_device_sr = device_sr
+
+            print(f"Recording from device {self.input_device} ch {self.input_channel} @ {device_sr}Hz")
+
+            def callback(indata, frames, time_info, status):
+                if status:
+                    print(f"Record status: {status}")
+                if self._recording:
+                    self._recorded_audio.append(indata[:, record_channel].copy())
+
+            with sd.InputStream(
+                device=self.input_device,
+                channels=max_channels,
+                samplerate=device_sr,  # Use device's native rate
+                dtype=np.float32,
+                callback=callback,
+                blocksize=1024
+            ):
+                while self._recording:
+                    time.sleep(0.05)
+
+        except Exception as e:
+            print(f"Recording error: {e}")
+            self._recording = False
+
+    # --- Caller TTS Playback ---
+
+    def _apply_fade(self, audio: np.ndarray, sample_rate: int, fade_ms: int = 15) -> np.ndarray:
+        """Apply fade-in and fade-out to avoid clicks"""
+        fade_samples = int(sample_rate * fade_ms / 1000)
+        if len(audio) < fade_samples * 2:
+            return audio
+
+        # Fade in
+        fade_in = np.linspace(0, 1, fade_samples)
+        audio[:fade_samples] *= fade_in
+
+        # Fade out
+        fade_out = np.linspace(1, 0, fade_samples)
+        audio[-fade_samples:] *= fade_out
+
+        return audio
+
+    def play_caller_audio(self, audio_bytes: bytes, sample_rate: int = 24000):
+        """Play caller TTS audio to specific channel of output device (interruptible)"""
+        import librosa
+
+        # Stop any existing caller audio
+        self.stop_caller_audio()
+        self._caller_stop_event.clear()
+
+        # Convert bytes to numpy
+        audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
+
+        if self.output_device is None:
+            print("No output device configured, using default")
+            audio = self._apply_fade(audio, sample_rate)
+            with sd.OutputStream(samplerate=sample_rate, channels=1, dtype=np.float32) as stream:
+                stream.write(audio.reshape(-1, 1))
+            return
+
+        try:
+            # Get device info and resample to device's native rate
+            device_info = sd.query_devices(self.output_device)
+            num_channels = device_info['max_output_channels']
+            device_sr = int(device_info['default_samplerate'])
+            channel_idx = min(self.caller_channel, num_channels) - 1
+
+            # Resample if needed
+            if sample_rate != device_sr:
+                audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=device_sr)
+
+            # Apply fade to prevent clicks
+            audio = self._apply_fade(audio, device_sr)
+
+            # Create multi-channel output with audio only on target channel
+            multi_ch = np.zeros((len(audio), num_channels), dtype=np.float32)
+            multi_ch[:, channel_idx] = audio
+
+            print(f"Playing caller audio to device {self.output_device} ch {self.caller_channel} @ {device_sr}Hz")
+
+            # Play in chunks so we can interrupt
+            chunk_size = int(device_sr * 0.1)  # 100ms chunks
+            pos = 0
+
+            with sd.OutputStream(
+                device=self.output_device,
+                samplerate=device_sr,
+                channels=num_channels,
+                dtype=np.float32
+            ) as stream:
+                while pos < len(multi_ch) and not self._caller_stop_event.is_set():
+                    end = min(pos + chunk_size, len(multi_ch))
+                    stream.write(multi_ch[pos:end])
+                    pos = end
+
+            if self._caller_stop_event.is_set():
+                print("Caller audio stopped early")
+            else:
+                print(f"Played caller audio: {len(audio)/device_sr:.2f}s")
+
+        except Exception as e:
+            print(f"Caller playback error: {e}")
+
+    def stop_caller_audio(self):
+        """Stop any playing caller audio"""
+        self._caller_stop_event.set()
+
+    # --- Music Playback ---
+
+    def load_music(self, file_path: str) -> bool:
+        """Load a music file for playback"""
+        path = Path(file_path)
+        if not path.exists():
+            print(f"Music file not found: {file_path}")
+            return False
+
+        try:
+            import librosa
+            audio, sr = librosa.load(str(path), sr=self.output_sample_rate, mono=True)
+            self._music_data = audio.astype(np.float32)
+            self._music_position = 0
+            print(f"Loaded music: {path.name} ({len(audio)/sr:.1f}s)")
+            return True
+        except Exception as e:
+            print(f"Failed to load music: {e}")
+            return False
+
+    def play_music(self):
+        """Start music playback to specific channel"""
+        import librosa
+
+        if self._music_data is None:
+            print("No music loaded")
+            return
+
+        if self._music_playing:
+            self.stop_music()
+
+        self._music_playing = True
+        self._music_position = 0
+
+        if self.output_device is None:
+            print("No output device configured, using default")
+            num_channels = 2
+            device = None
+            device_sr = self.output_sample_rate
+            channel_idx = 0
+        else:
+            device_info = sd.query_devices(self.output_device)
+            num_channels = device_info['max_output_channels']
+            device_sr = int(device_info['default_samplerate'])
+            device = self.output_device
+            channel_idx = min(self.music_channel, num_channels) - 1
+
+        # Resample music to device sample rate if needed
+        if self.output_sample_rate != device_sr:
+            self._music_resampled = librosa.resample(
+                self._music_data, orig_sr=self.output_sample_rate, target_sr=device_sr
+            )
+        else:
+            self._music_resampled = self._music_data.copy()
+
+        # Apply fade-in at start of track
+        fade_samples = int(device_sr * 0.015)  # 15ms fade
+        if len(self._music_resampled) > fade_samples:
+            fade_in = np.linspace(0, 1, fade_samples).astype(np.float32)
+            self._music_resampled[:fade_samples] *= fade_in
+
+        def callback(outdata, frames, time_info, status):
+            outdata.fill(0)
+
+            if not self._music_playing or self._music_resampled is None:
+                return
+
+            end_pos = self._music_position + frames
+
+            if end_pos <= len(self._music_resampled):
+                outdata[:, channel_idx] = self._music_resampled[self._music_position:end_pos] * self._music_volume
+                self._music_position = end_pos
+            else:
+                remaining = len(self._music_resampled) - self._music_position
+                if remaining > 0:
+                    outdata[:remaining, channel_idx] = self._music_resampled[self._music_position:] * self._music_volume
+
+                if self._music_loop:
+                    self._music_position = 0
+                    wrap_frames = frames - remaining
+                    if wrap_frames > 0:
+                        outdata[remaining:, channel_idx] = self._music_resampled[:wrap_frames] * self._music_volume
+                    self._music_position = wrap_frames
+                else:
+                    self._music_playing = False
+
+        try:
+            self._music_stream = sd.OutputStream(
+                device=device,
+                channels=num_channels,
+                samplerate=device_sr,
+                dtype=np.float32,
+                callback=callback,
+                blocksize=2048
+            )
+            self._music_stream.start()
+            print(f"Music playback started on ch {self.music_channel} @ {device_sr}Hz")
+        except Exception as e:
+            print(f"Music playback error: {e}")
+            self._music_playing = False
+
+    def stop_music(self):
+        """Stop music playback"""
+        self._music_playing = False
+        if self._music_stream:
+            self._music_stream.stop()
+            self._music_stream.close()
+            self._music_stream = None
+        self._music_position = 0
+        print("Music stopped")
+
+    def set_music_volume(self, volume: float):
+        """Set music volume (0.0 to 1.0)"""
+        self._music_volume = max(0.0, min(1.0, volume))
+
+    def is_music_playing(self) -> bool:
+        """Check if music is currently playing"""
+        return self._music_playing
+
+    # --- SFX Playback ---
+
+    def play_sfx(self, file_path: str):
+        """Play a sound effect to specific channel using dedicated stream"""
+        path = Path(file_path)
+        if not path.exists():
+            print(f"SFX file not found: {file_path}")
+            return
+
+        try:
+            import librosa
+
+            if self.output_device is None:
+                audio, sr = librosa.load(str(path), sr=None, mono=True)
+                audio = self._apply_fade(audio, sr)
+                def play():
+                    # Use a dedicated stream instead of sd.play()
+                    with sd.OutputStream(samplerate=sr, channels=1, dtype=np.float32) as stream:
+                        stream.write(audio.reshape(-1, 1))
+            else:
+                device_info = sd.query_devices(self.output_device)
+                num_channels = device_info['max_output_channels']
+                device_sr = int(device_info['default_samplerate'])
+                channel_idx = min(self.sfx_channel, num_channels) - 1
+
+                audio, _ = librosa.load(str(path), sr=device_sr, mono=True)
+                audio = self._apply_fade(audio, device_sr)
+
+                multi_ch = np.zeros((len(audio), num_channels), dtype=np.float32)
+                multi_ch[:, channel_idx] = audio
+
+                def play():
+                    # Use dedicated stream to avoid interrupting other audio
+                    with sd.OutputStream(
+                        device=self.output_device,
+                        samplerate=device_sr,
+                        channels=num_channels,
+                        dtype=np.float32
+                    ) as stream:
+                        stream.write(multi_ch)
+
+            threading.Thread(target=play, daemon=True).start()
+            print(f"Playing SFX: {path.name} on ch {self.sfx_channel}")
+        except Exception as e:
+            print(f"SFX playback error: {e}")
+
+
+# Global instance
+audio_service = AudioService()
@@ -0,0 +1,112 @@
+"""Edge TTS service - free Microsoft TTS API"""
+
+import asyncio
+import io
+import numpy as np
+from typing import Optional
+
+try:
+    import edge_tts
+    EDGE_TTS_AVAILABLE = True
+except ImportError:
+    EDGE_TTS_AVAILABLE = False
+
+
+class EdgeTTSService:
+    """TTS using Microsoft Edge's free API"""
+
+    def __init__(self):
+        self.sample_rate = 24000  # Edge TTS outputs 24kHz
+
+    def is_available(self) -> bool:
+        return EDGE_TTS_AVAILABLE
+
+    async def generate_speech(self, text: str, voice: str = "en-US-JennyNeural") -> bytes:
+        """Generate speech from text using Edge TTS
+
+        Args:
+            text: Text to synthesize
+            voice: Edge TTS voice name (e.g., "en-US-JennyNeural")
+
+        Returns:
+            Raw PCM audio bytes (16-bit signed int, 24kHz mono)
+        """
+        if not EDGE_TTS_AVAILABLE:
+            raise RuntimeError("edge-tts not installed. Run: pip install edge-tts")
+
+        communicate = edge_tts.Communicate(text, voice)
+
+        # Collect MP3 audio data
+        mp3_data = b''
+        async for chunk in communicate.stream():
+            if chunk['type'] == 'audio':
+                mp3_data += chunk['data']
+
+        if not mp3_data:
+            raise RuntimeError("No audio generated")
+
+        # Convert MP3 to PCM
+        pcm_data = await self._mp3_to_pcm(mp3_data)
+        return pcm_data
+
+    async def _mp3_to_pcm(self, mp3_data: bytes) -> bytes:
+        """Convert MP3 to raw PCM using ffmpeg or pydub"""
+        loop = asyncio.get_event_loop()
+
+        def convert():
+            try:
+                # Try pydub first (more reliable)
+                from pydub import AudioSegment
+                audio = AudioSegment.from_mp3(io.BytesIO(mp3_data))
+                # Convert to 24kHz mono 16-bit
+                audio = audio.set_frame_rate(24000).set_channels(1).set_sample_width(2)
+                return audio.raw_data
+            except ImportError:
+                pass
+
+            # Fallback to ffmpeg subprocess
+            import subprocess
+            process = subprocess.Popen(
+                [
+                    'ffmpeg', '-i', 'pipe:0',
+                    '-f', 's16le',
+                    '-acodec', 'pcm_s16le',
+                    '-ar', '24000',
+                    '-ac', '1',
+                    'pipe:1'
+                ],
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE
+            )
+            pcm_data, stderr = process.communicate(input=mp3_data)
+            if process.returncode != 0:
+                raise RuntimeError(f"ffmpeg failed: {stderr.decode()}")
+            return pcm_data
+
+        return await loop.run_in_executor(None, convert)
+
+    async def list_voices(self) -> list[dict]:
+        """List available Edge TTS voices"""
+        if not EDGE_TTS_AVAILABLE:
+            return []
+
+        voices = await edge_tts.list_voices()
+        return [
+            {
+                "id": v["ShortName"],
+                "name": v["ShortName"].replace("Neural", ""),
+                "gender": v["Gender"],
+                "locale": v["Locale"],
+            }
+            for v in voices
+            if v["Locale"].startswith("en-")
+        ]
+
+
+# Global instance
+edge_tts_service = EdgeTTSService()
+
+
+def is_edge_tts_available() -> bool:
+    return edge_tts_service.is_available()
@@ -0,0 +1,175 @@
+"""LLM service with OpenRouter and Ollama support"""
+
+import httpx
+from typing import Optional
+from ..config import settings
+
+
+# Available OpenRouter models
+OPENROUTER_MODELS = [
+    "anthropic/claude-3-haiku",
+    "anthropic/claude-3.5-sonnet",
+    "openai/gpt-4o-mini",
+    "openai/gpt-4o",
+    "google/gemini-flash-1.5",
+    "google/gemini-pro-1.5",
+    "meta-llama/llama-3.1-8b-instruct",
+    "mistralai/mistral-7b-instruct",
+]
+
+
+class LLMService:
+    """Abstraction layer for LLM providers"""
+
+    def __init__(self):
+        self.provider = settings.llm_provider
+        self.openrouter_model = settings.openrouter_model
+        self.ollama_model = settings.ollama_model
+        self.ollama_host = settings.ollama_host
+        self.tts_provider = settings.tts_provider
+
+    def update_settings(
+        self,
+        provider: Optional[str] = None,
+        openrouter_model: Optional[str] = None,
+        ollama_model: Optional[str] = None,
+        ollama_host: Optional[str] = None,
+        tts_provider: Optional[str] = None
+    ):
+        """Update LLM settings"""
+        if provider:
+            self.provider = provider
+        if openrouter_model:
+            self.openrouter_model = openrouter_model
+        if ollama_model:
+            self.ollama_model = ollama_model
+        if ollama_host:
+            self.ollama_host = ollama_host
+        if tts_provider:
+            self.tts_provider = tts_provider
+            # Also update the global settings so TTS service picks it up
+            settings.tts_provider = tts_provider
+
+    async def get_ollama_models(self) -> list[str]:
+        """Fetch available models from Ollama"""
+        try:
+            async with httpx.AsyncClient(timeout=5.0) as client:
+                response = await client.get(f"{self.ollama_host}/api/tags")
+                response.raise_for_status()
+                data = response.json()
+                return [model["name"] for model in data.get("models", [])]
+        except Exception as e:
+            print(f"Failed to fetch Ollama models: {e}")
+            return []
+
+    def get_settings(self) -> dict:
+        """Get current settings (sync version without Ollama models)"""
+        return {
+            "provider": self.provider,
+            "openrouter_model": self.openrouter_model,
+            "ollama_model": self.ollama_model,
+            "ollama_host": self.ollama_host,
+            "tts_provider": self.tts_provider,
+            "available_openrouter_models": OPENROUTER_MODELS,
+            "available_ollama_models": []  # Fetched separately
+        }
+
+    async def get_settings_async(self) -> dict:
+        """Get current settings with Ollama models"""
+        ollama_models = await self.get_ollama_models()
+        return {
+            "provider": self.provider,
+            "openrouter_model": self.openrouter_model,
+            "ollama_model": self.ollama_model,
+            "ollama_host": self.ollama_host,
+            "tts_provider": self.tts_provider,
+            "available_openrouter_models": OPENROUTER_MODELS,
+            "available_ollama_models": ollama_models
+        }
+
+    async def generate(
+        self,
+        messages: list[dict],
+        system_prompt: Optional[str] = None
+    ) -> str:
+        """
+        Generate a response from the LLM.
+
+        Args:
+            messages: List of message dicts with 'role' and 'content'
+            system_prompt: Optional system prompt to prepend
+
+        Returns:
+            Generated text response
+        """
+        if system_prompt:
+            messages = [{"role": "system", "content": system_prompt}] + messages
+
+        if self.provider == "openrouter":
+            return await self._call_openrouter(messages)
+        else:
+            return await self._call_ollama(messages)
+
+    async def _call_openrouter(self, messages: list[dict]) -> str:
+        """Call OpenRouter API with retry"""
+        for attempt in range(2):  # Try twice
+            try:
+                async with httpx.AsyncClient(timeout=30.0) as client:
+                    response = await client.post(
+                        "https://openrouter.ai/api/v1/chat/completions",
+                        headers={
+                            "Authorization": f"Bearer {settings.openrouter_api_key}",
+                            "Content-Type": "application/json",
+                        },
+                        json={
+                            "model": self.openrouter_model,
+                            "messages": messages,
+                            "max_tokens": 100,
+                        },
+                    )
+                    response.raise_for_status()
+                    data = response.json()
+                    return data["choices"][0]["message"]["content"]
+            except (httpx.TimeoutException, httpx.ReadTimeout):
+                print(f"OpenRouter timeout (attempt {attempt + 1})")
+                if attempt == 0:
+                    continue  # Retry once
+                return "Uh, sorry, I lost you there for a second. What was that?"
+            except Exception as e:
+                print(f"OpenRouter error: {e}")
+                return "Yeah... I don't know, man."
+        return "Uh, hold on a sec..."
+
+    async def _call_ollama(self, messages: list[dict]) -> str:
+        """Call Ollama API"""
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    f"{self.ollama_host}/api/chat",
+                    json={
+                        "model": self.ollama_model,
+                        "messages": messages,
+                        "stream": False,
+                        "options": {
+                            "num_predict": 100,     # Allow complete thoughts
+                            "temperature": 0.8,     # Balanced creativity/coherence
+                            "top_p": 0.9,           # Focused word choices
+                            "repeat_penalty": 1.3,  # Avoid repetition
+                            "top_k": 50,            # Reasonable token variety
+                        },
+                    },
+                    timeout=30.0
+                )
+                response.raise_for_status()
+                data = response.json()
+                return data["message"]["content"]
+        except httpx.TimeoutException:
+            print("Ollama timeout")
+            return "Uh, sorry, I lost you there for a second. What was that?"
+        except Exception as e:
+            print(f"Ollama error: {e}")
+            return "Yeah... I don't know, man."
+
+
+# Global instance
+llm_service = LLMService()
@@ -0,0 +1,144 @@
+"""Piper TTS service using sherpa-onnx for fast local voice synthesis"""
+
+import asyncio
+import numpy as np
+from pathlib import Path
+from typing import Optional
+
+# Models directory
+MODELS_DIR = Path(__file__).parent.parent.parent / "models" / "sherpa"
+
+# Try to import sherpa-onnx
+try:
+    import sherpa_onnx
+    SHERPA_AVAILABLE = True
+except ImportError:
+    SHERPA_AVAILABLE = False
+    sherpa_onnx = None
+
+
+# Available sherpa-onnx Piper models
+PIPER_MODELS = {
+    "amy": {
+        "dir": "vits-piper-en_US-amy-low",
+        "model": "en_US-amy-low.onnx",
+        "name": "Amy (US Female)",
+        "sample_rate": 16000,
+    },
+    "joe": {
+        "dir": "vits-piper-en_US-joe-medium",
+        "model": "en_US-joe-medium.onnx",
+        "name": "Joe (US Male)",
+        "sample_rate": 22050,
+    },
+    "lessac": {
+        "dir": "vits-piper-en_US-lessac-medium",
+        "model": "en_US-lessac-medium.onnx",
+        "name": "Lessac (US Female)",
+        "sample_rate": 22050,
+    },
+    "alan": {
+        "dir": "vits-piper-en_GB-alan-medium",
+        "model": "en_GB-alan-medium.onnx",
+        "name": "Alan (UK Male)",
+        "sample_rate": 22050,
+    },
+}
+
+
+class PiperTTSService:
+    """Fast local TTS using sherpa-onnx with Piper models"""
+
+    def __init__(self):
+        self.output_sample_rate = 24000  # Our standard output rate
+        self._tts_engines: dict[str, any] = {}
+
+    def is_available(self) -> bool:
+        """Check if sherpa-onnx is available"""
+        return SHERPA_AVAILABLE
+
+    def _get_engine(self, model_key: str):
+        """Get or create a TTS engine for the given model"""
+        if model_key in self._tts_engines:
+            return self._tts_engines[model_key], PIPER_MODELS[model_key]["sample_rate"]
+
+        if model_key not in PIPER_MODELS:
+            raise ValueError(f"Unknown model: {model_key}")
+
+        model_info = PIPER_MODELS[model_key]
+        model_dir = MODELS_DIR / model_info["dir"]
+
+        if not model_dir.exists():
+            raise RuntimeError(f"Model not found: {model_dir}")
+
+        config = sherpa_onnx.OfflineTtsConfig(
+            model=sherpa_onnx.OfflineTtsModelConfig(
+                vits=sherpa_onnx.OfflineTtsVitsModelConfig(
+                    model=str(model_dir / model_info["model"]),
+                    tokens=str(model_dir / "tokens.txt"),
+                    data_dir=str(model_dir / "espeak-ng-data"),
+                ),
+                num_threads=2,
+            ),
+        )
+        tts = sherpa_onnx.OfflineTts(config)
+        self._tts_engines[model_key] = tts
+        return tts, model_info["sample_rate"]
+
+    async def generate_speech(self, text: str, model_key: str = "amy") -> bytes:
+        """Generate speech from text using sherpa-onnx
+
+        Args:
+            text: Text to synthesize
+            model_key: Model key (amy, joe, lessac, alan)
+
+        Returns:
+            Raw PCM audio bytes (16-bit signed int, 24kHz mono)
+        """
+        if not SHERPA_AVAILABLE:
+            raise RuntimeError("sherpa-onnx not installed. Run: pip install sherpa-onnx")
+
+        loop = asyncio.get_event_loop()
+
+        def run_tts():
+            tts, model_sample_rate = self._get_engine(model_key)
+            audio = tts.generate(text)
+            samples = np.array(audio.samples, dtype=np.float32)
+
+            # Resample to 24kHz if needed
+            if model_sample_rate != self.output_sample_rate:
+                ratio = self.output_sample_rate / model_sample_rate
+                new_length = int(len(samples) * ratio)
+                samples = np.interp(
+                    np.linspace(0, len(samples) - 1, new_length),
+                    np.arange(len(samples)),
+                    samples
+                ).astype(np.float32)
+
+            # Convert to int16
+            audio_int16 = (samples * 32767).astype(np.int16)
+            return audio_int16.tobytes()
+
+        return await loop.run_in_executor(None, run_tts)
+
+    def list_available_models(self) -> list[dict]:
+        """List available models"""
+        available = []
+        for key, info in PIPER_MODELS.items():
+            model_dir = MODELS_DIR / info["dir"]
+            if model_dir.exists():
+                available.append({
+                    "id": key,
+                    "name": info["name"],
+                    "sample_rate": info["sample_rate"],
+                })
+        return available
+
+
+# Global instance
+piper_service = PiperTTSService()
+
+
+def is_piper_available() -> bool:
+    """Check if Piper (sherpa-onnx) is available"""
+    return piper_service.is_available()
@@ -0,0 +1,116 @@
+"""Whisper transcription service"""
+
+import tempfile
+import numpy as np
+from faster_whisper import WhisperModel
+import librosa
+
+# Global model instance (loaded once)
+_whisper_model = None
+
+
+def get_whisper_model() -> WhisperModel:
+    """Get or create Whisper model instance"""
+    global _whisper_model
+    if _whisper_model is None:
+        print("Loading Whisper tiny model for fast transcription...")
+        # Use tiny model for speed - about 3-4x faster than base
+        # beam_size=1 and best_of=1 for fastest inference
+        _whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
+        print("Whisper model loaded")
+    return _whisper_model
+
+
+def decode_audio(audio_data: bytes, source_sample_rate: int = None) -> tuple[np.ndarray, int]:
+    """
+    Decode audio from various formats to numpy array.
+
+    Args:
+        audio_data: Raw audio bytes
+        source_sample_rate: If provided, treat as raw PCM at this sample rate
+
+    Returns:
+        Tuple of (audio array as float32, sample rate)
+    """
+    # If sample rate is provided, assume raw PCM (from server-side recording)
+    if source_sample_rate is not None:
+        print(f"Decoding raw PCM at {source_sample_rate}Hz, {len(audio_data)} bytes")
+        if len(audio_data) % 2 != 0:
+            audio_data = audio_data + b'\x00'
+        audio = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
+        return audio, source_sample_rate
+
+    print(f"First 20 bytes: {audio_data[:20].hex()}")
+
+    # Try to decode with librosa first (handles webm, ogg, wav, mp3, etc via ffmpeg)
+    try:
+        with tempfile.NamedTemporaryFile(suffix='.webm', delete=False) as f:
+            f.write(audio_data)
+            temp_path = f.name
+
+        audio, sample_rate = librosa.load(temp_path, sr=None, mono=True)
+        print(f"Decoded with librosa: {len(audio)} samples at {sample_rate}Hz")
+
+        import os
+        os.unlink(temp_path)
+
+        return audio.astype(np.float32), sample_rate
+
+    except Exception as e:
+        print(f"librosa decode failed: {e}, trying raw PCM at 16kHz...")
+
+        # Fall back to raw PCM (16-bit signed int, 16kHz mono - Whisper's rate)
+        if len(audio_data) % 2 != 0:
+            audio_data = audio_data + b'\x00'
+
+        audio = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
+        return audio, 16000
+
+
+async def transcribe_audio(audio_data: bytes, source_sample_rate: int = None) -> str:
+    """
+    Transcribe audio data to text using Whisper.
+
+    Args:
+        audio_data: Audio bytes (webm, ogg, wav, or raw PCM)
+        source_sample_rate: If provided, treat audio_data as raw PCM at this rate
+
+    Returns:
+        Transcribed text
+    """
+    model = get_whisper_model()
+
+    print(f"Transcribing audio: {len(audio_data)} bytes")
+
+    # Decode audio from whatever format
+    audio, detected_sample_rate = decode_audio(audio_data, source_sample_rate)
+
+    print(f"Audio samples: {len(audio)}, duration: {len(audio)/detected_sample_rate:.2f}s")
+    print(f"Audio range: min={audio.min():.4f}, max={audio.max():.4f}")
+
+    # Check if audio is too quiet
+    if np.abs(audio).max() < 0.01:
+        print("Warning: Audio appears to be silent or very quiet")
+        return ""
+
+    # Resample to 16kHz for Whisper
+    if detected_sample_rate != 16000:
+        audio_16k = librosa.resample(audio, orig_sr=detected_sample_rate, target_sr=16000)
+        print(f"Resampled to {len(audio_16k)} samples at 16kHz")
+    else:
+        audio_16k = audio
+
+    # Transcribe with speed optimizations
+    segments, info = model.transcribe(
+        audio_16k,
+        beam_size=1,  # Faster, slightly less accurate
+        best_of=1,
+        language="en",  # Skip language detection
+        vad_filter=True,  # Skip silence
+    )
+    segments_list = list(segments)
+    text = " ".join([s.text for s in segments_list]).strip()
+
+    print(f"Transcription result: '{text}' (language: {info.language}, prob: {info.language_probability:.2f})")
+
+    return text
@@ -0,0 +1,701 @@
+"""TTS service with ElevenLabs, F5-TTS, MLX Kokoro, StyleTTS2, VITS, and Bark support"""
+
+import os
+import numpy as np
+from scipy.signal import butter, filtfilt
+from pathlib import Path
+import tempfile
+import torch
+
+from ..config import settings
+
+# Patch torch.load for compatibility with PyTorch 2.6+
+_original_torch_load = torch.load
+def _patched_torch_load(*args, **kwargs):
+    kwargs['weights_only'] = False
+    return _original_torch_load(*args, **kwargs)
+torch.load = _patched_torch_load
+
+# Global clients
+_elevenlabs_client = None
+_vits_tts = None
+_bark_loaded = False
+_kokoro_model = None
+_styletts2_model = None
+_f5tts_model = None
+_chattts_model = None
+_chattts_speakers = {}  # Cache for speaker embeddings
+
+# Kokoro voice mapping - using highest-graded voices
+# Grades from https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md
+KOKORO_VOICES = {
+    # Male voices (best available are C+ grade)
+    "VR6AewLTigWG4xSOukaG": "am_fenrir",  # Tony - deep/powerful (C+)
+    "TxGEqnHWrfWFTfGW9XjX": "am_michael", # Rick - solid male voice (C+)
+    "pNInz6obpgDQGcFmaJgB": "am_puck",    # Dennis - anxious dad (C+)
+    "ODq5zmih8GrVes37Dizd": "bm_george",  # Earl - older/distinguished British (C)
+    "IKne3meq5aSn9XLyUdCD": "bm_fable",   # Marcus - young British (C)
+    # Female voices (much better quality available)
+    "jBpfuIE2acCO8z3wKNLl": "af_heart",   # Jasmine - best quality (A)
+    "EXAVITQu4vr4xnSDxMaL": "af_bella",   # Megan - warm/friendly (A-)
+    "21m00Tcm4TlvDq8ikWAM": "bf_emma",    # Tanya - professional British (B-)
+    "XB0fDUnXU5powFXDhCwa": "af_nicole",  # Carla - Jersey mom (B-)
+    "pFZP5JQG7iQjIQuC4Bku": "af_sarah",   # Brenda - overthinker (C+)
+}
+
+# Speed adjustments per voice (1.0 = normal, lower = slower/more natural)
+# Slower speeds (0.85-0.95) generally sound more natural
+KOKORO_SPEEDS = {
+    # Male voices - slower speeds help with C+ grade voices
+    "VR6AewLTigWG4xSOukaG": 0.9,   # Tony (am_fenrir) - deep voice, slower
+    "TxGEqnHWrfWFTfGW9XjX": 0.92,  # Rick (am_michael) - solid pace
+    "pNInz6obpgDQGcFmaJgB": 0.95,  # Dennis (am_puck) - anxious but not rushed
+    "ODq5zmih8GrVes37Dizd": 0.85,  # Earl (bm_george) - older, slower British
+    "IKne3meq5aSn9XLyUdCD": 0.95,  # Marcus (bm_fable) - young, natural
+    # Female voices - A-grade voices can handle faster speeds
+    "jBpfuIE2acCO8z3wKNLl": 0.95,  # Jasmine (af_heart) - best voice, natural pace
+    "EXAVITQu4vr4xnSDxMaL": 0.95,  # Megan (af_bella) - warm
+    "21m00Tcm4TlvDq8ikWAM": 0.9,   # Tanya (bf_emma) - professional British
+    "XB0fDUnXU5powFXDhCwa": 0.95,  # Carla (af_nicole) - animated but clear
+    "pFZP5JQG7iQjIQuC4Bku": 0.92,  # Brenda (af_sarah) - overthinker, measured
+}
+
+DEFAULT_KOKORO_VOICE = "af_heart"
+DEFAULT_KOKORO_SPEED = 0.95
+
+# VCTK speaker mapping - different voices for different callers
+VITS_SPEAKERS = {
+    # Male voices
+    "VR6AewLTigWG4xSOukaG": "p226",  # Tony
+    "TxGEqnHWrfWFTfGW9XjX": "p251",  # Rick
+    "pNInz6obpgDQGcFmaJgB": "p245",  # Dennis
+    "ODq5zmih8GrVes37Dizd": "p232",  # Earl
+    "IKne3meq5aSn9XLyUdCD": "p252",  # Marcus
+    # Female voices
+    "jBpfuIE2acCO8z3wKNLl": "p225",  # Jasmine
+    "EXAVITQu4vr4xnSDxMaL": "p228",  # Megan
+    "21m00Tcm4TlvDq8ikWAM": "p229",  # Tanya
+    "XB0fDUnXU5powFXDhCwa": "p231",  # Carla
+    "pFZP5JQG7iQjIQuC4Bku": "p233",  # Brenda
+}
+
+DEFAULT_VITS_SPEAKER = "p225"
+
+# Inworld voice mapping - maps ElevenLabs voice IDs to Inworld voices
+# Full voice list from API: Alex, Ashley, Blake, Carter, Clive, Craig, Deborah,
+# Dennis, Dominus, Edward, Elizabeth, Hades, Hana, Julia, Luna, Mark, Olivia,
+# Pixie, Priya, Ronald, Sarah, Shaun, Theodore, Timothy, Wendy
+INWORLD_VOICES = {
+    # Male voices - each caller gets a unique voice matching their personality
+    "VR6AewLTigWG4xSOukaG": "Edward",    # Tony - fast-talking, emphatic, streetwise
+    "TxGEqnHWrfWFTfGW9XjX": "Shaun",     # Rick - friendly, dynamic, conversational
+    "pNInz6obpgDQGcFmaJgB": "Alex",      # Dennis - energetic, expressive, mildly nasal
+    "ODq5zmih8GrVes37Dizd": "Craig",     # Earl - older British, refined, articulate
+    "IKne3meq5aSn9XLyUdCD": "Timothy",   # Marcus - lively, upbeat American
+    # Female voices - each caller gets a unique voice matching their personality
+    "jBpfuIE2acCO8z3wKNLl": "Hana",      # Jasmine - bright, expressive young female
+    "EXAVITQu4vr4xnSDxMaL": "Ashley",    # Megan - warm, natural female
+    "21m00Tcm4TlvDq8ikWAM": "Wendy",     # Tanya - posh, middle-aged British
+    "XB0fDUnXU5powFXDhCwa": "Sarah",     # Carla - fast-talking, questioning tone
+    "pFZP5JQG7iQjIQuC4Bku": "Deborah",   # Brenda - gentle, elegant
+}
+DEFAULT_INWORLD_VOICE = "Dennis"
+
+
+def preprocess_text_for_kokoro(text: str) -> str:
+    """
+    Preprocess text to improve Kokoro prosody and naturalness.
+
+    - Adds slight pauses via punctuation
+    - Handles contractions and abbreviations
+    - Normalizes spacing
+    """
+    import re
+
+    # Normalize whitespace
+    text = ' '.join(text.split())
+
+    # Add comma pauses after common transition words (if no punctuation follows)
+    transitions = [
+        r'\b(Well)\s+(?=[A-Za-z])',
+        r'\b(So)\s+(?=[A-Za-z])',
+        r'\b(Now)\s+(?=[A-Za-z])',
+        r'\b(Look)\s+(?=[A-Za-z])',
+        r'\b(See)\s+(?=[A-Za-z])',
+        r'\b(Anyway)\s+(?=[A-Za-z])',
+        r'\b(Actually)\s+(?=[A-Za-z])',
+        r'\b(Honestly)\s+(?=[A-Za-z])',
+        r'\b(Basically)\s+(?=[A-Za-z])',
+    ]
+    for pattern in transitions:
+        text = re.sub(pattern, r'\1, ', text)
+
+    # Add pause after "I mean" at start of sentence
+    text = re.sub(r'^(I mean)\s+', r'\1, ', text)
+    text = re.sub(r'\.\s+(I mean)\s+', r'. \1, ', text)
+
+    # Expand common abbreviations for better pronunciation
+    abbreviations = {
+        r'\bDr\.': 'Doctor',
+        r'\bMr\.': 'Mister',
+        r'\bMrs\.': 'Missus',
+        r'\bMs\.': 'Miss',
+        r'\bSt\.': 'Street',
+        r'\bAve\.': 'Avenue',
+        r'\betc\.': 'etcetera',
+        r'\bvs\.': 'versus',
+        r'\bw/': 'with',
+        r'\bw/o': 'without',
+    }
+    for abbr, expansion in abbreviations.items():
+        text = re.sub(abbr, expansion, text, flags=re.IGNORECASE)
+
+    # Add breath pause (comma) before conjunctions in long sentences
+    text = re.sub(r'(\w{20,})\s+(and|but|or)\s+', r'\1, \2 ', text)
+
+    # Ensure proper spacing after punctuation
+    text = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', text)
+
+    return text
+
+# StyleTTS2 reference voice files (place .wav files in voices/ directory for voice cloning)
+# Maps voice_id to reference audio filename - if file doesn't exist, uses default voice
+STYLETTS2_VOICES = {
+    # Male voices
+    "VR6AewLTigWG4xSOukaG": "tony.wav",     # Tony
+    "TxGEqnHWrfWFTfGW9XjX": "rick.wav",     # Rick
+    "pNInz6obpgDQGcFmaJgB": "dennis.wav",   # Dennis
+    "ODq5zmih8GrVes37Dizd": "earl.wav",     # Earl
+    "IKne3meq5aSn9XLyUdCD": "marcus.wav",   # Marcus
+    # Female voices
+    "jBpfuIE2acCO8z3wKNLl": "jasmine.wav",  # Jasmine
+    "EXAVITQu4vr4xnSDxMaL": "megan.wav",    # Megan
+    "21m00Tcm4TlvDq8ikWAM": "tanya.wav",    # Tanya
+    "XB0fDUnXU5powFXDhCwa": "carla.wav",    # Carla
+    "pFZP5JQG7iQjIQuC4Bku": "brenda.wav",   # Brenda
+}
+
+# F5-TTS reference voices (same files as StyleTTS2, reuses voices/ directory)
+# Requires: mono, 24kHz, 5-10 seconds, with transcript in .txt file
+F5TTS_VOICES = STYLETTS2_VOICES.copy()
+
+# ChatTTS speaker seeds - different seeds produce different voices
+# These are used to generate consistent speaker embeddings
+CHATTTS_SEEDS = {
+    # Male voices
+    "VR6AewLTigWG4xSOukaG": 42,     # Tony - deep voice
+    "TxGEqnHWrfWFTfGW9XjX": 123,    # Rick
+    "pNInz6obpgDQGcFmaJgB": 456,    # Dennis
+    "ODq5zmih8GrVes37Dizd": 789,    # Earl
+    "IKne3meq5aSn9XLyUdCD": 1011,   # Marcus
+    # Female voices
+    "jBpfuIE2acCO8z3wKNLl": 2024,   # Jasmine
+    "EXAVITQu4vr4xnSDxMaL": 3033,   # Megan
+    "21m00Tcm4TlvDq8ikWAM": 4042,   # Tanya
+    "XB0fDUnXU5powFXDhCwa": 5051,   # Carla
+    "pFZP5JQG7iQjIQuC4Bku": 6060,   # Brenda
+}
+DEFAULT_CHATTTS_SEED = 42
+
+
+def get_elevenlabs_client():
+    """Get or create ElevenLabs client"""
+    global _elevenlabs_client
+    if _elevenlabs_client is None:
+        from elevenlabs.client import ElevenLabs
+        _elevenlabs_client = ElevenLabs(api_key=settings.elevenlabs_api_key)
+    return _elevenlabs_client
+
+
+def get_vits_tts():
+    """Get or create VITS VCTK TTS instance"""
+    global _vits_tts
+    if _vits_tts is None:
+        from TTS.api import TTS
+        _vits_tts = TTS("tts_models/en/vctk/vits")
+    return _vits_tts
+
+
+def get_kokoro_model():
+    """Get or create Kokoro MLX model"""
+    global _kokoro_model
+    if _kokoro_model is None:
+        from mlx_audio.tts.utils import load_model
+        _kokoro_model = load_model(model_path='mlx-community/Kokoro-82M-bf16')
+        print("Kokoro MLX model loaded")
+    return _kokoro_model
+
+
+def ensure_bark_loaded():
+    """Ensure Bark models are loaded on GPU"""
+    global _bark_loaded
+    if not _bark_loaded:
+        os.environ['SUNO_USE_SMALL_MODELS'] = '1'
+
+        # Force Bark to use MPS (Apple Silicon GPU)
+        if torch.backends.mps.is_available():
+            os.environ['SUNO_OFFLOAD_CPU'] = '0'
+            os.environ['SUNO_ENABLE_MPS'] = '1'
+
+        from bark import preload_models
+        preload_models()
+        _bark_loaded = True
+        print(f"Bark loaded on device: {'MPS' if torch.backends.mps.is_available() else 'CPU'}")
+
+
+def get_styletts2_model():
+    """Get or create StyleTTS2 model"""
+    global _styletts2_model
+    if _styletts2_model is None:
+        from styletts2 import tts
+        _styletts2_model = tts.StyleTTS2()
+        print("StyleTTS2 model loaded")
+    return _styletts2_model
+
+
+def get_f5tts_generate():
+    """Get F5-TTS generate function (lazy load)"""
+    global _f5tts_model
+    if _f5tts_model is None:
+        # Disable tqdm progress bars to avoid BrokenPipeError in server context
+        import os
+        os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
+        os.environ['TQDM_DISABLE'] = '1'
+
+        from f5_tts_mlx.generate import generate
+        _f5tts_model = generate
+        print("F5-TTS MLX loaded")
+    return _f5tts_model
+
+
+def get_chattts_model():
+    """Get or create ChatTTS model"""
+    global _chattts_model
+    if _chattts_model is None:
+        import ChatTTS
+        _chattts_model = ChatTTS.Chat()
+        _chattts_model.load(compile=False)
+        print("ChatTTS model loaded")
+    return _chattts_model
+
+
+def get_chattts_speaker(voice_id: str):
+    """Get or create a consistent speaker embedding for a voice"""
+    global _chattts_speakers
+    if voice_id not in _chattts_speakers:
+        chat = get_chattts_model()
+        seed = CHATTTS_SEEDS.get(voice_id, DEFAULT_CHATTTS_SEED)
+        # Set seed for reproducible speaker
+        torch.manual_seed(seed)
+        _chattts_speakers[voice_id] = chat.sample_random_speaker()
+        print(f"[ChatTTS] Created speaker for voice {voice_id} with seed {seed}")
+    return _chattts_speakers[voice_id]
+
+
+def phone_filter(audio: np.ndarray, sample_rate: int = 24000, quality: str = "normal") -> np.ndarray:
+    """Apply phone filter with variable quality."""
+    audio = audio.flatten()
+
+    presets = {
+        "good": (200, 7000, 1.0, 0.0),
+        "normal": (300, 3400, 1.5, 0.005),
+        "bad": (400, 2800, 2.0, 0.015),
+        "terrible": (500, 2200, 2.5, 0.03),
+    }
+
+    low_hz, high_hz, distortion, noise = presets.get(quality, presets["normal"])
+
+    low = low_hz / (sample_rate / 2)
+    high = high_hz / (sample_rate / 2)
+    b, a = butter(4, [low, high], btype='band')
+    filtered = filtfilt(b, a, audio)
+
+    filtered = np.tanh(filtered * distortion) * 0.8
+
+    if noise > 0:
+        static = np.random.normal(0, noise, len(filtered)).astype(np.float32)
+        static_envelope = np.random.random(len(filtered) // 1000 + 1)
+        static_envelope = np.repeat(static_envelope, 1000)[:len(filtered)]
+        static *= (static_envelope > 0.7).astype(np.float32)
+        filtered = filtered + static
+
+    return filtered.astype(np.float32)
+
+
+async def generate_speech_elevenlabs(text: str, voice_id: str) -> tuple[np.ndarray, int]:
+    """Generate speech using ElevenLabs"""
+    client = get_elevenlabs_client()
+
+    audio_gen = client.text_to_speech.convert(
+        voice_id=voice_id,
+        text=text,
+        model_id="eleven_v3",
+        output_format="pcm_24000"
+    )
+
+    audio_bytes = b"".join(audio_gen)
+    audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
+
+    return audio, 24000
+
+
+async def generate_speech_kokoro(text: str, voice_id: str) -> tuple[np.ndarray, int]:
+    """Generate speech using MLX Kokoro (fast, good quality, Apple Silicon optimized)"""
+    import librosa
+    from mlx_audio.tts.generate import generate_audio
+
+    model = get_kokoro_model()
+    voice = KOKORO_VOICES.get(voice_id, DEFAULT_KOKORO_VOICE)
+    speed = KOKORO_SPEEDS.get(voice_id, DEFAULT_KOKORO_SPEED)
+
+    # Preprocess text for better prosody
+    text = preprocess_text_for_kokoro(text)
+
+    # Determine lang_code from voice prefix (a=American, b=British)
+    lang_code = 'b' if voice.startswith('b') else 'a'
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        generate_audio(
+            text,
+            model=model,
+            voice=voice,
+            speed=speed,
+            lang_code=lang_code,
+            output_path=tmpdir,
+            file_prefix='tts',
+            verbose=False
+        )
+
+        # Read the generated audio file
+        audio_file = Path(tmpdir) / 'tts_000.wav'
+        if not audio_file.exists():
+            raise RuntimeError("Kokoro failed to generate audio")
+
+        audio, sr = librosa.load(str(audio_file), sr=None, mono=True)
+
+        # Resample to 24kHz if needed
+        if sr != 24000:
+            audio = librosa.resample(audio, orig_sr=sr, target_sr=24000)
+
+        return audio.astype(np.float32), 24000
+
+
+async def generate_speech_vits(text: str, voice_id: str) -> tuple[np.ndarray, int]:
+    """Generate speech using VITS VCTK (fast, multiple speakers)"""
+    import librosa
+
+    tts = get_vits_tts()
+    speaker = VITS_SPEAKERS.get(voice_id, DEFAULT_VITS_SPEAKER)
+
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+        tmp_path = tmp.name
+
+    try:
+        tts.tts_to_file(text=text, file_path=tmp_path, speaker=speaker)
+        audio, sr = librosa.load(tmp_path, sr=None, mono=True)
+
+        if sr != 24000:
+            audio = librosa.resample(audio, orig_sr=sr, target_sr=24000)
+
+        return audio.astype(np.float32), 24000
+    finally:
+        Path(tmp_path).unlink(missing_ok=True)
+
+
+async def generate_speech_bark(text: str, voice_id: str) -> tuple[np.ndarray, int]:
+    """Generate speech using Bark (slow but expressive, supports emotes like [laughs])"""
+    import librosa
+    from bark import SAMPLE_RATE, generate_audio
+
+    ensure_bark_loaded()
+
+    # Generate audio with Bark
+    audio = generate_audio(text)
+
+    # Normalize to prevent clipping (Bark can exceed [-1, 1])
+    max_val = np.abs(audio).max()
+    if max_val > 0.95:
+        audio = audio * (0.95 / max_val)
+
+    # Resample to 24kHz if needed
+    if SAMPLE_RATE != 24000:
+        audio = librosa.resample(audio, orig_sr=SAMPLE_RATE, target_sr=24000)
+
+    return audio.astype(np.float32), 24000
+
+
+async def generate_speech_styletts2(text: str, voice_id: str) -> tuple[np.ndarray, int]:
+    """Generate speech using StyleTTS2 (high quality, supports voice cloning)"""
+    import librosa
+
+    model = get_styletts2_model()
+
+    # Check for reference voice file
+    voice_file = STYLETTS2_VOICES.get(voice_id)
+    voice_path = None
+    if voice_file:
+        voice_path = settings.base_dir / "voices" / voice_file
+        if not voice_path.exists():
+            voice_path = None  # Use default voice if file doesn't exist
+
+    # Generate audio
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+        tmp_path = tmp.name
+
+    try:
+        if voice_path:
+            print(f"[StyleTTS2] Using voice clone: {voice_path}")
+            audio = model.inference(
+                text,
+                target_voice_path=str(voice_path),
+                output_wav_file=tmp_path,
+                output_sample_rate=24000,
+                diffusion_steps=5,  # Balance quality/speed
+                alpha=0.3,  # More voice-like than text-like
+                beta=0.7,   # Good prosody
+            )
+        else:
+            print("[StyleTTS2] Using default voice")
+            audio = model.inference(
+                text,
+                output_wav_file=tmp_path,
+                output_sample_rate=24000,
+                diffusion_steps=5,
+            )
+
+        # Load the generated audio
+        audio, sr = librosa.load(tmp_path, sr=None, mono=True)
+
+        if sr != 24000:
+            audio = librosa.resample(audio, orig_sr=sr, target_sr=24000)
+
+        return audio.astype(np.float32), 24000
+    finally:
+        Path(tmp_path).unlink(missing_ok=True)
+
+
+async def generate_speech_f5tts(text: str, voice_id: str) -> tuple[np.ndarray, int]:
+    """Generate speech using F5-TTS MLX (very natural, supports voice cloning)"""
+    import librosa
+
+    generate = get_f5tts_generate()
+
+    # Check for reference voice file and transcript
+    voice_file = F5TTS_VOICES.get(voice_id)
+    ref_audio_path = None
+    ref_text = None
+
+    if voice_file:
+        voice_path = settings.base_dir / "voices" / voice_file
+        txt_path = voice_path.with_suffix('.txt')
+
+        if voice_path.exists() and txt_path.exists():
+            ref_audio_path = str(voice_path)
+            ref_text = txt_path.read_text().strip()
+            print(f"[F5-TTS] Using voice clone: {voice_path}")
+
+    if not ref_audio_path:
+        print("[F5-TTS] Using default voice")
+
+    # Generate audio to temp file
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+        tmp_path = tmp.name
+
+    try:
+        generate(
+            generation_text=text,
+            ref_audio_path=ref_audio_path,
+            ref_audio_text=ref_text,
+            steps=8,
+            speed=1.0,
+            output_path=tmp_path,
+        )
+
+        # Load the generated audio
+        audio, sr = librosa.load(tmp_path, sr=None, mono=True)
+
+        # Resample to 24kHz if needed
+        if sr != 24000:
+            audio = librosa.resample(audio, orig_sr=sr, target_sr=24000)
+
+        return audio.astype(np.float32), 24000
+    finally:
+        Path(tmp_path).unlink(missing_ok=True)
+
+
+async def generate_speech_chattts(text: str, voice_id: str) -> tuple[np.ndarray, int]:
+    """Generate speech using ChatTTS (natural conversational speech, multiple speakers)"""
+    import ChatTTS
+
+    chat = get_chattts_model()
+
+    # Ensure text is not empty and has reasonable content
+    text = text.strip()
+    if not text:
+        text = "Hello."
+
+    print(f"[ChatTTS] Generating speech for: {text[:50]}...")
+
+    # Get consistent speaker for this voice
+    seed = CHATTTS_SEEDS.get(voice_id, DEFAULT_CHATTTS_SEED)
+    torch.manual_seed(seed)
+
+    # Configure inference parameters
+    params_infer_code = ChatTTS.Chat.InferCodeParams(
+        temperature=0.3,
+        top_P=0.7,
+        top_K=20,
+    )
+
+    # Generate audio (skip text refinement to avoid narrow() error with this version)
+    wavs = chat.infer(
+        [text],
+        params_infer_code=params_infer_code,
+        skip_refine_text=True,
+    )
+
+    if wavs is None or len(wavs) == 0:
+        raise RuntimeError("ChatTTS failed to generate audio")
+
+    audio = wavs[0]
+
+    # Handle different output shapes
+    if audio.ndim > 1:
+        audio = audio.squeeze()
+
+    # Normalize
+    max_val = np.abs(audio).max()
+    if max_val > 0.95:
+        audio = audio * (0.95 / max_val)
+
+    return audio.astype(np.float32), 24000
+
+
+async def generate_speech_inworld(text: str, voice_id: str) -> tuple[np.ndarray, int]:
+    """Generate speech using Inworld TTS API (high quality, natural voices)"""
+    import httpx
+    import base64
+    import librosa
+
+    voice = INWORLD_VOICES.get(voice_id, DEFAULT_INWORLD_VOICE)
+
+    api_key = settings.inworld_api_key
+    if not api_key:
+        raise RuntimeError("INWORLD_API_KEY not set in environment")
+
+    print(f"[Inworld TTS] Voice: {voice}, Text: {text[:50]}...")
+
+    url = "https://api.inworld.ai/tts/v1/voice"
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Basic {api_key}",
+    }
+    payload = {
+        "text": text,
+        "voice_id": voice,
+        "model_id": "inworld-tts-1.5-mini",
+        "audio_config": {
+            "encoding": "LINEAR16",
+            "sample_rate_hertz": 48000,
+        },
+    }
+
+    async with httpx.AsyncClient(timeout=60.0) as client:
+        response = await client.post(url, json=payload, headers=headers)
+        response.raise_for_status()
+        data = response.json()
+
+    # Decode base64 audio
+    audio_b64 = data.get("audioContent")
+    if not audio_b64:
+        raise RuntimeError("Inworld TTS returned no audio content")
+
+    audio_bytes = base64.b64decode(audio_b64)
+
+    # Parse audio using soundfile (handles WAV, MP3, etc.)
+    import soundfile as sf
+    import io
+
+    # soundfile can read WAV, FLAC, OGG, and with ffmpeg: MP3
+    # MP3 files start with ID3 tag or 0xff sync bytes
+    try:
+        audio, sr = sf.read(io.BytesIO(audio_bytes))
+    except Exception as e:
+        print(f"[Inworld TTS] soundfile failed: {e}, trying raw PCM")
+        # Fallback to raw PCM
+        if len(audio_bytes) % 2 != 0:
+            audio_bytes = audio_bytes[:-1]
+        audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
+        sr = 48000
+
+    # Resample to 24kHz to match other providers
+    if sr != 24000:
+        audio = librosa.resample(audio, orig_sr=sr, target_sr=24000)
+
+    return audio.astype(np.float32), 24000
+
+
+async def generate_speech(
+    text: str,
+    voice_id: str,
+    phone_quality: str = "normal",
+    apply_filter: bool = True
+) -> bytes:
+    """
+    Generate speech from text.
+
+    Args:
+        text: Text to speak
+        voice_id: ElevenLabs voice ID (mapped to local voice if using local TTS)
+        phone_quality: Quality of phone filter ("none" to disable)
+        apply_filter: Whether to apply phone filter
+
+    Returns:
+        Raw PCM audio bytes (16-bit signed int, 24kHz)
+    """
+    # Choose TTS provider
+    provider = settings.tts_provider
+    print(f"[TTS] Provider: {provider}, Text: {text[:50]}...")
+
+    if provider == "kokoro":
+        audio, sample_rate = await generate_speech_kokoro(text, voice_id)
+    elif provider == "f5tts":
+        audio, sample_rate = await generate_speech_f5tts(text, voice_id)
+    elif provider == "inworld":
+        audio, sample_rate = await generate_speech_inworld(text, voice_id)
+    elif provider == "chattts":
+        audio, sample_rate = await generate_speech_chattts(text, voice_id)
+    elif provider == "styletts2":
+        audio, sample_rate = await generate_speech_styletts2(text, voice_id)
+    elif provider == "bark":
+        audio, sample_rate = await generate_speech_bark(text, voice_id)
+    elif provider == "vits":
+        audio, sample_rate = await generate_speech_vits(text, voice_id)
+    elif provider == "elevenlabs":
+        audio, sample_rate = await generate_speech_elevenlabs(text, voice_id)
+    else:
+        raise ValueError(f"Unknown TTS provider: {provider}")
+
+    # Apply phone filter if requested
+    # Skip filter for Bark - it already has rough audio quality
+    if apply_filter and phone_quality not in ("none", "studio") and provider != "bark":
+        audio = phone_filter(audio, sample_rate, phone_quality)
+
+    # Convert to bytes
+    audio_int16 = (audio * 32768).clip(-32768, 32767).astype(np.int16)
+    return audio_int16.tobytes()
+
+
+# Voice IDs for cohost and announcer
+COHOST_VOICE_ID = "nPczCjzI2devNBz1zQrb"
+ANNOUNCER_VOICE_ID = "ErXwobaYiN019PkySvjV"
+
+
+async def generate_cohost_speech(text: str) -> bytes:
+    """Generate speech for cohost Bobby (no phone filter)"""
+    return await generate_speech(text, COHOST_VOICE_ID, apply_filter=False)
+
+
+async def generate_announcer_speech(text: str) -> bytes:
+    """Generate speech for announcer (no phone filter)"""
+    return await generate_speech(text, ANNOUNCER_VOICE_ID, apply_filter=False)
@@ -0,0 +1,200 @@
+"""Voice configuration and TTS provider management"""
+
+from dataclasses import dataclass
+from typing import Optional
+from enum import Enum
+
+
+class TTSProvider(str, Enum):
+    ELEVENLABS = "elevenlabs"
+    EDGE = "edge"  # Microsoft Edge TTS (free)
+    PIPER = "piper"  # Local Piper via sherpa-onnx (free, fast)
+
+
+@dataclass
+class Voice:
+    """Voice configuration"""
+    id: str
+    name: str
+    provider: TTSProvider
+    provider_voice_id: str  # The actual ID used by the provider
+    description: str = ""
+    language: str = "en"
+    gender: str = "neutral"
+
+
+# ElevenLabs voices
+ELEVENLABS_VOICES = [
+    Voice("el_tony", "Tony (ElevenLabs)", TTSProvider.ELEVENLABS, "IKne3meq5aSn9XLyUdCD",
+          "Male, New York accent, expressive", "en", "male"),
+    Voice("el_jasmine", "Jasmine (ElevenLabs)", TTSProvider.ELEVENLABS, "FGY2WhTYpPnrIDTdsKH5",
+          "Female, confident, direct", "en", "female"),
+    Voice("el_rick", "Rick (ElevenLabs)", TTSProvider.ELEVENLABS, "JBFqnCBsd6RMkjVDRZzb",
+          "Male, Texas accent, older", "en", "male"),
+    Voice("el_megan", "Megan (ElevenLabs)", TTSProvider.ELEVENLABS, "XrExE9yKIg1WjnnlVkGX",
+          "Female, young, casual", "en", "female"),
+    Voice("el_dennis", "Dennis (ElevenLabs)", TTSProvider.ELEVENLABS, "cjVigY5qzO86Huf0OWal",
+          "Male, middle-aged, anxious", "en", "male"),
+    Voice("el_tanya", "Tanya (ElevenLabs)", TTSProvider.ELEVENLABS, "N2lVS1w4EtoT3dr4eOWO",
+          "Female, Miami, sassy", "en", "female"),
+    Voice("el_earl", "Earl (ElevenLabs)", TTSProvider.ELEVENLABS, "EXAVITQu4vr4xnSDxMaL",
+          "Male, elderly, Southern", "en", "male"),
+    Voice("el_carla", "Carla (ElevenLabs)", TTSProvider.ELEVENLABS, "CwhRBWXzGAHq8TQ4Fs17",
+          "Female, Jersey, sharp", "en", "female"),
+    Voice("el_marcus", "Marcus (ElevenLabs)", TTSProvider.ELEVENLABS, "bIHbv24MWmeRgasZH58o",
+          "Male, young, urban", "en", "male"),
+    Voice("el_brenda", "Brenda (ElevenLabs)", TTSProvider.ELEVENLABS, "Xb7hH8MSUJpSbSDYk0k2",
+          "Female, middle-aged, worried", "en", "female"),
+    Voice("el_jake", "Jake (ElevenLabs)", TTSProvider.ELEVENLABS, "SOYHLrjzK2X1ezoPC6cr",
+          "Male, Boston, insecure", "en", "male"),
+    Voice("el_diane", "Diane (ElevenLabs)", TTSProvider.ELEVENLABS, "cgSgspJ2msm6clMCkdW9",
+          "Female, mature, conflicted", "en", "female"),
+    Voice("el_bobby", "Bobby (ElevenLabs)", TTSProvider.ELEVENLABS, "nPczCjzI2devNBz1zQrb",
+          "Male, sidekick, wisecracking", "en", "male"),
+    Voice("el_announcer", "Announcer (ElevenLabs)", TTSProvider.ELEVENLABS, "ErXwobaYiN019PkySvjV",
+          "Male, radio announcer", "en", "male"),
+]
+
+# Edge TTS voices (Microsoft, free)
+EDGE_VOICES = [
+    # US voices
+    Voice("edge_jenny", "Jenny (Edge)", TTSProvider.EDGE, "en-US-JennyNeural",
+          "Female, American, friendly", "en", "female"),
+    Voice("edge_guy", "Guy (Edge)", TTSProvider.EDGE, "en-US-GuyNeural",
+          "Male, American, casual", "en", "male"),
+    Voice("edge_aria", "Aria (Edge)", TTSProvider.EDGE, "en-US-AriaNeural",
+          "Female, American, professional", "en", "female"),
+    Voice("edge_davis", "Davis (Edge)", TTSProvider.EDGE, "en-US-DavisNeural",
+          "Male, American, calm", "en", "male"),
+    Voice("edge_amber", "Amber (Edge)", TTSProvider.EDGE, "en-US-AmberNeural",
+          "Female, American, warm", "en", "female"),
+    Voice("edge_andrew", "Andrew (Edge)", TTSProvider.EDGE, "en-US-AndrewNeural",
+          "Male, American, confident", "en", "male"),
+    Voice("edge_ashley", "Ashley (Edge)", TTSProvider.EDGE, "en-US-AshleyNeural",
+          "Female, American, cheerful", "en", "female"),
+    Voice("edge_brian", "Brian (Edge)", TTSProvider.EDGE, "en-US-BrianNeural",
+          "Male, American, narrator", "en", "male"),
+    Voice("edge_christopher", "Christopher (Edge)", TTSProvider.EDGE, "en-US-ChristopherNeural",
+          "Male, American, reliable", "en", "male"),
+    Voice("edge_cora", "Cora (Edge)", TTSProvider.EDGE, "en-US-CoraNeural",
+          "Female, American, older", "en", "female"),
+    Voice("edge_elizabeth", "Elizabeth (Edge)", TTSProvider.EDGE, "en-US-ElizabethNeural",
+          "Female, American, elegant", "en", "female"),
+    Voice("edge_eric", "Eric (Edge)", TTSProvider.EDGE, "en-US-EricNeural",
+          "Male, American, friendly", "en", "male"),
+    Voice("edge_jacob", "Jacob (Edge)", TTSProvider.EDGE, "en-US-JacobNeural",
+          "Male, American, young", "en", "male"),
+    Voice("edge_michelle", "Michelle (Edge)", TTSProvider.EDGE, "en-US-MichelleNeural",
+          "Female, American, clear", "en", "female"),
+    Voice("edge_monica", "Monica (Edge)", TTSProvider.EDGE, "en-US-MonicaNeural",
+          "Female, American, expressive", "en", "female"),
+    Voice("edge_roger", "Roger (Edge)", TTSProvider.EDGE, "en-US-RogerNeural",
+          "Male, American, mature", "en", "male"),
+    Voice("edge_steffan", "Steffan (Edge)", TTSProvider.EDGE, "en-US-SteffanNeural",
+          "Male, American, formal", "en", "male"),
+    Voice("edge_tony", "Tony (Edge)", TTSProvider.EDGE, "en-US-TonyNeural",
+          "Male, American, conversational", "en", "male"),
+    # UK voices
+    Voice("edge_sonia", "Sonia (Edge UK)", TTSProvider.EDGE, "en-GB-SoniaNeural",
+          "Female, British, professional", "en", "female"),
+    Voice("edge_ryan", "Ryan (Edge UK)", TTSProvider.EDGE, "en-GB-RyanNeural",
+          "Male, British, clear", "en", "male"),
+    Voice("edge_libby", "Libby (Edge UK)", TTSProvider.EDGE, "en-GB-LibbyNeural",
+          "Female, British, warm", "en", "female"),
+    Voice("edge_thomas", "Thomas (Edge UK)", TTSProvider.EDGE, "en-GB-ThomasNeural",
+          "Male, British, friendly", "en", "male"),
+    # Australian voices
+    Voice("edge_natasha", "Natasha (Edge AU)", TTSProvider.EDGE, "en-AU-NatashaNeural",
+          "Female, Australian, friendly", "en", "female"),
+    Voice("edge_william", "William (Edge AU)", TTSProvider.EDGE, "en-AU-WilliamNeural",
+          "Male, Australian, casual", "en", "male"),
+]
+
+# Piper voices (local, via sherpa-onnx)
+PIPER_VOICES = [
+    Voice("piper_amy", "Amy (Piper)", TTSProvider.PIPER, "amy",
+          "Female, American, clear", "en", "female"),
+    Voice("piper_joe", "Joe (Piper)", TTSProvider.PIPER, "joe",
+          "Male, American, natural", "en", "male"),
+    Voice("piper_lessac", "Lessac (Piper)", TTSProvider.PIPER, "lessac",
+          "Female, American, expressive", "en", "female"),
+    Voice("piper_alan", "Alan (Piper)", TTSProvider.PIPER, "alan",
+          "Male, British, clear", "en", "male"),
+]
+
+# All voices combined
+ALL_VOICES = ELEVENLABS_VOICES + EDGE_VOICES + PIPER_VOICES
+
+# Voice lookup by ID
+VOICES_BY_ID = {v.id: v for v in ALL_VOICES}
+
+# Default voice assignments for callers (maps caller key to voice ID)
+DEFAULT_CALLER_VOICES = {
+    "1": "el_tony",      # Tony from Staten Island
+    "2": "el_jasmine",   # Jasmine from Atlanta
+    "3": "el_rick",      # Rick from Texas
+    "4": "el_megan",     # Megan from Portland
+    "5": "el_dennis",    # Dennis from Long Island
+    "6": "el_tanya",     # Tanya from Miami
+    "7": "el_earl",      # Earl from Tennessee
+    "8": "el_carla",     # Carla from Jersey
+    "9": "el_marcus",    # Marcus from Detroit
+    "0": "el_brenda",    # Brenda from Phoenix
+    "-": "el_jake",      # Jake from Boston
+    "=": "el_diane",     # Diane from Chicago
+    "bobby": "el_bobby",
+    "announcer": "el_announcer",
+}
+
+
+class VoiceManager:
+    """Manages voice assignments and TTS provider selection"""
+
+    def __init__(self):
+        # Current voice assignments (can be modified at runtime)
+        self.caller_voices = DEFAULT_CALLER_VOICES.copy()
+
+    def get_voice(self, voice_id: str) -> Optional[Voice]:
+        """Get voice by ID"""
+        return VOICES_BY_ID.get(voice_id)
+
+    def get_caller_voice(self, caller_key: str) -> Voice:
+        """Get the voice assigned to a caller"""
+        voice_id = self.caller_voices.get(caller_key, "el_tony")
+        return VOICES_BY_ID.get(voice_id, ELEVENLABS_VOICES[0])
+
+    def set_caller_voice(self, caller_key: str, voice_id: str):
+        """Assign a voice to a caller"""
+        if voice_id in VOICES_BY_ID:
+            self.caller_voices[caller_key] = voice_id
+
+    def get_all_voices(self) -> list[dict]:
+        """Get all available voices as dicts for API"""
+        return [
+            {
+                "id": v.id,
+                "name": v.name,
+                "provider": v.provider.value,
+                "description": v.description,
+                "gender": v.gender,
+            }
+            for v in ALL_VOICES
+        ]
+
+    def get_voices_by_provider(self, provider: TTSProvider) -> list[Voice]:
+        """Get all voices for a specific provider"""
+        return [v for v in ALL_VOICES if v.provider == provider]
+
+    def get_caller_voice_assignments(self) -> dict[str, str]:
+        """Get current caller voice assignments"""
+        return self.caller_voices.copy()
+
+    def set_caller_voice_assignments(self, assignments: dict[str, str]):
+        """Set multiple caller voice assignments"""
+        for caller_key, voice_id in assignments.items():
+            if voice_id in VOICES_BY_ID:
+                self.caller_voices[caller_key] = voice_id
+
+
+# Global instance
+voice_manager = VoiceManager()
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+"""
+Download free sound effects for the radio show soundboard.
+Uses sounds from freesound.org and other free sources.
+"""
+
+import os
+import urllib.request
+import ssl
+from pathlib import Path
+
+# Bypass SSL issues
+ssl._create_default_https_context = ssl._create_unverified_context
+
+SOUNDS_DIR = Path(__file__).parent / "sounds"
+SOUNDS_DIR.mkdir(exist_ok=True)
+
+# Free sound effect URLs (public domain / CC0)
+# These are from various free sources
+SOUND_URLS = {
+    # Using pixabay free sounds (no attribution required)
+    'rimshot.wav': 'https://cdn.pixabay.com/audio/2022/03/15/audio_7a569d6dde.mp3',
+    'laugh.wav': 'https://cdn.pixabay.com/audio/2024/02/14/audio_70fa4b1f7c.mp3',
+    'sad_trombone.wav': 'https://cdn.pixabay.com/audio/2022/03/15/audio_cce0f1f0f1.mp3',
+    'cheer.wav': 'https://cdn.pixabay.com/audio/2021/08/04/audio_0625c1539c.mp3',
+    'boo.wav': 'https://cdn.pixabay.com/audio/2022/10/30/audio_f2a4d3d7db.mp3',
+    'drumroll.wav': 'https://cdn.pixabay.com/audio/2022/03/24/audio_52a6ef9129.mp3',
+    'crickets.wav': 'https://cdn.pixabay.com/audio/2022/03/09/audio_691875e05c.mp3',
+    'phone_ring.wav': 'https://cdn.pixabay.com/audio/2022/03/15/audio_0f66b49312.mp3',
+}
+
+def download_sound(name, url):
+    """Download a sound file"""
+    output_path = SOUNDS_DIR / name
+
+    if output_path.exists():
+        print(f"  ✓ {name} (already exists)")
+        return True
+
+    try:
+        print(f"  Downloading {name}...")
+
+        # Download the file
+        req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
+        with urllib.request.urlopen(req, timeout=30) as response:
+            data = response.read()
+
+        # If it's an MP3, we need to convert it
+        if url.endswith('.mp3'):
+            temp_mp3 = SOUNDS_DIR / f"temp_{name}.mp3"
+            with open(temp_mp3, 'wb') as f:
+                f.write(data)
+
+            # Try to convert with ffmpeg
+            import subprocess
+            result = subprocess.run([
+                'ffmpeg', '-y', '-i', str(temp_mp3),
+                '-ar', '24000', '-ac', '1',
+                str(output_path)
+            ], capture_output=True)
+
+            temp_mp3.unlink()  # Remove temp file
+
+            if result.returncode == 0:
+                print(f"  ✓ {name}")
+                return True
+            else:
+                print(f"  ✗ {name} (ffmpeg conversion failed)")
+                return False
+        else:
+            with open(output_path, 'wb') as f:
+                f.write(data)
+            print(f"  ✓ {name}")
+            return True
+
+    except Exception as e:
+        print(f"  ✗ {name} ({e})")
+        return False
+
+def main():
+    print("Downloading sound effects for radio show soundboard...")
+    print(f"Saving to: {SOUNDS_DIR}\n")
+
+    # Check for ffmpeg
+    import subprocess
+    try:
+        subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
+    except:
+        print("WARNING: ffmpeg not found. Install it with: brew install ffmpeg")
+        print("Some sounds may not download correctly.\n")
+
+    success = 0
+    for name, url in SOUND_URLS.items():
+        if download_sound(name, url):
+            success += 1
+
+    print(f"\nDownloaded {success}/{len(SOUND_URLS)} sounds.")
+    print("\nTo add more sounds:")
+    print("  1. Find free .wav files online")
+    print("  2. Name them according to the SOUNDBOARD mapping in radio_show.py")
+    print("  3. Place them in the sounds/ directory")
+    print("\nRecommended free sound sources:")
+    print("  - freesound.org")
+    print("  - pixabay.com/sound-effects")
+    print("  - zapsplat.com")
+    print("  - soundbible.com")
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,543 @@
+/* AI Radio Show - Clean CSS */
+
+:root {
+    --bg: #1a1a2e;
+    --bg-light: #252547;
+    --accent: #e94560;
+    --text: #fff;
+    --text-muted: #888;
+    --radius: 8px;
+}
+
+* {
+    box-sizing: border-box;
+    margin: 0;
+    padding: 0;
+}
+
+body {
+    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
+    background: var(--bg);
+    color: var(--text);
+    min-height: 100vh;
+}
+
+#app {
+    max-width: 900px;
+    margin: 0 auto;
+    padding: 20px;
+}
+
+/* Header */
+header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    margin-bottom: 20px;
+}
+
+header h1 {
+    font-size: 1.5rem;
+}
+
+.header-buttons {
+    display: flex;
+    gap: 8px;
+}
+
+header button {
+    background: var(--bg-light);
+    color: var(--text);
+    border: none;
+    padding: 8px 16px;
+    border-radius: var(--radius);
+    cursor: pointer;
+}
+
+.new-session-btn {
+    background: var(--accent) !important;
+}
+
+.session-id {
+    font-size: 0.7rem;
+    color: var(--text-muted);
+    font-weight: normal;
+}
+
+.caller-background {
+    font-size: 0.85rem;
+    color: var(--text-muted);
+    padding: 10px;
+    background: var(--bg);
+    border-radius: var(--radius);
+    margin-bottom: 12px;
+    line-height: 1.4;
+}
+
+.caller-background.hidden {
+    display: none;
+}
+
+/* Main layout */
+main {
+    display: grid;
+    grid-template-columns: 1fr 1fr;
+    gap: 20px;
+}
+
+@media (max-width: 700px) {
+    main {
+        grid-template-columns: 1fr;
+    }
+}
+
+/* Sections */
+section {
+    background: var(--bg-light);
+    padding: 16px;
+    border-radius: var(--radius);
+}
+
+section h2 {
+    font-size: 1rem;
+    margin-bottom: 12px;
+    color: var(--text-muted);
+}
+
+/* Callers */
+.caller-grid {
+    display: grid;
+    grid-template-columns: repeat(5, 1fr);
+    gap: 8px;
+    margin-bottom: 12px;
+}
+
+.caller-btn {
+    background: var(--bg);
+    color: var(--text);
+    border: 2px solid transparent;
+    padding: 10px 8px;
+    border-radius: var(--radius);
+    cursor: pointer;
+    font-size: 0.85rem;
+    transition: all 0.2s;
+}
+
+.caller-btn:hover {
+    border-color: var(--accent);
+}
+
+.caller-btn.active {
+    background: var(--accent);
+    border-color: var(--accent);
+}
+
+.call-status {
+    text-align: center;
+    padding: 8px;
+    color: var(--text-muted);
+    margin-bottom: 12px;
+}
+
+.hangup-btn {
+    width: 100%;
+    background: #c0392b;
+    color: white;
+    border: none;
+    padding: 12px;
+    border-radius: var(--radius);
+    cursor: pointer;
+    font-weight: bold;
+}
+
+.hangup-btn:disabled {
+    opacity: 0.5;
+    cursor: not-allowed;
+}
+
+/* Chat */
+.chat-section {
+    grid-column: span 2;
+}
+
+@media (max-width: 700px) {
+    .chat-section {
+        grid-column: span 1;
+    }
+}
+
+.chat-log {
+    height: 300px;
+    overflow-y: auto;
+    background: var(--bg);
+    border-radius: var(--radius);
+    padding: 12px;
+    margin-bottom: 12px;
+}
+
+.message {
+    padding: 8px 12px;
+    margin-bottom: 8px;
+    border-radius: var(--radius);
+    line-height: 1.4;
+}
+
+.message.host {
+    background: #2c5282;
+}
+
+.message.caller {
+    background: #553c9a;
+}
+
+.message strong {
+    display: block;
+    font-size: 0.8rem;
+    opacity: 0.7;
+    margin-bottom: 4px;
+}
+
+.talk-controls {
+    display: flex;
+    gap: 10px;
+}
+
+.talk-btn {
+    flex: 1;
+    background: var(--accent);
+    color: white;
+    border: none;
+    padding: 16px;
+    border-radius: var(--radius);
+    font-size: 1rem;
+    font-weight: bold;
+    cursor: pointer;
+    transition: all 0.2s;
+}
+
+.talk-btn:hover {
+    filter: brightness(1.1);
+}
+
+.talk-btn.recording {
+    background: #c0392b;
+    animation: pulse 1s infinite;
+}
+
+@keyframes pulse {
+    0%, 100% { opacity: 1; }
+    50% { opacity: 0.7; }
+}
+
+.type-btn {
+    background: var(--bg);
+    color: var(--text);
+    border: none;
+    padding: 16px 24px;
+    border-radius: var(--radius);
+    cursor: pointer;
+}
+
+.status {
+    text-align: center;
+    padding: 12px;
+    color: var(--accent);
+    font-weight: bold;
+}
+
+.status.hidden {
+    display: none;
+}
+
+/* Music */
+.music-section select {
+    width: 100%;
+    padding: 10px;
+    background: var(--bg);
+    color: var(--text);
+    border: none;
+    border-radius: var(--radius);
+    margin-bottom: 10px;
+}
+
+.music-controls {
+    display: flex;
+    gap: 8px;
+    align-items: center;
+}
+
+.music-controls button {
+    background: var(--bg);
+    color: var(--text);
+    border: none;
+    padding: 10px 16px;
+    border-radius: var(--radius);
+    cursor: pointer;
+}
+
+.music-controls input[type="range"] {
+    flex: 1;
+}
+
+/* Soundboard */
+.soundboard {
+    display: grid;
+    grid-template-columns: repeat(3, 1fr);
+    gap: 8px;
+}
+
+.sound-btn {
+    background: var(--bg);
+    color: var(--text);
+    border: none;
+    padding: 12px 8px;
+    border-radius: var(--radius);
+    cursor: pointer;
+    font-size: 0.8rem;
+    transition: all 0.1s;
+}
+
+.sound-btn:hover {
+    background: var(--accent);
+}
+
+.sound-btn:active {
+    transform: scale(0.95);
+}
+
+/* Modal */
+.modal {
+    position: fixed;
+    inset: 0;
+    background: rgba(0, 0, 0, 0.8);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    z-index: 100;
+}
+
+.modal.hidden {
+    display: none;
+}
+
+.modal-content {
+    background: var(--bg-light);
+    padding: 24px;
+    border-radius: var(--radius);
+    width: 90%;
+    max-width: 400px;
+}
+
+.modal-content h2 {
+    margin-bottom: 16px;
+}
+
+.modal-content h3 {
+    font-size: 0.9rem;
+    color: var(--text-muted);
+    margin: 16px 0 8px 0;
+    border-bottom: 1px solid var(--bg);
+    padding-bottom: 4px;
+}
+
+.settings-group {
+    margin-bottom: 16px;
+}
+
+.device-row {
+    display: flex;
+    gap: 8px;
+    align-items: flex-end;
+}
+
+.device-row label:first-child {
+    flex: 1;
+}
+
+.channel-row {
+    display: flex;
+    gap: 12px;
+    margin-top: 8px;
+}
+
+.channel-row label {
+    display: flex;
+    align-items: center;
+    gap: 4px;
+    font-size: 0.85rem;
+}
+
+.channel-input {
+    width: 50px !important;
+    text-align: center;
+}
+
+.modal-content label {
+    display: block;
+    margin-bottom: 16px;
+}
+
+.modal-content label.checkbox {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+}
+
+.modal-content select,
+.modal-content input[type="text"],
+.modal-content textarea {
+    width: 100%;
+    padding: 10px;
+    background: var(--bg);
+    color: var(--text);
+    border: none;
+    border-radius: var(--radius);
+    margin-top: 4px;
+}
+
+.modal-buttons {
+    display: flex;
+    gap: 10px;
+    margin-top: 20px;
+}
+
+.modal-buttons button {
+    flex: 1;
+    padding: 12px;
+    border: none;
+    border-radius: var(--radius);
+    cursor: pointer;
+    font-weight: bold;
+}
+
+.modal-buttons button:first-child {
+    background: var(--accent);
+    color: white;
+}
+
+.modal-buttons button:last-child {
+    background: var(--bg);
+    color: var(--text);
+}
+
+.refresh-btn {
+    background: var(--bg);
+    color: var(--text-muted);
+    border: 1px solid var(--bg-light);
+    padding: 6px 12px;
+    border-radius: var(--radius);
+    cursor: pointer;
+    font-size: 0.85rem;
+    margin-top: 8px;
+}
+
+.refresh-btn:hover {
+    background: var(--bg-light);
+    color: var(--text);
+}
+
+.refresh-btn:disabled {
+    opacity: 0.5;
+    cursor: not-allowed;
+}
+
+.hidden {
+    display: none !important;
+}
+
+/* Server Log */
+.log-section {
+    grid-column: span 2;
+}
+
+@media (max-width: 700px) {
+    .log-section {
+        grid-column: span 1;
+    }
+}
+
+.log-header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    margin-bottom: 12px;
+}
+
+.log-header h2 {
+    margin-bottom: 0;
+}
+
+.server-controls {
+    display: flex;
+    gap: 8px;
+    align-items: center;
+}
+
+.server-btn {
+    border: none;
+    padding: 6px 12px;
+    border-radius: var(--radius);
+    cursor: pointer;
+    font-size: 0.85rem;
+    font-weight: bold;
+}
+
+.server-btn.restart {
+    background: #2196F3;
+    color: white;
+}
+
+.server-btn.restart:hover {
+    background: #1976D2;
+}
+
+.server-btn.stop {
+    background: #c0392b;
+    color: white;
+}
+
+.server-btn.stop:hover {
+    background: #a93226;
+}
+
+.auto-scroll-label {
+    display: flex;
+    align-items: center;
+    gap: 4px;
+    font-size: 0.8rem;
+    color: var(--text-muted);
+    cursor: pointer;
+}
+
+.server-log {
+    height: 200px;
+    overflow-y: auto;
+    background: #0d0d1a;
+    border-radius: var(--radius);
+    padding: 12px;
+    font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
+    font-size: 0.75rem;
+    line-height: 1.5;
+    color: #8f8;
+}
+
+.server-log .log-line {
+    white-space: pre-wrap;
+    word-break: break-all;
+}
+
+.server-log .log-line.error {
+    color: #f88;
+}
+
+.server-log .log-line.warning {
+    color: #ff8;
+}
+
+.server-log .log-line.tts {
+    color: #8ff;
+}
+
+.server-log .log-line.chat {
+    color: #f8f;
+}
@@ -0,0 +1,178 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AI Radio Show</title>
+    <link rel="stylesheet" href="/css/style.css">
+</head>
+<body>
+    <div id="app">
+        <header>
+            <h1>AI Radio Show</h1>
+            <div class="header-buttons">
+                <button id="new-session-btn" class="new-session-btn">New Session</button>
+                <button id="settings-btn">Settings</button>
+            </div>
+        </header>
+
+        <main>
+            <!-- Callers -->
+            <section class="callers-section">
+                <h2>Callers <span id="session-id" class="session-id"></span></h2>
+                <div id="callers" class="caller-grid"></div>
+                <div id="call-status" class="call-status">No active call</div>
+                <div id="caller-background" class="caller-background hidden"></div>
+                <button id="hangup-btn" class="hangup-btn" disabled>Hang Up</button>
+            </section>
+
+            <!-- Chat -->
+            <section class="chat-section">
+                <div id="chat" class="chat-log"></div>
+                <div class="talk-controls">
+                    <button id="talk-btn" class="talk-btn">Hold to Talk</button>
+                    <button id="type-btn" class="type-btn">Type</button>
+                </div>
+                <div id="status" class="status hidden"></div>
+            </section>
+
+            <!-- Music -->
+            <section class="music-section">
+                <h2>Music</h2>
+                <select id="track-select"></select>
+                <div class="music-controls">
+                    <button id="play-btn">Play</button>
+                    <button id="stop-btn">Stop</button>
+                    <input type="range" id="volume" min="0" max="100" value="30">
+                </div>
+            </section>
+
+            <!-- Sound Effects -->
+            <section class="sounds-section">
+                <h2>Sounds</h2>
+                <div id="soundboard" class="soundboard"></div>
+            </section>
+
+            <!-- Server Log -->
+            <section class="log-section">
+                <div class="log-header">
+                    <h2>Server Log</h2>
+                    <div class="server-controls">
+                        <button id="restart-server-btn" class="server-btn restart">Restart</button>
+                        <button id="stop-server-btn" class="server-btn stop">Stop</button>
+                        <label class="auto-scroll-label">
+                            <input type="checkbox" id="auto-scroll" checked> Auto-scroll
+                        </label>
+                    </div>
+                </div>
+                <div id="server-log" class="server-log"></div>
+            </section>
+        </main>
+
+        <!-- Settings Modal -->
+        <div id="settings-modal" class="modal hidden">
+            <div class="modal-content">
+                <h2>Settings</h2>
+
+                <!-- Audio Devices -->
+                <div class="settings-group">
+                    <h3>Audio Routing</h3>
+                    <div class="device-row">
+                        <label>
+                            Input Device
+                            <select id="input-device"></select>
+                        </label>
+                        <label>
+                            Ch
+                            <input type="number" id="input-channel" value="1" min="1" max="16" class="channel-input">
+                        </label>
+                    </div>
+                    <div class="device-row">
+                        <label>
+                            Output Device
+                            <select id="output-device"></select>
+                        </label>
+                    </div>
+                    <div class="channel-row">
+                        <label>Caller Ch <input type="number" id="caller-channel" value="1" min="1" max="16" class="channel-input"></label>
+                        <label>Music Ch <input type="number" id="music-channel" value="2" min="1" max="16" class="channel-input"></label>
+                        <label>SFX Ch <input type="number" id="sfx-channel" value="3" min="1" max="16" class="channel-input"></label>
+                    </div>
+                </div>
+
+                <!-- LLM Settings -->
+                <div class="settings-group">
+                    <h3>LLM Provider</h3>
+                    <label>
+                        Provider
+                        <select id="provider">
+                            <option value="openrouter">OpenRouter</option>
+                            <option value="ollama">Ollama</option>
+                        </select>
+                    </label>
+
+                    <div id="openrouter-settings">
+                        <label>
+                            Model
+                            <select id="openrouter-model"></select>
+                        </label>
+                    </div>
+
+                    <div id="ollama-settings" class="hidden">
+                        <label>
+                            Model
+                            <select id="ollama-model"></select>
+                        </label>
+                        <label>
+                            Host
+                            <input type="text" id="ollama-host" value="http://localhost:11434">
+                        </label>
+                        <button type="button" id="refresh-ollama" class="refresh-btn">Refresh Models</button>
+                    </div>
+                </div>
+
+                <!-- TTS Settings -->
+                <div class="settings-group">
+                    <h3>TTS Provider</h3>
+                    <label>
+                        Provider
+                        <select id="tts-provider">
+                            <option value="inworld">Inworld (High quality, natural)</option>
+                            <option value="f5tts">F5-TTS (Most natural local)</option>
+                            <option value="elevenlabs">ElevenLabs (Best quality, paid)</option>
+                            <option value="kokoro">Kokoro MLX (Fast, Apple Silicon)</option>
+                            <option value="chattts">ChatTTS (Conversational)</option>
+                            <option value="styletts2">StyleTTS2 (Voice cloning)</option>
+                            <option value="vits">VITS (Fast local)</option>
+                            <option value="bark">Bark (Expressive, supports [laughs])</option>
+                        </select>
+                    </label>
+                    <label class="checkbox">
+                        <input type="checkbox" id="phone-filter">
+                        Phone filter on voices
+                    </label>
+                </div>
+
+                <div class="modal-buttons">
+                    <button id="save-settings">Save</button>
+                    <button id="close-settings">Close</button>
+                </div>
+            </div>
+        </div>
+
+        <!-- Type Modal -->
+        <div id="type-modal" class="modal hidden">
+            <div class="modal-content">
+                <h2>Type Message</h2>
+                <textarea id="type-input" rows="3" placeholder="Type what you want to say..."></textarea>
+                <div class="modal-buttons">
+                    <button id="send-type">Send</button>
+                    <button id="close-type">Cancel</button>
+                </div>
+            </div>
+        </div>
+    </div>
+
+    <script src="/js/app.js?v=8"></script>
+</body>
+</html>
@@ -0,0 +1,782 @@
+/**
+ * AI Radio Show - Control Panel (Server-Side Audio)
+ */
+
+// --- State ---
+let currentCaller = null;
+let isProcessing = false;
+let isRecording = false;
+let phoneFilter = false;
+let autoScroll = true;
+let logPollInterval = null;
+let lastLogCount = 0;
+
+// Track lists
+let tracks = [];
+let sounds = [];
+
+
+// --- Init ---
+document.addEventListener('DOMContentLoaded', async () => {
+    console.log('AI Radio Show initializing...');
+    try {
+        await loadAudioDevices();
+        await loadCallers();
+        await loadMusic();
+        await loadSounds();
+        await loadSettings();
+        initEventListeners();
+        log('Ready. Configure audio devices in Settings, then click a caller to start.');
+        console.log('AI Radio Show ready');
+    } catch (err) {
+        console.error('Init error:', err);
+        log('Error loading: ' + err.message);
+    }
+});
+
+
+function initEventListeners() {
+    // Hangup
+    document.getElementById('hangup-btn')?.addEventListener('click', hangup);
+
+    // New Session
+    document.getElementById('new-session-btn')?.addEventListener('click', newSession);
+
+    // Server controls
+    document.getElementById('restart-server-btn')?.addEventListener('click', restartServer);
+    document.getElementById('stop-server-btn')?.addEventListener('click', stopServer);
+    document.getElementById('auto-scroll')?.addEventListener('change', e => {
+        autoScroll = e.target.checked;
+    });
+
+    // Start log polling
+    startLogPolling();
+
+    // Talk button - now triggers server-side recording
+    const talkBtn = document.getElementById('talk-btn');
+    if (talkBtn) {
+        talkBtn.addEventListener('mousedown', startRecording);
+        talkBtn.addEventListener('mouseup', stopRecording);
+        talkBtn.addEventListener('mouseleave', () => { if (isRecording) stopRecording(); });
+        talkBtn.addEventListener('touchstart', e => { e.preventDefault(); startRecording(); });
+        talkBtn.addEventListener('touchend', e => { e.preventDefault(); stopRecording(); });
+    }
+
+    // Type button
+    document.getElementById('type-btn')?.addEventListener('click', () => {
+        document.getElementById('type-modal')?.classList.remove('hidden');
+        document.getElementById('type-input')?.focus();
+    });
+    document.getElementById('send-type')?.addEventListener('click', sendTypedMessage);
+    document.getElementById('close-type')?.addEventListener('click', () => {
+        document.getElementById('type-modal')?.classList.add('hidden');
+    });
+    document.getElementById('type-input')?.addEventListener('keydown', e => {
+        if (e.key === 'Enter' && !e.shiftKey) {
+            e.preventDefault();
+            sendTypedMessage();
+        }
+    });
+
+    // Music - now server-side
+    document.getElementById('play-btn')?.addEventListener('click', playMusic);
+    document.getElementById('stop-btn')?.addEventListener('click', stopMusic);
+    document.getElementById('volume')?.addEventListener('input', setMusicVolume);
+
+    // Settings
+    document.getElementById('settings-btn')?.addEventListener('click', async () => {
+        document.getElementById('settings-modal')?.classList.remove('hidden');
+        await loadSettings();  // Reload settings when modal opens
+    });
+    document.getElementById('close-settings')?.addEventListener('click', () => {
+        document.getElementById('settings-modal')?.classList.add('hidden');
+    });
+    document.getElementById('save-settings')?.addEventListener('click', saveSettings);
+    document.getElementById('provider')?.addEventListener('change', updateProviderUI);
+    document.getElementById('phone-filter')?.addEventListener('change', e => {
+        phoneFilter = e.target.checked;
+    });
+    document.getElementById('refresh-ollama')?.addEventListener('click', refreshOllamaModels);
+}
+
+
+async function refreshOllamaModels() {
+    const btn = document.getElementById('refresh-ollama');
+    const select = document.getElementById('ollama-model');
+    if (!select) return;
+
+    btn.textContent = 'Loading...';
+    btn.disabled = true;
+
+    try {
+        const res = await fetch('/api/settings');
+        const data = await res.json();
+
+        select.innerHTML = '';
+        const models = data.available_ollama_models || [];
+
+        if (models.length === 0) {
+            const option = document.createElement('option');
+            option.value = '';
+            option.textContent = '(No models found)';
+            select.appendChild(option);
+        } else {
+            models.forEach(model => {
+                const option = document.createElement('option');
+                option.value = model;
+                option.textContent = model;
+                select.appendChild(option);
+            });
+        }
+    } catch (err) {
+        console.error('Failed to refresh Ollama models:', err);
+    }
+
+    btn.textContent = 'Refresh Models';
+    btn.disabled = false;
+}
+
+
+// --- Audio Devices ---
+async function loadAudioDevices() {
+    try {
+        const res = await fetch('/api/audio/devices');
+        const data = await res.json();
+
+        const inputSelect = document.getElementById('input-device');
+        const outputSelect = document.getElementById('output-device');
+
+        if (!inputSelect || !outputSelect) return;
+
+        // Clear selects
+        inputSelect.innerHTML = '<option value="">-- Select --</option>';
+        outputSelect.innerHTML = '<option value="">-- Select --</option>';
+
+        data.devices.forEach(device => {
+            // Input devices
+            if (device.inputs > 0) {
+                const opt = document.createElement('option');
+                opt.value = device.id;
+                opt.textContent = `${device.name} (${device.inputs} ch)`;
+                inputSelect.appendChild(opt);
+            }
+            // Output devices
+            if (device.outputs > 0) {
+                const opt = document.createElement('option');
+                opt.value = device.id;
+                opt.textContent = `${device.name} (${device.outputs} ch)`;
+                outputSelect.appendChild(opt);
+            }
+        });
+
+        // Load current settings
+        const settingsRes = await fetch('/api/audio/settings');
+        const settings = await settingsRes.json();
+
+        if (settings.input_device !== null)
+            inputSelect.value = settings.input_device;
+        if (settings.output_device !== null)
+            outputSelect.value = settings.output_device;
+
+        // Channel settings
+        const inputCh = document.getElementById('input-channel');
+        const callerCh = document.getElementById('caller-channel');
+        const musicCh = document.getElementById('music-channel');
+        const sfxCh = document.getElementById('sfx-channel');
+
+        if (inputCh) inputCh.value = settings.input_channel || 1;
+        if (callerCh) callerCh.value = settings.caller_channel || 1;
+        if (musicCh) musicCh.value = settings.music_channel || 2;
+        if (sfxCh) sfxCh.value = settings.sfx_channel || 3;
+
+        // Phone filter setting
+        const phoneFilterEl = document.getElementById('phone-filter');
+        if (phoneFilterEl) {
+            phoneFilterEl.checked = settings.phone_filter ?? false;
+            phoneFilter = phoneFilterEl.checked;
+        }
+
+        console.log('Audio devices loaded');
+    } catch (err) {
+        console.error('loadAudioDevices error:', err);
+    }
+}
+
+
+async function saveAudioDevices() {
+    const inputDevice = document.getElementById('input-device')?.value;
+    const outputDevice = document.getElementById('output-device')?.value;
+    const inputChannel = document.getElementById('input-channel')?.value;
+    const callerChannel = document.getElementById('caller-channel')?.value;
+    const musicChannel = document.getElementById('music-channel')?.value;
+    const sfxChannel = document.getElementById('sfx-channel')?.value;
+    const phoneFilterChecked = document.getElementById('phone-filter')?.checked ?? false;
+
+    await fetch('/api/audio/settings', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+            input_device: inputDevice ? parseInt(inputDevice) : null,
+            input_channel: inputChannel ? parseInt(inputChannel) : 1,
+            output_device: outputDevice ? parseInt(outputDevice) : null,
+            caller_channel: callerChannel ? parseInt(callerChannel) : 1,
+            music_channel: musicChannel ? parseInt(musicChannel) : 2,
+            sfx_channel: sfxChannel ? parseInt(sfxChannel) : 3,
+            phone_filter: phoneFilterChecked
+        })
+    });
+
+    // Update local state
+    phoneFilter = phoneFilterChecked;
+
+    log('Audio routing saved');
+}
+
+
+// --- Callers ---
+async function loadCallers() {
+    try {
+        const res = await fetch('/api/callers');
+        const data = await res.json();
+
+        const grid = document.getElementById('callers');
+        if (!grid) return;
+        grid.innerHTML = '';
+
+        data.callers.forEach(caller => {
+            const btn = document.createElement('button');
+            btn.className = 'caller-btn';
+            btn.textContent = caller.name;
+            btn.dataset.key = caller.key;
+            btn.addEventListener('click', () => startCall(caller.key, caller.name));
+            grid.appendChild(btn);
+        });
+
+        // Show session ID
+        const sessionEl = document.getElementById('session-id');
+        if (sessionEl && data.session_id) {
+            sessionEl.textContent = `(${data.session_id})`;
+        }
+
+        console.log('Loaded', data.callers.length, 'callers, session:', data.session_id);
+    } catch (err) {
+        console.error('loadCallers error:', err);
+    }
+}
+
+
+async function startCall(key, name) {
+    if (isProcessing) return;
+
+    const res = await fetch(`/api/call/${key}`, { method: 'POST' });
+    const data = await res.json();
+
+    currentCaller = { key, name };
+
+    document.getElementById('call-status').textContent = `On call: ${name}`;
+    document.getElementById('hangup-btn').disabled = false;
+
+    // Show caller background
+    const bgEl = document.getElementById('caller-background');
+    if (bgEl && data.background) {
+        bgEl.textContent = data.background;
+        bgEl.classList.remove('hidden');
+    }
+
+    document.querySelectorAll('.caller-btn').forEach(btn => {
+        btn.classList.toggle('active', btn.dataset.key === key);
+    });
+
+    log(`Connected to ${name}`);
+    clearChat();
+}
+
+
+async function newSession() {
+    // Hangup if on a call
+    if (currentCaller) {
+        await hangup();
+    }
+
+    await fetch('/api/session/reset', { method: 'POST' });
+
+    // Hide caller background
+    const bgEl = document.getElementById('caller-background');
+    if (bgEl) bgEl.classList.add('hidden');
+
+    // Reload callers to get new session ID
+    await loadCallers();
+
+    log('New session started - all callers have fresh backgrounds');
+}
+
+
+async function hangup() {
+    if (!currentCaller) return;
+
+    // Stop any playing TTS
+    await fetch('/api/tts/stop', { method: 'POST' });
+    await fetch('/api/hangup', { method: 'POST' });
+
+    log(`Hung up on ${currentCaller.name}`);
+
+    currentCaller = null;
+    isProcessing = false;
+    hideStatus();
+
+    document.getElementById('call-status').textContent = 'No active call';
+    document.getElementById('hangup-btn').disabled = true;
+    document.querySelectorAll('.caller-btn').forEach(btn => btn.classList.remove('active'));
+
+    // Hide caller background
+    const bgEl = document.getElementById('caller-background');
+    if (bgEl) bgEl.classList.add('hidden');
+}
+
+
+// --- Server-Side Recording ---
+async function startRecording() {
+    if (!currentCaller || isProcessing) return;
+
+    try {
+        const res = await fetch('/api/record/start', { method: 'POST' });
+        if (!res.ok) {
+            const err = await res.json();
+            log('Record error: ' + (err.detail || 'Failed to start'));
+            return;
+        }
+
+        isRecording = true;
+        document.getElementById('talk-btn').classList.add('recording');
+        document.getElementById('talk-btn').textContent = 'Recording...';
+
+    } catch (err) {
+        log('Record error: ' + err.message);
+    }
+}
+
+
+async function stopRecording() {
+    if (!isRecording) return;
+
+    document.getElementById('talk-btn').classList.remove('recording');
+    document.getElementById('talk-btn').textContent = 'Hold to Talk';
+
+    isRecording = false;
+    isProcessing = true;
+    showStatus('Processing...');
+
+    try {
+        // Stop recording and get transcription
+        const res = await fetch('/api/record/stop', { method: 'POST' });
+        const data = await res.json();
+
+        if (!data.text) {
+            log('(No speech detected)');
+            isProcessing = false;
+            hideStatus();
+            return;
+        }
+
+        addMessage('You', data.text);
+
+        // Chat
+        showStatus(`${currentCaller.name} is thinking...`);
+
+        const chatRes = await fetch('/api/chat', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ text: data.text })
+        });
+        const chatData = await chatRes.json();
+
+        addMessage(chatData.caller, chatData.text);
+
+        // TTS (plays on server) - only if we have text
+        if (chatData.text && chatData.text.trim()) {
+            showStatus(`${currentCaller.name} is speaking...`);
+
+            await fetch('/api/tts', {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({
+                    text: chatData.text,
+                    voice_id: chatData.voice_id,
+                    phone_filter: phoneFilter
+                })
+            });
+        }
+
+    } catch (err) {
+        log('Error: ' + err.message);
+    }
+
+    isProcessing = false;
+    hideStatus();
+}
+
+
+async function sendTypedMessage() {
+    const input = document.getElementById('type-input');
+    const text = input.value.trim();
+    if (!text || !currentCaller || isProcessing) return;
+
+    input.value = '';
+    document.getElementById('type-modal').classList.add('hidden');
+
+    isProcessing = true;
+    addMessage('You', text);
+
+    try {
+        showStatus(`${currentCaller.name} is thinking...`);
+
+        const chatRes = await fetch('/api/chat', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ text })
+        });
+        const chatData = await chatRes.json();
+
+        addMessage(chatData.caller, chatData.text);
+
+        // TTS (plays on server) - only if we have text
+        if (chatData.text && chatData.text.trim()) {
+            showStatus(`${currentCaller.name} is speaking...`);
+
+            await fetch('/api/tts', {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({
+                    text: chatData.text,
+                    voice_id: chatData.voice_id,
+                    phone_filter: phoneFilter
+                })
+            });
+        }
+
+    } catch (err) {
+        log('Error: ' + err.message);
+    }
+
+    isProcessing = false;
+    hideStatus();
+}
+
+
+// --- Music (Server-Side) ---
+async function loadMusic() {
+    try {
+        const res = await fetch('/api/music');
+        const data = await res.json();
+        tracks = data.tracks || [];
+
+        const select = document.getElementById('track-select');
+        if (!select) return;
+        select.innerHTML = '';
+
+        tracks.forEach((track, i) => {
+            const option = document.createElement('option');
+            option.value = track.file;
+            option.textContent = track.name;
+            select.appendChild(option);
+        });
+        console.log('Loaded', tracks.length, 'tracks');
+    } catch (err) {
+        console.error('loadMusic error:', err);
+    }
+}
+
+
+async function playMusic() {
+    const select = document.getElementById('track-select');
+    const track = select?.value;
+    if (!track) return;
+
+    await fetch('/api/music/play', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ track, action: 'play' })
+    });
+}
+
+
+async function stopMusic() {
+    await fetch('/api/music/stop', { method: 'POST' });
+}
+
+
+async function setMusicVolume(e) {
+    const volume = e.target.value / 100;
+    await fetch('/api/music/volume', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ track: '', action: 'volume', volume })
+    });
+}
+
+
+// --- Sound Effects (Server-Side) ---
+async function loadSounds() {
+    try {
+        const res = await fetch('/api/sounds');
+        const data = await res.json();
+        sounds = data.sounds || [];
+
+        const board = document.getElementById('soundboard');
+        if (!board) return;
+        board.innerHTML = '';
+
+        sounds.forEach(sound => {
+            const btn = document.createElement('button');
+            btn.className = 'sound-btn';
+            btn.textContent = sound.name;
+            btn.addEventListener('click', () => playSFX(sound.file));
+            board.appendChild(btn);
+        });
+        console.log('Loaded', sounds.length, 'sounds');
+    } catch (err) {
+        console.error('loadSounds error:', err);
+    }
+}
+
+
+async function playSFX(soundFile) {
+    await fetch('/api/sfx/play', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ sound: soundFile })
+    });
+}
+
+
+// --- Settings ---
+async function loadSettings() {
+    try {
+        const res = await fetch('/api/settings');
+        const data = await res.json();
+
+        const providerEl = document.getElementById('provider');
+        if (providerEl) providerEl.value = data.provider || 'openrouter';
+
+        const modelSelect = document.getElementById('openrouter-model');
+        if (modelSelect) {
+            modelSelect.innerHTML = '';
+            (data.available_openrouter_models || []).forEach(model => {
+                const option = document.createElement('option');
+                option.value = model;
+                option.textContent = model;
+                if (model === data.openrouter_model) option.selected = true;
+                modelSelect.appendChild(option);
+            });
+        }
+
+        const ollamaModel = document.getElementById('ollama-model');
+        const ollamaHost = document.getElementById('ollama-host');
+        if (ollamaHost) ollamaHost.value = data.ollama_host || 'http://localhost:11434';
+
+        // Populate Ollama models dropdown
+        if (ollamaModel) {
+            ollamaModel.innerHTML = '';
+            const ollamaModels = data.available_ollama_models || [];
+            console.log('Ollama models from API:', ollamaModels.length, ollamaModels);
+            if (ollamaModels.length === 0) {
+                const option = document.createElement('option');
+                option.value = data.ollama_model || 'llama3.2';
+                option.textContent = data.ollama_model || 'llama3.2';
+                ollamaModel.appendChild(option);
+            } else {
+                ollamaModels.forEach(model => {
+                    const option = document.createElement('option');
+                    option.value = model;
+                    option.textContent = model;
+                    if (model === data.ollama_model) option.selected = true;
+                    ollamaModel.appendChild(option);
+                });
+            }
+            console.log('Ollama dropdown options:', ollamaModel.options.length);
+        } else {
+            console.log('Ollama model element not found!');
+        }
+
+        // TTS provider
+        const ttsProvider = document.getElementById('tts-provider');
+        if (ttsProvider) ttsProvider.value = data.tts_provider || 'elevenlabs';
+
+        updateProviderUI();
+        console.log('Settings loaded:', data.provider, 'TTS:', data.tts_provider);
+    } catch (err) {
+        console.error('loadSettings error:', err);
+    }
+}
+
+
+function updateProviderUI() {
+    const isOpenRouter = document.getElementById('provider')?.value === 'openrouter';
+    document.getElementById('openrouter-settings')?.classList.toggle('hidden', !isOpenRouter);
+    document.getElementById('ollama-settings')?.classList.toggle('hidden', isOpenRouter);
+}
+
+
+async function saveSettings() {
+    // Save audio devices
+    await saveAudioDevices();
+
+    // Save LLM and TTS settings
+    await fetch('/api/settings', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+            provider: document.getElementById('provider')?.value,
+            openrouter_model: document.getElementById('openrouter-model')?.value,
+            ollama_model: document.getElementById('ollama-model')?.value,
+            ollama_host: document.getElementById('ollama-host')?.value,
+            tts_provider: document.getElementById('tts-provider')?.value
+        })
+    });
+
+    document.getElementById('settings-modal')?.classList.add('hidden');
+    log('Settings saved');
+}
+
+
+// --- UI Helpers ---
+function addMessage(sender, text) {
+    const chat = document.getElementById('chat');
+    if (!chat) {
+        console.log(`[${sender}]: ${text}`);
+        return;
+    }
+    const div = document.createElement('div');
+    div.className = `message ${sender === 'You' ? 'host' : 'caller'}`;
+    div.innerHTML = `<strong>${sender}:</strong> ${text}`;
+    chat.appendChild(div);
+    chat.scrollTop = chat.scrollHeight;
+}
+
+
+function clearChat() {
+    const chat = document.getElementById('chat');
+    if (chat) chat.innerHTML = '';
+}
+
+
+function log(text) {
+    addMessage('System', text);
+}
+
+
+function showStatus(text) {
+    const status = document.getElementById('status');
+    if (status) {
+        status.textContent = text;
+        status.classList.remove('hidden');
+    }
+}
+
+
+function hideStatus() {
+    const status = document.getElementById('status');
+    if (status) status.classList.add('hidden');
+}
+
+
+// --- Server Control & Logging ---
+
+function startLogPolling() {
+    // Poll for logs every second
+    logPollInterval = setInterval(fetchLogs, 1000);
+    // Initial fetch
+    fetchLogs();
+}
+
+
+async function fetchLogs() {
+    try {
+        const res = await fetch('/api/logs?lines=200');
+        const data = await res.json();
+
+        const logEl = document.getElementById('server-log');
+        if (!logEl) return;
+
+        // Only update if we have new logs
+        if (data.logs.length !== lastLogCount) {
+            lastLogCount = data.logs.length;
+
+            logEl.innerHTML = data.logs.map(line => {
+                let className = 'log-line';
+                if (line.includes('Error') || line.includes('error') || line.includes('ERROR')) {
+                    className += ' error';
+                } else if (line.includes('Warning') || line.includes('WARNING')) {
+                    className += ' warning';
+                } else if (line.includes('[TTS]')) {
+                    className += ' tts';
+                } else if (line.includes('[Chat]')) {
+                    className += ' chat';
+                }
+                return `<div class="${className}">${escapeHtml(line)}</div>`;
+            }).join('');
+
+            if (autoScroll) {
+                logEl.scrollTop = logEl.scrollHeight;
+            }
+        }
+    } catch (err) {
+        // Server might be down, that's ok
+        console.log('Log fetch failed (server may be restarting)');
+    }
+}
+
+
+function escapeHtml(text) {
+    const div = document.createElement('div');
+    div.textContent = text;
+    return div.innerHTML;
+}
+
+
+async function restartServer() {
+    if (!confirm('Restart the server? This will briefly disconnect you.')) return;
+
+    try {
+        await fetch('/api/server/restart', { method: 'POST' });
+        log('Server restart requested...');
+
+        // Clear the log and wait for server to come back
+        document.getElementById('server-log').innerHTML = '<div class="log-line">Restarting server...</div>';
+
+        // Poll until server is back
+        let attempts = 0;
+        const checkServer = setInterval(async () => {
+            attempts++;
+            try {
+                const res = await fetch('/api/server/status');
+                if (res.ok) {
+                    clearInterval(checkServer);
+                    log('Server restarted successfully');
+                    await loadSettings();
+                }
+            } catch (e) {
+                if (attempts > 30) {
+                    clearInterval(checkServer);
+                    log('Server did not restart - check terminal');
+                }
+            }
+        }, 1000);
+
+    } catch (err) {
+        log('Failed to restart server: ' + err.message);
+    }
+}
+
+
+async function stopServer() {
+    if (!confirm('Stop the server? You will need to restart it manually.')) return;
+
+    try {
+        await fetch('/api/server/stop', { method: 'POST' });
+        log('Server stop requested...');
+        document.getElementById('server-log').innerHTML = '<div class="log-line">Server stopped. Run ./run.sh to restart.</div>';
+    } catch (err) {
+        log('Failed to stop server: ' + err.message);
+    }
+}
@@ -0,0 +1,77 @@
+import os
+os.environ["SUNO_USE_SMALL_MODELS"] = "False"
+
+from bark import generate_audio, preload_models
+from scipy.io.wavfile import write as write_wav
+from scipy.signal import butter, filtfilt
+import numpy as np
+
+def phone_filter(audio, sample_rate=24000):
+    """Apply telephone bandpass filter (300Hz - 3400Hz)"""
+    low = 300 / (sample_rate / 2)
+    high = 3400 / (sample_rate / 2)
+    b, a = butter(4, [low, high], btype='band')
+    filtered = filtfilt(b, a, audio)
+
+    # Add slight compression and normalize
+    filtered = np.tanh(filtered * 1.5) * 0.9
+    return filtered.astype(np.float32)
+
+# Define your callers
+CALLERS = [
+    {
+        "name": "caller1_mike",
+        "voice": "v2/en_speaker_6",
+        "text": """Hey, thanks for taking my call!
+        So I've been thinking about this a lot and...
+        I know it sounds crazy, but hear me out."""
+    },
+    {
+        "name": "caller2_sarah",
+        "voice": "v2/en_speaker_9",
+        "text": """Hi! Oh my gosh, I can't believe I got through.
+        Okay so... this is kind of a long story,
+        but basically I had this experience last week that blew my mind."""
+    },
+    {
+        "name": "caller3_dave",
+        "voice": "v2/en_speaker_1",
+        "text": """Yeah, hey. First time caller, long time listener.
+        Look, I gotta be honest with you here,
+        I think you're missing something important."""
+    },
+    {
+        "name": "caller4_jenny",
+        "voice": "v2/en_speaker_3",
+        "text": """Okay okay, so get this...
+        I was literally just talking about this with my friend yesterday!
+        And she said, and I quote, well, I can't say that on air."""
+    },
+]
+
+def main():
+    print("Loading models...")
+    preload_models()
+
+    os.makedirs("output", exist_ok=True)
+
+    for caller in CALLERS:
+        print(f"\nGenerating: {caller['name']}")
+
+        # Generate raw audio
+        audio = generate_audio(caller["text"], history_prompt=caller["voice"])
+
+        # Save clean version
+        write_wav(f"output/{caller['name']}_clean.wav", 24000, audio)
+
+        # Apply phone filter and save
+        phone_audio = phone_filter(audio)
+        write_wav(f"output/{caller['name']}_phone.wav", 24000, phone_audio)
+
+        print(f"  Saved: output/{caller['name']}_clean.wav")
+        print(f"  Saved: output/{caller['name']}_phone.wav")
+
+    print("\nDone! Check the output/ folder.")
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+"""
+Generate sound effects using ElevenLabs Sound Effects API
+"""
+
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+
+load_dotenv()
+
+SOUNDS_DIR = Path(__file__).parent / "sounds"
+SOUNDS_DIR.mkdir(exist_ok=True)
+
+# Sound effects to generate with descriptions
+SOUND_EFFECTS = {
+    'airhorn.wav': 'loud air horn blast, sports event',
+    'boo.wav': 'crowd booing, disappointed audience',
+    'crickets.wav': 'crickets chirping, awkward silence',
+    'drumroll.wav': 'drum roll, building suspense',
+    'buzzer.wav': 'game show wrong answer buzzer',
+    'laugh.wav': 'audience laughing, sitcom laugh track',
+    'rimshot.wav': 'ba dum tss, drum rimshot comedy',
+    'sad_trombone.wav': 'sad trombone, wah wah wah failure sound',
+    'phone_ring.wav': 'old telephone ringing',
+    'cheer.wav': 'crowd cheering and applauding',
+    'scratch.wav': 'vinyl record scratch',
+    'wow.wav': 'crowd saying wow, impressed reaction',
+    'fart.wav': 'comedic fart sound effect',
+    'victory.wav': 'victory fanfare, triumphant horns',
+    'uh_oh.wav': 'uh oh, something went wrong sound',
+}
+
+def generate_sound(name, description):
+    """Generate a sound effect using ElevenLabs"""
+    from elevenlabs.client import ElevenLabs
+    import soundfile as sf
+    import numpy as np
+
+    output_path = SOUNDS_DIR / name
+
+    if output_path.exists():
+        print(f"  ✓ {name} (already exists)")
+        return True
+
+    try:
+        print(f"  Generating {name}: '{description}'...")
+
+        client = ElevenLabs(api_key=os.getenv('ELEVENLABS_API_KEY'))
+
+        # Generate sound effect
+        result = client.text_to_sound_effects.convert(
+            text=description,
+            duration_seconds=2.0,
+        )
+
+        # Collect audio data
+        audio_data = b''.join(result)
+
+        # Save as mp3 first, then convert
+        temp_mp3 = SOUNDS_DIR / f"temp_{name}.mp3"
+        with open(temp_mp3, 'wb') as f:
+            f.write(audio_data)
+
+        # Convert to wav with ffmpeg
+        import subprocess
+        subprocess.run([
+            'ffmpeg', '-y', '-i', str(temp_mp3),
+            '-ar', '24000', '-ac', '1',
+            str(output_path)
+        ], capture_output=True, check=True)
+
+        temp_mp3.unlink()
+        print(f"  ✓ {name}")
+        return True
+
+    except Exception as e:
+        print(f"  ✗ {name} ({e})")
+        return False
+
+def main():
+    print("Generating sound effects with ElevenLabs...")
+    print(f"Saving to: {SOUNDS_DIR}")
+    print("(This uses your ElevenLabs credits)\n")
+
+    # Check for ffmpeg
+    import subprocess
+    try:
+        subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
+    except:
+        print("ERROR: ffmpeg required. Install with: brew install ffmpeg")
+        return
+
+    success = 0
+    for name, description in SOUND_EFFECTS.items():
+        if generate_sound(name, description):
+            success += 1
+
+    print(f"\nGenerated {success}/{len(SOUND_EFFECTS)} sounds.")
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,400 @@
+#!/usr/bin/env python3
+"""
+Podcast Episode Publisher
+Transcribes audio, generates metadata, and publishes to Castopod.
+
+Usage:
+    python publish_episode.py /path/to/episode.mp3
+    python publish_episode.py /path/to/episode.mp3 --episode-number 3
+    python publish_episode.py /path/to/episode.mp3 --dry-run
+"""
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+import base64
+from pathlib import Path
+
+import requests
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv(Path(__file__).parent / ".env")
+
+# Configuration
+CASTOPOD_URL = "https://podcast.macneilmediagroup.com"
+CASTOPOD_USERNAME = "admin"
+CASTOPOD_PASSWORD = "podcast2026api"
+PODCAST_ID = 1
+PODCAST_HANDLE = "LukeAtTheRoost"
+OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
+WHISPER_MODEL = "base"  # Options: tiny, base, small, medium, large
+
+# NAS Configuration for chapters upload
+NAS_HOST = "mmgnas-10g"
+NAS_USER = "luke"
+NAS_SSH_PORT = 8001
+DOCKER_PATH = "/share/CACHEDEV1_DATA/.qpkg/container-station/bin/docker"
+CASTOPOD_CONTAINER = "castopod-castopod-1"
+MARIADB_CONTAINER = "castopod-mariadb-1"
+DB_USER = "castopod"
+DB_PASS = "BYtbFfk3ndeVabb26xb0UyKU"
+DB_NAME = "castopod"
+
+
+def get_auth_header():
+    """Get Basic Auth header for Castopod API."""
+    credentials = base64.b64encode(
+        f"{CASTOPOD_USERNAME}:{CASTOPOD_PASSWORD}".encode()
+    ).decode()
+    return {"Authorization": f"Basic {credentials}"}
+
+
+def transcribe_audio(audio_path: str) -> dict:
+    """Transcribe audio using faster-whisper with timestamps."""
+    print(f"[1/5] Transcribing {audio_path}...")
+
+    try:
+        from faster_whisper import WhisperModel
+    except ImportError:
+        print("Error: faster-whisper not installed. Run: pip install faster-whisper")
+        sys.exit(1)
+
+    model = WhisperModel(WHISPER_MODEL, compute_type="int8")
+    segments, info = model.transcribe(audio_path, word_timestamps=True)
+
+    transcript_segments = []
+    full_text = []
+
+    for segment in segments:
+        transcript_segments.append({
+            "start": segment.start,
+            "end": segment.end,
+            "text": segment.text.strip()
+        })
+        full_text.append(segment.text.strip())
+
+    print(f"    Transcribed {info.duration:.1f} seconds of audio")
+
+    return {
+        "segments": transcript_segments,
+        "full_text": " ".join(full_text),
+        "duration": int(info.duration)
+    }
+
+
+def generate_metadata(transcript: dict, episode_number: int) -> dict:
+    """Use LLM to generate title, description, and chapters from transcript."""
+    print("[2/5] Generating metadata with LLM...")
+
+    if not OPENROUTER_API_KEY:
+        print("Error: OPENROUTER_API_KEY not set in .env")
+        sys.exit(1)
+
+    # Prepare transcript with timestamps for chapter detection
+    timestamped_text = ""
+    for seg in transcript["segments"]:
+        mins = int(seg["start"] // 60)
+        secs = int(seg["start"] % 60)
+        timestamped_text += f"[{mins:02d}:{secs:02d}] {seg['text']}\n"
+
+    prompt = f"""Analyze this podcast transcript and generate metadata.
+
+TRANSCRIPT:
+{timestamped_text}
+
+Generate a JSON response with:
+1. "title": A catchy episode title (include "Episode {episode_number}:" prefix)
+2. "description": A 2-4 sentence description summarizing the episode's content. Mention callers by name and their topics. End with something engaging.
+3. "chapters": An array of chapter objects with "startTime" (in seconds) and "title". Include:
+   - "Intro" at 0 seconds
+   - A chapter for each caller/topic (use caller names if mentioned)
+   - "Outro" near the end
+
+Respond with ONLY valid JSON, no markdown or explanation."""
+
+    response = requests.post(
+        "https://openrouter.ai/api/v1/chat/completions",
+        headers={
+            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+            "Content-Type": "application/json"
+        },
+        json={
+            "model": "anthropic/claude-3-haiku",
+            "messages": [{"role": "user", "content": prompt}],
+            "temperature": 0.7
+        }
+    )
+
+    if response.status_code != 200:
+        print(f"Error from OpenRouter: {response.text}")
+        sys.exit(1)
+
+    result = response.json()
+    content = result["choices"][0]["message"]["content"]
+
+    # Parse JSON from response (handle markdown code blocks)
+    content = content.strip()
+    if content.startswith("```"):
+        content = re.sub(r"^```(?:json)?\n?", "", content)
+        content = re.sub(r"\n?```$", "", content)
+
+    try:
+        metadata = json.loads(content)
+    except json.JSONDecodeError as e:
+        print(f"Error parsing LLM response: {e}")
+        print(f"Response was: {content}")
+        sys.exit(1)
+
+    print(f"    Title: {metadata['title']}")
+    print(f"    Chapters: {len(metadata['chapters'])}")
+
+    return metadata
+
+
+def create_episode(audio_path: str, metadata: dict, duration: int) -> dict:
+    """Create episode on Castopod."""
+    print("[3/5] Creating episode on Castopod...")
+
+    headers = get_auth_header()
+
+    # Upload audio and create episode
+    with open(audio_path, "rb") as f:
+        files = {
+            "audio_file": (Path(audio_path).name, f, "audio/mpeg")
+        }
+        data = {
+            "title": metadata["title"],
+            "description_markdown": metadata["description"],
+            "parental_advisory": "explicit",
+            "type": "full",
+            "created_by": "1"
+        }
+
+        response = requests.post(
+            f"{CASTOPOD_URL}/api/rest/v1/podcasts/{PODCAST_ID}/episodes",
+            headers=headers,
+            files=files,
+            data=data
+        )
+
+    if response.status_code not in (200, 201):
+        print(f"Error creating episode: {response.text}")
+        sys.exit(1)
+
+    episode = response.json()
+    print(f"    Created episode ID: {episode['id']}")
+    print(f"    Slug: {episode['slug']}")
+
+    return episode
+
+
+def publish_episode(episode_id: int) -> dict:
+    """Publish the episode."""
+    print("[4/5] Publishing episode...")
+
+    headers = get_auth_header()
+
+    response = requests.post(
+        f"{CASTOPOD_URL}/api/rest/v1/episodes/{episode_id}/publish",
+        headers=headers,
+        data={
+            "publication_method": "now",
+            "created_by": "1"
+        }
+    )
+
+    if response.status_code != 200:
+        print(f"Error publishing: {response.text}")
+        sys.exit(1)
+
+    episode = response.json()
+    published_at = episode.get("published_at", {})
+    if isinstance(published_at, dict):
+        print(f"    Published at: {published_at.get('date', 'unknown')}")
+    else:
+        print(f"    Published at: {published_at}")
+
+    return episode
+
+
+def save_chapters(metadata: dict, output_path: str):
+    """Save chapters to JSON file."""
+    chapters_data = {
+        "version": "1.2.0",
+        "chapters": metadata["chapters"]
+    }
+
+    with open(output_path, "w") as f:
+        json.dump(chapters_data, f, indent=2)
+
+    print(f"    Chapters saved to: {output_path}")
+
+
+def run_ssh_command(command: str) -> tuple[bool, str]:
+    """Run a command on the NAS via SSH."""
+    ssh_cmd = [
+        "ssh", "-p", str(NAS_SSH_PORT),
+        f"{NAS_USER}@{NAS_HOST}",
+        command
+    ]
+    try:
+        result = subprocess.run(ssh_cmd, capture_output=True, text=True, timeout=30)
+        return result.returncode == 0, result.stdout.strip() or result.stderr.strip()
+    except subprocess.TimeoutExpired:
+        return False, "SSH command timed out"
+    except Exception as e:
+        return False, str(e)
+
+
+def upload_chapters_to_castopod(episode_slug: str, episode_id: int, chapters_path: str) -> bool:
+    """Upload chapters file to Castopod via SSH and link in database."""
+    print("[4.5/5] Uploading chapters to Castopod...")
+
+    chapters_filename = f"{episode_slug}-chapters.json"
+    remote_path = f"podcasts/{PODCAST_HANDLE}/{chapters_filename}"
+
+    # Read local chapters file
+    with open(chapters_path, "r") as f:
+        chapters_content = f.read()
+
+    # Base64 encode for safe transfer
+    chapters_b64 = base64.b64encode(chapters_content.encode()).decode()
+
+    # Upload file to container using base64 decode
+    upload_cmd = f'echo "{chapters_b64}" | base64 -d | {DOCKER_PATH} exec -i {CASTOPOD_CONTAINER} tee /var/www/castopod/public/media/{remote_path} > /dev/null'
+    success, output = run_ssh_command(upload_cmd)
+    if not success:
+        print(f"    Warning: Failed to upload chapters file: {output}")
+        return False
+
+    # Get file size
+    file_size = len(chapters_content)
+
+    # Insert into media table
+    insert_sql = f"""INSERT INTO cp_media (file_key, file_size, file_mimetype, type, uploaded_by, updated_by, uploaded_at, updated_at)
+        VALUES ('{remote_path}', {file_size}, 'application/json', 'chapters', 1, 1, NOW(), NOW())"""
+    db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{insert_sql}; SELECT LAST_INSERT_ID();"'
+    success, output = run_ssh_command(db_cmd)
+    if not success:
+        print(f"    Warning: Failed to insert chapters in database: {output}")
+        return False
+
+    # Parse media ID from output
+    try:
+        lines = output.strip().split('\n')
+        media_id = int(lines[-1])
+    except (ValueError, IndexError):
+        print(f"    Warning: Could not parse media ID from: {output}")
+        return False
+
+    # Link chapters to episode
+    update_sql = f"UPDATE cp_episodes SET chapters_id = {media_id} WHERE id = {episode_id}"
+    db_cmd = f'{DOCKER_PATH} exec {MARIADB_CONTAINER} mysql -u {DB_USER} -p{DB_PASS} {DB_NAME} -e "{update_sql}"'
+    success, output = run_ssh_command(db_cmd)
+    if not success:
+        print(f"    Warning: Failed to link chapters to episode: {output}")
+        return False
+
+    # Clear Castopod cache
+    cache_cmd = f'{DOCKER_PATH} exec {CASTOPOD_CONTAINER} php spark cache:clear'
+    run_ssh_command(cache_cmd)
+
+    print(f"    Chapters uploaded and linked (media_id: {media_id})")
+    return True
+
+
+def get_next_episode_number() -> int:
+    """Get the next episode number from Castopod."""
+    headers = get_auth_header()
+
+    response = requests.get(
+        f"{CASTOPOD_URL}/api/rest/v1/podcasts/{PODCAST_ID}/episodes",
+        headers=headers
+    )
+
+    if response.status_code != 200:
+        return 1
+
+    episodes = response.json()
+    if not episodes:
+        return 1
+
+    max_num = max(ep.get("number", 0) for ep in episodes)
+    return max_num + 1
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Publish podcast episode to Castopod")
+    parser.add_argument("audio_file", help="Path to the audio file (MP3)")
+    parser.add_argument("--episode-number", "-n", type=int, help="Episode number (auto-detected if not provided)")
+    parser.add_argument("--dry-run", "-d", action="store_true", help="Generate metadata but don't publish")
+    parser.add_argument("--title", "-t", help="Override generated title")
+    parser.add_argument("--description", help="Override generated description")
+    args = parser.parse_args()
+
+    audio_path = Path(args.audio_file).expanduser().resolve()
+    if not audio_path.exists():
+        print(f"Error: Audio file not found: {audio_path}")
+        sys.exit(1)
+
+    # Determine episode number
+    if args.episode_number:
+        episode_number = args.episode_number
+    else:
+        episode_number = get_next_episode_number()
+    print(f"Episode number: {episode_number}")
+
+    # Step 1: Transcribe
+    transcript = transcribe_audio(str(audio_path))
+
+    # Step 2: Generate metadata
+    metadata = generate_metadata(transcript, episode_number)
+
+    # Apply overrides
+    if args.title:
+        metadata["title"] = args.title
+    if args.description:
+        metadata["description"] = args.description
+
+    # Save chapters file
+    chapters_path = audio_path.with_suffix(".chapters.json")
+    save_chapters(metadata, str(chapters_path))
+
+    if args.dry_run:
+        print("\n[DRY RUN] Would publish with:")
+        print(f"  Title: {metadata['title']}")
+        print(f"  Description: {metadata['description']}")
+        print(f"  Chapters: {json.dumps(metadata['chapters'], indent=2)}")
+        print("\nChapters file saved. Run without --dry-run to publish.")
+        return
+
+    # Step 3: Create episode
+    episode = create_episode(str(audio_path), metadata, transcript["duration"])
+
+    # Step 4: Publish
+    episode = publish_episode(episode["id"])
+
+    # Step 4.5: Upload chapters via SSH
+    chapters_uploaded = upload_chapters_to_castopod(
+        episode["slug"],
+        episode["id"],
+        str(chapters_path)
+    )
+
+    # Step 5: Summary
+    print("\n[5/5] Done!")
+    print("=" * 50)
+    print(f"Episode URL: {CASTOPOD_URL}/@{PODCAST_HANDLE}/episodes/{episode['slug']}")
+    print(f"RSS Feed: {CASTOPOD_URL}/@{PODCAST_HANDLE}/feed.xml")
+    print("=" * 50)
+    if not chapters_uploaded:
+        print("\nNote: Chapters upload failed. Add manually via Castopod admin UI")
+        print(f"      Chapters file: {chapters_path}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+"""
+Simplified Radio Show - for debugging
+"""
+
+import os
+import sys
+from pathlib import Path
+import numpy as np
+import sounddevice as sd
+import soundfile as sf
+from faster_whisper import WhisperModel
+from scipy.signal import butter, filtfilt
+from dotenv import load_dotenv
+
+load_dotenv()
+
+SAMPLE_RATE = 24000
+
+CALLERS = {
+    "1": ("Big Tony", "IKne3meq5aSn9XLyUdCD", "You are Big Tony, a loud Italian guy from Staten Island. Swear naturally, be opinionated. Keep it to 2 sentences."),
+    "2": ("Drunk Diane", "FGY2WhTYpPnrIDTdsKH5", "You are Drunk Diane, tipsy woman at a bar. Ramble a bit, be funny. Keep it to 2 sentences."),
+    "3": ("Stoner Phil", "bIHbv24MWmeRgasZH58o", "You are Stoner Phil, super chill stoner dude. Speak slow, be spacey but profound. Keep it to 2 sentences."),
+}
+
+def phone_filter(audio):
+    b, a = butter(4, [300/(SAMPLE_RATE/2), 3400/(SAMPLE_RATE/2)], btype='band')
+    return (np.tanh(filtfilt(b, a, audio.flatten()) * 1.5) * 0.8).astype(np.float32)
+
+class SimpleRadio:
+    def __init__(self):
+        print("Loading Whisper...")
+        self.whisper = WhisperModel("base", device="cpu", compute_type="int8")
+
+        print("Connecting to ElevenLabs...")
+        from elevenlabs.client import ElevenLabs
+        self.tts = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
+
+        print("Connecting to Ollama...")
+        import ollama
+        self.ollama = ollama
+
+        self.caller = CALLERS["1"]
+        self.history = []
+        print("\nReady!\n")
+
+    def record(self):
+        print("  [Recording - press Enter to stop]")
+        chunks = []
+        recording = True
+
+        def callback(indata, frames, time, status):
+            if recording:
+                chunks.append(indata.copy())
+
+        with sd.InputStream(samplerate=SAMPLE_RATE, channels=1, callback=callback):
+            input()  # Wait for Enter
+
+        recording = False
+        return np.vstack(chunks) if chunks else None
+
+    def transcribe(self, audio):
+        import librosa
+        audio_16k = librosa.resample(audio.flatten().astype(np.float32), orig_sr=SAMPLE_RATE, target_sr=16000)
+        segments, _ = self.whisper.transcribe(audio_16k)
+        return " ".join([s.text for s in segments]).strip()
+
+    def respond(self, text):
+        self.history.append({"role": "user", "content": text})
+
+        response = self.ollama.chat(
+            model="llama3.2:latest",
+            messages=[{"role": "system", "content": self.caller[2]}] + self.history[-6:],
+            options={"temperature": 0.9}
+        )
+
+        reply = response["message"]["content"]
+        self.history.append({"role": "assistant", "content": reply})
+        return reply
+
+    def speak(self, text):
+        print("  [Generating voice...]")
+        audio_gen = self.tts.text_to_speech.convert(
+            voice_id=self.caller[1],
+            text=text,
+            model_id="eleven_turbo_v2_5",
+            output_format="pcm_24000"
+        )
+
+        audio_bytes = b"".join(audio_gen)
+        audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
+        filtered = phone_filter(audio)
+
+        print("  [Playing...]")
+        sd.play(filtered, SAMPLE_RATE)
+        sd.wait()
+
+    def run(self):
+        print("=" * 50)
+        print("  SIMPLE RADIO - Type commands:")
+        print("  1/2/3 = switch caller")
+        print("  r     = record & respond")
+        print("  t     = type message (skip recording)")
+        print("  q     = quit")
+        print("=" * 50)
+        print(f"\nCaller: {self.caller[0]}\n")
+
+        while True:
+            cmd = input("> ").strip().lower()
+
+            if cmd == 'q':
+                break
+            elif cmd in '123':
+                self.caller = CALLERS[cmd]
+                self.history = []
+                print(f"\n📞 Switched to: {self.caller[0]}\n")
+            elif cmd == 'r':
+                audio = self.record()
+                if audio is not None:
+                    print("  [Transcribing...]")
+                    text = self.transcribe(audio)
+                    print(f"\n  YOU: {text}\n")
+                    if text:
+                        print("  [Thinking...]")
+                        reply = self.respond(text)
+                        print(f"\n  📞 {self.caller[0].upper()}: {reply}\n")
+                        self.speak(reply)
+            elif cmd == 't':
+                text = input("  Type message: ")
+                if text:
+                    print("  [Thinking...]")
+                    reply = self.respond(text)
+                    print(f"\n  📞 {self.caller[0].upper()}: {reply}\n")
+                    self.speak(reply)
+            else:
+                print("  Commands: r=record, t=type, 1/2/3=caller, q=quit")
+
+if __name__ == "__main__":
+    radio = SimpleRadio()
+    radio.run()
@@ -0,0 +1,16 @@
+# Web application requirements (in addition to existing radio_show.py deps)
+fastapi>=0.109.0
+uvicorn[standard]>=0.27.0
+python-multipart>=0.0.6
+websockets>=12.0
+httpx>=0.26.0
+pydantic-settings>=2.1.0
+
+# Already installed for CLI (but listed for completeness):
+# faster-whisper
+# elevenlabs
+# numpy
+# scipy
+# librosa
+# soundfile
+# python-dotenv
@@ -0,0 +1,60 @@
+#!/bin/bash
+# AI Radio Show - Server Runner with restart support
+
+LOG_FILE="/tmp/ai-radio-show.log"
+RESTART_FLAG="/tmp/ai-radio-show.restart"
+STOP_FLAG="/tmp/ai-radio-show.stop"
+
+cd "$(dirname "$0")"
+
+# Activate virtual environment
+source venv/bin/activate
+
+# Cleanup old flags
+rm -f "$RESTART_FLAG" "$STOP_FLAG"
+
+echo "AI Radio Show Server Runner"
+echo "Log file: $LOG_FILE"
+echo "Press Ctrl+C to stop"
+echo ""
+
+while true; do
+    echo "[$(date)] Starting server..." | tee -a "$LOG_FILE"
+
+    # Start uvicorn with output to both console and log file
+    python -m uvicorn backend.main:app --host 0.0.0.0 --port 8000 2>&1 | tee -a "$LOG_FILE" &
+    SERVER_PID=$!
+
+    # Wait for server to exit or restart signal
+    while kill -0 $SERVER_PID 2>/dev/null; do
+        if [ -f "$RESTART_FLAG" ]; then
+            echo "[$(date)] Restart requested..." | tee -a "$LOG_FILE"
+            rm -f "$RESTART_FLAG"
+            kill $SERVER_PID 2>/dev/null
+            wait $SERVER_PID 2>/dev/null
+            sleep 1
+            break
+        fi
+
+        if [ -f "$STOP_FLAG" ]; then
+            echo "[$(date)] Stop requested..." | tee -a "$LOG_FILE"
+            rm -f "$STOP_FLAG"
+            kill $SERVER_PID 2>/dev/null
+            wait $SERVER_PID 2>/dev/null
+            echo "[$(date)] Server stopped." | tee -a "$LOG_FILE"
+            exit 0
+        fi
+
+        sleep 1
+    done
+
+    # Check if we should restart or exit
+    if [ -f "$STOP_FLAG" ]; then
+        rm -f "$STOP_FLAG"
+        echo "[$(date)] Server stopped." | tee -a "$LOG_FILE"
+        exit 0
+    fi
+
+    echo "[$(date)] Restarting in 2 seconds..." | tee -a "$LOG_FILE"
+    sleep 2
+done
@@ -0,0 +1,37 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Test JavaScript Loading</title>
+</head>
+<body>
+    <h1>JavaScript Test</h1>
+    <button id="test-btn">Test Button</button>
+    <div id="output"></div>
+
+    <script src="frontend/js/audio.js"></script>
+    <script src="frontend/js/websocket.js"></script>
+    <script src="frontend/js/app.js"></script>
+    <script>
+        // Test if the classes loaded
+        document.addEventListener('DOMContentLoaded', function() {
+            const output = document.getElementById('output');
+            
+            if (typeof AudioManager !== 'undefined') {
+                output.innerHTML += '<p>✓ AudioManager loaded</p>';
+            } else {
+                output.innerHTML += '<p>✗ AudioManager failed to load</p>';
+            }
+            
+            if (typeof RadioShowApp !== 'undefined') {
+                output.innerHTML += '<p>✓ RadioShowApp loaded</p>';
+            } else {
+                output.innerHTML += '<p>✗ RadioShowApp failed to load</p>';
+            }
+            
+            document.getElementById('test-btn').addEventListener('click', function() {
+                output.innerHTML += '<p>Button click works!</p>';
+            });
+        });
+    </script>
+</body>
+</html>