Devon own stem/track/channel, per-category LLM routing, settings UI cleanup

Audio: - Devon gets own stem, Reaper track (Input 17), and configurable channel - play_caller_audio accepts stem_name + channel_override params - Reaper script checks 4 voice tracks (Host, Devon, Live Caller, AI Caller) - postprod.py includes devon stem in gap detection Cost optimization: - Per-category model routing: Sonnet for caller dialog, Gemini Flash for everything else - Estimated 65% cost reduction ($4.32 → ~$1.50/show) - Category models configurable from settings UI Frontend: - Settings panel: clean routing grid for output channels, model routing grid for LLM categories - Devon channel added to audio routing - Share icon SVG fill fix (currentColor) - Website homepage iterations Publishing: - Revert Castopod API workaround (API re-enabled) - Fix container media path Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 17:05:19 -06:00
parent 0b091a1afd
commit 164cad456c
11 changed files with 201 additions and 73 deletions
@@ -29,10 +29,23 @@ class Settings(BaseSettings):

    # LLM Settings
    llm_provider: str = "openrouter"  # "openrouter" or "ollama"
-    openrouter_model: str = "anthropic/claude-sonnet-4-5"
+    openrouter_model: str = "anthropic/claude-sonnet-4-5"  # primary/default model
    ollama_model: str = "llama3.2"
    ollama_host: str = "http://localhost:11434"

+    # Per-category model routing — cheaper models for non-critical tasks
+    # Categories: caller_dialog, devon_monitor, devon_ask, background_gen,
+    #             call_summary, news_summary, topic_gen, unknown
+    category_models: dict = {
+        "caller_dialog": "anthropic/claude-sonnet-4-5",       # quality matters — this IS the show
+        "devon_ask": "google/gemini-2.5-flash",               # Devon direct questions
+        "devon_monitor": "google/gemini-2.5-flash",           # Devon polling — biggest cost saver
+        "background_gen": "google/gemini-2.5-flash",          # JSON caller backgrounds
+        "call_summary": "google/gemini-2.5-flash",            # post-call summaries
+        "news_summary": "google/gemini-2.5-flash",            # news digests
+        "topic_gen": "google/gemini-2.5-flash",               # topic generation
+    }
+
    # TTS Settings
    tts_provider: str = "inworld"  # "kokoro", "elevenlabs", "inworld", "vits", or "bark"

@@ -7516,6 +7516,7 @@ class AudioDeviceSettings(BaseModel):
    input_channel: Optional[int] = None
    output_device: Optional[int] = None
    caller_channel: Optional[int] = None
+    devon_channel: Optional[int] = None
    live_caller_channel: Optional[int] = None
    music_channel: Optional[int] = None
    sfx_channel: Optional[int] = None
@@ -7556,6 +7557,7 @@ async def set_audio_settings(settings: AudioDeviceSettings):
        input_channel=settings.input_channel,
        output_device=settings.output_device,
        caller_channel=settings.caller_channel,
+        devon_channel=settings.devon_channel,
        live_caller_channel=settings.live_caller_channel,
        music_channel=settings.music_channel,
        sfx_channel=settings.sfx_channel,
@@ -8743,7 +8745,8 @@ async def update_settings(data: dict):
        openrouter_model=data.get("openrouter_model"),
        ollama_model=data.get("ollama_model"),
        ollama_host=data.get("ollama_host"),
-        tts_provider=data.get("tts_provider")
+        tts_provider=data.get("tts_provider"),
+        category_models=data.get("category_models")
    )
    # Re-randomize voices when TTS provider changes voice system
    new_tts = settings.tts_provider
@@ -9656,7 +9659,7 @@ async def intern_dismiss_suggestion():


 async def _play_intern_audio(text: str):
-    """Generate TTS for Devon and play on air (no phone filter)"""
+    """Generate TTS for Devon and play on air (no phone filter, own stem + channel)"""
    try:
        audio_bytes = await generate_speech(
            text, intern_service.voice, apply_filter=False
@@ -9664,6 +9667,7 @@ async def _play_intern_audio(text: str):
        thread = threading.Thread(
            target=audio_service.play_caller_audio,
            args=(audio_bytes, 24000),
+            kwargs={"stem_name": "devon", "channel_override": audio_service.devon_channel},
            daemon=True,
        )
        thread.start()
@@ -64,6 +64,7 @@ class AudioService:

        self.output_device: Optional[int] = 12  # Radio Voice Mic (loopback output)
        self.caller_channel: int = 3   # Channel for caller TTS
+        self.devon_channel: int = 17  # Channel for Devon (intern)
        self.live_caller_channel: int = 9  # Channel for live caller audio
        self.music_channel: int = 5    # Channel for music
        self.sfx_channel: int = 3      # Channel for SFX
@@ -164,6 +165,7 @@ class AudioService:
                self.input_channel = data.get("input_channel", 1)
                self.output_device = self._resolve_device(data, "output_device")
                self.caller_channel = data.get("caller_channel", 1)
+                self.devon_channel = data.get("devon_channel", 17)
                self.live_caller_channel = data.get("live_caller_channel", 4)
                self.music_channel = data.get("music_channel", 2)
                self.sfx_channel = data.get("sfx_channel", 3)
@@ -186,6 +188,7 @@ class AudioService:
                "output_device": self.output_device,
                "output_device_name": self._get_device_name(self.output_device),
                "caller_channel": self.caller_channel,
+                "devon_channel": self.devon_channel,
                "live_caller_channel": self.live_caller_channel,
                "music_channel": self.music_channel,
                "sfx_channel": self.sfx_channel,
@@ -222,6 +225,7 @@ class AudioService:
        input_channel: Optional[int] = None,
        output_device: Optional[int] = None,
        caller_channel: Optional[int] = None,
+        devon_channel: Optional[int] = None,
        live_caller_channel: Optional[int] = None,
        music_channel: Optional[int] = None,
        sfx_channel: Optional[int] = None,
@@ -240,6 +244,8 @@ class AudioService:
            self.output_device = output_device
        if caller_channel is not None:
            self.caller_channel = caller_channel
+        if devon_channel is not None:
+            self.devon_channel = devon_channel
        if live_caller_channel is not None:
            self.live_caller_channel = live_caller_channel
        if music_channel is not None:
@@ -267,6 +273,7 @@ class AudioService:
            "input_channel": self.input_channel,
            "output_device": self.output_device,
            "caller_channel": self.caller_channel,
+            "devon_channel": self.devon_channel,
            "live_caller_channel": self.live_caller_channel,
            "music_channel": self.music_channel,
            "sfx_channel": self.sfx_channel,
@@ -419,8 +426,8 @@ class AudioService:

        return audio

-    def play_caller_audio(self, audio_bytes: bytes, sample_rate: int = 24000):
-        """Play caller TTS audio to specific channel of output device (interruptible)"""
+    def play_caller_audio(self, audio_bytes: bytes, sample_rate: int = 24000, stem_name: str = "caller", channel_override: int | None = None):
+        """Play TTS audio to specific channel of output device (interruptible)"""
        import librosa

        # Stop any existing caller audio
@@ -442,7 +449,8 @@ class AudioService:
            device_info = sd.query_devices(self.output_device)
            num_channels = device_info['max_output_channels']
            device_sr = int(device_info['default_samplerate'])
-            channel_idx = min(self.caller_channel, num_channels) - 1
+            ch = channel_override if channel_override is not None else self.caller_channel
+            channel_idx = min(ch, num_channels) - 1

            # Resample if needed
            if sample_rate != device_sr:
@@ -455,7 +463,7 @@ class AudioService:
            multi_ch = np.zeros((len(audio), num_channels), dtype=np.float32)
            multi_ch[:, channel_idx] = audio

-            print(f"Playing caller audio to device {self.output_device} ch {self.caller_channel} @ {device_sr}Hz")
+            print(f"Playing {stem_name} audio to device {self.output_device} ch {ch} @ {device_sr}Hz")

            # Play in chunks so we can interrupt
            chunk_size = int(device_sr * 0.1)  # 100ms chunks
@@ -472,7 +480,7 @@ class AudioService:
                    stream.write(multi_ch[pos:end])
                    # Record each chunk as it plays so hangups cut the stem too
                    if self.stem_recorder:
-                        self.stem_recorder.write_sporadic("caller", audio[pos:end].copy(), device_sr)
+                        self.stem_recorder.write_sporadic(stem_name, audio[pos:end].copy(), device_sr)
                    pos = end

            if self._caller_stop_event.is_set():
@@ -15,7 +15,7 @@ from .news import news_service, SEARXNG_URL
 DATA_FILE = Path(__file__).parent.parent.parent / "data" / "intern.json"

 # Model for intern — good at tool use, same as primary
-INTERN_MODEL = "anthropic/claude-sonnet-4-5"
+INTERN_MODEL = None  # uses category-based routing from config

 INTERN_SYSTEM_PROMPT = """You are Devon, the 23-year-old intern on "Luke at the Roost," a late-night radio show. You are NOT Luke. Luke is the HOST — he talks to callers, runs the show, and is your boss. You work behind the scenes and occasionally get pulled into conversations.

@@ -59,7 +59,8 @@ class LLMService:
        openrouter_model: Optional[str] = None,
        ollama_model: Optional[str] = None,
        ollama_host: Optional[str] = None,
-        tts_provider: Optional[str] = None
+        tts_provider: Optional[str] = None,
+        category_models: Optional[dict] = None
    ):
        """Update LLM settings"""
        if provider:
@@ -73,6 +74,8 @@ class LLMService:
        if tts_provider:
            self.tts_provider = tts_provider
            settings.tts_provider = tts_provider
+        if category_models:
+            settings.category_models.update(category_models)

    async def get_ollama_models(self) -> list[str]:
        """Fetch available models from Ollama"""
@@ -94,6 +97,7 @@ class LLMService:
            "ollama_model": self.ollama_model,
            "ollama_host": self.ollama_host,
            "tts_provider": self.tts_provider,
+            "category_models": settings.category_models,
            "available_openrouter_models": OPENROUTER_MODELS,
            "available_ollama_models": []
        }
@@ -107,6 +111,7 @@ class LLMService:
            "ollama_model": self.ollama_model,
            "ollama_host": self.ollama_host,
            "tts_provider": self.tts_provider,
+            "category_models": settings.category_models,
            "available_openrouter_models": OPENROUTER_MODELS,
            "available_ollama_models": ollama_models
        }
@@ -155,7 +160,7 @@ class LLMService:
            (final_text, tool_calls_made) where tool_calls_made is a list of
            {"name": str, "arguments": dict, "result": str} dicts
        """
-        model = model or self.openrouter_model
+        model = model or self._get_model_for_category(category)
        msgs = list(messages)
        if system_prompt:
            msgs = [{"role": "system", "content": system_prompt}] + msgs
@@ -285,11 +290,16 @@ class LLMService:
            print(f"[LLM-Tools] Final call failed: {e}")
            return "", all_tool_calls

-    async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None, response_format: Optional[dict] = None, category: str = "unknown", caller_name: str = "") -> str:
-        """Try primary model, then fallback models. Always returns a response."""
+    def _get_model_for_category(self, category: str) -> str:
+        """Get the best model for a given category based on config routing."""
+        return settings.category_models.get(category, self.openrouter_model)

-        # Try primary model first
-        result = await self._call_openrouter_once(messages, self.openrouter_model, max_tokens=max_tokens, response_format=response_format, category=category, caller_name=caller_name)
+    async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None, response_format: Optional[dict] = None, category: str = "unknown", caller_name: str = "") -> str:
+        """Try category-specific model, then fallback models. Always returns a response."""
+
+        # Use category-specific model if configured, otherwise primary
+        model = self._get_model_for_category(category)
+        result = await self._call_openrouter_once(messages, model, max_tokens=max_tokens, response_format=response_format, category=category, caller_name=caller_name)
        if result is not None:
            return result

@@ -7,7 +7,7 @@ import soundfile as sf
 from pathlib import Path
 from collections import deque

-STEM_NAMES = ["host", "caller", "music", "sfx", "ads", "idents"]
+STEM_NAMES = ["host", "caller", "devon", "music", "sfx", "ads", "idents"]


 class StemRecorder: