diff --git a/backend/config.py b/backend/config.py index 98315a0..109ea4f 100644 --- a/backend/config.py +++ b/backend/config.py @@ -29,10 +29,23 @@ class Settings(BaseSettings): # LLM Settings llm_provider: str = "openrouter" # "openrouter" or "ollama" - openrouter_model: str = "anthropic/claude-sonnet-4-5" + openrouter_model: str = "anthropic/claude-sonnet-4-5" # primary/default model ollama_model: str = "llama3.2" ollama_host: str = "http://localhost:11434" + # Per-category model routing — cheaper models for non-critical tasks + # Categories: caller_dialog, devon_monitor, devon_ask, background_gen, + # call_summary, news_summary, topic_gen, unknown + category_models: dict = { + "caller_dialog": "anthropic/claude-sonnet-4-5", # quality matters — this IS the show + "devon_ask": "google/gemini-2.5-flash", # Devon direct questions + "devon_monitor": "google/gemini-2.5-flash", # Devon polling — biggest cost saver + "background_gen": "google/gemini-2.5-flash", # JSON caller backgrounds + "call_summary": "google/gemini-2.5-flash", # post-call summaries + "news_summary": "google/gemini-2.5-flash", # news digests + "topic_gen": "google/gemini-2.5-flash", # topic generation + } + # TTS Settings tts_provider: str = "inworld" # "kokoro", "elevenlabs", "inworld", "vits", or "bark" diff --git a/backend/main.py b/backend/main.py index 8ae00e9..d16a5d8 100644 --- a/backend/main.py +++ b/backend/main.py @@ -7516,6 +7516,7 @@ class AudioDeviceSettings(BaseModel): input_channel: Optional[int] = None output_device: Optional[int] = None caller_channel: Optional[int] = None + devon_channel: Optional[int] = None live_caller_channel: Optional[int] = None music_channel: Optional[int] = None sfx_channel: Optional[int] = None @@ -7556,6 +7557,7 @@ async def set_audio_settings(settings: AudioDeviceSettings): input_channel=settings.input_channel, output_device=settings.output_device, caller_channel=settings.caller_channel, + devon_channel=settings.devon_channel, live_caller_channel=settings.live_caller_channel, music_channel=settings.music_channel, sfx_channel=settings.sfx_channel, @@ -8743,7 +8745,8 @@ async def update_settings(data: dict): openrouter_model=data.get("openrouter_model"), ollama_model=data.get("ollama_model"), ollama_host=data.get("ollama_host"), - tts_provider=data.get("tts_provider") + tts_provider=data.get("tts_provider"), + category_models=data.get("category_models") ) # Re-randomize voices when TTS provider changes voice system new_tts = settings.tts_provider @@ -9656,7 +9659,7 @@ async def intern_dismiss_suggestion(): async def _play_intern_audio(text: str): - """Generate TTS for Devon and play on air (no phone filter)""" + """Generate TTS for Devon and play on air (no phone filter, own stem + channel)""" try: audio_bytes = await generate_speech( text, intern_service.voice, apply_filter=False @@ -9664,6 +9667,7 @@ async def _play_intern_audio(text: str): thread = threading.Thread( target=audio_service.play_caller_audio, args=(audio_bytes, 24000), + kwargs={"stem_name": "devon", "channel_override": audio_service.devon_channel}, daemon=True, ) thread.start() diff --git a/backend/services/audio.py b/backend/services/audio.py index 014e193..77b2bbd 100644 --- a/backend/services/audio.py +++ b/backend/services/audio.py @@ -64,6 +64,7 @@ class AudioService: self.output_device: Optional[int] = 12 # Radio Voice Mic (loopback output) self.caller_channel: int = 3 # Channel for caller TTS + self.devon_channel: int = 17 # Channel for Devon (intern) self.live_caller_channel: int = 9 # Channel for live caller audio self.music_channel: int = 5 # Channel for music self.sfx_channel: int = 3 # Channel for SFX @@ -164,6 +165,7 @@ class AudioService: self.input_channel = data.get("input_channel", 1) self.output_device = self._resolve_device(data, "output_device") self.caller_channel = data.get("caller_channel", 1) + self.devon_channel = data.get("devon_channel", 17) self.live_caller_channel = data.get("live_caller_channel", 4) self.music_channel = data.get("music_channel", 2) self.sfx_channel = data.get("sfx_channel", 3) @@ -186,6 +188,7 @@ class AudioService: "output_device": self.output_device, "output_device_name": self._get_device_name(self.output_device), "caller_channel": self.caller_channel, + "devon_channel": self.devon_channel, "live_caller_channel": self.live_caller_channel, "music_channel": self.music_channel, "sfx_channel": self.sfx_channel, @@ -222,6 +225,7 @@ class AudioService: input_channel: Optional[int] = None, output_device: Optional[int] = None, caller_channel: Optional[int] = None, + devon_channel: Optional[int] = None, live_caller_channel: Optional[int] = None, music_channel: Optional[int] = None, sfx_channel: Optional[int] = None, @@ -240,6 +244,8 @@ class AudioService: self.output_device = output_device if caller_channel is not None: self.caller_channel = caller_channel + if devon_channel is not None: + self.devon_channel = devon_channel if live_caller_channel is not None: self.live_caller_channel = live_caller_channel if music_channel is not None: @@ -267,6 +273,7 @@ class AudioService: "input_channel": self.input_channel, "output_device": self.output_device, "caller_channel": self.caller_channel, + "devon_channel": self.devon_channel, "live_caller_channel": self.live_caller_channel, "music_channel": self.music_channel, "sfx_channel": self.sfx_channel, @@ -419,8 +426,8 @@ class AudioService: return audio - def play_caller_audio(self, audio_bytes: bytes, sample_rate: int = 24000): - """Play caller TTS audio to specific channel of output device (interruptible)""" + def play_caller_audio(self, audio_bytes: bytes, sample_rate: int = 24000, stem_name: str = "caller", channel_override: int | None = None): + """Play TTS audio to specific channel of output device (interruptible)""" import librosa # Stop any existing caller audio @@ -442,7 +449,8 @@ class AudioService: device_info = sd.query_devices(self.output_device) num_channels = device_info['max_output_channels'] device_sr = int(device_info['default_samplerate']) - channel_idx = min(self.caller_channel, num_channels) - 1 + ch = channel_override if channel_override is not None else self.caller_channel + channel_idx = min(ch, num_channels) - 1 # Resample if needed if sample_rate != device_sr: @@ -455,7 +463,7 @@ class AudioService: multi_ch = np.zeros((len(audio), num_channels), dtype=np.float32) multi_ch[:, channel_idx] = audio - print(f"Playing caller audio to device {self.output_device} ch {self.caller_channel} @ {device_sr}Hz") + print(f"Playing {stem_name} audio to device {self.output_device} ch {ch} @ {device_sr}Hz") # Play in chunks so we can interrupt chunk_size = int(device_sr * 0.1) # 100ms chunks @@ -472,7 +480,7 @@ class AudioService: stream.write(multi_ch[pos:end]) # Record each chunk as it plays so hangups cut the stem too if self.stem_recorder: - self.stem_recorder.write_sporadic("caller", audio[pos:end].copy(), device_sr) + self.stem_recorder.write_sporadic(stem_name, audio[pos:end].copy(), device_sr) pos = end if self._caller_stop_event.is_set(): diff --git a/backend/services/intern.py b/backend/services/intern.py index 408146d..9652918 100644 --- a/backend/services/intern.py +++ b/backend/services/intern.py @@ -15,7 +15,7 @@ from .news import news_service, SEARXNG_URL DATA_FILE = Path(__file__).parent.parent.parent / "data" / "intern.json" # Model for intern — good at tool use, same as primary -INTERN_MODEL = "anthropic/claude-sonnet-4-5" +INTERN_MODEL = None # uses category-based routing from config INTERN_SYSTEM_PROMPT = """You are Devon, the 23-year-old intern on "Luke at the Roost," a late-night radio show. You are NOT Luke. Luke is the HOST — he talks to callers, runs the show, and is your boss. You work behind the scenes and occasionally get pulled into conversations. diff --git a/backend/services/llm.py b/backend/services/llm.py index a880b84..a19449f 100644 --- a/backend/services/llm.py +++ b/backend/services/llm.py @@ -59,7 +59,8 @@ class LLMService: openrouter_model: Optional[str] = None, ollama_model: Optional[str] = None, ollama_host: Optional[str] = None, - tts_provider: Optional[str] = None + tts_provider: Optional[str] = None, + category_models: Optional[dict] = None ): """Update LLM settings""" if provider: @@ -73,6 +74,8 @@ class LLMService: if tts_provider: self.tts_provider = tts_provider settings.tts_provider = tts_provider + if category_models: + settings.category_models.update(category_models) async def get_ollama_models(self) -> list[str]: """Fetch available models from Ollama""" @@ -94,6 +97,7 @@ class LLMService: "ollama_model": self.ollama_model, "ollama_host": self.ollama_host, "tts_provider": self.tts_provider, + "category_models": settings.category_models, "available_openrouter_models": OPENROUTER_MODELS, "available_ollama_models": [] } @@ -107,6 +111,7 @@ class LLMService: "ollama_model": self.ollama_model, "ollama_host": self.ollama_host, "tts_provider": self.tts_provider, + "category_models": settings.category_models, "available_openrouter_models": OPENROUTER_MODELS, "available_ollama_models": ollama_models } @@ -155,7 +160,7 @@ class LLMService: (final_text, tool_calls_made) where tool_calls_made is a list of {"name": str, "arguments": dict, "result": str} dicts """ - model = model or self.openrouter_model + model = model or self._get_model_for_category(category) msgs = list(messages) if system_prompt: msgs = [{"role": "system", "content": system_prompt}] + msgs @@ -285,11 +290,16 @@ class LLMService: print(f"[LLM-Tools] Final call failed: {e}") return "", all_tool_calls - async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None, response_format: Optional[dict] = None, category: str = "unknown", caller_name: str = "") -> str: - """Try primary model, then fallback models. Always returns a response.""" + def _get_model_for_category(self, category: str) -> str: + """Get the best model for a given category based on config routing.""" + return settings.category_models.get(category, self.openrouter_model) - # Try primary model first - result = await self._call_openrouter_once(messages, self.openrouter_model, max_tokens=max_tokens, response_format=response_format, category=category, caller_name=caller_name) + async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None, response_format: Optional[dict] = None, category: str = "unknown", caller_name: str = "") -> str: + """Try category-specific model, then fallback models. Always returns a response.""" + + # Use category-specific model if configured, otherwise primary + model = self._get_model_for_category(category) + result = await self._call_openrouter_once(messages, model, max_tokens=max_tokens, response_format=response_format, category=category, caller_name=caller_name) if result is not None: return result diff --git a/backend/services/stem_recorder.py b/backend/services/stem_recorder.py index 9a4beb9..0242cad 100644 --- a/backend/services/stem_recorder.py +++ b/backend/services/stem_recorder.py @@ -7,7 +7,7 @@ import soundfile as sf from pathlib import Path from collections import deque -STEM_NAMES = ["host", "caller", "music", "sfx", "ads", "idents"] +STEM_NAMES = ["host", "caller", "devon", "music", "sfx", "ads", "idents"] class StemRecorder: diff --git a/frontend/css/style.css b/frontend/css/style.css index cb70f21..1e8db65 100644 --- a/frontend/css/style.css +++ b/frontend/css/style.css @@ -802,22 +802,63 @@ section h2 { flex: 1; } -.channel-row { - display: flex; - gap: 12px; - margin-top: 8px; +.routing-grid { + display: grid; + grid-template-columns: repeat(3, 1fr); + gap: 6px; } -.channel-row label { +.routing-item { display: flex; align-items: center; + justify-content: space-between; gap: 4px; - font-size: 0.85rem; + background: rgba(255, 255, 255, 0.05); + border-radius: 6px; + padding: 5px 8px; +} + +.routing-item label { + margin-bottom: 0 !important; +} + +.routing-label { + font-size: 0.75rem; + color: var(--text-muted, #9a8b78); + white-space: nowrap; } .channel-input { - width: 50px !important; + width: 40px !important; text-align: center; + font-size: 0.8rem; + padding: 3px !important; +} + +.model-routing-grid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 8px; +} + +.model-routing-item { + display: flex; + flex-direction: column; + gap: 3px; +} + +.model-routing-item label { + margin-bottom: 0 !important; +} + +.model-routing-label { + font-size: 0.75rem; + color: var(--text-muted, #9a8b78); +} + +.model-select { + font-size: 0.8rem !important; + padding: 4px 6px !important; } .modal-content label { diff --git a/frontend/index.html b/frontend/index.html index 9383360..542d8cc 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -195,7 +195,7 @@
-

Audio Routing

+

Audio Devices

@@ -212,44 +212,70 @@
-
- - - - - - +
+ +
+

Output Routing

+
+
+ AI Caller + +
+
+ Devon + +
+
+ Live Caller + +
+
+ Music + +
+
+ SFX + +
+
+ Ads + +
+
+ Idents + +
-

LLM Provider

- - -
- -
- - diff --git a/frontend/js/app.js b/frontend/js/app.js index f134206..0c10884 100644 --- a/frontend/js/app.js +++ b/frontend/js/app.js @@ -493,6 +493,7 @@ async function loadAudioDevices() { // Channel settings const inputCh = document.getElementById('input-channel'); const callerCh = document.getElementById('caller-channel'); + const devonCh = document.getElementById('devon-channel'); const liveCallerCh = document.getElementById('live-caller-channel'); const musicCh = document.getElementById('music-channel'); const sfxCh = document.getElementById('sfx-channel'); @@ -501,6 +502,7 @@ async function loadAudioDevices() { if (inputCh) inputCh.value = settings.input_channel || 1; if (callerCh) callerCh.value = settings.caller_channel || 1; + if (devonCh) devonCh.value = settings.devon_channel || 17; if (liveCallerCh) liveCallerCh.value = settings.live_caller_channel || 9; if (musicCh) musicCh.value = settings.music_channel || 2; if (sfxCh) sfxCh.value = settings.sfx_channel || 3; @@ -526,6 +528,7 @@ async function saveAudioDevices() { const outputDevice = document.getElementById('output-device')?.value; const inputChannel = document.getElementById('input-channel')?.value; const callerChannel = document.getElementById('caller-channel')?.value; + const devonChannel = document.getElementById('devon-channel')?.value; const liveCallerChannel = document.getElementById('live-caller-channel')?.value; const musicChannel = document.getElementById('music-channel')?.value; const sfxChannel = document.getElementById('sfx-channel')?.value; @@ -541,6 +544,7 @@ async function saveAudioDevices() { input_channel: inputChannel ? parseInt(inputChannel) : 1, output_device: outputDevice ? parseInt(outputDevice) : null, caller_channel: callerChannel ? parseInt(callerChannel) : 1, + devon_channel: devonChannel ? parseInt(devonChannel) : 17, live_caller_channel: liveCallerChannel ? parseInt(liveCallerChannel) : 9, music_channel: musicChannel ? parseInt(musicChannel) : 2, sfx_channel: sfxChannel ? parseInt(sfxChannel) : 3, @@ -1208,6 +1212,23 @@ async function loadSettings() { const ttsProvider = document.getElementById('tts-provider'); if (ttsProvider) ttsProvider.value = data.tts_provider || 'elevenlabs'; + // Category model routing + const models = data.available_openrouter_models || []; + const categoryModels = data.category_models || {}; + const categories = ['caller_dialog', 'devon_monitor', 'devon_ask', 'background_gen', 'call_summary', 'news_summary']; + for (const cat of categories) { + const sel = document.getElementById(`model-${cat}`); + if (!sel) continue; + sel.innerHTML = ''; + for (const m of models) { + const opt = document.createElement('option'); + opt.value = m; + opt.textContent = m.split('/').pop(); + if (m === (categoryModels[cat] || data.openrouter_model)) opt.selected = true; + sel.appendChild(opt); + } + } + updateProviderUI(); console.log('Settings loaded:', data.provider, 'TTS:', data.tts_provider); } catch (err) { @@ -1217,9 +1238,7 @@ async function loadSettings() { function updateProviderUI() { - const isOpenRouter = document.getElementById('provider')?.value === 'openrouter'; - document.getElementById('openrouter-settings')?.classList.toggle('hidden', !isOpenRouter); - document.getElementById('ollama-settings')?.classList.toggle('hidden', isOpenRouter); + // Kept for compatibility — provider toggle removed from UI } @@ -1227,16 +1246,23 @@ async function saveSettings() { // Save audio devices await saveAudioDevices(); - // Save LLM and TTS settings + // Collect category model routing + const categoryModels = {}; + const categories = ['caller_dialog', 'devon_monitor', 'devon_ask', 'background_gen', 'call_summary', 'news_summary']; + for (const cat of categories) { + const sel = document.getElementById(`model-${cat}`); + if (sel) categoryModels[cat] = sel.value; + } + + // Save LLM, TTS, and model routing settings await fetch('/api/settings', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ - provider: document.getElementById('provider')?.value, - openrouter_model: document.getElementById('openrouter-model')?.value, - ollama_model: document.getElementById('ollama-model')?.value, - ollama_host: document.getElementById('ollama-host')?.value, - tts_provider: document.getElementById('tts-provider')?.value + provider: 'openrouter', + openrouter_model: categoryModels.caller_dialog || document.getElementById('model-caller_dialog')?.value, + tts_provider: document.getElementById('tts-provider')?.value, + category_models: categoryModels }) }); diff --git a/postprod.py b/postprod.py index 2cb5266..7f63be8 100644 --- a/postprod.py +++ b/postprod.py @@ -17,7 +17,7 @@ from pathlib import Path import numpy as np import soundfile as sf -STEM_NAMES = ["host", "caller", "music", "sfx", "ads", "idents"] +STEM_NAMES = ["host", "caller", "devon", "music", "sfx", "ads", "idents"] def load_stems(stems_dir: Path) -> tuple[dict[str, np.ndarray], int]: @@ -69,7 +69,7 @@ def remove_gaps(stems: dict[str, np.ndarray], sr: int, # Detect gaps in everything except music (which always plays). # This catches TTS latency gaps while protecting ad breaks and SFX transitions. - content = stems["host"] + stems["caller"] + stems["sfx"] + stems["ads"] + stems["idents"] + content = stems["host"] + stems["caller"] + stems["devon"] + stems["sfx"] + stems["ads"] + stems["idents"] rms = compute_rms(content, window_samples) # Threshold: percentile-based to sit above the mic noise floor diff --git a/reaper/strip_silence_dialog.lua b/reaper/strip_silence_dialog.lua index 87d28f0..d8eac8e 100644 --- a/reaper/strip_silence_dialog.lua +++ b/reaper/strip_silence_dialog.lua @@ -14,10 +14,10 @@ local MIN_VOICE_SEC = 0.3 -- ignore non-silent bursts shorter than this (fi local KEEP_PAD_SEC = 0.5 -- leave this much silence on each side of a cut local BLOCK_SEC = 0.1 -- analysis block size (100ms) local SAMPLE_RATE = 48000 -local CHECK_TRACKS = {1, 2, 3} -- 1-indexed: Host, Live Caller, AI Caller -local IDENTS_TRACK = 5 -- 1-indexed: Idents track -local ADS_TRACK = 6 -- 1-indexed: Ads track -local MUSIC_TRACK = 7 -- 1-indexed: Music track +local CHECK_TRACKS = {1, 2, 3, 4} -- 1-indexed: Host, Devon, Live Caller, AI Caller +local IDENTS_TRACK = 6 -- 1-indexed: Idents track +local ADS_TRACK = 7 -- 1-indexed: Ads track +local MUSIC_TRACK = 8 -- 1-indexed: Music track local MUSIC_FADE_SEC = 2.0 -- fade duration for music in/out around ads/idents local YIELD_INTERVAL = 200 -- yield to REAPER every N blocks (~20s of audio) ---------------------------------------------------------------------------