Initial commit: AI Radio Show web application

- FastAPI backend with multiple TTS providers (Inworld, ElevenLabs, Kokoro, F5-TTS, etc.) - Web frontend with caller management, music, and soundboard - Whisper transcription integration - OpenRouter/Ollama LLM support - Castopod podcast publishing script Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 23:11:20 -07:00
commit 029ce6d689
25 changed files with 6817 additions and 0 deletions
@@ -0,0 +1,175 @@
+"""LLM service with OpenRouter and Ollama support"""
+
+import httpx
+from typing import Optional
+from ..config import settings
+
+
+# Available OpenRouter models
+OPENROUTER_MODELS = [
+    "anthropic/claude-3-haiku",
+    "anthropic/claude-3.5-sonnet",
+    "openai/gpt-4o-mini",
+    "openai/gpt-4o",
+    "google/gemini-flash-1.5",
+    "google/gemini-pro-1.5",
+    "meta-llama/llama-3.1-8b-instruct",
+    "mistralai/mistral-7b-instruct",
+]
+
+
+class LLMService:
+    """Abstraction layer for LLM providers"""
+
+    def __init__(self):
+        self.provider = settings.llm_provider
+        self.openrouter_model = settings.openrouter_model
+        self.ollama_model = settings.ollama_model
+        self.ollama_host = settings.ollama_host
+        self.tts_provider = settings.tts_provider
+
+    def update_settings(
+        self,
+        provider: Optional[str] = None,
+        openrouter_model: Optional[str] = None,
+        ollama_model: Optional[str] = None,
+        ollama_host: Optional[str] = None,
+        tts_provider: Optional[str] = None
+    ):
+        """Update LLM settings"""
+        if provider:
+            self.provider = provider
+        if openrouter_model:
+            self.openrouter_model = openrouter_model
+        if ollama_model:
+            self.ollama_model = ollama_model
+        if ollama_host:
+            self.ollama_host = ollama_host
+        if tts_provider:
+            self.tts_provider = tts_provider
+            # Also update the global settings so TTS service picks it up
+            settings.tts_provider = tts_provider
+
+    async def get_ollama_models(self) -> list[str]:
+        """Fetch available models from Ollama"""
+        try:
+            async with httpx.AsyncClient(timeout=5.0) as client:
+                response = await client.get(f"{self.ollama_host}/api/tags")
+                response.raise_for_status()
+                data = response.json()
+                return [model["name"] for model in data.get("models", [])]
+        except Exception as e:
+            print(f"Failed to fetch Ollama models: {e}")
+            return []
+
+    def get_settings(self) -> dict:
+        """Get current settings (sync version without Ollama models)"""
+        return {
+            "provider": self.provider,
+            "openrouter_model": self.openrouter_model,
+            "ollama_model": self.ollama_model,
+            "ollama_host": self.ollama_host,
+            "tts_provider": self.tts_provider,
+            "available_openrouter_models": OPENROUTER_MODELS,
+            "available_ollama_models": []  # Fetched separately
+        }
+
+    async def get_settings_async(self) -> dict:
+        """Get current settings with Ollama models"""
+        ollama_models = await self.get_ollama_models()
+        return {
+            "provider": self.provider,
+            "openrouter_model": self.openrouter_model,
+            "ollama_model": self.ollama_model,
+            "ollama_host": self.ollama_host,
+            "tts_provider": self.tts_provider,
+            "available_openrouter_models": OPENROUTER_MODELS,
+            "available_ollama_models": ollama_models
+        }
+
+    async def generate(
+        self,
+        messages: list[dict],
+        system_prompt: Optional[str] = None
+    ) -> str:
+        """
+        Generate a response from the LLM.
+
+        Args:
+            messages: List of message dicts with 'role' and 'content'
+            system_prompt: Optional system prompt to prepend
+
+        Returns:
+            Generated text response
+        """
+        if system_prompt:
+            messages = [{"role": "system", "content": system_prompt}] + messages
+
+        if self.provider == "openrouter":
+            return await self._call_openrouter(messages)
+        else:
+            return await self._call_ollama(messages)
+
+    async def _call_openrouter(self, messages: list[dict]) -> str:
+        """Call OpenRouter API with retry"""
+        for attempt in range(2):  # Try twice
+            try:
+                async with httpx.AsyncClient(timeout=30.0) as client:
+                    response = await client.post(
+                        "https://openrouter.ai/api/v1/chat/completions",
+                        headers={
+                            "Authorization": f"Bearer {settings.openrouter_api_key}",
+                            "Content-Type": "application/json",
+                        },
+                        json={
+                            "model": self.openrouter_model,
+                            "messages": messages,
+                            "max_tokens": 100,
+                        },
+                    )
+                    response.raise_for_status()
+                    data = response.json()
+                    return data["choices"][0]["message"]["content"]
+            except (httpx.TimeoutException, httpx.ReadTimeout):
+                print(f"OpenRouter timeout (attempt {attempt + 1})")
+                if attempt == 0:
+                    continue  # Retry once
+                return "Uh, sorry, I lost you there for a second. What was that?"
+            except Exception as e:
+                print(f"OpenRouter error: {e}")
+                return "Yeah... I don't know, man."
+        return "Uh, hold on a sec..."
+
+    async def _call_ollama(self, messages: list[dict]) -> str:
+        """Call Ollama API"""
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    f"{self.ollama_host}/api/chat",
+                    json={
+                        "model": self.ollama_model,
+                        "messages": messages,
+                        "stream": False,
+                        "options": {
+                            "num_predict": 100,     # Allow complete thoughts
+                            "temperature": 0.8,     # Balanced creativity/coherence
+                            "top_p": 0.9,           # Focused word choices
+                            "repeat_penalty": 1.3,  # Avoid repetition
+                            "top_k": 50,            # Reasonable token variety
+                        },
+                    },
+                    timeout=30.0
+                )
+                response.raise_for_status()
+                data = response.json()
+                return data["message"]["content"]
+        except httpx.TimeoutException:
+            print("Ollama timeout")
+            return "Uh, sorry, I lost you there for a second. What was that?"
+        except Exception as e:
+            print(f"Ollama error: {e}")
+            return "Yeah... I don't know, man."
+
+
+# Global instance
+llm_service = LLMService()