Initial commit: AI Radio Show web application
- FastAPI backend with multiple TTS providers (Inworld, ElevenLabs, Kokoro, F5-TTS, etc.) - Web frontend with caller management, music, and soundboard - Whisper transcription integration - OpenRouter/Ollama LLM support - Castopod podcast publishing script Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
175
backend/services/llm.py
Normal file
175
backend/services/llm.py
Normal file
@@ -0,0 +1,175 @@
|
||||
"""LLM service with OpenRouter and Ollama support"""
|
||||
|
||||
import httpx
|
||||
from typing import Optional
|
||||
from ..config import settings
|
||||
|
||||
|
||||
# Available OpenRouter models
|
||||
OPENROUTER_MODELS = [
|
||||
"anthropic/claude-3-haiku",
|
||||
"anthropic/claude-3.5-sonnet",
|
||||
"openai/gpt-4o-mini",
|
||||
"openai/gpt-4o",
|
||||
"google/gemini-flash-1.5",
|
||||
"google/gemini-pro-1.5",
|
||||
"meta-llama/llama-3.1-8b-instruct",
|
||||
"mistralai/mistral-7b-instruct",
|
||||
]
|
||||
|
||||
|
||||
class LLMService:
|
||||
"""Abstraction layer for LLM providers"""
|
||||
|
||||
def __init__(self):
|
||||
self.provider = settings.llm_provider
|
||||
self.openrouter_model = settings.openrouter_model
|
||||
self.ollama_model = settings.ollama_model
|
||||
self.ollama_host = settings.ollama_host
|
||||
self.tts_provider = settings.tts_provider
|
||||
|
||||
def update_settings(
|
||||
self,
|
||||
provider: Optional[str] = None,
|
||||
openrouter_model: Optional[str] = None,
|
||||
ollama_model: Optional[str] = None,
|
||||
ollama_host: Optional[str] = None,
|
||||
tts_provider: Optional[str] = None
|
||||
):
|
||||
"""Update LLM settings"""
|
||||
if provider:
|
||||
self.provider = provider
|
||||
if openrouter_model:
|
||||
self.openrouter_model = openrouter_model
|
||||
if ollama_model:
|
||||
self.ollama_model = ollama_model
|
||||
if ollama_host:
|
||||
self.ollama_host = ollama_host
|
||||
if tts_provider:
|
||||
self.tts_provider = tts_provider
|
||||
# Also update the global settings so TTS service picks it up
|
||||
settings.tts_provider = tts_provider
|
||||
|
||||
async def get_ollama_models(self) -> list[str]:
|
||||
"""Fetch available models from Ollama"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(f"{self.ollama_host}/api/tags")
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return [model["name"] for model in data.get("models", [])]
|
||||
except Exception as e:
|
||||
print(f"Failed to fetch Ollama models: {e}")
|
||||
return []
|
||||
|
||||
def get_settings(self) -> dict:
|
||||
"""Get current settings (sync version without Ollama models)"""
|
||||
return {
|
||||
"provider": self.provider,
|
||||
"openrouter_model": self.openrouter_model,
|
||||
"ollama_model": self.ollama_model,
|
||||
"ollama_host": self.ollama_host,
|
||||
"tts_provider": self.tts_provider,
|
||||
"available_openrouter_models": OPENROUTER_MODELS,
|
||||
"available_ollama_models": [] # Fetched separately
|
||||
}
|
||||
|
||||
async def get_settings_async(self) -> dict:
|
||||
"""Get current settings with Ollama models"""
|
||||
ollama_models = await self.get_ollama_models()
|
||||
return {
|
||||
"provider": self.provider,
|
||||
"openrouter_model": self.openrouter_model,
|
||||
"ollama_model": self.ollama_model,
|
||||
"ollama_host": self.ollama_host,
|
||||
"tts_provider": self.tts_provider,
|
||||
"available_openrouter_models": OPENROUTER_MODELS,
|
||||
"available_ollama_models": ollama_models
|
||||
}
|
||||
|
||||
async def generate(
|
||||
self,
|
||||
messages: list[dict],
|
||||
system_prompt: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Generate a response from the LLM.
|
||||
|
||||
Args:
|
||||
messages: List of message dicts with 'role' and 'content'
|
||||
system_prompt: Optional system prompt to prepend
|
||||
|
||||
Returns:
|
||||
Generated text response
|
||||
"""
|
||||
if system_prompt:
|
||||
messages = [{"role": "system", "content": system_prompt}] + messages
|
||||
|
||||
if self.provider == "openrouter":
|
||||
return await self._call_openrouter(messages)
|
||||
else:
|
||||
return await self._call_ollama(messages)
|
||||
|
||||
async def _call_openrouter(self, messages: list[dict]) -> str:
|
||||
"""Call OpenRouter API with retry"""
|
||||
for attempt in range(2): # Try twice
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {settings.openrouter_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": self.openrouter_model,
|
||||
"messages": messages,
|
||||
"max_tokens": 100,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data["choices"][0]["message"]["content"]
|
||||
except (httpx.TimeoutException, httpx.ReadTimeout):
|
||||
print(f"OpenRouter timeout (attempt {attempt + 1})")
|
||||
if attempt == 0:
|
||||
continue # Retry once
|
||||
return "Uh, sorry, I lost you there for a second. What was that?"
|
||||
except Exception as e:
|
||||
print(f"OpenRouter error: {e}")
|
||||
return "Yeah... I don't know, man."
|
||||
return "Uh, hold on a sec..."
|
||||
|
||||
async def _call_ollama(self, messages: list[dict]) -> str:
|
||||
"""Call Ollama API"""
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
f"{self.ollama_host}/api/chat",
|
||||
json={
|
||||
"model": self.ollama_model,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"num_predict": 100, # Allow complete thoughts
|
||||
"temperature": 0.8, # Balanced creativity/coherence
|
||||
"top_p": 0.9, # Focused word choices
|
||||
"repeat_penalty": 1.3, # Avoid repetition
|
||||
"top_k": 50, # Reasonable token variety
|
||||
},
|
||||
},
|
||||
timeout=30.0
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data["message"]["content"]
|
||||
except httpx.TimeoutException:
|
||||
print("Ollama timeout")
|
||||
return "Uh, sorry, I lost you there for a second. What was that?"
|
||||
except Exception as e:
|
||||
print(f"Ollama error: {e}")
|
||||
return "Yeah... I don't know, man."
|
||||
|
||||
|
||||
# Global instance
|
||||
llm_service = LLMService()
|
||||
Reference in New Issue
Block a user