- Remove gpt-4o-realtime (WebSocket-only) from OpenRouter models - Increase OpenRouter timeout to 60s and max_tokens to 150 - Handle empty LLM responses - Fix publish_episode.py for current Castopod API fields - Add port conflict check and graceful shutdown to run.sh Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
180 lines
6.6 KiB
Python
180 lines
6.6 KiB
Python
"""LLM service with OpenRouter and Ollama support"""
|
|
|
|
import httpx
|
|
from typing import Optional
|
|
from ..config import settings
|
|
|
|
|
|
# Available OpenRouter models
|
|
OPENROUTER_MODELS = [
|
|
"openai/gpt-4o-mini",
|
|
"openai/gpt-4o",
|
|
"anthropic/claude-3-haiku",
|
|
"anthropic/claude-3.5-sonnet",
|
|
"google/gemini-flash-1.5",
|
|
"google/gemini-pro-1.5",
|
|
"meta-llama/llama-3.1-8b-instruct",
|
|
"mistralai/mistral-7b-instruct",
|
|
]
|
|
|
|
|
|
class LLMService:
|
|
"""Abstraction layer for LLM providers"""
|
|
|
|
def __init__(self):
|
|
self.provider = settings.llm_provider
|
|
self.openrouter_model = settings.openrouter_model
|
|
self.ollama_model = settings.ollama_model
|
|
self.ollama_host = settings.ollama_host
|
|
self.tts_provider = settings.tts_provider
|
|
|
|
def update_settings(
|
|
self,
|
|
provider: Optional[str] = None,
|
|
openrouter_model: Optional[str] = None,
|
|
ollama_model: Optional[str] = None,
|
|
ollama_host: Optional[str] = None,
|
|
tts_provider: Optional[str] = None
|
|
):
|
|
"""Update LLM settings"""
|
|
if provider:
|
|
self.provider = provider
|
|
if openrouter_model:
|
|
self.openrouter_model = openrouter_model
|
|
if ollama_model:
|
|
self.ollama_model = ollama_model
|
|
if ollama_host:
|
|
self.ollama_host = ollama_host
|
|
if tts_provider:
|
|
self.tts_provider = tts_provider
|
|
# Also update the global settings so TTS service picks it up
|
|
settings.tts_provider = tts_provider
|
|
|
|
async def get_ollama_models(self) -> list[str]:
|
|
"""Fetch available models from Ollama"""
|
|
try:
|
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
response = await client.get(f"{self.ollama_host}/api/tags")
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
return [model["name"] for model in data.get("models", [])]
|
|
except Exception as e:
|
|
print(f"Failed to fetch Ollama models: {e}")
|
|
return []
|
|
|
|
def get_settings(self) -> dict:
|
|
"""Get current settings (sync version without Ollama models)"""
|
|
return {
|
|
"provider": self.provider,
|
|
"openrouter_model": self.openrouter_model,
|
|
"ollama_model": self.ollama_model,
|
|
"ollama_host": self.ollama_host,
|
|
"tts_provider": self.tts_provider,
|
|
"available_openrouter_models": OPENROUTER_MODELS,
|
|
"available_ollama_models": [] # Fetched separately
|
|
}
|
|
|
|
async def get_settings_async(self) -> dict:
|
|
"""Get current settings with Ollama models"""
|
|
ollama_models = await self.get_ollama_models()
|
|
return {
|
|
"provider": self.provider,
|
|
"openrouter_model": self.openrouter_model,
|
|
"ollama_model": self.ollama_model,
|
|
"ollama_host": self.ollama_host,
|
|
"tts_provider": self.tts_provider,
|
|
"available_openrouter_models": OPENROUTER_MODELS,
|
|
"available_ollama_models": ollama_models
|
|
}
|
|
|
|
async def generate(
|
|
self,
|
|
messages: list[dict],
|
|
system_prompt: Optional[str] = None
|
|
) -> str:
|
|
"""
|
|
Generate a response from the LLM.
|
|
|
|
Args:
|
|
messages: List of message dicts with 'role' and 'content'
|
|
system_prompt: Optional system prompt to prepend
|
|
|
|
Returns:
|
|
Generated text response
|
|
"""
|
|
if system_prompt:
|
|
messages = [{"role": "system", "content": system_prompt}] + messages
|
|
|
|
if self.provider == "openrouter":
|
|
return await self._call_openrouter(messages)
|
|
else:
|
|
return await self._call_ollama(messages)
|
|
|
|
async def _call_openrouter(self, messages: list[dict]) -> str:
|
|
"""Call OpenRouter API with retry"""
|
|
for attempt in range(2): # Try twice
|
|
try:
|
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
response = await client.post(
|
|
"https://openrouter.ai/api/v1/chat/completions",
|
|
headers={
|
|
"Authorization": f"Bearer {settings.openrouter_api_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
json={
|
|
"model": self.openrouter_model,
|
|
"messages": messages,
|
|
"max_tokens": 150,
|
|
},
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
content = data["choices"][0]["message"]["content"]
|
|
if not content or not content.strip():
|
|
print(f"OpenRouter returned empty response")
|
|
return ""
|
|
return content
|
|
except (httpx.TimeoutException, httpx.ReadTimeout):
|
|
print(f"OpenRouter timeout (attempt {attempt + 1})")
|
|
if attempt == 0:
|
|
continue # Retry once
|
|
return "Uh, sorry, I lost you there for a second. What was that?"
|
|
except Exception as e:
|
|
print(f"OpenRouter error: {e}")
|
|
return "Yeah... I don't know, man."
|
|
return "Uh, hold on a sec..."
|
|
|
|
async def _call_ollama(self, messages: list[dict]) -> str:
|
|
"""Call Ollama API"""
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.post(
|
|
f"{self.ollama_host}/api/chat",
|
|
json={
|
|
"model": self.ollama_model,
|
|
"messages": messages,
|
|
"stream": False,
|
|
"options": {
|
|
"num_predict": 100, # Allow complete thoughts
|
|
"temperature": 0.8, # Balanced creativity/coherence
|
|
"top_p": 0.9, # Focused word choices
|
|
"repeat_penalty": 1.3, # Avoid repetition
|
|
"top_k": 50, # Reasonable token variety
|
|
},
|
|
},
|
|
timeout=30.0
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
return data["message"]["content"]
|
|
except httpx.TimeoutException:
|
|
print("Ollama timeout")
|
|
return "Uh, sorry, I lost you there for a second. What was that?"
|
|
except Exception as e:
|
|
print(f"Ollama error: {e}")
|
|
return "Yeah... I don't know, man."
|
|
|
|
|
|
# Global instance
|
|
llm_service = LLMService()
|