Files
ai-podcast/backend/services/llm.py
tcpsyn b3fb3b1127 Fix AI caller hanging on 'thinking...' indefinitely
- Add 30s timeout to all frontend fetch calls (safeFetch)
- Add 20s asyncio.timeout around lock+LLM in chat, ai-respond, auto-respond
- Reduce OpenRouter timeout from 60s to 25s
- Reduce Inworld TTS timeout from 60s to 25s
- Return graceful fallback responses on timeout instead of hanging

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 21:16:15 -07:00

180 lines
6.6 KiB
Python

"""LLM service with OpenRouter and Ollama support"""
import httpx
from typing import Optional
from ..config import settings
# Available OpenRouter models
OPENROUTER_MODELS = [
"openai/gpt-4o-mini",
"openai/gpt-4o",
"anthropic/claude-3-haiku",
"anthropic/claude-3.5-sonnet",
"google/gemini-flash-1.5",
"google/gemini-pro-1.5",
"meta-llama/llama-3.1-8b-instruct",
"mistralai/mistral-7b-instruct",
]
class LLMService:
"""Abstraction layer for LLM providers"""
def __init__(self):
self.provider = settings.llm_provider
self.openrouter_model = settings.openrouter_model
self.ollama_model = settings.ollama_model
self.ollama_host = settings.ollama_host
self.tts_provider = settings.tts_provider
def update_settings(
self,
provider: Optional[str] = None,
openrouter_model: Optional[str] = None,
ollama_model: Optional[str] = None,
ollama_host: Optional[str] = None,
tts_provider: Optional[str] = None
):
"""Update LLM settings"""
if provider:
self.provider = provider
if openrouter_model:
self.openrouter_model = openrouter_model
if ollama_model:
self.ollama_model = ollama_model
if ollama_host:
self.ollama_host = ollama_host
if tts_provider:
self.tts_provider = tts_provider
# Also update the global settings so TTS service picks it up
settings.tts_provider = tts_provider
async def get_ollama_models(self) -> list[str]:
"""Fetch available models from Ollama"""
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get(f"{self.ollama_host}/api/tags")
response.raise_for_status()
data = response.json()
return [model["name"] for model in data.get("models", [])]
except Exception as e:
print(f"Failed to fetch Ollama models: {e}")
return []
def get_settings(self) -> dict:
"""Get current settings (sync version without Ollama models)"""
return {
"provider": self.provider,
"openrouter_model": self.openrouter_model,
"ollama_model": self.ollama_model,
"ollama_host": self.ollama_host,
"tts_provider": self.tts_provider,
"available_openrouter_models": OPENROUTER_MODELS,
"available_ollama_models": [] # Fetched separately
}
async def get_settings_async(self) -> dict:
"""Get current settings with Ollama models"""
ollama_models = await self.get_ollama_models()
return {
"provider": self.provider,
"openrouter_model": self.openrouter_model,
"ollama_model": self.ollama_model,
"ollama_host": self.ollama_host,
"tts_provider": self.tts_provider,
"available_openrouter_models": OPENROUTER_MODELS,
"available_ollama_models": ollama_models
}
async def generate(
self,
messages: list[dict],
system_prompt: Optional[str] = None
) -> str:
"""
Generate a response from the LLM.
Args:
messages: List of message dicts with 'role' and 'content'
system_prompt: Optional system prompt to prepend
Returns:
Generated text response
"""
if system_prompt:
messages = [{"role": "system", "content": system_prompt}] + messages
if self.provider == "openrouter":
return await self._call_openrouter(messages)
else:
return await self._call_ollama(messages)
async def _call_openrouter(self, messages: list[dict]) -> str:
"""Call OpenRouter API with retry"""
for attempt in range(2): # Try twice
try:
async with httpx.AsyncClient(timeout=25.0) as client:
response = await client.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {settings.openrouter_api_key}",
"Content-Type": "application/json",
},
json={
"model": self.openrouter_model,
"messages": messages,
"max_tokens": 150,
},
)
response.raise_for_status()
data = response.json()
content = data["choices"][0]["message"]["content"]
if not content or not content.strip():
print(f"OpenRouter returned empty response")
return ""
return content
except (httpx.TimeoutException, httpx.ReadTimeout):
print(f"OpenRouter timeout (attempt {attempt + 1})")
if attempt == 0:
continue # Retry once
return "Uh, sorry, I lost you there for a second. What was that?"
except Exception as e:
print(f"OpenRouter error: {e}")
return "Yeah... I don't know, man."
return "Uh, hold on a sec..."
async def _call_ollama(self, messages: list[dict]) -> str:
"""Call Ollama API"""
try:
async with httpx.AsyncClient() as client:
response = await client.post(
f"{self.ollama_host}/api/chat",
json={
"model": self.ollama_model,
"messages": messages,
"stream": False,
"options": {
"num_predict": 100, # Allow complete thoughts
"temperature": 0.8, # Balanced creativity/coherence
"top_p": 0.9, # Focused word choices
"repeat_penalty": 1.3, # Avoid repetition
"top_k": 50, # Reasonable token variety
},
},
timeout=30.0
)
response.raise_for_status()
data = response.json()
return data["message"]["content"]
except httpx.TimeoutException:
print("Ollama timeout")
return "Uh, sorry, I lost you there for a second. What was that?"
except Exception as e:
print(f"Ollama error: {e}")
return "Yeah... I don't know, man."
# Global instance
llm_service = LLMService()