Caller generation overhaul, Devon intern, frontend redesign
Caller system: structured JSON backgrounds, voice-personality matching (68 profiles), thematic inter-caller awareness, adaptive call shapes, show pacing, returning caller memory with relationships/arcs, post-call quality signals, 95 comedy writer entries. Devon the Intern: persistent show character with tool-calling LLM (web search, Wikipedia, headlines, webpage fetch), auto-monitoring, 6 API endpoints, full frontend UI. Frontend: wrap-up nudge button, caller info panel with shape/energy/emotion badges, keyboard shortcuts (1-0/H/W/M/D), pinned SFX, visual polish, Devon panel. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
"""LLM service with OpenRouter and Ollama support"""
|
||||
|
||||
import json
|
||||
import httpx
|
||||
from typing import Optional
|
||||
from typing import Optional, Callable, Awaitable
|
||||
from ..config import settings
|
||||
|
||||
|
||||
@@ -112,25 +113,156 @@ class LLMService:
|
||||
self,
|
||||
messages: list[dict],
|
||||
system_prompt: Optional[str] = None,
|
||||
max_tokens: Optional[int] = None
|
||||
max_tokens: Optional[int] = None,
|
||||
response_format: Optional[dict] = None
|
||||
) -> str:
|
||||
if system_prompt:
|
||||
messages = [{"role": "system", "content": system_prompt}] + messages
|
||||
|
||||
if self.provider == "openrouter":
|
||||
return await self._call_openrouter_with_fallback(messages, max_tokens=max_tokens)
|
||||
return await self._call_openrouter_with_fallback(messages, max_tokens=max_tokens, response_format=response_format)
|
||||
else:
|
||||
return await self._call_ollama(messages, max_tokens=max_tokens)
|
||||
|
||||
async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None) -> str:
|
||||
async def generate_with_tools(
|
||||
self,
|
||||
messages: list[dict],
|
||||
tools: list[dict],
|
||||
tool_executor: Callable[[str, dict], Awaitable[str]],
|
||||
system_prompt: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
max_tokens: int = 500,
|
||||
max_tool_rounds: int = 3,
|
||||
) -> tuple[str, list[dict]]:
|
||||
"""Generate a response with OpenRouter function calling.
|
||||
|
||||
Args:
|
||||
messages: Conversation messages
|
||||
tools: Tool definitions in OpenAI function-calling format
|
||||
tool_executor: async function(tool_name, arguments) -> result string
|
||||
system_prompt: Optional system prompt
|
||||
model: Model to use (defaults to primary openrouter_model)
|
||||
max_tokens: Max tokens for response
|
||||
max_tool_rounds: Max tool call rounds to prevent loops
|
||||
|
||||
Returns:
|
||||
(final_text, tool_calls_made) where tool_calls_made is a list of
|
||||
{"name": str, "arguments": dict, "result": str} dicts
|
||||
"""
|
||||
model = model or self.openrouter_model
|
||||
msgs = list(messages)
|
||||
if system_prompt:
|
||||
msgs = [{"role": "system", "content": system_prompt}] + msgs
|
||||
|
||||
all_tool_calls = []
|
||||
|
||||
for round_num in range(max_tool_rounds + 1):
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": msgs,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": 0.65,
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
}
|
||||
|
||||
try:
|
||||
response = await self.client.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {settings.openrouter_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=payload,
|
||||
timeout=15.0,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
except httpx.TimeoutException:
|
||||
print(f"[LLM-Tools] {model} timed out (round {round_num})")
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"[LLM-Tools] {model} error (round {round_num}): {e}")
|
||||
break
|
||||
|
||||
choice = data["choices"][0]
|
||||
msg = choice["message"]
|
||||
|
||||
# Check for tool calls
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if not tool_calls:
|
||||
# No tool calls — LLM returned a final text response
|
||||
content = msg.get("content", "")
|
||||
return content or "", all_tool_calls
|
||||
|
||||
# Append assistant message with tool calls to conversation
|
||||
msgs.append(msg)
|
||||
|
||||
# Execute each tool call
|
||||
for tc in tool_calls:
|
||||
func = tc["function"]
|
||||
tool_name = func["name"]
|
||||
try:
|
||||
arguments = json.loads(func["arguments"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
arguments = {}
|
||||
|
||||
print(f"[LLM-Tools] Round {round_num}: calling {tool_name}({arguments})")
|
||||
|
||||
try:
|
||||
result = await tool_executor(tool_name, arguments)
|
||||
except Exception as e:
|
||||
result = f"Error: {e}"
|
||||
print(f"[LLM-Tools] Tool {tool_name} failed: {e}")
|
||||
|
||||
all_tool_calls.append({
|
||||
"name": tool_name,
|
||||
"arguments": arguments,
|
||||
"result": result[:500],
|
||||
})
|
||||
|
||||
# Append tool result to conversation
|
||||
msgs.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tc["id"],
|
||||
"content": result,
|
||||
})
|
||||
|
||||
# Exhausted tool rounds or hit an error — do one final call without tools
|
||||
print(f"[LLM-Tools] Finishing after {len(all_tool_calls)} tool calls")
|
||||
try:
|
||||
final_payload = {
|
||||
"model": model,
|
||||
"messages": msgs,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": 0.65,
|
||||
}
|
||||
response = await self.client.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {settings.openrouter_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=final_payload,
|
||||
timeout=15.0,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
content = data["choices"][0]["message"].get("content", "")
|
||||
return content or "", all_tool_calls
|
||||
except Exception as e:
|
||||
print(f"[LLM-Tools] Final call failed: {e}")
|
||||
return "", all_tool_calls
|
||||
|
||||
async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None, response_format: Optional[dict] = None) -> str:
|
||||
"""Try primary model, then fallback models. Always returns a response."""
|
||||
|
||||
# Try primary model first
|
||||
result = await self._call_openrouter_once(messages, self.openrouter_model, max_tokens=max_tokens)
|
||||
result = await self._call_openrouter_once(messages, self.openrouter_model, max_tokens=max_tokens, response_format=response_format)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
# Try fallback models
|
||||
# Try fallback models (drop response_format for fallbacks — not all models support it)
|
||||
for model in FALLBACK_MODELS:
|
||||
if model == self.openrouter_model:
|
||||
continue # Already tried
|
||||
@@ -143,24 +275,27 @@ class LLMService:
|
||||
print("[LLM] All models failed, using canned response")
|
||||
return "Sorry, I totally blanked out for a second. What were you saying?"
|
||||
|
||||
async def _call_openrouter_once(self, messages: list[dict], model: str, timeout: float = 10.0, max_tokens: Optional[int] = None) -> str | None:
|
||||
async def _call_openrouter_once(self, messages: list[dict], model: str, timeout: float = 10.0, max_tokens: Optional[int] = None, response_format: Optional[dict] = None) -> str | None:
|
||||
"""Single attempt to call OpenRouter. Returns None on failure (not a fallback string)."""
|
||||
try:
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens or 500,
|
||||
"temperature": 0.65,
|
||||
"top_p": 0.9,
|
||||
"frequency_penalty": 0.3,
|
||||
"presence_penalty": 0.15,
|
||||
}
|
||||
if response_format:
|
||||
payload["response_format"] = response_format
|
||||
response = await self.client.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {settings.openrouter_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens or 500,
|
||||
"temperature": 0.65,
|
||||
"top_p": 0.9,
|
||||
"frequency_penalty": 0.3,
|
||||
"presence_penalty": 0.15,
|
||||
},
|
||||
json=payload,
|
||||
timeout=timeout,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
Reference in New Issue
Block a user