Caller generation overhaul, Devon intern, frontend redesign

Caller system: structured JSON backgrounds, voice-personality matching (68 profiles),
thematic inter-caller awareness, adaptive call shapes, show pacing, returning caller
memory with relationships/arcs, post-call quality signals, 95 comedy writer entries.

Devon the Intern: persistent show character with tool-calling LLM (web search, Wikipedia,
headlines, webpage fetch), auto-monitoring, 6 API endpoints, full frontend UI.

Frontend: wrap-up nudge button, caller info panel with shape/energy/emotion badges,
keyboard shortcuts (1-0/H/W/M/D), pinned SFX, visual polish, Devon panel.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-14 01:54:08 -06:00
parent d3490e1521
commit 6d4e490283
10 changed files with 2776 additions and 179 deletions

View File

@@ -1,7 +1,8 @@
"""LLM service with OpenRouter and Ollama support"""
import json
import httpx
from typing import Optional
from typing import Optional, Callable, Awaitable
from ..config import settings
@@ -112,25 +113,156 @@ class LLMService:
self,
messages: list[dict],
system_prompt: Optional[str] = None,
max_tokens: Optional[int] = None
max_tokens: Optional[int] = None,
response_format: Optional[dict] = None
) -> str:
if system_prompt:
messages = [{"role": "system", "content": system_prompt}] + messages
if self.provider == "openrouter":
return await self._call_openrouter_with_fallback(messages, max_tokens=max_tokens)
return await self._call_openrouter_with_fallback(messages, max_tokens=max_tokens, response_format=response_format)
else:
return await self._call_ollama(messages, max_tokens=max_tokens)
async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None) -> str:
async def generate_with_tools(
self,
messages: list[dict],
tools: list[dict],
tool_executor: Callable[[str, dict], Awaitable[str]],
system_prompt: Optional[str] = None,
model: Optional[str] = None,
max_tokens: int = 500,
max_tool_rounds: int = 3,
) -> tuple[str, list[dict]]:
"""Generate a response with OpenRouter function calling.
Args:
messages: Conversation messages
tools: Tool definitions in OpenAI function-calling format
tool_executor: async function(tool_name, arguments) -> result string
system_prompt: Optional system prompt
model: Model to use (defaults to primary openrouter_model)
max_tokens: Max tokens for response
max_tool_rounds: Max tool call rounds to prevent loops
Returns:
(final_text, tool_calls_made) where tool_calls_made is a list of
{"name": str, "arguments": dict, "result": str} dicts
"""
model = model or self.openrouter_model
msgs = list(messages)
if system_prompt:
msgs = [{"role": "system", "content": system_prompt}] + msgs
all_tool_calls = []
for round_num in range(max_tool_rounds + 1):
payload = {
"model": model,
"messages": msgs,
"max_tokens": max_tokens,
"temperature": 0.65,
"tools": tools,
"tool_choice": "auto",
}
try:
response = await self.client.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {settings.openrouter_api_key}",
"Content-Type": "application/json",
},
json=payload,
timeout=15.0,
)
response.raise_for_status()
data = response.json()
except httpx.TimeoutException:
print(f"[LLM-Tools] {model} timed out (round {round_num})")
break
except Exception as e:
print(f"[LLM-Tools] {model} error (round {round_num}): {e}")
break
choice = data["choices"][0]
msg = choice["message"]
# Check for tool calls
tool_calls = msg.get("tool_calls")
if not tool_calls:
# No tool calls — LLM returned a final text response
content = msg.get("content", "")
return content or "", all_tool_calls
# Append assistant message with tool calls to conversation
msgs.append(msg)
# Execute each tool call
for tc in tool_calls:
func = tc["function"]
tool_name = func["name"]
try:
arguments = json.loads(func["arguments"])
except (json.JSONDecodeError, TypeError):
arguments = {}
print(f"[LLM-Tools] Round {round_num}: calling {tool_name}({arguments})")
try:
result = await tool_executor(tool_name, arguments)
except Exception as e:
result = f"Error: {e}"
print(f"[LLM-Tools] Tool {tool_name} failed: {e}")
all_tool_calls.append({
"name": tool_name,
"arguments": arguments,
"result": result[:500],
})
# Append tool result to conversation
msgs.append({
"role": "tool",
"tool_call_id": tc["id"],
"content": result,
})
# Exhausted tool rounds or hit an error — do one final call without tools
print(f"[LLM-Tools] Finishing after {len(all_tool_calls)} tool calls")
try:
final_payload = {
"model": model,
"messages": msgs,
"max_tokens": max_tokens,
"temperature": 0.65,
}
response = await self.client.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {settings.openrouter_api_key}",
"Content-Type": "application/json",
},
json=final_payload,
timeout=15.0,
)
response.raise_for_status()
data = response.json()
content = data["choices"][0]["message"].get("content", "")
return content or "", all_tool_calls
except Exception as e:
print(f"[LLM-Tools] Final call failed: {e}")
return "", all_tool_calls
async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None, response_format: Optional[dict] = None) -> str:
"""Try primary model, then fallback models. Always returns a response."""
# Try primary model first
result = await self._call_openrouter_once(messages, self.openrouter_model, max_tokens=max_tokens)
result = await self._call_openrouter_once(messages, self.openrouter_model, max_tokens=max_tokens, response_format=response_format)
if result is not None:
return result
# Try fallback models
# Try fallback models (drop response_format for fallbacks — not all models support it)
for model in FALLBACK_MODELS:
if model == self.openrouter_model:
continue # Already tried
@@ -143,24 +275,27 @@ class LLMService:
print("[LLM] All models failed, using canned response")
return "Sorry, I totally blanked out for a second. What were you saying?"
async def _call_openrouter_once(self, messages: list[dict], model: str, timeout: float = 10.0, max_tokens: Optional[int] = None) -> str | None:
async def _call_openrouter_once(self, messages: list[dict], model: str, timeout: float = 10.0, max_tokens: Optional[int] = None, response_format: Optional[dict] = None) -> str | None:
"""Single attempt to call OpenRouter. Returns None on failure (not a fallback string)."""
try:
payload = {
"model": model,
"messages": messages,
"max_tokens": max_tokens or 500,
"temperature": 0.65,
"top_p": 0.9,
"frequency_penalty": 0.3,
"presence_penalty": 0.15,
}
if response_format:
payload["response_format"] = response_format
response = await self.client.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {settings.openrouter_api_key}",
"Content-Type": "application/json",
},
json={
"model": model,
"messages": messages,
"max_tokens": max_tokens or 500,
"temperature": 0.65,
"top_p": 0.9,
"frequency_penalty": 0.3,
"presence_penalty": 0.15,
},
json=payload,
timeout=timeout,
)
response.raise_for_status()