Caller generation overhaul, Devon intern, frontend redesign

Caller system: structured JSON backgrounds, voice-personality matching (68 profiles), thematic inter-caller awareness, adaptive call shapes, show pacing, returning caller memory with relationships/arcs, post-call quality signals, 95 comedy writer entries. Devon the Intern: persistent show character with tool-calling LLM (web search, Wikipedia, headlines, webpage fetch), auto-monitoring, 6 API endpoints, full frontend UI. Frontend: wrap-up nudge button, caller info panel with shape/energy/emotion badges, keyboard shortcuts (1-0/H/W/M/D), pinned SFX, visual polish, Devon panel. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-14 01:54:08 -06:00
parent d3490e1521
commit 6d4e490283
10 changed files with 2776 additions and 179 deletions
@@ -1,7 +1,8 @@
 """LLM service with OpenRouter and Ollama support"""

+import json
 import httpx
-from typing import Optional
+from typing import Optional, Callable, Awaitable
 from ..config import settings


@@ -112,25 +113,156 @@ class LLMService:
        self,
        messages: list[dict],
        system_prompt: Optional[str] = None,
-        max_tokens: Optional[int] = None
+        max_tokens: Optional[int] = None,
+        response_format: Optional[dict] = None
    ) -> str:
        if system_prompt:
            messages = [{"role": "system", "content": system_prompt}] + messages

        if self.provider == "openrouter":
-            return await self._call_openrouter_with_fallback(messages, max_tokens=max_tokens)
+            return await self._call_openrouter_with_fallback(messages, max_tokens=max_tokens, response_format=response_format)
        else:
            return await self._call_ollama(messages, max_tokens=max_tokens)

-    async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None) -> str:
+    async def generate_with_tools(
+        self,
+        messages: list[dict],
+        tools: list[dict],
+        tool_executor: Callable[[str, dict], Awaitable[str]],
+        system_prompt: Optional[str] = None,
+        model: Optional[str] = None,
+        max_tokens: int = 500,
+        max_tool_rounds: int = 3,
+    ) -> tuple[str, list[dict]]:
+        """Generate a response with OpenRouter function calling.
+
+        Args:
+            messages: Conversation messages
+            tools: Tool definitions in OpenAI function-calling format
+            tool_executor: async function(tool_name, arguments) -> result string
+            system_prompt: Optional system prompt
+            model: Model to use (defaults to primary openrouter_model)
+            max_tokens: Max tokens for response
+            max_tool_rounds: Max tool call rounds to prevent loops
+
+        Returns:
+            (final_text, tool_calls_made) where tool_calls_made is a list of
+            {"name": str, "arguments": dict, "result": str} dicts
+        """
+        model = model or self.openrouter_model
+        msgs = list(messages)
+        if system_prompt:
+            msgs = [{"role": "system", "content": system_prompt}] + msgs
+
+        all_tool_calls = []
+
+        for round_num in range(max_tool_rounds + 1):
+            payload = {
+                "model": model,
+                "messages": msgs,
+                "max_tokens": max_tokens,
+                "temperature": 0.65,
+                "tools": tools,
+                "tool_choice": "auto",
+            }
+
+            try:
+                response = await self.client.post(
+                    "https://openrouter.ai/api/v1/chat/completions",
+                    headers={
+                        "Authorization": f"Bearer {settings.openrouter_api_key}",
+                        "Content-Type": "application/json",
+                    },
+                    json=payload,
+                    timeout=15.0,
+                )
+                response.raise_for_status()
+                data = response.json()
+            except httpx.TimeoutException:
+                print(f"[LLM-Tools] {model} timed out (round {round_num})")
+                break
+            except Exception as e:
+                print(f"[LLM-Tools] {model} error (round {round_num}): {e}")
+                break
+
+            choice = data["choices"][0]
+            msg = choice["message"]
+
+            # Check for tool calls
+            tool_calls = msg.get("tool_calls")
+            if not tool_calls:
+                # No tool calls — LLM returned a final text response
+                content = msg.get("content", "")
+                return content or "", all_tool_calls
+
+            # Append assistant message with tool calls to conversation
+            msgs.append(msg)
+
+            # Execute each tool call
+            for tc in tool_calls:
+                func = tc["function"]
+                tool_name = func["name"]
+                try:
+                    arguments = json.loads(func["arguments"])
+                except (json.JSONDecodeError, TypeError):
+                    arguments = {}
+
+                print(f"[LLM-Tools] Round {round_num}: calling {tool_name}({arguments})")
+
+                try:
+                    result = await tool_executor(tool_name, arguments)
+                except Exception as e:
+                    result = f"Error: {e}"
+                    print(f"[LLM-Tools] Tool {tool_name} failed: {e}")
+
+                all_tool_calls.append({
+                    "name": tool_name,
+                    "arguments": arguments,
+                    "result": result[:500],
+                })
+
+                # Append tool result to conversation
+                msgs.append({
+                    "role": "tool",
+                    "tool_call_id": tc["id"],
+                    "content": result,
+                })
+
+        # Exhausted tool rounds or hit an error — do one final call without tools
+        print(f"[LLM-Tools] Finishing after {len(all_tool_calls)} tool calls")
+        try:
+            final_payload = {
+                "model": model,
+                "messages": msgs,
+                "max_tokens": max_tokens,
+                "temperature": 0.65,
+            }
+            response = await self.client.post(
+                "https://openrouter.ai/api/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {settings.openrouter_api_key}",
+                    "Content-Type": "application/json",
+                },
+                json=final_payload,
+                timeout=15.0,
+            )
+            response.raise_for_status()
+            data = response.json()
+            content = data["choices"][0]["message"].get("content", "")
+            return content or "", all_tool_calls
+        except Exception as e:
+            print(f"[LLM-Tools] Final call failed: {e}")
+            return "", all_tool_calls
+
+    async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None, response_format: Optional[dict] = None) -> str:
        """Try primary model, then fallback models. Always returns a response."""

        # Try primary model first
-        result = await self._call_openrouter_once(messages, self.openrouter_model, max_tokens=max_tokens)
+        result = await self._call_openrouter_once(messages, self.openrouter_model, max_tokens=max_tokens, response_format=response_format)
        if result is not None:
            return result

-        # Try fallback models
+        # Try fallback models (drop response_format for fallbacks — not all models support it)
        for model in FALLBACK_MODELS:
            if model == self.openrouter_model:
                continue  # Already tried
@@ -143,24 +275,27 @@ class LLMService:
        print("[LLM] All models failed, using canned response")
        return "Sorry, I totally blanked out for a second. What were you saying?"

-    async def _call_openrouter_once(self, messages: list[dict], model: str, timeout: float = 10.0, max_tokens: Optional[int] = None) -> str | None:
+    async def _call_openrouter_once(self, messages: list[dict], model: str, timeout: float = 10.0, max_tokens: Optional[int] = None, response_format: Optional[dict] = None) -> str | None:
        """Single attempt to call OpenRouter. Returns None on failure (not a fallback string)."""
        try:
+            payload = {
+                "model": model,
+                "messages": messages,
+                "max_tokens": max_tokens or 500,
+                "temperature": 0.65,
+                "top_p": 0.9,
+                "frequency_penalty": 0.3,
+                "presence_penalty": 0.15,
+            }
+            if response_format:
+                payload["response_format"] = response_format
            response = await self.client.post(
                "https://openrouter.ai/api/v1/chat/completions",
                headers={
                    "Authorization": f"Bearer {settings.openrouter_api_key}",
                    "Content-Type": "application/json",
                },
-                json={
-                    "model": model,
-                    "messages": messages,
-                    "max_tokens": max_tokens or 500,
-                    "temperature": 0.65,
-                    "top_p": 0.9,
-                    "frequency_penalty": 0.3,
-                    "presence_penalty": 0.15,
-                },
+                json=payload,
                timeout=timeout,
            )
            response.raise_for_status()