From b3fb3b11275c4263ff755e5625cee272c76995d0 Mon Sep 17 00:00:00 2001
From: tcpsyn <tcpsyn@gmail.com>
Date: Fri, 6 Feb 2026 21:16:15 -0700
Subject: [PATCH] Fix AI caller hanging on 'thinking...' indefinitely

- Add 30s timeout to all frontend fetch calls (safeFetch)
- Add 20s asyncio.timeout around lock+LLM in chat, ai-respond, auto-respond
- Reduce OpenRouter timeout from 60s to 25s
- Reduce Inworld TTS timeout from 60s to 25s
- Return graceful fallback responses on timeout instead of hanging

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/main.py         | 111 +++++++++++++++++++++++-----------------
 backend/services/llm.py |   2 +-
 backend/services/tts.py |   2 +-
 frontend/js/app.js      |  27 ++++++----
 4 files changed, 83 insertions(+), 59 deletions(-)

diff --git a/backend/main.py b/backend/main.py
index 0385de4..fb88fbd 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -1075,25 +1075,30 @@ async def chat(request: ChatRequest):
     session.add_message("user", request.text)
     session._research_task = asyncio.create_task(_background_research(request.text))
 
-    async with _ai_response_lock:
-        if _session_epoch != epoch:
-            raise HTTPException(409, "Call ended while waiting")
+    try:
+        async with asyncio.timeout(20):
+            async with _ai_response_lock:
+                if _session_epoch != epoch:
+                    raise HTTPException(409, "Call ended while waiting")
 
-        # Stop any playing caller audio so responses don't overlap
-        audio_service.stop_caller_audio()
+                # Stop any playing caller audio so responses don't overlap
+                audio_service.stop_caller_audio()
 
-        # Include conversation summary and show history for context
-        conversation_summary = session.get_conversation_summary()
-        show_history = session.get_show_history()
-        news_ctx, research_ctx = _build_news_context()
-        system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
-                                          news_ctx, research_ctx)
+                # Include conversation summary and show history for context
+                conversation_summary = session.get_conversation_summary()
+                show_history = session.get_show_history()
+                news_ctx, research_ctx = _build_news_context()
+                system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
+                                                  news_ctx, research_ctx)
 
-        messages = _normalize_messages_for_llm(session.conversation[-10:])
-        response = await llm_service.generate(
-            messages=messages,
-            system_prompt=system_prompt
-        )
+                messages = _normalize_messages_for_llm(session.conversation[-10:])
+                response = await llm_service.generate(
+                    messages=messages,
+                    system_prompt=system_prompt
+                )
+    except TimeoutError:
+        caller_name = session.caller["name"] if session.caller else "Caller"
+        return {"text": "Uh... hold on, I lost my train of thought.", "caller": caller_name, "voice_id": session.caller["voice"] if session.caller else ""}
 
     # Discard if call changed while we were generating
     if _session_epoch != epoch:
@@ -1600,26 +1605,32 @@ async def _trigger_ai_auto_respond(accumulated_text: str):
 
     ai_name = session.caller["name"]
 
-    async with _ai_response_lock:
-        if _session_epoch != epoch:
-            return  # Call changed while waiting for lock
+    try:
+        async with asyncio.timeout(20):
+            async with _ai_response_lock:
+                if _session_epoch != epoch:
+                    return  # Call changed while waiting for lock
 
-        print(f"[Auto-Respond] {ai_name} is jumping in...")
-        session._last_ai_auto_respond = time.time()
-        audio_service.stop_caller_audio()
-        broadcast_event("ai_status", {"text": f"{ai_name} is thinking..."})
+                print(f"[Auto-Respond] {ai_name} is jumping in...")
+                session._last_ai_auto_respond = time.time()
+                audio_service.stop_caller_audio()
+                broadcast_event("ai_status", {"text": f"{ai_name} is thinking..."})
 
-        conversation_summary = session.get_conversation_summary()
-        show_history = session.get_show_history()
-        news_ctx, research_ctx = _build_news_context()
-        system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
-                                          news_ctx, research_ctx)
+                conversation_summary = session.get_conversation_summary()
+                show_history = session.get_show_history()
+                news_ctx, research_ctx = _build_news_context()
+                system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
+                                                  news_ctx, research_ctx)
 
-        messages = _normalize_messages_for_llm(session.conversation[-10:])
-        response = await llm_service.generate(
-            messages=messages,
-            system_prompt=system_prompt,
-        )
+                messages = _normalize_messages_for_llm(session.conversation[-10:])
+                response = await llm_service.generate(
+                    messages=messages,
+                    system_prompt=system_prompt,
+                )
+    except TimeoutError:
+        print(f"[Auto-Respond] Timed out for {ai_name}")
+        broadcast_event("ai_done")
+        return
 
     # Discard if call changed during generation
     if _session_epoch != epoch:
@@ -1677,23 +1688,27 @@ async def ai_respond():
 
     epoch = _session_epoch
 
-    async with _ai_response_lock:
-        if _session_epoch != epoch:
-            raise HTTPException(409, "Call ended while waiting")
+    try:
+        async with asyncio.timeout(20):
+            async with _ai_response_lock:
+                if _session_epoch != epoch:
+                    raise HTTPException(409, "Call ended while waiting")
 
-        audio_service.stop_caller_audio()
+                audio_service.stop_caller_audio()
 
-        conversation_summary = session.get_conversation_summary()
-        show_history = session.get_show_history()
-        news_ctx, research_ctx = _build_news_context()
-        system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
-                                          news_ctx, research_ctx)
+                conversation_summary = session.get_conversation_summary()
+                show_history = session.get_show_history()
+                news_ctx, research_ctx = _build_news_context()
+                system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
+                                                  news_ctx, research_ctx)
 
-        messages = _normalize_messages_for_llm(session.conversation[-10:])
-        response = await llm_service.generate(
-            messages=messages,
-            system_prompt=system_prompt
-        )
+                messages = _normalize_messages_for_llm(session.conversation[-10:])
+                response = await llm_service.generate(
+                    messages=messages,
+                    system_prompt=system_prompt
+                )
+    except TimeoutError:
+        return {"text": "Uh... sorry, I spaced out for a second there.", "caller": session.caller["name"], "voice_id": session.caller["voice"]}
 
     if _session_epoch != epoch:
         raise HTTPException(409, "Call changed during response")
@@ -1707,7 +1722,7 @@ async def ai_respond():
     ai_name = session.caller["name"]
     session.add_message(f"ai_caller:{ai_name}", response)
 
-    # TTS
+    # TTS — outside the lock so other requests aren't blocked
     audio_bytes = await generate_speech(response, session.caller["voice"], "none")
 
     if _session_epoch != epoch:
diff --git a/backend/services/llm.py b/backend/services/llm.py
index 8982ee3..2776fd4 100644
--- a/backend/services/llm.py
+++ b/backend/services/llm.py
@@ -114,7 +114,7 @@ class LLMService:
         """Call OpenRouter API with retry"""
         for attempt in range(2):  # Try twice
             try:
-                async with httpx.AsyncClient(timeout=60.0) as client:
+                async with httpx.AsyncClient(timeout=25.0) as client:
                     response = await client.post(
                         "https://openrouter.ai/api/v1/chat/completions",
                         headers={
diff --git a/backend/services/tts.py b/backend/services/tts.py
index 6ba3708..1375feb 100644
--- a/backend/services/tts.py
+++ b/backend/services/tts.py
@@ -600,7 +600,7 @@ async def generate_speech_inworld(text: str, voice_id: str) -> tuple[np.ndarray,
         },
     }
 
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    async with httpx.AsyncClient(timeout=25.0) as client:
         response = await client.post(url, json=payload, headers=headers)
         response.raise_for_status()
         data = response.json()
diff --git a/frontend/js/app.js b/frontend/js/app.js
index d1e9f9f..aab3713 100644
--- a/frontend/js/app.js
+++ b/frontend/js/app.js
@@ -17,17 +17,26 @@ let sounds = [];
 
 
 // --- Safe JSON parsing ---
-async function safeFetch(url, options = {}) {
-    const res = await fetch(url, options);
-    if (!res.ok) {
+async function safeFetch(url, options = {}, timeoutMs = 30000) {
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), timeoutMs);
+    try {
+        const res = await fetch(url, { ...options, signal: controller.signal });
+        clearTimeout(timer);
+        if (!res.ok) {
+            const text = await res.text();
+            let detail = text;
+            try { detail = JSON.parse(text).detail || text; } catch {}
+            throw new Error(detail);
+        }
         const text = await res.text();
-        let detail = text;
-        try { detail = JSON.parse(text).detail || text; } catch {}
-        throw new Error(detail);
+        if (!text) return {};
+        return JSON.parse(text);
+    } catch (err) {
+        clearTimeout(timer);
+        if (err.name === 'AbortError') throw new Error('Request timed out');
+        throw err;
     }
-    const text = await res.text();
-    if (!text) return {};
-    return JSON.parse(text);
 }