From b3fb3b11275c4263ff755e5625cee272c76995d0 Mon Sep 17 00:00:00 2001 From: tcpsyn Date: Fri, 6 Feb 2026 21:16:15 -0700 Subject: [PATCH] Fix AI caller hanging on 'thinking...' indefinitely - Add 30s timeout to all frontend fetch calls (safeFetch) - Add 20s asyncio.timeout around lock+LLM in chat, ai-respond, auto-respond - Reduce OpenRouter timeout from 60s to 25s - Reduce Inworld TTS timeout from 60s to 25s - Return graceful fallback responses on timeout instead of hanging Co-Authored-By: Claude Opus 4.6 --- backend/main.py | 111 +++++++++++++++++++++++----------------- backend/services/llm.py | 2 +- backend/services/tts.py | 2 +- frontend/js/app.js | 27 ++++++---- 4 files changed, 83 insertions(+), 59 deletions(-) diff --git a/backend/main.py b/backend/main.py index 0385de4..fb88fbd 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1075,25 +1075,30 @@ async def chat(request: ChatRequest): session.add_message("user", request.text) session._research_task = asyncio.create_task(_background_research(request.text)) - async with _ai_response_lock: - if _session_epoch != epoch: - raise HTTPException(409, "Call ended while waiting") + try: + async with asyncio.timeout(20): + async with _ai_response_lock: + if _session_epoch != epoch: + raise HTTPException(409, "Call ended while waiting") - # Stop any playing caller audio so responses don't overlap - audio_service.stop_caller_audio() + # Stop any playing caller audio so responses don't overlap + audio_service.stop_caller_audio() - # Include conversation summary and show history for context - conversation_summary = session.get_conversation_summary() - show_history = session.get_show_history() - news_ctx, research_ctx = _build_news_context() - system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, - news_ctx, research_ctx) + # Include conversation summary and show history for context + conversation_summary = session.get_conversation_summary() + show_history = session.get_show_history() + news_ctx, research_ctx = _build_news_context() + system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, + news_ctx, research_ctx) - messages = _normalize_messages_for_llm(session.conversation[-10:]) - response = await llm_service.generate( - messages=messages, - system_prompt=system_prompt - ) + messages = _normalize_messages_for_llm(session.conversation[-10:]) + response = await llm_service.generate( + messages=messages, + system_prompt=system_prompt + ) + except TimeoutError: + caller_name = session.caller["name"] if session.caller else "Caller" + return {"text": "Uh... hold on, I lost my train of thought.", "caller": caller_name, "voice_id": session.caller["voice"] if session.caller else ""} # Discard if call changed while we were generating if _session_epoch != epoch: @@ -1600,26 +1605,32 @@ async def _trigger_ai_auto_respond(accumulated_text: str): ai_name = session.caller["name"] - async with _ai_response_lock: - if _session_epoch != epoch: - return # Call changed while waiting for lock + try: + async with asyncio.timeout(20): + async with _ai_response_lock: + if _session_epoch != epoch: + return # Call changed while waiting for lock - print(f"[Auto-Respond] {ai_name} is jumping in...") - session._last_ai_auto_respond = time.time() - audio_service.stop_caller_audio() - broadcast_event("ai_status", {"text": f"{ai_name} is thinking..."}) + print(f"[Auto-Respond] {ai_name} is jumping in...") + session._last_ai_auto_respond = time.time() + audio_service.stop_caller_audio() + broadcast_event("ai_status", {"text": f"{ai_name} is thinking..."}) - conversation_summary = session.get_conversation_summary() - show_history = session.get_show_history() - news_ctx, research_ctx = _build_news_context() - system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, - news_ctx, research_ctx) + conversation_summary = session.get_conversation_summary() + show_history = session.get_show_history() + news_ctx, research_ctx = _build_news_context() + system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, + news_ctx, research_ctx) - messages = _normalize_messages_for_llm(session.conversation[-10:]) - response = await llm_service.generate( - messages=messages, - system_prompt=system_prompt, - ) + messages = _normalize_messages_for_llm(session.conversation[-10:]) + response = await llm_service.generate( + messages=messages, + system_prompt=system_prompt, + ) + except TimeoutError: + print(f"[Auto-Respond] Timed out for {ai_name}") + broadcast_event("ai_done") + return # Discard if call changed during generation if _session_epoch != epoch: @@ -1677,23 +1688,27 @@ async def ai_respond(): epoch = _session_epoch - async with _ai_response_lock: - if _session_epoch != epoch: - raise HTTPException(409, "Call ended while waiting") + try: + async with asyncio.timeout(20): + async with _ai_response_lock: + if _session_epoch != epoch: + raise HTTPException(409, "Call ended while waiting") - audio_service.stop_caller_audio() + audio_service.stop_caller_audio() - conversation_summary = session.get_conversation_summary() - show_history = session.get_show_history() - news_ctx, research_ctx = _build_news_context() - system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, - news_ctx, research_ctx) + conversation_summary = session.get_conversation_summary() + show_history = session.get_show_history() + news_ctx, research_ctx = _build_news_context() + system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, + news_ctx, research_ctx) - messages = _normalize_messages_for_llm(session.conversation[-10:]) - response = await llm_service.generate( - messages=messages, - system_prompt=system_prompt - ) + messages = _normalize_messages_for_llm(session.conversation[-10:]) + response = await llm_service.generate( + messages=messages, + system_prompt=system_prompt + ) + except TimeoutError: + return {"text": "Uh... sorry, I spaced out for a second there.", "caller": session.caller["name"], "voice_id": session.caller["voice"]} if _session_epoch != epoch: raise HTTPException(409, "Call changed during response") @@ -1707,7 +1722,7 @@ async def ai_respond(): ai_name = session.caller["name"] session.add_message(f"ai_caller:{ai_name}", response) - # TTS + # TTS — outside the lock so other requests aren't blocked audio_bytes = await generate_speech(response, session.caller["voice"], "none") if _session_epoch != epoch: diff --git a/backend/services/llm.py b/backend/services/llm.py index 8982ee3..2776fd4 100644 --- a/backend/services/llm.py +++ b/backend/services/llm.py @@ -114,7 +114,7 @@ class LLMService: """Call OpenRouter API with retry""" for attempt in range(2): # Try twice try: - async with httpx.AsyncClient(timeout=60.0) as client: + async with httpx.AsyncClient(timeout=25.0) as client: response = await client.post( "https://openrouter.ai/api/v1/chat/completions", headers={ diff --git a/backend/services/tts.py b/backend/services/tts.py index 6ba3708..1375feb 100644 --- a/backend/services/tts.py +++ b/backend/services/tts.py @@ -600,7 +600,7 @@ async def generate_speech_inworld(text: str, voice_id: str) -> tuple[np.ndarray, }, } - async with httpx.AsyncClient(timeout=60.0) as client: + async with httpx.AsyncClient(timeout=25.0) as client: response = await client.post(url, json=payload, headers=headers) response.raise_for_status() data = response.json() diff --git a/frontend/js/app.js b/frontend/js/app.js index d1e9f9f..aab3713 100644 --- a/frontend/js/app.js +++ b/frontend/js/app.js @@ -17,17 +17,26 @@ let sounds = []; // --- Safe JSON parsing --- -async function safeFetch(url, options = {}) { - const res = await fetch(url, options); - if (!res.ok) { +async function safeFetch(url, options = {}, timeoutMs = 30000) { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), timeoutMs); + try { + const res = await fetch(url, { ...options, signal: controller.signal }); + clearTimeout(timer); + if (!res.ok) { + const text = await res.text(); + let detail = text; + try { detail = JSON.parse(text).detail || text; } catch {} + throw new Error(detail); + } const text = await res.text(); - let detail = text; - try { detail = JSON.parse(text).detail || text; } catch {} - throw new Error(detail); + if (!text) return {}; + return JSON.parse(text); + } catch (err) { + clearTimeout(timer); + if (err.name === 'AbortError') throw new Error('Request timed out'); + throw err; } - const text = await res.text(); - if (!text) return {}; - return JSON.parse(text); }