Fix AI caller hanging on 'thinking...' indefinitely

- Add 30s timeout to all frontend fetch calls (safeFetch)
- Add 20s asyncio.timeout around lock+LLM in chat, ai-respond, auto-respond
- Reduce OpenRouter timeout from 60s to 25s
- Reduce Inworld TTS timeout from 60s to 25s
- Return graceful fallback responses on timeout instead of hanging

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-06 21:16:15 -07:00
parent cac80a4b52
commit b3fb3b1127
4 changed files with 83 additions and 59 deletions

View File

@@ -1075,25 +1075,30 @@ async def chat(request: ChatRequest):
session.add_message("user", request.text) session.add_message("user", request.text)
session._research_task = asyncio.create_task(_background_research(request.text)) session._research_task = asyncio.create_task(_background_research(request.text))
async with _ai_response_lock: try:
if _session_epoch != epoch: async with asyncio.timeout(20):
raise HTTPException(409, "Call ended while waiting") async with _ai_response_lock:
if _session_epoch != epoch:
raise HTTPException(409, "Call ended while waiting")
# Stop any playing caller audio so responses don't overlap # Stop any playing caller audio so responses don't overlap
audio_service.stop_caller_audio() audio_service.stop_caller_audio()
# Include conversation summary and show history for context # Include conversation summary and show history for context
conversation_summary = session.get_conversation_summary() conversation_summary = session.get_conversation_summary()
show_history = session.get_show_history() show_history = session.get_show_history()
news_ctx, research_ctx = _build_news_context() news_ctx, research_ctx = _build_news_context()
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
news_ctx, research_ctx) news_ctx, research_ctx)
messages = _normalize_messages_for_llm(session.conversation[-10:]) messages = _normalize_messages_for_llm(session.conversation[-10:])
response = await llm_service.generate( response = await llm_service.generate(
messages=messages, messages=messages,
system_prompt=system_prompt system_prompt=system_prompt
) )
except TimeoutError:
caller_name = session.caller["name"] if session.caller else "Caller"
return {"text": "Uh... hold on, I lost my train of thought.", "caller": caller_name, "voice_id": session.caller["voice"] if session.caller else ""}
# Discard if call changed while we were generating # Discard if call changed while we were generating
if _session_epoch != epoch: if _session_epoch != epoch:
@@ -1600,26 +1605,32 @@ async def _trigger_ai_auto_respond(accumulated_text: str):
ai_name = session.caller["name"] ai_name = session.caller["name"]
async with _ai_response_lock: try:
if _session_epoch != epoch: async with asyncio.timeout(20):
return # Call changed while waiting for lock async with _ai_response_lock:
if _session_epoch != epoch:
return # Call changed while waiting for lock
print(f"[Auto-Respond] {ai_name} is jumping in...") print(f"[Auto-Respond] {ai_name} is jumping in...")
session._last_ai_auto_respond = time.time() session._last_ai_auto_respond = time.time()
audio_service.stop_caller_audio() audio_service.stop_caller_audio()
broadcast_event("ai_status", {"text": f"{ai_name} is thinking..."}) broadcast_event("ai_status", {"text": f"{ai_name} is thinking..."})
conversation_summary = session.get_conversation_summary() conversation_summary = session.get_conversation_summary()
show_history = session.get_show_history() show_history = session.get_show_history()
news_ctx, research_ctx = _build_news_context() news_ctx, research_ctx = _build_news_context()
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
news_ctx, research_ctx) news_ctx, research_ctx)
messages = _normalize_messages_for_llm(session.conversation[-10:]) messages = _normalize_messages_for_llm(session.conversation[-10:])
response = await llm_service.generate( response = await llm_service.generate(
messages=messages, messages=messages,
system_prompt=system_prompt, system_prompt=system_prompt,
) )
except TimeoutError:
print(f"[Auto-Respond] Timed out for {ai_name}")
broadcast_event("ai_done")
return
# Discard if call changed during generation # Discard if call changed during generation
if _session_epoch != epoch: if _session_epoch != epoch:
@@ -1677,23 +1688,27 @@ async def ai_respond():
epoch = _session_epoch epoch = _session_epoch
async with _ai_response_lock: try:
if _session_epoch != epoch: async with asyncio.timeout(20):
raise HTTPException(409, "Call ended while waiting") async with _ai_response_lock:
if _session_epoch != epoch:
raise HTTPException(409, "Call ended while waiting")
audio_service.stop_caller_audio() audio_service.stop_caller_audio()
conversation_summary = session.get_conversation_summary() conversation_summary = session.get_conversation_summary()
show_history = session.get_show_history() show_history = session.get_show_history()
news_ctx, research_ctx = _build_news_context() news_ctx, research_ctx = _build_news_context()
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
news_ctx, research_ctx) news_ctx, research_ctx)
messages = _normalize_messages_for_llm(session.conversation[-10:]) messages = _normalize_messages_for_llm(session.conversation[-10:])
response = await llm_service.generate( response = await llm_service.generate(
messages=messages, messages=messages,
system_prompt=system_prompt system_prompt=system_prompt
) )
except TimeoutError:
return {"text": "Uh... sorry, I spaced out for a second there.", "caller": session.caller["name"], "voice_id": session.caller["voice"]}
if _session_epoch != epoch: if _session_epoch != epoch:
raise HTTPException(409, "Call changed during response") raise HTTPException(409, "Call changed during response")
@@ -1707,7 +1722,7 @@ async def ai_respond():
ai_name = session.caller["name"] ai_name = session.caller["name"]
session.add_message(f"ai_caller:{ai_name}", response) session.add_message(f"ai_caller:{ai_name}", response)
# TTS # TTS — outside the lock so other requests aren't blocked
audio_bytes = await generate_speech(response, session.caller["voice"], "none") audio_bytes = await generate_speech(response, session.caller["voice"], "none")
if _session_epoch != epoch: if _session_epoch != epoch:

View File

@@ -114,7 +114,7 @@ class LLMService:
"""Call OpenRouter API with retry""" """Call OpenRouter API with retry"""
for attempt in range(2): # Try twice for attempt in range(2): # Try twice
try: try:
async with httpx.AsyncClient(timeout=60.0) as client: async with httpx.AsyncClient(timeout=25.0) as client:
response = await client.post( response = await client.post(
"https://openrouter.ai/api/v1/chat/completions", "https://openrouter.ai/api/v1/chat/completions",
headers={ headers={

View File

@@ -600,7 +600,7 @@ async def generate_speech_inworld(text: str, voice_id: str) -> tuple[np.ndarray,
}, },
} }
async with httpx.AsyncClient(timeout=60.0) as client: async with httpx.AsyncClient(timeout=25.0) as client:
response = await client.post(url, json=payload, headers=headers) response = await client.post(url, json=payload, headers=headers)
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()

View File

@@ -17,17 +17,26 @@ let sounds = [];
// --- Safe JSON parsing --- // --- Safe JSON parsing ---
async function safeFetch(url, options = {}) { async function safeFetch(url, options = {}, timeoutMs = 30000) {
const res = await fetch(url, options); const controller = new AbortController();
if (!res.ok) { const timer = setTimeout(() => controller.abort(), timeoutMs);
try {
const res = await fetch(url, { ...options, signal: controller.signal });
clearTimeout(timer);
if (!res.ok) {
const text = await res.text();
let detail = text;
try { detail = JSON.parse(text).detail || text; } catch {}
throw new Error(detail);
}
const text = await res.text(); const text = await res.text();
let detail = text; if (!text) return {};
try { detail = JSON.parse(text).detail || text; } catch {} return JSON.parse(text);
throw new Error(detail); } catch (err) {
clearTimeout(timer);
if (err.name === 'AbortError') throw new Error('Request timed out');
throw err;
} }
const text = await res.text();
if (!text) return {};
return JSON.parse(text);
} }