Fix AI caller hanging on 'thinking...' indefinitely
- Add 30s timeout to all frontend fetch calls (safeFetch) - Add 20s asyncio.timeout around lock+LLM in chat, ai-respond, auto-respond - Reduce OpenRouter timeout from 60s to 25s - Reduce Inworld TTS timeout from 60s to 25s - Return graceful fallback responses on timeout instead of hanging Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
111
backend/main.py
111
backend/main.py
@@ -1075,25 +1075,30 @@ async def chat(request: ChatRequest):
|
|||||||
session.add_message("user", request.text)
|
session.add_message("user", request.text)
|
||||||
session._research_task = asyncio.create_task(_background_research(request.text))
|
session._research_task = asyncio.create_task(_background_research(request.text))
|
||||||
|
|
||||||
async with _ai_response_lock:
|
try:
|
||||||
if _session_epoch != epoch:
|
async with asyncio.timeout(20):
|
||||||
raise HTTPException(409, "Call ended while waiting")
|
async with _ai_response_lock:
|
||||||
|
if _session_epoch != epoch:
|
||||||
|
raise HTTPException(409, "Call ended while waiting")
|
||||||
|
|
||||||
# Stop any playing caller audio so responses don't overlap
|
# Stop any playing caller audio so responses don't overlap
|
||||||
audio_service.stop_caller_audio()
|
audio_service.stop_caller_audio()
|
||||||
|
|
||||||
# Include conversation summary and show history for context
|
# Include conversation summary and show history for context
|
||||||
conversation_summary = session.get_conversation_summary()
|
conversation_summary = session.get_conversation_summary()
|
||||||
show_history = session.get_show_history()
|
show_history = session.get_show_history()
|
||||||
news_ctx, research_ctx = _build_news_context()
|
news_ctx, research_ctx = _build_news_context()
|
||||||
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
|
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
|
||||||
news_ctx, research_ctx)
|
news_ctx, research_ctx)
|
||||||
|
|
||||||
messages = _normalize_messages_for_llm(session.conversation[-10:])
|
messages = _normalize_messages_for_llm(session.conversation[-10:])
|
||||||
response = await llm_service.generate(
|
response = await llm_service.generate(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
system_prompt=system_prompt
|
system_prompt=system_prompt
|
||||||
)
|
)
|
||||||
|
except TimeoutError:
|
||||||
|
caller_name = session.caller["name"] if session.caller else "Caller"
|
||||||
|
return {"text": "Uh... hold on, I lost my train of thought.", "caller": caller_name, "voice_id": session.caller["voice"] if session.caller else ""}
|
||||||
|
|
||||||
# Discard if call changed while we were generating
|
# Discard if call changed while we were generating
|
||||||
if _session_epoch != epoch:
|
if _session_epoch != epoch:
|
||||||
@@ -1600,26 +1605,32 @@ async def _trigger_ai_auto_respond(accumulated_text: str):
|
|||||||
|
|
||||||
ai_name = session.caller["name"]
|
ai_name = session.caller["name"]
|
||||||
|
|
||||||
async with _ai_response_lock:
|
try:
|
||||||
if _session_epoch != epoch:
|
async with asyncio.timeout(20):
|
||||||
return # Call changed while waiting for lock
|
async with _ai_response_lock:
|
||||||
|
if _session_epoch != epoch:
|
||||||
|
return # Call changed while waiting for lock
|
||||||
|
|
||||||
print(f"[Auto-Respond] {ai_name} is jumping in...")
|
print(f"[Auto-Respond] {ai_name} is jumping in...")
|
||||||
session._last_ai_auto_respond = time.time()
|
session._last_ai_auto_respond = time.time()
|
||||||
audio_service.stop_caller_audio()
|
audio_service.stop_caller_audio()
|
||||||
broadcast_event("ai_status", {"text": f"{ai_name} is thinking..."})
|
broadcast_event("ai_status", {"text": f"{ai_name} is thinking..."})
|
||||||
|
|
||||||
conversation_summary = session.get_conversation_summary()
|
conversation_summary = session.get_conversation_summary()
|
||||||
show_history = session.get_show_history()
|
show_history = session.get_show_history()
|
||||||
news_ctx, research_ctx = _build_news_context()
|
news_ctx, research_ctx = _build_news_context()
|
||||||
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
|
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
|
||||||
news_ctx, research_ctx)
|
news_ctx, research_ctx)
|
||||||
|
|
||||||
messages = _normalize_messages_for_llm(session.conversation[-10:])
|
messages = _normalize_messages_for_llm(session.conversation[-10:])
|
||||||
response = await llm_service.generate(
|
response = await llm_service.generate(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
)
|
)
|
||||||
|
except TimeoutError:
|
||||||
|
print(f"[Auto-Respond] Timed out for {ai_name}")
|
||||||
|
broadcast_event("ai_done")
|
||||||
|
return
|
||||||
|
|
||||||
# Discard if call changed during generation
|
# Discard if call changed during generation
|
||||||
if _session_epoch != epoch:
|
if _session_epoch != epoch:
|
||||||
@@ -1677,23 +1688,27 @@ async def ai_respond():
|
|||||||
|
|
||||||
epoch = _session_epoch
|
epoch = _session_epoch
|
||||||
|
|
||||||
async with _ai_response_lock:
|
try:
|
||||||
if _session_epoch != epoch:
|
async with asyncio.timeout(20):
|
||||||
raise HTTPException(409, "Call ended while waiting")
|
async with _ai_response_lock:
|
||||||
|
if _session_epoch != epoch:
|
||||||
|
raise HTTPException(409, "Call ended while waiting")
|
||||||
|
|
||||||
audio_service.stop_caller_audio()
|
audio_service.stop_caller_audio()
|
||||||
|
|
||||||
conversation_summary = session.get_conversation_summary()
|
conversation_summary = session.get_conversation_summary()
|
||||||
show_history = session.get_show_history()
|
show_history = session.get_show_history()
|
||||||
news_ctx, research_ctx = _build_news_context()
|
news_ctx, research_ctx = _build_news_context()
|
||||||
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
|
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history,
|
||||||
news_ctx, research_ctx)
|
news_ctx, research_ctx)
|
||||||
|
|
||||||
messages = _normalize_messages_for_llm(session.conversation[-10:])
|
messages = _normalize_messages_for_llm(session.conversation[-10:])
|
||||||
response = await llm_service.generate(
|
response = await llm_service.generate(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
system_prompt=system_prompt
|
system_prompt=system_prompt
|
||||||
)
|
)
|
||||||
|
except TimeoutError:
|
||||||
|
return {"text": "Uh... sorry, I spaced out for a second there.", "caller": session.caller["name"], "voice_id": session.caller["voice"]}
|
||||||
|
|
||||||
if _session_epoch != epoch:
|
if _session_epoch != epoch:
|
||||||
raise HTTPException(409, "Call changed during response")
|
raise HTTPException(409, "Call changed during response")
|
||||||
@@ -1707,7 +1722,7 @@ async def ai_respond():
|
|||||||
ai_name = session.caller["name"]
|
ai_name = session.caller["name"]
|
||||||
session.add_message(f"ai_caller:{ai_name}", response)
|
session.add_message(f"ai_caller:{ai_name}", response)
|
||||||
|
|
||||||
# TTS
|
# TTS — outside the lock so other requests aren't blocked
|
||||||
audio_bytes = await generate_speech(response, session.caller["voice"], "none")
|
audio_bytes = await generate_speech(response, session.caller["voice"], "none")
|
||||||
|
|
||||||
if _session_epoch != epoch:
|
if _session_epoch != epoch:
|
||||||
|
|||||||
@@ -114,7 +114,7 @@ class LLMService:
|
|||||||
"""Call OpenRouter API with retry"""
|
"""Call OpenRouter API with retry"""
|
||||||
for attempt in range(2): # Try twice
|
for attempt in range(2): # Try twice
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
async with httpx.AsyncClient(timeout=25.0) as client:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
"https://openrouter.ai/api/v1/chat/completions",
|
"https://openrouter.ai/api/v1/chat/completions",
|
||||||
headers={
|
headers={
|
||||||
|
|||||||
@@ -600,7 +600,7 @@ async def generate_speech_inworld(text: str, voice_id: str) -> tuple[np.ndarray,
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
async with httpx.AsyncClient(timeout=25.0) as client:
|
||||||
response = await client.post(url, json=payload, headers=headers)
|
response = await client.post(url, json=payload, headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
|
|||||||
@@ -17,17 +17,26 @@ let sounds = [];
|
|||||||
|
|
||||||
|
|
||||||
// --- Safe JSON parsing ---
|
// --- Safe JSON parsing ---
|
||||||
async function safeFetch(url, options = {}) {
|
async function safeFetch(url, options = {}, timeoutMs = 30000) {
|
||||||
const res = await fetch(url, options);
|
const controller = new AbortController();
|
||||||
if (!res.ok) {
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||||
|
try {
|
||||||
|
const res = await fetch(url, { ...options, signal: controller.signal });
|
||||||
|
clearTimeout(timer);
|
||||||
|
if (!res.ok) {
|
||||||
|
const text = await res.text();
|
||||||
|
let detail = text;
|
||||||
|
try { detail = JSON.parse(text).detail || text; } catch {}
|
||||||
|
throw new Error(detail);
|
||||||
|
}
|
||||||
const text = await res.text();
|
const text = await res.text();
|
||||||
let detail = text;
|
if (!text) return {};
|
||||||
try { detail = JSON.parse(text).detail || text; } catch {}
|
return JSON.parse(text);
|
||||||
throw new Error(detail);
|
} catch (err) {
|
||||||
|
clearTimeout(timer);
|
||||||
|
if (err.name === 'AbortError') throw new Error('Request timed out');
|
||||||
|
throw err;
|
||||||
}
|
}
|
||||||
const text = await res.text();
|
|
||||||
if (!text) return {};
|
|
||||||
return JSON.parse(text);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user