Fix research hanging: add timeouts, fix keyword extraction, cache failures

- Google News RSS returns 302: add follow_redirects and User-Agent header
- Cache failed headline fetches for 5min so they don't retry every call
- Add 8s timeout on background research tasks
- Fix keyword extraction: skip short texts, require 2+ proper nouns (not names),
  increase min word length to 6, add radio show filler to stop words
- Stops garbage searches like "Megan welcome" and "sounds thats youre"

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-06 21:25:31 -07:00
parent b3fb3b1127
commit 69b7078142
2 changed files with 49 additions and 20 deletions

View File

@@ -661,15 +661,19 @@ async def _background_research(text: str):
if query.lower() in session.research_notes:
return
try:
results = await news_service.search_topic(query)
if results:
session.research_notes[query.lower()] = results
print(f"[Research] Found {len(results)} results for '{query}'")
async with asyncio.timeout(8):
results = await news_service.search_topic(query)
if results:
session.research_notes[query.lower()] = results
print(f"[Research] Found {len(results)} results for '{query}'")
except TimeoutError:
print(f"[Research] Timed out for '{query}'")
except Exception as e:
print(f"[Research] Error: {e}")
def _build_news_context() -> tuple[str, str]:
"""Build context from cached news/research only — never does network calls."""
news_context = ""
if session.news_headlines:
news_context = news_service.format_headlines_for_prompt(session.news_headlines[:6])