Fix research hanging: add timeouts, fix keyword extraction, cache failures

- Google News RSS returns 302: add follow_redirects and User-Agent header - Cache failed headline fetches for 5min so they don't retry every call - Add 8s timeout on background research tasks - Fix keyword extraction: skip short texts, require 2+ proper nouns (not names), increase min word length to 6, add radio show filler to stop words - Stops garbage searches like "Megan welcome" and "sounds thats youre" Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 21:25:31 -07:00
parent b3fb3b1127
commit 69b7078142
2 changed files with 49 additions and 20 deletions
@@ -661,15 +661,19 @@ async def _background_research(text: str):
    if query.lower() in session.research_notes:
        return
    try:
-        results = await news_service.search_topic(query)
-        if results:
-            session.research_notes[query.lower()] = results
-            print(f"[Research] Found {len(results)} results for '{query}'")
+        async with asyncio.timeout(8):
+            results = await news_service.search_topic(query)
+            if results:
+                session.research_notes[query.lower()] = results
+                print(f"[Research] Found {len(results)} results for '{query}'")
+    except TimeoutError:
+        print(f"[Research] Timed out for '{query}'")
    except Exception as e:
        print(f"[Research] Error: {e}")


 def _build_news_context() -> tuple[str, str]:
+    """Build context from cached news/research only — never does network calls."""
    news_context = ""
    if session.news_headlines:
        news_context = news_service.format_headlines_for_prompt(session.news_headlines[:6])