Switch news service from Google News to local SearXNG

- Use local SearXNG at localhost:8888 instead of Google News RSS
- No more 302 redirects or blocked requests — local is fast and reliable
- 5s timeout on all SearXNG requests
- Removed async locks (no contention needed for local service)
- Re-enabled research and headlines

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-06 21:34:45 -07:00
parent c03f46ea96
commit e45ba2617a
2 changed files with 52 additions and 68 deletions

View File

@@ -921,9 +921,8 @@ async def start_call(caller_key: str):
session.start_call(caller_key) session.start_call(caller_key)
caller = session.caller # This generates the background if needed caller = session.caller # This generates the background if needed
# Headlines fetch disabled — Google News RSS blocks automated requests if not session.news_headlines:
# if not session.news_headlines: asyncio.create_task(_fetch_session_headlines())
# asyncio.create_task(_fetch_session_headlines())
return { return {
"status": "connected", "status": "connected",
@@ -1078,8 +1077,7 @@ async def chat(request: ChatRequest):
epoch = _session_epoch epoch = _session_epoch
session.add_message("user", request.text) session.add_message("user", request.text)
# Research disabled — was causing hangs and producing garbage searches session._research_task = asyncio.create_task(_background_research(request.text))
# session._research_task = asyncio.create_task(_background_research(request.text))
try: try:
async with asyncio.timeout(20): async with asyncio.timeout(20):
@@ -1676,8 +1674,7 @@ async def _trigger_ai_auto_respond(accumulated_text: str):
broadcast_event("ai_done") broadcast_event("ai_done")
# Research disabled — was causing hangs session._research_task = asyncio.create_task(_background_research(accumulated_text))
# session._research_task = asyncio.create_task(_background_research(accumulated_text))
# Also stream to active real caller so they hear the AI # Also stream to active real caller so they hear the AI
if session.active_real_caller: if session.active_real_caller:

View File

@@ -1,14 +1,14 @@
"""News service for current events awareness in AI callers""" """News service using local SearXNG for current events awareness in AI callers"""
import asyncio import asyncio
import time import time
import re import re
from dataclasses import dataclass from dataclasses import dataclass
from urllib.parse import quote_plus
from xml.etree import ElementTree
import httpx import httpx
SEARXNG_URL = "http://localhost:8888"
@dataclass @dataclass
class NewsItem: class NewsItem:
@@ -22,42 +22,37 @@ class NewsService:
self._client: httpx.AsyncClient | None = None self._client: httpx.AsyncClient | None = None
self._headlines_cache: list[NewsItem] = [] self._headlines_cache: list[NewsItem] = []
self._headlines_ts: float = 0 self._headlines_ts: float = 0
self._headlines_lock = asyncio.Lock()
self._search_cache: dict[str, tuple[float, list[NewsItem]]] = {} self._search_cache: dict[str, tuple[float, list[NewsItem]]] = {}
self._search_lock = asyncio.Lock()
@property @property
def client(self) -> httpx.AsyncClient: def client(self) -> httpx.AsyncClient:
if self._client is None or self._client.is_closed: if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(timeout=10.0) self._client = httpx.AsyncClient(timeout=5.0)
return self._client return self._client
async def get_headlines(self) -> list[NewsItem]: async def get_headlines(self) -> list[NewsItem]:
async with self._headlines_lock: # Cache for 30min
# Cache for 30min on success, 5min on failure (avoid hammering) if self._headlines_cache and time.time() - self._headlines_ts < 1800:
if time.time() - self._headlines_ts < (1800 if self._headlines_cache else 300):
return self._headlines_cache return self._headlines_cache
try: try:
resp = await self.client.get( resp = await self.client.get(
"https://news.google.com/rss", f"{SEARXNG_URL}/search",
follow_redirects=True, params={"q": "news", "format": "json", "categories": "news"},
headers={"User-Agent": "Mozilla/5.0"}
) )
resp.raise_for_status() resp.raise_for_status()
items = self._parse_rss(resp.text, max_items=10) items = self._parse_searxng(resp.json(), max_items=10)
self._headlines_cache = items self._headlines_cache = items
self._headlines_ts = time.time() self._headlines_ts = time.time()
return items return items
except Exception as e: except Exception as e:
print(f"[News] Headlines fetch failed: {e}") print(f"[News] Headlines fetch failed: {e}")
self._headlines_ts = time.time() # Don't retry immediately self._headlines_ts = time.time()
return self._headlines_cache return self._headlines_cache
async def search_topic(self, query: str) -> list[NewsItem]: async def search_topic(self, query: str) -> list[NewsItem]:
cache_key = query.lower() cache_key = query.lower()
async with self._search_lock:
if cache_key in self._search_cache: if cache_key in self._search_cache:
ts, items = self._search_cache[cache_key] ts, items = self._search_cache[cache_key]
if time.time() - ts < 600: if time.time() - ts < 600:
@@ -69,38 +64,31 @@ class NewsService:
del self._search_cache[oldest_key] del self._search_cache[oldest_key]
try: try:
encoded = quote_plus(query) resp = await self.client.get(
url = f"https://news.google.com/rss/search?q={encoded}&hl=en-US&gl=US&ceid=US:en" f"{SEARXNG_URL}/search",
resp = await self.client.get(url, follow_redirects=True, headers={"User-Agent": "Mozilla/5.0"}) params={"q": query, "format": "json", "categories": "news"},
)
resp.raise_for_status() resp.raise_for_status()
items = self._parse_rss(resp.text, max_items=5) items = self._parse_searxng(resp.json(), max_items=5)
async with self._search_lock:
self._search_cache[cache_key] = (time.time(), items) self._search_cache[cache_key] = (time.time(), items)
return items return items
except Exception as e: except Exception as e:
print(f"[News] Search failed for '{query}': {e}") print(f"[News] Search failed for '{query}': {e}")
async with self._search_lock:
if cache_key in self._search_cache: if cache_key in self._search_cache:
return self._search_cache[cache_key][1] return self._search_cache[cache_key][1]
return [] return []
def _parse_rss(self, xml_text: str, max_items: int = 10) -> list[NewsItem]: def _parse_searxng(self, data: dict, max_items: int = 10) -> list[NewsItem]:
items = [] items = []
try: for result in data.get("results", [])[:max_items]:
root = ElementTree.fromstring(xml_text) title = result.get("title", "").strip()
for item_el in root.iter("item"): if not title:
if len(items) >= max_items: continue
break # Extract source from engines list or metadata
title = item_el.findtext("title", "").strip() engines = result.get("engines", [])
source_el = item_el.find("source") source = engines[0] if engines else ""
source = source_el.text.strip() if source_el is not None and source_el.text else "" published = result.get("publishedDate", "")
published = item_el.findtext("pubDate", "").strip()
if title:
items.append(NewsItem(title=title, source=source, published=published)) items.append(NewsItem(title=title, source=source, published=published))
except ElementTree.ParseError as e:
print(f"[News] RSS parse error: {e}")
return items return items
def format_headlines_for_prompt(self, items: list[NewsItem]) -> str: def format_headlines_for_prompt(self, items: list[NewsItem]) -> str:
@@ -150,7 +138,7 @@ STOP_WORDS = {
# Radio show filler # Radio show filler
"welcome", "thanks", "thank", "show", "roost", "luke", "whats", "welcome", "thanks", "thank", "show", "roost", "luke", "whats",
"youre", "thats", "heres", "theyre", "ive", "youve", "weve", "youre", "thats", "heres", "theyre", "ive", "youve", "weve",
"sounds", "sounds", "listen", "hear", "heard", "happen", "happened", "sounds", "listen", "hear", "heard", "happen", "happened",
"happening", "absolutely", "definitely", "exactly", "totally", "happening", "absolutely", "definitely", "exactly", "totally",
"pretty", "little", "whole", "every", "point", "sense", "real", "pretty", "little", "whole", "every", "point", "sense", "real",
"great", "cool", "awesome", "amazing", "crazy", "weird", "funny", "great", "cool", "awesome", "amazing", "crazy", "weird", "funny",
@@ -170,7 +158,6 @@ def extract_keywords(text: str, max_keywords: int = 3) -> list[str]:
keywords = [] keywords = []
# Only look for proper nouns that are likely real topics (not caller names) # Only look for proper nouns that are likely real topics (not caller names)
# Skip first few words (usually greetings) and single proper nouns (usually names)
proper_nouns = [] proper_nouns = []
for i, word in enumerate(words): for i, word in enumerate(words):
clean = re.sub(r'[^\w]', '', word) clean = re.sub(r'[^\w]', '', word)