Switch news service from Google News to local SearXNG
- Use local SearXNG at localhost:8888 instead of Google News RSS - No more 302 redirects or blocked requests — local is fast and reliable - 5s timeout on all SearXNG requests - Removed async locks (no contention needed for local service) - Re-enabled research and headlines Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -921,9 +921,8 @@ async def start_call(caller_key: str):
|
|||||||
session.start_call(caller_key)
|
session.start_call(caller_key)
|
||||||
caller = session.caller # This generates the background if needed
|
caller = session.caller # This generates the background if needed
|
||||||
|
|
||||||
# Headlines fetch disabled — Google News RSS blocks automated requests
|
if not session.news_headlines:
|
||||||
# if not session.news_headlines:
|
asyncio.create_task(_fetch_session_headlines())
|
||||||
# asyncio.create_task(_fetch_session_headlines())
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": "connected",
|
"status": "connected",
|
||||||
@@ -1078,8 +1077,7 @@ async def chat(request: ChatRequest):
|
|||||||
|
|
||||||
epoch = _session_epoch
|
epoch = _session_epoch
|
||||||
session.add_message("user", request.text)
|
session.add_message("user", request.text)
|
||||||
# Research disabled — was causing hangs and producing garbage searches
|
session._research_task = asyncio.create_task(_background_research(request.text))
|
||||||
# session._research_task = asyncio.create_task(_background_research(request.text))
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async with asyncio.timeout(20):
|
async with asyncio.timeout(20):
|
||||||
@@ -1676,8 +1674,7 @@ async def _trigger_ai_auto_respond(accumulated_text: str):
|
|||||||
|
|
||||||
broadcast_event("ai_done")
|
broadcast_event("ai_done")
|
||||||
|
|
||||||
# Research disabled — was causing hangs
|
session._research_task = asyncio.create_task(_background_research(accumulated_text))
|
||||||
# session._research_task = asyncio.create_task(_background_research(accumulated_text))
|
|
||||||
|
|
||||||
# Also stream to active real caller so they hear the AI
|
# Also stream to active real caller so they hear the AI
|
||||||
if session.active_real_caller:
|
if session.active_real_caller:
|
||||||
|
|||||||
@@ -1,14 +1,14 @@
|
|||||||
"""News service for current events awareness in AI callers"""
|
"""News service using local SearXNG for current events awareness in AI callers"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from urllib.parse import quote_plus
|
|
||||||
from xml.etree import ElementTree
|
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
SEARXNG_URL = "http://localhost:8888"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class NewsItem:
|
class NewsItem:
|
||||||
@@ -22,85 +22,73 @@ class NewsService:
|
|||||||
self._client: httpx.AsyncClient | None = None
|
self._client: httpx.AsyncClient | None = None
|
||||||
self._headlines_cache: list[NewsItem] = []
|
self._headlines_cache: list[NewsItem] = []
|
||||||
self._headlines_ts: float = 0
|
self._headlines_ts: float = 0
|
||||||
self._headlines_lock = asyncio.Lock()
|
|
||||||
self._search_cache: dict[str, tuple[float, list[NewsItem]]] = {}
|
self._search_cache: dict[str, tuple[float, list[NewsItem]]] = {}
|
||||||
self._search_lock = asyncio.Lock()
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def client(self) -> httpx.AsyncClient:
|
def client(self) -> httpx.AsyncClient:
|
||||||
if self._client is None or self._client.is_closed:
|
if self._client is None or self._client.is_closed:
|
||||||
self._client = httpx.AsyncClient(timeout=10.0)
|
self._client = httpx.AsyncClient(timeout=5.0)
|
||||||
return self._client
|
return self._client
|
||||||
|
|
||||||
async def get_headlines(self) -> list[NewsItem]:
|
async def get_headlines(self) -> list[NewsItem]:
|
||||||
async with self._headlines_lock:
|
# Cache for 30min
|
||||||
# Cache for 30min on success, 5min on failure (avoid hammering)
|
if self._headlines_cache and time.time() - self._headlines_ts < 1800:
|
||||||
if time.time() - self._headlines_ts < (1800 if self._headlines_cache else 300):
|
return self._headlines_cache
|
||||||
return self._headlines_cache
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = await self.client.get(
|
resp = await self.client.get(
|
||||||
"https://news.google.com/rss",
|
f"{SEARXNG_URL}/search",
|
||||||
follow_redirects=True,
|
params={"q": "news", "format": "json", "categories": "news"},
|
||||||
headers={"User-Agent": "Mozilla/5.0"}
|
)
|
||||||
)
|
resp.raise_for_status()
|
||||||
resp.raise_for_status()
|
items = self._parse_searxng(resp.json(), max_items=10)
|
||||||
items = self._parse_rss(resp.text, max_items=10)
|
self._headlines_cache = items
|
||||||
self._headlines_cache = items
|
self._headlines_ts = time.time()
|
||||||
self._headlines_ts = time.time()
|
return items
|
||||||
return items
|
except Exception as e:
|
||||||
except Exception as e:
|
print(f"[News] Headlines fetch failed: {e}")
|
||||||
print(f"[News] Headlines fetch failed: {e}")
|
self._headlines_ts = time.time()
|
||||||
self._headlines_ts = time.time() # Don't retry immediately
|
return self._headlines_cache
|
||||||
return self._headlines_cache
|
|
||||||
|
|
||||||
async def search_topic(self, query: str) -> list[NewsItem]:
|
async def search_topic(self, query: str) -> list[NewsItem]:
|
||||||
cache_key = query.lower()
|
cache_key = query.lower()
|
||||||
|
|
||||||
async with self._search_lock:
|
if cache_key in self._search_cache:
|
||||||
if cache_key in self._search_cache:
|
ts, items = self._search_cache[cache_key]
|
||||||
ts, items = self._search_cache[cache_key]
|
if time.time() - ts < 600:
|
||||||
if time.time() - ts < 600:
|
return items
|
||||||
return items
|
|
||||||
|
|
||||||
# Evict oldest when cache too large
|
# Evict oldest when cache too large
|
||||||
if len(self._search_cache) > 50:
|
if len(self._search_cache) > 50:
|
||||||
oldest_key = min(self._search_cache, key=lambda k: self._search_cache[k][0])
|
oldest_key = min(self._search_cache, key=lambda k: self._search_cache[k][0])
|
||||||
del self._search_cache[oldest_key]
|
del self._search_cache[oldest_key]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
encoded = quote_plus(query)
|
resp = await self.client.get(
|
||||||
url = f"https://news.google.com/rss/search?q={encoded}&hl=en-US&gl=US&ceid=US:en"
|
f"{SEARXNG_URL}/search",
|
||||||
resp = await self.client.get(url, follow_redirects=True, headers={"User-Agent": "Mozilla/5.0"})
|
params={"q": query, "format": "json", "categories": "news"},
|
||||||
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
items = self._parse_rss(resp.text, max_items=5)
|
items = self._parse_searxng(resp.json(), max_items=5)
|
||||||
|
self._search_cache[cache_key] = (time.time(), items)
|
||||||
async with self._search_lock:
|
|
||||||
self._search_cache[cache_key] = (time.time(), items)
|
|
||||||
|
|
||||||
return items
|
return items
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[News] Search failed for '{query}': {e}")
|
print(f"[News] Search failed for '{query}': {e}")
|
||||||
async with self._search_lock:
|
if cache_key in self._search_cache:
|
||||||
if cache_key in self._search_cache:
|
return self._search_cache[cache_key][1]
|
||||||
return self._search_cache[cache_key][1]
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _parse_rss(self, xml_text: str, max_items: int = 10) -> list[NewsItem]:
|
def _parse_searxng(self, data: dict, max_items: int = 10) -> list[NewsItem]:
|
||||||
items = []
|
items = []
|
||||||
try:
|
for result in data.get("results", [])[:max_items]:
|
||||||
root = ElementTree.fromstring(xml_text)
|
title = result.get("title", "").strip()
|
||||||
for item_el in root.iter("item"):
|
if not title:
|
||||||
if len(items) >= max_items:
|
continue
|
||||||
break
|
# Extract source from engines list or metadata
|
||||||
title = item_el.findtext("title", "").strip()
|
engines = result.get("engines", [])
|
||||||
source_el = item_el.find("source")
|
source = engines[0] if engines else ""
|
||||||
source = source_el.text.strip() if source_el is not None and source_el.text else ""
|
published = result.get("publishedDate", "")
|
||||||
published = item_el.findtext("pubDate", "").strip()
|
items.append(NewsItem(title=title, source=source, published=published))
|
||||||
if title:
|
|
||||||
items.append(NewsItem(title=title, source=source, published=published))
|
|
||||||
except ElementTree.ParseError as e:
|
|
||||||
print(f"[News] RSS parse error: {e}")
|
|
||||||
return items
|
return items
|
||||||
|
|
||||||
def format_headlines_for_prompt(self, items: list[NewsItem]) -> str:
|
def format_headlines_for_prompt(self, items: list[NewsItem]) -> str:
|
||||||
@@ -150,7 +138,7 @@ STOP_WORDS = {
|
|||||||
# Radio show filler
|
# Radio show filler
|
||||||
"welcome", "thanks", "thank", "show", "roost", "luke", "whats",
|
"welcome", "thanks", "thank", "show", "roost", "luke", "whats",
|
||||||
"youre", "thats", "heres", "theyre", "ive", "youve", "weve",
|
"youre", "thats", "heres", "theyre", "ive", "youve", "weve",
|
||||||
"sounds", "sounds", "listen", "hear", "heard", "happen", "happened",
|
"sounds", "listen", "hear", "heard", "happen", "happened",
|
||||||
"happening", "absolutely", "definitely", "exactly", "totally",
|
"happening", "absolutely", "definitely", "exactly", "totally",
|
||||||
"pretty", "little", "whole", "every", "point", "sense", "real",
|
"pretty", "little", "whole", "every", "point", "sense", "real",
|
||||||
"great", "cool", "awesome", "amazing", "crazy", "weird", "funny",
|
"great", "cool", "awesome", "amazing", "crazy", "weird", "funny",
|
||||||
@@ -170,7 +158,6 @@ def extract_keywords(text: str, max_keywords: int = 3) -> list[str]:
|
|||||||
keywords = []
|
keywords = []
|
||||||
|
|
||||||
# Only look for proper nouns that are likely real topics (not caller names)
|
# Only look for proper nouns that are likely real topics (not caller names)
|
||||||
# Skip first few words (usually greetings) and single proper nouns (usually names)
|
|
||||||
proper_nouns = []
|
proper_nouns = []
|
||||||
for i, word in enumerate(words):
|
for i, word in enumerate(words):
|
||||||
clean = re.sub(r'[^\w]', '', word)
|
clean = re.sub(r'[^\w]', '', word)
|
||||||
|
|||||||
Reference in New Issue
Block a user