From e28579f90935fa7e118d7c8d903f4fda616c0247 Mon Sep 17 00:00:00 2001 From: tcpsyn Date: Fri, 6 Feb 2026 00:18:40 -0700 Subject: [PATCH] Add NewsService for current events awareness Co-Authored-By: Claude Opus 4.6 --- backend/services/news.py | 178 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 backend/services/news.py diff --git a/backend/services/news.py b/backend/services/news.py new file mode 100644 index 0000000..0f8c9e3 --- /dev/null +++ b/backend/services/news.py @@ -0,0 +1,178 @@ +"""News service for current events awareness in AI callers""" + +import asyncio +import time +import re +from dataclasses import dataclass +from urllib.parse import quote_plus +from xml.etree import ElementTree + +import httpx + + +@dataclass +class NewsItem: + title: str + source: str + published: str + + +class NewsService: + def __init__(self): + self._client: httpx.AsyncClient | None = None + self._headlines_cache: list[NewsItem] = [] + self._headlines_ts: float = 0 + self._headlines_lock = asyncio.Lock() + self._search_cache: dict[str, tuple[float, list[NewsItem]]] = {} + self._search_lock = asyncio.Lock() + + @property + def client(self) -> httpx.AsyncClient: + if self._client is None or self._client.is_closed: + self._client = httpx.AsyncClient(timeout=10.0) + return self._client + + async def get_headlines(self) -> list[NewsItem]: + async with self._headlines_lock: + if self._headlines_cache and time.time() - self._headlines_ts < 1800: + return self._headlines_cache + + try: + resp = await self.client.get("https://news.google.com/rss") + resp.raise_for_status() + items = self._parse_rss(resp.text, max_items=10) + self._headlines_cache = items + self._headlines_ts = time.time() + return items + except Exception as e: + print(f"[News] Headlines fetch failed: {e}") + if self._headlines_cache: + return self._headlines_cache + return [] + + async def search_topic(self, query: str) -> list[NewsItem]: + cache_key = query.lower() + + async with self._search_lock: + if cache_key in self._search_cache: + ts, items = self._search_cache[cache_key] + if time.time() - ts < 600: + return items + + # Evict oldest when cache too large + if len(self._search_cache) > 50: + oldest_key = min(self._search_cache, key=lambda k: self._search_cache[k][0]) + del self._search_cache[oldest_key] + + try: + encoded = quote_plus(query) + url = f"https://news.google.com/rss/search?q={encoded}&hl=en-US&gl=US&ceid=US:en" + resp = await self.client.get(url) + resp.raise_for_status() + items = self._parse_rss(resp.text, max_items=5) + + async with self._search_lock: + self._search_cache[cache_key] = (time.time(), items) + + return items + except Exception as e: + print(f"[News] Search failed for '{query}': {e}") + async with self._search_lock: + if cache_key in self._search_cache: + return self._search_cache[cache_key][1] + return [] + + def _parse_rss(self, xml_text: str, max_items: int = 10) -> list[NewsItem]: + items = [] + try: + root = ElementTree.fromstring(xml_text) + for item_el in root.iter("item"): + if len(items) >= max_items: + break + title = item_el.findtext("title", "").strip() + source_el = item_el.find("source") + source = source_el.text.strip() if source_el is not None and source_el.text else "" + published = item_el.findtext("pubDate", "").strip() + if title: + items.append(NewsItem(title=title, source=source, published=published)) + except ElementTree.ParseError as e: + print(f"[News] RSS parse error: {e}") + return items + + def format_headlines_for_prompt(self, items: list[NewsItem]) -> str: + lines = [] + for item in items: + if item.source: + lines.append(f"- {item.title} ({item.source})") + else: + lines.append(f"- {item.title}") + return "\n".join(lines) + + async def close(self): + if self._client and not self._client.is_closed: + await self._client.aclose() + + +STOP_WORDS = { + "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", + "have", "has", "had", "do", "does", "did", "will", "would", "could", + "should", "may", "might", "shall", "can", "need", "dare", "ought", + "used", "to", "of", "in", "for", "on", "with", "at", "by", "from", + "as", "into", "through", "during", "before", "after", "above", "below", + "between", "out", "off", "over", "under", "again", "further", "then", + "once", "here", "there", "when", "where", "why", "how", "all", "both", + "each", "few", "more", "most", "other", "some", "such", "no", "nor", + "not", "only", "own", "same", "so", "than", "too", "very", "just", + "but", "and", "or", "if", "while", "because", "until", "about", + "that", "this", "these", "those", "what", "which", "who", "whom", + "it", "its", "he", "him", "his", "she", "her", "they", "them", + "their", "we", "us", "our", "you", "your", "me", "my", "i", + # Casual speech fillers + "yeah", "well", "like", "man", "dude", "okay", "right", "know", + "think", "mean", "really", "actually", "honestly", "basically", + "literally", "stuff", "thing", "things", "something", "anything", + "nothing", "everything", "someone", "anyone", "everyone", "nobody", + "gonna", "wanna", "gotta", "kinda", "sorta", "dunno", + "look", "see", "say", "said", "tell", "told", "talk", "talking", + "feel", "felt", "guess", "sure", "maybe", "probably", "never", + "always", "still", "even", "much", "many", "also", "got", "get", + "getting", "going", "come", "came", "make", "made", "take", "took", + "give", "gave", "want", "keep", "kept", "let", "put", "went", + "been", "being", "doing", "having", "call", "called", "calling", + "tonight", "today", "night", "time", "long", "good", "bad", + "first", "last", "back", "down", "ever", "away", "cant", "dont", + "didnt", "doesnt", "isnt", "wasnt", "wont", "wouldnt", "couldnt", + "shouldnt", "aint", "stop", "start", "started", "help", +} + + +def extract_keywords(text: str, max_keywords: int = 3) -> list[str]: + words = text.split() + keywords = [] + + # Pass 1: capitalized words (proper nouns) not at sentence start + for i, word in enumerate(words): + clean = re.sub(r'[^\w]', '', word) + if not clean: + continue + is_sentence_start = i == 0 or (i > 0 and words[i - 1].rstrip()[-1:] in '.!?') + if clean[0].isupper() and not is_sentence_start and clean.lower() not in STOP_WORDS: + if clean not in keywords: + keywords.append(clean) + if len(keywords) >= max_keywords: + return keywords + + # Pass 2: uncommon words (>4 chars, not in stop words) + for word in words: + clean = re.sub(r'[^\w]', '', word).lower() + if len(clean) > 4 and clean not in STOP_WORDS: + title_clean = clean.capitalize() + if title_clean not in keywords and clean not in [k.lower() for k in keywords]: + keywords.append(clean) + if len(keywords) >= max_keywords: + return keywords + + return keywords + + +news_service = NewsService()