diff --git a/podcast_stats.py b/podcast_stats.py
new file mode 100644
index 0000000..be80ae7
--- /dev/null
+++ b/podcast_stats.py
@@ -0,0 +1,412 @@
+#!/usr/bin/env python3
+"""
+Podcast Stats — Aggregate reviews, comments, likes, and analytics from all platforms.
+
+Usage:
+ python podcast_stats.py # All platforms
+ python podcast_stats.py --youtube # YouTube only
+ python podcast_stats.py --apple # Apple Podcasts only
+ python podcast_stats.py --spotify # Spotify only
+ python podcast_stats.py --castopod # Castopod downloads only
+ python podcast_stats.py --comments # Include full YouTube comments
+ python podcast_stats.py --json # Output as JSON
+ python podcast_stats.py --json --upload # Output JSON and upload to BunnyCDN
+"""
+
+import argparse
+import json
+import re
+import subprocess
+import sys
+from datetime import datetime, timezone
+
+import requests
+
+YOUTUBE_PLAYLIST = "PLGq4uZyNV1yYH_rcitTTPVysPbC6-7pe-"
+APPLE_PODCAST_ID = "1875205848"
+APPLE_STOREFRONTS = ["us", "gb", "ca", "au"]
+SPOTIFY_SHOW_ID = "0ZrpMigG1fo0CCN7F4YmuF"
+NAS_SSH = "luke@mmgnas-10g"
+NAS_SSH_PORT = "8001"
+DOCKER_BIN = "/share/CACHEDEV1_DATA/.qpkg/container-station/bin/docker"
+CASTOPOD_DB_CONTAINER = "castopod-mariadb-1"
+
+BUNNY_STORAGE_ZONE = "lukeattheroost"
+BUNNY_STORAGE_KEY = "92749cd3-85df-4cff-938fe35eb994-30f8-4cf2"
+BUNNY_STORAGE_REGION = "la"
+BUNNY_ACCOUNT_KEY = "2865f279-297b-431a-ad18-0ccf1f8e4fa8cf636cea-3222-415a-84ed-56ee195c0530"
+
+
+def gather_apple_reviews():
+ all_reviews = []
+ seen_ids = set()
+
+ for storefront in APPLE_STOREFRONTS:
+ url = f"https://itunes.apple.com/{storefront}/rss/customerreviews/id={APPLE_PODCAST_ID}/sortby=mostrecent/json"
+ try:
+ resp = requests.get(url, timeout=15)
+ if resp.status_code != 200:
+ continue
+ data = resp.json()
+ except Exception:
+ continue
+
+ feed = data.get("feed", {})
+ entries = feed.get("entry", [])
+ if not entries:
+ continue
+
+ for entry in entries:
+ if "im:name" in entry and "im:rating" not in entry:
+ continue
+
+ review_id = entry.get("id", {}).get("label", "")
+ if review_id in seen_ids:
+ continue
+ seen_ids.add(review_id)
+
+ author = entry.get("author", {}).get("name", {}).get("label", "Unknown")
+ title = entry.get("title", {}).get("label", "")
+ content = entry.get("content", {}).get("label", "")
+ rating = int(entry.get("im:rating", {}).get("label", "0"))
+ updated = entry.get("updated", {}).get("label", "")
+ date_str = updated[:10] if updated else ""
+
+ all_reviews.append({
+ "author": author,
+ "title": title,
+ "content": content,
+ "rating": rating,
+ "date": date_str,
+ "storefront": storefront.upper(),
+ })
+
+ avg_rating = round(sum(r["rating"] for r in all_reviews) / len(all_reviews), 1) if all_reviews else None
+ return {
+ "avg_rating": avg_rating,
+ "review_count": len(all_reviews),
+ "reviews": all_reviews[:10],
+ }
+
+
+def gather_spotify():
+ result = {"show_title": None, "rating": None, "url": f"https://open.spotify.com/show/{SPOTIFY_SHOW_ID}"}
+
+ try:
+ oembed_url = f"https://open.spotify.com/oembed?url=https://open.spotify.com/show/{SPOTIFY_SHOW_ID}"
+ resp = requests.get(oembed_url, timeout=15)
+ if resp.status_code == 200:
+ data = resp.json()
+ result["show_title"] = data.get("title")
+
+ show_url = f"https://open.spotify.com/show/{SPOTIFY_SHOW_ID}"
+ resp = requests.get(show_url, timeout=15, headers={
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"
+ })
+
+ rating_match = re.search(r'"ratingValue"\s*:\s*"?([\d.]+)"?', resp.text)
+ if rating_match:
+ result["rating"] = float(rating_match.group(1))
+ else:
+ rating_match2 = re.search(r'rating["\s:]*(\d+\.?\d*)\s*/\s*5', resp.text, re.IGNORECASE)
+ if rating_match2:
+ result["rating"] = float(rating_match2.group(1))
+ except Exception:
+ pass
+
+ return result
+
+
+def gather_youtube(include_comments=False):
+ result = {
+ "total_views": 0,
+ "total_likes": 0,
+ "total_comments": 0,
+ "subscribers": None,
+ "videos": [],
+ }
+
+ try:
+ proc = subprocess.run(
+ ["yt-dlp", "--dump-json", "--flat-playlist",
+ f"https://www.youtube.com/playlist?list={YOUTUBE_PLAYLIST}"],
+ capture_output=True, text=True, timeout=60
+ )
+ if proc.returncode != 0:
+ return result
+ except (FileNotFoundError, subprocess.TimeoutExpired):
+ return result
+
+ video_ids = []
+ for line in proc.stdout.strip().split("\n"):
+ if not line:
+ continue
+ try:
+ entry = json.loads(line)
+ vid = entry.get("id") or entry.get("url", "").split("=")[-1]
+ if vid:
+ video_ids.append(vid)
+ except json.JSONDecodeError:
+ continue
+
+ if not video_ids:
+ return result
+
+ total_views = 0
+ total_likes = 0
+ total_comments = 0
+ videos = []
+
+ for vid in video_ids:
+ try:
+ cmd = ["yt-dlp", "--dump-json", "--no-download", f"https://www.youtube.com/watch?v={vid}"]
+ if include_comments:
+ cmd.insert(2, "--write-comments")
+ vr = subprocess.run(cmd, capture_output=True, text=True, timeout=90)
+ if vr.returncode != 0:
+ continue
+ vdata = json.loads(vr.stdout)
+
+ title = vdata.get("title", "Unknown")
+ views = vdata.get("view_count", 0) or 0
+ likes = vdata.get("like_count", 0) or 0
+ comment_count = vdata.get("comment_count", 0) or 0
+ upload_date = vdata.get("upload_date", "")
+ if upload_date:
+ upload_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:]}"
+
+ comments_list = []
+ if include_comments:
+ for c in (vdata.get("comments") or [])[:5]:
+ comments_list.append({
+ "author": c.get("author", "Unknown"),
+ "text": c.get("text", "")[:200],
+ "time": c.get("time_text", ""),
+ "likes": c.get("like_count", 0),
+ })
+
+ total_views += views
+ total_likes += likes
+ total_comments += comment_count
+
+ videos.append({
+ "title": title,
+ "views": views,
+ "likes": likes,
+ "comments": comment_count,
+ "date": upload_date,
+ })
+ except (subprocess.TimeoutExpired, json.JSONDecodeError):
+ continue
+
+ # Get subscriber count
+ if videos:
+ try:
+ vr = subprocess.run(
+ ["yt-dlp", "--dump-json", "--no-download", "--playlist-items", "1",
+ f"https://www.youtube.com/playlist?list={YOUTUBE_PLAYLIST}"],
+ capture_output=True, text=True, timeout=30
+ )
+ if vr.returncode == 0:
+ ch_data = json.loads(vr.stdout)
+ sub = ch_data.get("channel_follower_count")
+ if sub is not None:
+ result["subscribers"] = sub
+ except Exception:
+ pass
+
+ result["total_views"] = total_views
+ result["total_likes"] = total_likes
+ result["total_comments"] = total_comments
+ result["videos"] = videos
+ return result
+
+
+def _run_db_query(sql):
+ cmd = [
+ "ssh", "-p", NAS_SSH_PORT, NAS_SSH,
+ f"{DOCKER_BIN} exec -i {CASTOPOD_DB_CONTAINER} mysql -u castopod -pBYtbFfk3ndeVabb26xb0UyKU castopod -N"
+ ]
+ try:
+ proc = subprocess.run(cmd, input=sql, capture_output=True, text=True, timeout=30)
+ stderr = proc.stderr.strip()
+ stdout = proc.stdout.strip()
+ if proc.returncode != 0 and not stdout:
+ return None, stderr
+ return stdout, None
+ except subprocess.TimeoutExpired:
+ return None, "SSH timeout"
+ except Exception as e:
+ return None, str(e)
+
+
+def gather_castopod():
+ result = {"total_downloads": 0, "unique_listeners": 0, "episodes": []}
+
+ query = (
+ "SELECT p.title, "
+ "(SELECT SUM(hits) FROM cp_analytics_podcasts WHERE podcast_id = p.id), "
+ "(SELECT SUM(unique_listeners) FROM cp_analytics_podcasts WHERE podcast_id = p.id) "
+ "FROM cp_podcasts p WHERE p.handle = 'LukeAtTheRoost' LIMIT 1;"
+ )
+ episode_query = (
+ "SELECT e.title, e.slug, COALESCE(SUM(ae.hits), 0), e.published_at "
+ "FROM cp_episodes e LEFT JOIN cp_analytics_podcasts_by_episode ae ON ae.episode_id = e.id "
+ "WHERE e.podcast_id = (SELECT id FROM cp_podcasts WHERE handle = 'LukeAtTheRoost') "
+ "GROUP BY e.id ORDER BY e.published_at DESC;"
+ )
+
+ out, err = _run_db_query(query)
+ if err or not out:
+ return result
+
+ parts = out.split("\t")
+ if len(parts) >= 3:
+ result["total_downloads"] = int(parts[1]) if parts[1] and parts[1] != "NULL" else 0
+ result["unique_listeners"] = int(parts[2]) if parts[2] and parts[2] != "NULL" else 0
+ elif len(parts) >= 2:
+ result["total_downloads"] = int(parts[1]) if parts[1] and parts[1] != "NULL" else 0
+
+ out, err = _run_db_query(episode_query)
+ if err or not out:
+ return result
+
+ for line in out.strip().split("\n"):
+ cols = line.split("\t")
+ if len(cols) >= 4:
+ result["episodes"].append({
+ "title": cols[0],
+ "downloads": int(cols[2]) if cols[2] else 0,
+ "date": cols[3][:10] if cols[3] else "",
+ })
+
+ return result
+
+
+def print_apple(data):
+ print("\n⭐ APPLE PODCASTS")
+ print("─" * 40)
+ if data["reviews"]:
+ print(f" Rating: {data['avg_rating']}/5 ({data['review_count']} reviews)")
+ print()
+ for r in data["reviews"]:
+ stars = "★" * r["rating"] + "☆" * (5 - r["rating"])
+ print(f" {stars} \"{r['title']}\" — {r['author']} ({r['date']}, {r['storefront']})")
+ if r["content"] and r["content"] != r["title"]:
+ content_preview = r["content"][:120]
+ if len(r["content"]) > 120:
+ content_preview += "..."
+ print(f" {content_preview}")
+ else:
+ print(" No reviews found")
+
+
+def print_spotify(data):
+ print("\n🎵 SPOTIFY")
+ print("─" * 40)
+ if data["show_title"]:
+ print(f" Show: {data['show_title']}")
+ if data["rating"]:
+ print(f" Rating: {data['rating']}/5")
+ else:
+ print(" Rating: Not publicly available (Spotify hides ratings from web)")
+ print(f" Link: {data['url']}")
+
+
+def print_youtube(data):
+ print("\n📺 YOUTUBE")
+ print("─" * 40)
+ sub_str = f" | Subscribers: {data['subscribers']:,}" if data["subscribers"] else ""
+ print(f" Total views: {data['total_views']:,} | Likes: {data['total_likes']:,} | Comments: {data['total_comments']:,}{sub_str}")
+ print()
+ for v in data["videos"]:
+ print(f" {v['title']}")
+ print(f" {v['views']:,} views, {v['likes']:,} likes, {v['comments']:,} comments — {v['date']}")
+
+
+def print_castopod(data):
+ print("\n📊 DOWNLOADS (Castopod)")
+ print("─" * 40)
+ print(f" Total downloads: {data['total_downloads']:,} | Unique listeners: {data['unique_listeners']:,}")
+ if data["episodes"]:
+ print()
+ for ep in data["episodes"]:
+ print(f" {ep['title']} — {ep['downloads']:,} downloads ({ep['date']})")
+
+
+def upload_to_bunnycdn(json_data):
+ storage_url = f"https://{BUNNY_STORAGE_REGION}.storage.bunnycdn.com/{BUNNY_STORAGE_ZONE}/stats.json"
+ resp = requests.put(
+ storage_url,
+ data=json_data,
+ headers={
+ "AccessKey": BUNNY_STORAGE_KEY,
+ "Content-Type": "application/json",
+ },
+ timeout=30,
+ )
+ resp.raise_for_status()
+
+ purge_url = "https://api.bunny.net/purge"
+ requests.post(
+ purge_url,
+ params={"url": "https://cdn.lukeattheroost.com/stats.json"},
+ headers={"AccessKey": BUNNY_ACCOUNT_KEY},
+ timeout=15,
+ )
+ print("Uploaded stats.json to BunnyCDN and purged cache", file=sys.stderr)
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Podcast analytics aggregator")
+ parser.add_argument("--youtube", action="store_true", help="YouTube only")
+ parser.add_argument("--apple", action="store_true", help="Apple Podcasts only")
+ parser.add_argument("--spotify", action="store_true", help="Spotify only")
+ parser.add_argument("--castopod", action="store_true", help="Castopod only")
+ parser.add_argument("--comments", action="store_true", help="Include YouTube comments")
+ parser.add_argument("--json", dest="json_output", action="store_true", help="Output as JSON")
+ parser.add_argument("--upload", action="store_true", help="Upload JSON to BunnyCDN (requires --json)")
+ args = parser.parse_args()
+
+ if args.upload and not args.json_output:
+ print("Error: --upload requires --json", file=sys.stderr)
+ sys.exit(1)
+
+ run_all = not (args.youtube or args.apple or args.spotify or args.castopod)
+
+ results = {}
+ if run_all or args.castopod:
+ results["castopod"] = gather_castopod()
+ if run_all or args.apple:
+ results["apple"] = gather_apple_reviews()
+ if run_all or args.spotify:
+ results["spotify"] = gather_spotify()
+ if run_all or args.youtube:
+ results["youtube"] = gather_youtube(include_comments=args.comments or args.youtube)
+
+ if args.json_output:
+ output = {
+ "updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+ **results,
+ }
+ json_str = json.dumps(output, indent=2, ensure_ascii=False)
+ print(json_str)
+ if args.upload:
+ upload_to_bunnycdn(json_str)
+ else:
+ print("=" * 45)
+ print(" PODCAST STATS: Luke at the Roost")
+ print("=" * 45)
+ if "castopod" in results:
+ print_castopod(results["castopod"])
+ if "apple" in results:
+ print_apple(results["apple"])
+ if "spotify" in results:
+ print_spotify(results["spotify"])
+ if "youtube" in results:
+ print_youtube(results["youtube"])
+ print()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/publish_episode.py b/publish_episode.py
index f2e4719..36e4a07 100755
--- a/publish_episode.py
+++ b/publish_episode.py
@@ -17,6 +17,7 @@ import subprocess
import sys
import tempfile
import base64
+from datetime import datetime
from pathlib import Path
import ssl
@@ -515,6 +516,33 @@ def sync_episode_media_to_bunny(episode_id: int, already_uploaded: set):
Path(tmp_path).unlink(missing_ok=True)
+def add_episode_to_sitemap(slug: str):
+ """Add episode transcript page to sitemap.xml."""
+ sitemap_path = Path(__file__).parent / "website" / "sitemap.xml"
+ if not sitemap_path.exists():
+ return
+
+ url = f"https://lukeattheroost.com/episode.html?slug={slug}"
+ content = sitemap_path.read_text()
+
+ if url in content:
+ print(f" Episode already in sitemap")
+ return
+
+ today = datetime.now().strftime("%Y-%m-%d")
+ new_entry = f"""