Add clip generator, hourly stats cron, and transcription improvements

- make_clips.py: Extract best moments from episodes as short-form clips
  (9:16 vertical MP4 with captions for TikTok/Shorts/Reels)
- deploy_stats_cron.sh: Deploy podcast_stats.py to NAS as Docker container
  running hourly with auto-restart
- podcast_stats.py: Add _find_ytdlp() for Docker compatibility, auto-detect
  local Docker for Castopod DB queries
- publish_episode.py: Upgrade Whisper model from base to large-v3

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 01:30:33 -07:00
parent 28af0723c7
commit b02616bc44
4 changed files with 1193 additions and 9 deletions

73
deploy_stats_cron.sh Executable file
View File

@@ -0,0 +1,73 @@
#!/bin/bash
# Deploy podcast_stats.py to NAS as a long-running Docker container that updates hourly.
#
# Usage: ./deploy_stats_cron.sh
set -e
NAS_HOST="mmgnas-10g"
NAS_USER="luke"
NAS_PORT="8001"
DOCKER_BIN="/share/CACHEDEV1_DATA/.qpkg/container-station/bin/docker"
DEPLOY_DIR="/share/CACHEDEV1_DATA/podcast-stats"
CONTAINER_NAME="podcast-stats"
echo "Deploying podcast stats to NAS..."
# Create deploy dir and copy files
ssh -p "$NAS_PORT" "$NAS_USER@$NAS_HOST" "mkdir -p $DEPLOY_DIR"
scp -P "$NAS_PORT" podcast_stats.py "$NAS_USER@$NAS_HOST:$DEPLOY_DIR/podcast_stats.py"
# Create Dockerfile locally, then copy it over (NAS /tmp is tiny)
TMPFILE=$(mktemp)
cat > "$TMPFILE" << 'DOCKERFILE'
FROM python:3.11-slim
RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/* \
&& curl -fsSL https://download.docker.com/linux/static/stable/x86_64/docker-27.5.1.tgz | tar xz --strip-components=1 -C /usr/local/bin docker/docker \
&& apt-get purge -y curl && apt-get autoremove -y
RUN pip install --no-cache-dir requests yt-dlp
COPY podcast_stats.py /app/podcast_stats.py
COPY run_loop.sh /app/run_loop.sh
RUN chmod +x /app/run_loop.sh
WORKDIR /app
CMD ["/app/run_loop.sh"]
DOCKERFILE
scp -P "$NAS_PORT" "$TMPFILE" "$NAS_USER@$NAS_HOST:$DEPLOY_DIR/Dockerfile"
rm "$TMPFILE"
# Create the loop script
TMPFILE=$(mktemp)
cat > "$TMPFILE" << 'LOOPSCRIPT'
#!/bin/sh
echo "podcast-stats: starting hourly loop"
while true; do
echo "$(date -u '+%Y-%m-%dT%H:%M:%SZ') Running stats update..."
python podcast_stats.py --json --upload 2>&1 || echo " ...failed, will retry next hour"
echo "Sleeping 1 hour..."
sleep 3600
done
LOOPSCRIPT
scp -P "$NAS_PORT" "$TMPFILE" "$NAS_USER@$NAS_HOST:$DEPLOY_DIR/run_loop.sh"
rm "$TMPFILE"
echo "Building Docker image on NAS..."
ssh -p "$NAS_PORT" "$NAS_USER@$NAS_HOST" \
"TMPDIR=$DEPLOY_DIR $DOCKER_BIN build -t $CONTAINER_NAME $DEPLOY_DIR"
# Stop old container if running
ssh -p "$NAS_PORT" "$NAS_USER@$NAS_HOST" \
"$DOCKER_BIN rm -f $CONTAINER_NAME 2>/dev/null || true"
# Run as a daemon with auto-restart (survives reboots)
echo "Starting container..."
ssh -p "$NAS_PORT" "$NAS_USER@$NAS_HOST" \
"$DOCKER_BIN run -d --name $CONTAINER_NAME --restart unless-stopped --network host -v /var/run/docker.sock:/var/run/docker.sock $CONTAINER_NAME"
echo "Verifying..."
sleep 3
ssh -p "$NAS_PORT" "$NAS_USER@$NAS_HOST" \
"$DOCKER_BIN logs $CONTAINER_NAME 2>&1 | tail -5"
echo ""
echo "Done! Container runs hourly in a loop with --restart unless-stopped."
echo " Logs: ssh -p $NAS_PORT $NAS_USER@$NAS_HOST '$DOCKER_BIN logs -f $CONTAINER_NAME'"

1088
make_clips.py Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -38,6 +38,18 @@ BUNNY_STORAGE_REGION = "la"
BUNNY_ACCOUNT_KEY = "2865f279-297b-431a-ad18-0ccf1f8e4fa8cf636cea-3222-415a-84ed-56ee195c0530" BUNNY_ACCOUNT_KEY = "2865f279-297b-431a-ad18-0ccf1f8e4fa8cf636cea-3222-415a-84ed-56ee195c0530"
def _find_ytdlp():
"""Find yt-dlp: check local venv first, then fall back to PATH."""
import shutil
venv_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "venv", "bin", "yt-dlp")
if os.path.exists(venv_path):
return venv_path
path_bin = shutil.which("yt-dlp")
if path_bin:
return path_bin
return "yt-dlp"
def gather_apple_reviews(): def gather_apple_reviews():
all_reviews = [] all_reviews = []
seen_ids = set() seen_ids = set()
@@ -129,7 +141,7 @@ def gather_youtube(include_comments=False):
try: try:
proc = subprocess.run( proc = subprocess.run(
[os.path.join(os.path.dirname(os.path.abspath(__file__)), "venv", "bin", "yt-dlp"), "--dump-json", "--flat-playlist", [_find_ytdlp(), "--dump-json", "--flat-playlist",
f"https://www.youtube.com/playlist?list={YOUTUBE_PLAYLIST}"], f"https://www.youtube.com/playlist?list={YOUTUBE_PLAYLIST}"],
capture_output=True, text=True, timeout=60 capture_output=True, text=True, timeout=60
) )
@@ -160,7 +172,7 @@ def gather_youtube(include_comments=False):
for vid in video_ids: for vid in video_ids:
try: try:
cmd = [os.path.join(os.path.dirname(os.path.abspath(__file__)), "venv", "bin", "yt-dlp"), "--dump-json", "--no-download", f"https://www.youtube.com/watch?v={vid}"] cmd = [_find_ytdlp(), "--dump-json", "--no-download", f"https://www.youtube.com/watch?v={vid}"]
if include_comments: if include_comments:
cmd.insert(2, "--write-comments") cmd.insert(2, "--write-comments")
vr = subprocess.run(cmd, capture_output=True, text=True, timeout=90) vr = subprocess.run(cmd, capture_output=True, text=True, timeout=90)
@@ -204,7 +216,7 @@ def gather_youtube(include_comments=False):
if videos: if videos:
try: try:
vr = subprocess.run( vr = subprocess.run(
[os.path.join(os.path.dirname(os.path.abspath(__file__)), "venv", "bin", "yt-dlp"), "--dump-json", "--no-download", "--playlist-items", "1", [_find_ytdlp(), "--dump-json", "--no-download", "--playlist-items", "1",
f"https://www.youtube.com/playlist?list={YOUTUBE_PLAYLIST}"], f"https://www.youtube.com/playlist?list={YOUTUBE_PLAYLIST}"],
capture_output=True, text=True, timeout=30 capture_output=True, text=True, timeout=30
) )
@@ -224,10 +236,21 @@ def gather_youtube(include_comments=False):
def _run_db_query(sql): def _run_db_query(sql):
cmd = [ # If running on NAS (docker socket available), exec directly
"ssh", "-p", NAS_SSH_PORT, NAS_SSH, docker_bin = None
f"{DOCKER_BIN} exec -i {CASTOPOD_DB_CONTAINER} mysql -u castopod -pBYtbFfk3ndeVabb26xb0UyKU castopod -N" for path in [DOCKER_BIN, "/usr/bin/docker", "/usr/local/bin/docker"]:
] if os.path.exists(path):
docker_bin = path
break
if docker_bin:
cmd = [docker_bin, "exec", "-i", CASTOPOD_DB_CONTAINER,
"mysql", "-u", "castopod", "-pBYtbFfk3ndeVabb26xb0UyKU", "castopod", "-N"]
else:
cmd = [
"ssh", "-p", NAS_SSH_PORT, NAS_SSH,
f"{DOCKER_BIN} exec -i {CASTOPOD_DB_CONTAINER} mysql -u castopod -pBYtbFfk3ndeVabb26xb0UyKU castopod -N"
]
try: try:
proc = subprocess.run(cmd, input=sql, capture_output=True, text=True, timeout=30) proc = subprocess.run(cmd, input=sql, capture_output=True, text=True, timeout=30)
stderr = proc.stderr.strip() stderr = proc.stderr.strip()
@@ -236,7 +259,7 @@ def _run_db_query(sql):
return None, stderr return None, stderr
return stdout, None return stdout, None
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
return None, "SSH timeout" return None, "timeout"
except Exception as e: except Exception as e:
return None, str(e) return None, str(e)

View File

@@ -60,7 +60,7 @@ PODCAST_ID = 1
PODCAST_HANDLE = "LukeAtTheRoost" PODCAST_HANDLE = "LukeAtTheRoost"
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
WHISPER_MODEL = "base" # Options: tiny, base, small, medium, large WHISPER_MODEL = "large-v3"
# Postiz (social media posting) # Postiz (social media posting)
POSTIZ_URL = "https://social.lukeattheroost.com" POSTIZ_URL = "https://social.lukeattheroost.com"