Ep13 publish, MLX whisper, voicemail system, hero redesign, massive topic expansion

- Switch whisper transcription from faster-whisper (CPU) to lightning-whisper-mlx (GPU) - Fix word_timestamps hanging, use ffprobe for accurate duration - Add Cloudflare Pages Worker for SignalWire voicemail fallback when server offline - Add voicemail sync on startup, delete tracking, save feature - Add /feed RSS proxy to _worker.js (was broken by worker taking over routing) - Redesign website hero section: ghost buttons, compact phone, plain text links - Rewrite caller prompts for faster point-getting and host-following - Expand TOPIC_CALLIN from ~250 to 547 entries across 34 categories - Add new categories: biology, psychology, engineering, math, geology, animals, work, money, books, movies, relationships, health, language, true crime, drunk/high/unhinged callers - Remove bad Inworld voices (Pixie, Dominus), reduce repeat caller frequency - Add audio monitor device routing, uvicorn --reload-dir fix - Publish episode 13 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 01:56:47 -07:00
parent 8d3d67a177
commit 3164a70e48
23 changed files with 2944 additions and 512 deletions
@@ -19,15 +19,17 @@ class AudioService:

    def __init__(self):
        # Device configuration
-        self.input_device: Optional[int] = None
+        self.input_device: Optional[int] = 13   # Radio Voice Mic (loopback input)
        self.input_channel: int = 1  # 1-indexed channel

-        self.output_device: Optional[int] = None  # Single output device (multi-channel)
-        self.caller_channel: int = 1   # Channel for caller TTS
+        self.output_device: Optional[int] = 12  # Radio Voice Mic (loopback output)
+        self.caller_channel: int = 3   # Channel for caller TTS
        self.live_caller_channel: int = 9  # Channel for live caller audio
-        self.music_channel: int = 2    # Channel for music
+        self.music_channel: int = 5    # Channel for music
        self.sfx_channel: int = 3      # Channel for SFX
        self.ad_channel: int = 11      # Channel for ads
+        self.monitor_device: Optional[int] = 14  # Babyface Pro (headphone monitoring)
+        self.monitor_channel: int = 1  # Channel for mic monitoring on monitor device
        self.phone_filter: bool = False  # Phone filter on caller voices

        # Ad playback state
@@ -78,6 +80,10 @@ class AudioService:
        self.input_sample_rate = 16000  # For Whisper
        self.output_sample_rate = 24000  # For TTS

+        # Mic monitor (input → monitor device passthrough)
+        self._monitor_stream: Optional[sd.OutputStream] = None
+        self._monitor_write: Optional[Callable] = None
+
        # Stem recording (opt-in, attached via API)
        self.stem_recorder = None
        self._stem_mic_stream: Optional[sd.InputStream] = None
@@ -99,8 +105,10 @@ class AudioService:
                self.music_channel = data.get("music_channel", 2)
                self.sfx_channel = data.get("sfx_channel", 3)
                self.ad_channel = data.get("ad_channel", 11)
+                self.monitor_device = data.get("monitor_device")
+                self.monitor_channel = data.get("monitor_channel", 1)
                self.phone_filter = data.get("phone_filter", False)
-                print(f"Loaded audio settings: output={self.output_device}, channels={self.caller_channel}/{self.live_caller_channel}/{self.music_channel}/{self.sfx_channel}/ad:{self.ad_channel}, phone_filter={self.phone_filter}")
+                print(f"Loaded audio settings: input={self.input_device}, output={self.output_device}, monitor={self.monitor_device}, phone_filter={self.phone_filter}")
            except Exception as e:
                print(f"Failed to load audio settings: {e}")

@@ -116,6 +124,8 @@ class AudioService:
                "music_channel": self.music_channel,
                "sfx_channel": self.sfx_channel,
                "ad_channel": self.ad_channel,
+                "monitor_device": self.monitor_device,
+                "monitor_channel": self.monitor_channel,
                "phone_filter": self.phone_filter,
            }
            with open(SETTINGS_FILE, "w") as f:
@@ -148,6 +158,8 @@ class AudioService:
        music_channel: Optional[int] = None,
        sfx_channel: Optional[int] = None,
        ad_channel: Optional[int] = None,
+        monitor_device: Optional[int] = None,
+        monitor_channel: Optional[int] = None,
        phone_filter: Optional[bool] = None
    ):
        """Configure audio devices and channels"""
@@ -167,6 +179,10 @@ class AudioService:
            self.sfx_channel = sfx_channel
        if ad_channel is not None:
            self.ad_channel = ad_channel
+        if monitor_device is not None:
+            self.monitor_device = monitor_device
+        if monitor_channel is not None:
+            self.monitor_channel = monitor_channel
        if phone_filter is not None:
            self.phone_filter = phone_filter

@@ -184,6 +200,8 @@ class AudioService:
            "music_channel": self.music_channel,
            "sfx_channel": self.sfx_channel,
            "ad_channel": self.ad_channel,
+            "monitor_device": self.monitor_device,
+            "monitor_channel": self.monitor_channel,
            "phone_filter": self.phone_filter,
        }

@@ -542,6 +560,9 @@ class AudioService:
            host_accum_samples = [0]
            send_threshold = 1600  # 100ms at 16kHz

+            # Start mic monitor if monitor device is configured
+            self._start_monitor(device_sr)
+
            def callback(indata, frames, time_info, status):
                # Capture for push-to-talk recording if active
                if self._recording and self._recorded_audio is not None:
@@ -551,6 +572,10 @@ class AudioService:
                if self.stem_recorder:
                    self.stem_recorder.write("host", indata[:, record_channel].copy(), device_sr)

+                # Mic monitor: send to headphone device
+                if self._monitor_write:
+                    self._monitor_write(indata[:, record_channel].copy())
+
                if not self._host_send_callback:
                    return
                mono = indata[:, record_channel]
@@ -591,8 +616,84 @@ class AudioService:
            self._host_stream = None
            self._host_send_callback = None
            print("[Audio] Host mic streaming stopped")
+        self._stop_monitor()
        self._stop_live_caller_stream()

+    # --- Mic Monitor (input → headphone device) ---
+
+    def _start_monitor(self, input_sr: int):
+        """Start mic monitor stream that routes input to monitor device"""
+        if self._monitor_stream is not None:
+            return
+        if self.monitor_device is None:
+            return
+
+        device_info = sd.query_devices(self.monitor_device)
+        num_channels = device_info['max_output_channels']
+        device_sr = int(device_info['default_samplerate'])
+        channel_idx = min(self.monitor_channel, num_channels) - 1
+
+        # Ring buffer for cross-device routing
+        ring_size = int(device_sr * 2)
+        ring = np.zeros(ring_size, dtype=np.float32)
+        state = {"write_pos": 0, "read_pos": 0, "avail": 0}
+
+        # Precompute resample ratio (input device sr → monitor device sr)
+        resample_ratio = device_sr / input_sr
+
+        def write_audio(data):
+            # Resample if sample rates differ
+            if abs(resample_ratio - 1.0) > 0.01:
+                n_out = int(len(data) * resample_ratio)
+                indices = np.linspace(0, len(data) - 1, n_out).astype(int)
+                data = data[indices]
+            n = len(data)
+            wp = state["write_pos"]
+            if wp + n <= ring_size:
+                ring[wp:wp + n] = data
+            else:
+                first = ring_size - wp
+                ring[wp:] = data[:first]
+                ring[:n - first] = data[first:]
+            state["write_pos"] = (wp + n) % ring_size
+            state["avail"] += n
+
+        def callback(outdata, frames, time_info, status):
+            outdata.fill(0)
+            avail = state["avail"]
+            if avail < frames:
+                return
+            rp = state["read_pos"]
+            if rp + frames <= ring_size:
+                outdata[:frames, channel_idx] = ring[rp:rp + frames]
+            else:
+                first = ring_size - rp
+                outdata[:first, channel_idx] = ring[rp:]
+                outdata[first:frames, channel_idx] = ring[:frames - first]
+            state["read_pos"] = (rp + frames) % ring_size
+            state["avail"] -= frames
+
+        self._monitor_write = write_audio
+        self._monitor_stream = sd.OutputStream(
+            device=self.monitor_device,
+            samplerate=device_sr,
+            channels=num_channels,
+            dtype=np.float32,
+            blocksize=1024,
+            callback=callback,
+        )
+        self._monitor_stream.start()
+        print(f"[Audio] Mic monitor started (device {self.monitor_device} ch {self.monitor_channel} @ {device_sr}Hz)")
+
+    def _stop_monitor(self):
+        """Stop mic monitor stream"""
+        if self._monitor_stream:
+            self._monitor_stream.stop()
+            self._monitor_stream.close()
+            self._monitor_stream = None
+            self._monitor_write = None
+            print("[Audio] Mic monitor stopped")
+
    # --- Music Playback ---

    def load_music(self, file_path: str) -> bool:
@@ -981,9 +1082,13 @@ class AudioService:
        device_sr = int(device_info['default_samplerate'])
        record_channel = min(self.input_channel, max_channels) - 1

+        self._start_monitor(device_sr)
+
        def callback(indata, frames, time_info, status):
            if self.stem_recorder:
                self.stem_recorder.write("host", indata[:, record_channel].copy(), device_sr)
+            if self._monitor_write:
+                self._monitor_write(indata[:, record_channel].copy())

        self._stem_mic_stream = sd.InputStream(
            device=self.input_device,
@@ -1003,6 +1108,7 @@ class AudioService:
            self._stem_mic_stream.close()
            self._stem_mic_stream = None
            print("[StemRecorder] Host mic capture stopped")
+        self._stop_monitor()


 # Global instance
@@ -13,10 +13,8 @@ def get_whisper_model() -> WhisperModel:
    """Get or create Whisper model instance"""
    global _whisper_model
    if _whisper_model is None:
-        print("Loading Whisper tiny model for fast transcription...")
-        # Use tiny model for speed - about 3-4x faster than base
-        # beam_size=1 and best_of=1 for fastest inference
-        _whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
+        print("Loading Whisper base model...")
+        _whisper_model = WhisperModel("base", device="cpu", compute_type="int8")
        print("Whisper model loaded")
    return _whisper_model

@@ -100,13 +98,13 @@ async def transcribe_audio(audio_data: bytes, source_sample_rate: int = None) ->
    else:
        audio_16k = audio

-    # Transcribe with speed optimizations
+    # Transcribe
    segments, info = model.transcribe(
        audio_16k,
-        beam_size=1,  # Faster, slightly less accurate
-        best_of=1,
-        language="en",  # Skip language detection
-        vad_filter=True,  # Skip silence
+        beam_size=3,
+        language="en",
+        vad_filter=True,
+        initial_prompt="Luke at the Roost, a late-night radio talk show. The host Luke talks to callers about life, relationships, sports, politics, and pop culture.",
    )
    segments_list = list(segments)
    text = " ".join([s.text for s in segments_list]).strip()