Initial commit: AI Radio Show web application

- FastAPI backend with multiple TTS providers (Inworld, ElevenLabs, Kokoro, F5-TTS, etc.) - Web frontend with caller management, music, and soundboard - Whisper transcription integration - OpenRouter/Ollama LLM support - Castopod podcast publishing script Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 23:11:20 -07:00
commit 029ce6d689
25 changed files with 6817 additions and 0 deletions
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+"""
+Simplified Radio Show - for debugging
+"""
+
+import os
+import sys
+from pathlib import Path
+import numpy as np
+import sounddevice as sd
+import soundfile as sf
+from faster_whisper import WhisperModel
+from scipy.signal import butter, filtfilt
+from dotenv import load_dotenv
+
+load_dotenv()
+
+SAMPLE_RATE = 24000
+
+CALLERS = {
+    "1": ("Big Tony", "IKne3meq5aSn9XLyUdCD", "You are Big Tony, a loud Italian guy from Staten Island. Swear naturally, be opinionated. Keep it to 2 sentences."),
+    "2": ("Drunk Diane", "FGY2WhTYpPnrIDTdsKH5", "You are Drunk Diane, tipsy woman at a bar. Ramble a bit, be funny. Keep it to 2 sentences."),
+    "3": ("Stoner Phil", "bIHbv24MWmeRgasZH58o", "You are Stoner Phil, super chill stoner dude. Speak slow, be spacey but profound. Keep it to 2 sentences."),
+}
+
+def phone_filter(audio):
+    b, a = butter(4, [300/(SAMPLE_RATE/2), 3400/(SAMPLE_RATE/2)], btype='band')
+    return (np.tanh(filtfilt(b, a, audio.flatten()) * 1.5) * 0.8).astype(np.float32)
+
+class SimpleRadio:
+    def __init__(self):
+        print("Loading Whisper...")
+        self.whisper = WhisperModel("base", device="cpu", compute_type="int8")
+
+        print("Connecting to ElevenLabs...")
+        from elevenlabs.client import ElevenLabs
+        self.tts = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
+
+        print("Connecting to Ollama...")
+        import ollama
+        self.ollama = ollama
+
+        self.caller = CALLERS["1"]
+        self.history = []
+        print("\nReady!\n")
+
+    def record(self):
+        print("  [Recording - press Enter to stop]")
+        chunks = []
+        recording = True
+
+        def callback(indata, frames, time, status):
+            if recording:
+                chunks.append(indata.copy())
+
+        with sd.InputStream(samplerate=SAMPLE_RATE, channels=1, callback=callback):
+            input()  # Wait for Enter
+
+        recording = False
+        return np.vstack(chunks) if chunks else None
+
+    def transcribe(self, audio):
+        import librosa
+        audio_16k = librosa.resample(audio.flatten().astype(np.float32), orig_sr=SAMPLE_RATE, target_sr=16000)
+        segments, _ = self.whisper.transcribe(audio_16k)
+        return " ".join([s.text for s in segments]).strip()
+
+    def respond(self, text):
+        self.history.append({"role": "user", "content": text})
+
+        response = self.ollama.chat(
+            model="llama3.2:latest",
+            messages=[{"role": "system", "content": self.caller[2]}] + self.history[-6:],
+            options={"temperature": 0.9}
+        )
+
+        reply = response["message"]["content"]
+        self.history.append({"role": "assistant", "content": reply})
+        return reply
+
+    def speak(self, text):
+        print("  [Generating voice...]")
+        audio_gen = self.tts.text_to_speech.convert(
+            voice_id=self.caller[1],
+            text=text,
+            model_id="eleven_turbo_v2_5",
+            output_format="pcm_24000"
+        )
+
+        audio_bytes = b"".join(audio_gen)
+        audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
+        filtered = phone_filter(audio)
+
+        print("  [Playing...]")
+        sd.play(filtered, SAMPLE_RATE)
+        sd.wait()
+
+    def run(self):
+        print("=" * 50)
+        print("  SIMPLE RADIO - Type commands:")
+        print("  1/2/3 = switch caller")
+        print("  r     = record & respond")
+        print("  t     = type message (skip recording)")
+        print("  q     = quit")
+        print("=" * 50)
+        print(f"\nCaller: {self.caller[0]}\n")
+
+        while True:
+            cmd = input("> ").strip().lower()
+
+            if cmd == 'q':
+                break
+            elif cmd in '123':
+                self.caller = CALLERS[cmd]
+                self.history = []
+                print(f"\n📞 Switched to: {self.caller[0]}\n")
+            elif cmd == 'r':
+                audio = self.record()
+                if audio is not None:
+                    print("  [Transcribing...]")
+                    text = self.transcribe(audio)
+                    print(f"\n  YOU: {text}\n")
+                    if text:
+                        print("  [Thinking...]")
+                        reply = self.respond(text)
+                        print(f"\n  📞 {self.caller[0].upper()}: {reply}\n")
+                        self.speak(reply)
+            elif cmd == 't':
+                text = input("  Type message: ")
+                if text:
+                    print("  [Thinking...]")
+                    reply = self.respond(text)
+                    print(f"\n  📞 {self.caller[0].upper()}: {reply}\n")
+                    self.speak(reply)
+            else:
+                print("  Commands: r=record, t=type, 1/2/3=caller, q=quit")
+
+if __name__ == "__main__":
+    radio = SimpleRadio()
+    radio.run()