Initial commit: AI Radio Show web application
- FastAPI backend with multiple TTS providers (Inworld, ElevenLabs, Kokoro, F5-TTS, etc.) - Web frontend with caller management, music, and soundboard - Whisper transcription integration - OpenRouter/Ollama LLM support - Castopod podcast publishing script Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
140
radio_simple.py
Normal file
140
radio_simple.py
Normal file
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simplified Radio Show - for debugging
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import sounddevice as sd
|
||||
import soundfile as sf
|
||||
from faster_whisper import WhisperModel
|
||||
from scipy.signal import butter, filtfilt
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
SAMPLE_RATE = 24000
|
||||
|
||||
CALLERS = {
|
||||
"1": ("Big Tony", "IKne3meq5aSn9XLyUdCD", "You are Big Tony, a loud Italian guy from Staten Island. Swear naturally, be opinionated. Keep it to 2 sentences."),
|
||||
"2": ("Drunk Diane", "FGY2WhTYpPnrIDTdsKH5", "You are Drunk Diane, tipsy woman at a bar. Ramble a bit, be funny. Keep it to 2 sentences."),
|
||||
"3": ("Stoner Phil", "bIHbv24MWmeRgasZH58o", "You are Stoner Phil, super chill stoner dude. Speak slow, be spacey but profound. Keep it to 2 sentences."),
|
||||
}
|
||||
|
||||
def phone_filter(audio):
|
||||
b, a = butter(4, [300/(SAMPLE_RATE/2), 3400/(SAMPLE_RATE/2)], btype='band')
|
||||
return (np.tanh(filtfilt(b, a, audio.flatten()) * 1.5) * 0.8).astype(np.float32)
|
||||
|
||||
class SimpleRadio:
|
||||
def __init__(self):
|
||||
print("Loading Whisper...")
|
||||
self.whisper = WhisperModel("base", device="cpu", compute_type="int8")
|
||||
|
||||
print("Connecting to ElevenLabs...")
|
||||
from elevenlabs.client import ElevenLabs
|
||||
self.tts = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
|
||||
|
||||
print("Connecting to Ollama...")
|
||||
import ollama
|
||||
self.ollama = ollama
|
||||
|
||||
self.caller = CALLERS["1"]
|
||||
self.history = []
|
||||
print("\nReady!\n")
|
||||
|
||||
def record(self):
|
||||
print(" [Recording - press Enter to stop]")
|
||||
chunks = []
|
||||
recording = True
|
||||
|
||||
def callback(indata, frames, time, status):
|
||||
if recording:
|
||||
chunks.append(indata.copy())
|
||||
|
||||
with sd.InputStream(samplerate=SAMPLE_RATE, channels=1, callback=callback):
|
||||
input() # Wait for Enter
|
||||
|
||||
recording = False
|
||||
return np.vstack(chunks) if chunks else None
|
||||
|
||||
def transcribe(self, audio):
|
||||
import librosa
|
||||
audio_16k = librosa.resample(audio.flatten().astype(np.float32), orig_sr=SAMPLE_RATE, target_sr=16000)
|
||||
segments, _ = self.whisper.transcribe(audio_16k)
|
||||
return " ".join([s.text for s in segments]).strip()
|
||||
|
||||
def respond(self, text):
|
||||
self.history.append({"role": "user", "content": text})
|
||||
|
||||
response = self.ollama.chat(
|
||||
model="llama3.2:latest",
|
||||
messages=[{"role": "system", "content": self.caller[2]}] + self.history[-6:],
|
||||
options={"temperature": 0.9}
|
||||
)
|
||||
|
||||
reply = response["message"]["content"]
|
||||
self.history.append({"role": "assistant", "content": reply})
|
||||
return reply
|
||||
|
||||
def speak(self, text):
|
||||
print(" [Generating voice...]")
|
||||
audio_gen = self.tts.text_to_speech.convert(
|
||||
voice_id=self.caller[1],
|
||||
text=text,
|
||||
model_id="eleven_turbo_v2_5",
|
||||
output_format="pcm_24000"
|
||||
)
|
||||
|
||||
audio_bytes = b"".join(audio_gen)
|
||||
audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
|
||||
filtered = phone_filter(audio)
|
||||
|
||||
print(" [Playing...]")
|
||||
sd.play(filtered, SAMPLE_RATE)
|
||||
sd.wait()
|
||||
|
||||
def run(self):
|
||||
print("=" * 50)
|
||||
print(" SIMPLE RADIO - Type commands:")
|
||||
print(" 1/2/3 = switch caller")
|
||||
print(" r = record & respond")
|
||||
print(" t = type message (skip recording)")
|
||||
print(" q = quit")
|
||||
print("=" * 50)
|
||||
print(f"\nCaller: {self.caller[0]}\n")
|
||||
|
||||
while True:
|
||||
cmd = input("> ").strip().lower()
|
||||
|
||||
if cmd == 'q':
|
||||
break
|
||||
elif cmd in '123':
|
||||
self.caller = CALLERS[cmd]
|
||||
self.history = []
|
||||
print(f"\n📞 Switched to: {self.caller[0]}\n")
|
||||
elif cmd == 'r':
|
||||
audio = self.record()
|
||||
if audio is not None:
|
||||
print(" [Transcribing...]")
|
||||
text = self.transcribe(audio)
|
||||
print(f"\n YOU: {text}\n")
|
||||
if text:
|
||||
print(" [Thinking...]")
|
||||
reply = self.respond(text)
|
||||
print(f"\n 📞 {self.caller[0].upper()}: {reply}\n")
|
||||
self.speak(reply)
|
||||
elif cmd == 't':
|
||||
text = input(" Type message: ")
|
||||
if text:
|
||||
print(" [Thinking...]")
|
||||
reply = self.respond(text)
|
||||
print(f"\n 📞 {self.caller[0].upper()}: {reply}\n")
|
||||
self.speak(reply)
|
||||
else:
|
||||
print(" Commands: r=record, t=type, 1/2/3=caller, q=quit")
|
||||
|
||||
if __name__ == "__main__":
|
||||
radio = SimpleRadio()
|
||||
radio.run()
|
||||
Reference in New Issue
Block a user