#!/usr/bin/env python3 """ Simplified Radio Show - for debugging """ import os import sys from pathlib import Path import numpy as np import sounddevice as sd import soundfile as sf from faster_whisper import WhisperModel from scipy.signal import butter, filtfilt from dotenv import load_dotenv load_dotenv() SAMPLE_RATE = 24000 CALLERS = { "1": ("Big Tony", "IKne3meq5aSn9XLyUdCD", "You are Big Tony, a loud Italian guy from Staten Island. Swear naturally, be opinionated. Keep it to 2 sentences."), "2": ("Drunk Diane", "FGY2WhTYpPnrIDTdsKH5", "You are Drunk Diane, tipsy woman at a bar. Ramble a bit, be funny. Keep it to 2 sentences."), "3": ("Stoner Phil", "bIHbv24MWmeRgasZH58o", "You are Stoner Phil, super chill stoner dude. Speak slow, be spacey but profound. Keep it to 2 sentences."), } def phone_filter(audio): b, a = butter(4, [300/(SAMPLE_RATE/2), 3400/(SAMPLE_RATE/2)], btype='band') return (np.tanh(filtfilt(b, a, audio.flatten()) * 1.5) * 0.8).astype(np.float32) class SimpleRadio: def __init__(self): print("Loading Whisper...") self.whisper = WhisperModel("base", device="cpu", compute_type="int8") print("Connecting to ElevenLabs...") from elevenlabs.client import ElevenLabs self.tts = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY")) print("Connecting to Ollama...") import ollama self.ollama = ollama self.caller = CALLERS["1"] self.history = [] print("\nReady!\n") def record(self): print(" [Recording - press Enter to stop]") chunks = [] recording = True def callback(indata, frames, time, status): if recording: chunks.append(indata.copy()) with sd.InputStream(samplerate=SAMPLE_RATE, channels=1, callback=callback): input() # Wait for Enter recording = False return np.vstack(chunks) if chunks else None def transcribe(self, audio): import librosa audio_16k = librosa.resample(audio.flatten().astype(np.float32), orig_sr=SAMPLE_RATE, target_sr=16000) segments, _ = self.whisper.transcribe(audio_16k) return " ".join([s.text for s in segments]).strip() def respond(self, text): self.history.append({"role": "user", "content": text}) response = self.ollama.chat( model="llama3.2:latest", messages=[{"role": "system", "content": self.caller[2]}] + self.history[-6:], options={"temperature": 0.9} ) reply = response["message"]["content"] self.history.append({"role": "assistant", "content": reply}) return reply def speak(self, text): print(" [Generating voice...]") audio_gen = self.tts.text_to_speech.convert( voice_id=self.caller[1], text=text, model_id="eleven_turbo_v2_5", output_format="pcm_24000" ) audio_bytes = b"".join(audio_gen) audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0 filtered = phone_filter(audio) print(" [Playing...]") sd.play(filtered, SAMPLE_RATE) sd.wait() def run(self): print("=" * 50) print(" SIMPLE RADIO - Type commands:") print(" 1/2/3 = switch caller") print(" r = record & respond") print(" t = type message (skip recording)") print(" q = quit") print("=" * 50) print(f"\nCaller: {self.caller[0]}\n") while True: cmd = input("> ").strip().lower() if cmd == 'q': break elif cmd in '123': self.caller = CALLERS[cmd] self.history = [] print(f"\nšŸ“ž Switched to: {self.caller[0]}\n") elif cmd == 'r': audio = self.record() if audio is not None: print(" [Transcribing...]") text = self.transcribe(audio) print(f"\n YOU: {text}\n") if text: print(" [Thinking...]") reply = self.respond(text) print(f"\n šŸ“ž {self.caller[0].upper()}: {reply}\n") self.speak(reply) elif cmd == 't': text = input(" Type message: ") if text: print(" [Thinking...]") reply = self.respond(text) print(f"\n šŸ“ž {self.caller[0].upper()}: {reply}\n") self.speak(reply) else: print(" Commands: r=record, t=type, 1/2/3=caller, q=quit") if __name__ == "__main__": radio = SimpleRadio() radio.run()