diff --git a/audio_settings.json b/audio_settings.json index 8e8fc3a..612a442 100644 --- a/audio_settings.json +++ b/audio_settings.json @@ -3,6 +3,7 @@ "input_channel": 1, "output_device": 13, "caller_channel": 3, + "live_caller_channel": 9, "music_channel": 5, "sfx_channel": 7, "phone_filter": false diff --git a/backend/config.py b/backend/config.py index 4e630ff..da30d58 100644 --- a/backend/config.py +++ b/backend/config.py @@ -20,6 +20,7 @@ class Settings(BaseSettings): signalwire_space: str = os.getenv("SIGNALWIRE_SPACE", "") signalwire_token: str = os.getenv("SIGNALWIRE_TOKEN", "") signalwire_phone: str = os.getenv("SIGNALWIRE_PHONE", "") + signalwire_stream_url: str = os.getenv("SIGNALWIRE_STREAM_URL", "") # LLM Settings llm_provider: str = "openrouter" # "openrouter" or "ollama" @@ -28,7 +29,7 @@ class Settings(BaseSettings): ollama_host: str = "http://localhost:11434" # TTS Settings - tts_provider: str = "kokoro" # "kokoro", "elevenlabs", "vits", or "bark" + tts_provider: str = "inworld" # "kokoro", "elevenlabs", "inworld", "vits", or "bark" # Audio Settings sample_rate: int = 24000 diff --git a/backend/main.py b/backend/main.py index 1548017..eed813d 100644 --- a/backend/main.py +++ b/backend/main.py @@ -40,19 +40,47 @@ app.add_middleware( # Base caller info (name, voice) - backgrounds generated dynamically per session import random +MALE_NAMES = [ + "Tony", "Rick", "Dennis", "Earl", "Marcus", "Keith", "Darnell", "Wayne", + "Greg", "Andre", "Ray", "Jerome", "Hector", "Travis", "Vince", "Leon", + "Dale", "Frank", "Terrence", "Bobby", "Cliff", "Nate", "Reggie", "Carl", +] + +FEMALE_NAMES = [ + "Jasmine", "Megan", "Tanya", "Carla", "Brenda", "Sheila", "Denise", "Tamika", + "Lorraine", "Crystal", "Angie", "Renee", "Monique", "Gina", "Patrice", "Deb", + "Shonda", "Marlene", "Yolanda", "Stacy", "Jackie", "Carmen", "Rita", "Val", +] + CALLER_BASES = { - "1": {"name": "Tony", "voice": "VR6AewLTigWG4xSOukaG", "gender": "male", "age_range": (35, 55)}, - "2": {"name": "Jasmine", "voice": "jBpfuIE2acCO8z3wKNLl", "gender": "female", "age_range": (25, 38)}, - "3": {"name": "Rick", "voice": "TxGEqnHWrfWFTfGW9XjX", "gender": "male", "age_range": (40, 58)}, - "4": {"name": "Megan", "voice": "EXAVITQu4vr4xnSDxMaL", "gender": "female", "age_range": (24, 35)}, - "5": {"name": "Dennis", "voice": "pNInz6obpgDQGcFmaJgB", "gender": "male", "age_range": (32, 48)}, - "6": {"name": "Tanya", "voice": "21m00Tcm4TlvDq8ikWAM", "gender": "female", "age_range": (30, 45)}, - "7": {"name": "Earl", "voice": "ODq5zmih8GrVes37Dizd", "gender": "male", "age_range": (58, 72)}, - "8": {"name": "Carla", "voice": "XB0fDUnXU5powFXDhCwa", "gender": "female", "age_range": (38, 52)}, - "9": {"name": "Marcus", "voice": "IKne3meq5aSn9XLyUdCD", "gender": "male", "age_range": (24, 34)}, - "0": {"name": "Brenda", "voice": "pFZP5JQG7iQjIQuC4Bku", "gender": "female", "age_range": (45, 60)}, + "1": {"voice": "VR6AewLTigWG4xSOukaG", "gender": "male", "age_range": (35, 55)}, + "2": {"voice": "jBpfuIE2acCO8z3wKNLl", "gender": "female", "age_range": (25, 38)}, + "3": {"voice": "TxGEqnHWrfWFTfGW9XjX", "gender": "male", "age_range": (40, 58)}, + "4": {"voice": "EXAVITQu4vr4xnSDxMaL", "gender": "female", "age_range": (24, 35)}, + "5": {"voice": "pNInz6obpgDQGcFmaJgB", "gender": "male", "age_range": (32, 48)}, + "6": {"voice": "21m00Tcm4TlvDq8ikWAM", "gender": "female", "age_range": (30, 45)}, + "7": {"voice": "ODq5zmih8GrVes37Dizd", "gender": "male", "age_range": (58, 72)}, + "8": {"voice": "XB0fDUnXU5powFXDhCwa", "gender": "female", "age_range": (38, 52)}, + "9": {"voice": "IKne3meq5aSn9XLyUdCD", "gender": "male", "age_range": (24, 34)}, + "0": {"voice": "pFZP5JQG7iQjIQuC4Bku", "gender": "female", "age_range": (45, 60)}, } + +def _randomize_caller_names(): + """Assign random names to callers, unique per gender.""" + males = random.sample(MALE_NAMES, sum(1 for c in CALLER_BASES.values() if c["gender"] == "male")) + females = random.sample(FEMALE_NAMES, sum(1 for c in CALLER_BASES.values() if c["gender"] == "female")) + mi, fi = 0, 0 + for base in CALLER_BASES.values(): + if base["gender"] == "male": + base["name"] = males[mi] + mi += 1 + else: + base["name"] = females[fi] + fi += 1 + +_randomize_caller_names() # Initial assignment + # Background components for dynamic generation JOBS_MALE = [ "runs a small HVAC business", "works as a long-haul trucker", "is a high school football coach", @@ -431,8 +459,10 @@ class Session: if self._research_task and not self._research_task.done(): self._research_task.cancel() self._research_task = None + _randomize_caller_names() self.id = str(uuid.uuid4())[:8] - print(f"[Session] Reset - new session ID: {self.id}") + names = [CALLER_BASES[k]["name"] for k in sorted(CALLER_BASES.keys())] + print(f"[Session] Reset - new session ID: {self.id}, callers: {', '.join(names)}") session = Session() diff --git a/backend/services/caller_service.py b/backend/services/caller_service.py index 836bd71..12870a0 100644 --- a/backend/services/caller_service.py +++ b/backend/services/caller_service.py @@ -1,6 +1,8 @@ """Phone caller queue and audio stream service""" import asyncio +import base64 +import json import time import threading import numpy as np @@ -20,7 +22,20 @@ class CallerService: self._lock = threading.Lock() self._websockets: dict[str, any] = {} # caller_id -> WebSocket self._call_sids: dict[str, str] = {} # caller_id -> SignalWire callSid - self.streaming_tts: bool = False # True while TTS audio is being streamed + self._stream_sids: dict[str, str] = {} # caller_id -> SignalWire streamSid + self._send_locks: dict[str, asyncio.Lock] = {} # per-caller send lock + self._streaming_tts: set[str] = set() # caller_ids currently receiving TTS + + def _get_send_lock(self, caller_id: str) -> asyncio.Lock: + if caller_id not in self._send_locks: + self._send_locks[caller_id] = asyncio.Lock() + return self._send_locks[caller_id] + + def is_streaming_tts(self, caller_id: str) -> bool: + return caller_id in self._streaming_tts + + def is_streaming_tts_any(self) -> bool: + return len(self._streaming_tts) > 0 def add_to_queue(self, caller_id: str, phone: str): with self._lock: @@ -94,6 +109,8 @@ class CallerService: print(f"[Caller] {call_info['phone']} hung up — channel {call_info['channel']} released") self._websockets.pop(caller_id, None) self._call_sids.pop(caller_id, None) + self._stream_sids.pop(caller_id, None) + self._send_locks.pop(caller_id, None) def reset(self): with self._lock: @@ -105,6 +122,9 @@ class CallerService: self._caller_counter = 0 self._websockets.clear() self._call_sids.clear() + self._stream_sids.clear() + self._send_locks.clear() + self._streaming_tts.clear() print("[Caller] Service reset") def register_websocket(self, caller_id: str, websocket): @@ -119,29 +139,34 @@ class CallerService: """Send small audio chunk to caller via SignalWire WebSocket. Encodes L16 PCM as base64 JSON per SignalWire protocol. """ + if caller_id in self._streaming_tts: + return # Don't send host audio during TTS streaming + ws = self._websockets.get(caller_id) if not ws: return - try: - import base64 - if sample_rate != 16000: - audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0 - ratio = 16000 / sample_rate - out_len = int(len(audio) * ratio) - indices = (np.arange(out_len) / ratio).astype(int) - indices = np.clip(indices, 0, len(audio) - 1) - audio = audio[indices] - pcm_data = (audio * 32767).astype(np.int16).tobytes() + lock = self._get_send_lock(caller_id) + async with lock: + try: + if sample_rate != 16000: + audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0 + ratio = 16000 / sample_rate + out_len = int(len(audio) * ratio) + indices = (np.arange(out_len) / ratio).astype(int) + indices = np.clip(indices, 0, len(audio) - 1) + audio = audio[indices] + pcm_data = (audio * 32767).astype(np.int16).tobytes() - payload = base64.b64encode(pcm_data).decode('ascii') - import json - await ws.send_text(json.dumps({ - "event": "media", - "media": {"payload": payload} - })) - except Exception as e: - print(f"[Caller] Failed to send audio: {e}") + payload = base64.b64encode(pcm_data).decode('ascii') + stream_sid = self._stream_sids.get(caller_id, "") + await ws.send_text(json.dumps({ + "event": "media", + "streamSid": stream_sid, + "media": {"payload": payload} + })) + except Exception as e: + print(f"[Caller] Failed to send audio: {e}") async def stream_audio_to_caller(self, caller_id: str, pcm_data: bytes, sample_rate: int): """Stream large audio (TTS) to caller in real-time chunks via SignalWire WebSocket.""" @@ -149,10 +174,10 @@ class CallerService: if not ws: return - self.streaming_tts = True + lock = self._get_send_lock(caller_id) + self._streaming_tts.add(caller_id) + chunks_sent = 0 try: - import base64 - import json audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0 if sample_rate != 16000: ratio = 16000 / sample_rate @@ -161,23 +186,40 @@ class CallerService: indices = np.clip(indices, 0, len(audio) - 1) audio = audio[indices] + total_chunks = (len(audio) + 959) // 960 + duration_s = len(audio) / 16000 + print(f"[Caller] TTS stream starting: {duration_s:.1f}s audio, {total_chunks} chunks") + chunk_samples = 960 + chunk_duration = chunk_samples / 16000 # 60ms per chunk + for i in range(0, len(audio), chunk_samples): if caller_id not in self._websockets: + print(f"[Caller] TTS stream aborted: caller {caller_id} disconnected at chunk {chunks_sent}/{total_chunks}") break + t0 = time.time() chunk = audio[i:i + chunk_samples] pcm_chunk = (chunk * 32767).astype(np.int16).tobytes() payload = base64.b64encode(pcm_chunk).decode('ascii') - await ws.send_text(json.dumps({ - "event": "media", - "media": {"payload": payload} - })) - await asyncio.sleep(0.055) + stream_sid = self._stream_sids.get(caller_id, "") + async with lock: + await ws.send_text(json.dumps({ + "event": "media", + "streamSid": stream_sid, + "media": {"payload": payload} + })) + chunks_sent += 1 + # Sleep to match real-time playback rate + elapsed = time.time() - t0 + sleep_time = max(0, chunk_duration - elapsed) + await asyncio.sleep(sleep_time) + + print(f"[Caller] TTS stream finished: {chunks_sent}/{total_chunks} chunks sent") except Exception as e: - print(f"[Caller] Failed to stream audio: {e}") + print(f"[Caller] TTS stream failed at chunk {chunks_sent}: {e}") finally: - self.streaming_tts = False + self._streaming_tts.discard(caller_id) def register_call_sid(self, caller_id: str, call_sid: str): """Track SignalWire callSid for a caller""" @@ -190,3 +232,11 @@ class CallerService: def unregister_call_sid(self, caller_id: str): """Remove callSid tracking""" self._call_sids.pop(caller_id, None) + + def register_stream_sid(self, caller_id: str, stream_sid: str): + """Track SignalWire streamSid for a caller""" + self._stream_sids[caller_id] = stream_sid + + def unregister_stream_sid(self, caller_id: str): + """Remove streamSid tracking""" + self._stream_sids.pop(caller_id, None) diff --git a/frontend/index.html b/frontend/index.html index 64277bd..6531719 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -3,13 +3,13 @@ - AI Radio Show + Luke at The Roost
-

AI Radio Show

+

Luke at The Roost

@@ -53,7 +53,7 @@
-

Incoming Calls

+

Incoming Calls (208) 439-5853

No callers waiting
@@ -207,6 +207,6 @@
- +