Improve SignalWire streaming, randomize caller names, update frontend

- Add streamSid tracking and per-caller send locks for SignalWire
- Improve TTS streaming with real-time pacing and detailed logging
- Block host audio to caller during TTS playback
- Randomize caller names between sessions from name pools
- Update page title and show phone number in UI

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-06 01:56:05 -07:00
parent b0643d6082
commit a94fc92647
5 changed files with 127 additions and 45 deletions

View File

@@ -3,6 +3,7 @@
"input_channel": 1, "input_channel": 1,
"output_device": 13, "output_device": 13,
"caller_channel": 3, "caller_channel": 3,
"live_caller_channel": 9,
"music_channel": 5, "music_channel": 5,
"sfx_channel": 7, "sfx_channel": 7,
"phone_filter": false "phone_filter": false

View File

@@ -20,6 +20,7 @@ class Settings(BaseSettings):
signalwire_space: str = os.getenv("SIGNALWIRE_SPACE", "") signalwire_space: str = os.getenv("SIGNALWIRE_SPACE", "")
signalwire_token: str = os.getenv("SIGNALWIRE_TOKEN", "") signalwire_token: str = os.getenv("SIGNALWIRE_TOKEN", "")
signalwire_phone: str = os.getenv("SIGNALWIRE_PHONE", "") signalwire_phone: str = os.getenv("SIGNALWIRE_PHONE", "")
signalwire_stream_url: str = os.getenv("SIGNALWIRE_STREAM_URL", "")
# LLM Settings # LLM Settings
llm_provider: str = "openrouter" # "openrouter" or "ollama" llm_provider: str = "openrouter" # "openrouter" or "ollama"
@@ -28,7 +29,7 @@ class Settings(BaseSettings):
ollama_host: str = "http://localhost:11434" ollama_host: str = "http://localhost:11434"
# TTS Settings # TTS Settings
tts_provider: str = "kokoro" # "kokoro", "elevenlabs", "vits", or "bark" tts_provider: str = "inworld" # "kokoro", "elevenlabs", "inworld", "vits", or "bark"
# Audio Settings # Audio Settings
sample_rate: int = 24000 sample_rate: int = 24000

View File

@@ -40,19 +40,47 @@ app.add_middleware(
# Base caller info (name, voice) - backgrounds generated dynamically per session # Base caller info (name, voice) - backgrounds generated dynamically per session
import random import random
MALE_NAMES = [
"Tony", "Rick", "Dennis", "Earl", "Marcus", "Keith", "Darnell", "Wayne",
"Greg", "Andre", "Ray", "Jerome", "Hector", "Travis", "Vince", "Leon",
"Dale", "Frank", "Terrence", "Bobby", "Cliff", "Nate", "Reggie", "Carl",
]
FEMALE_NAMES = [
"Jasmine", "Megan", "Tanya", "Carla", "Brenda", "Sheila", "Denise", "Tamika",
"Lorraine", "Crystal", "Angie", "Renee", "Monique", "Gina", "Patrice", "Deb",
"Shonda", "Marlene", "Yolanda", "Stacy", "Jackie", "Carmen", "Rita", "Val",
]
CALLER_BASES = { CALLER_BASES = {
"1": {"name": "Tony", "voice": "VR6AewLTigWG4xSOukaG", "gender": "male", "age_range": (35, 55)}, "1": {"voice": "VR6AewLTigWG4xSOukaG", "gender": "male", "age_range": (35, 55)},
"2": {"name": "Jasmine", "voice": "jBpfuIE2acCO8z3wKNLl", "gender": "female", "age_range": (25, 38)}, "2": {"voice": "jBpfuIE2acCO8z3wKNLl", "gender": "female", "age_range": (25, 38)},
"3": {"name": "Rick", "voice": "TxGEqnHWrfWFTfGW9XjX", "gender": "male", "age_range": (40, 58)}, "3": {"voice": "TxGEqnHWrfWFTfGW9XjX", "gender": "male", "age_range": (40, 58)},
"4": {"name": "Megan", "voice": "EXAVITQu4vr4xnSDxMaL", "gender": "female", "age_range": (24, 35)}, "4": {"voice": "EXAVITQu4vr4xnSDxMaL", "gender": "female", "age_range": (24, 35)},
"5": {"name": "Dennis", "voice": "pNInz6obpgDQGcFmaJgB", "gender": "male", "age_range": (32, 48)}, "5": {"voice": "pNInz6obpgDQGcFmaJgB", "gender": "male", "age_range": (32, 48)},
"6": {"name": "Tanya", "voice": "21m00Tcm4TlvDq8ikWAM", "gender": "female", "age_range": (30, 45)}, "6": {"voice": "21m00Tcm4TlvDq8ikWAM", "gender": "female", "age_range": (30, 45)},
"7": {"name": "Earl", "voice": "ODq5zmih8GrVes37Dizd", "gender": "male", "age_range": (58, 72)}, "7": {"voice": "ODq5zmih8GrVes37Dizd", "gender": "male", "age_range": (58, 72)},
"8": {"name": "Carla", "voice": "XB0fDUnXU5powFXDhCwa", "gender": "female", "age_range": (38, 52)}, "8": {"voice": "XB0fDUnXU5powFXDhCwa", "gender": "female", "age_range": (38, 52)},
"9": {"name": "Marcus", "voice": "IKne3meq5aSn9XLyUdCD", "gender": "male", "age_range": (24, 34)}, "9": {"voice": "IKne3meq5aSn9XLyUdCD", "gender": "male", "age_range": (24, 34)},
"0": {"name": "Brenda", "voice": "pFZP5JQG7iQjIQuC4Bku", "gender": "female", "age_range": (45, 60)}, "0": {"voice": "pFZP5JQG7iQjIQuC4Bku", "gender": "female", "age_range": (45, 60)},
} }
def _randomize_caller_names():
"""Assign random names to callers, unique per gender."""
males = random.sample(MALE_NAMES, sum(1 for c in CALLER_BASES.values() if c["gender"] == "male"))
females = random.sample(FEMALE_NAMES, sum(1 for c in CALLER_BASES.values() if c["gender"] == "female"))
mi, fi = 0, 0
for base in CALLER_BASES.values():
if base["gender"] == "male":
base["name"] = males[mi]
mi += 1
else:
base["name"] = females[fi]
fi += 1
_randomize_caller_names() # Initial assignment
# Background components for dynamic generation # Background components for dynamic generation
JOBS_MALE = [ JOBS_MALE = [
"runs a small HVAC business", "works as a long-haul trucker", "is a high school football coach", "runs a small HVAC business", "works as a long-haul trucker", "is a high school football coach",
@@ -431,8 +459,10 @@ class Session:
if self._research_task and not self._research_task.done(): if self._research_task and not self._research_task.done():
self._research_task.cancel() self._research_task.cancel()
self._research_task = None self._research_task = None
_randomize_caller_names()
self.id = str(uuid.uuid4())[:8] self.id = str(uuid.uuid4())[:8]
print(f"[Session] Reset - new session ID: {self.id}") names = [CALLER_BASES[k]["name"] for k in sorted(CALLER_BASES.keys())]
print(f"[Session] Reset - new session ID: {self.id}, callers: {', '.join(names)}")
session = Session() session = Session()

View File

@@ -1,6 +1,8 @@
"""Phone caller queue and audio stream service""" """Phone caller queue and audio stream service"""
import asyncio import asyncio
import base64
import json
import time import time
import threading import threading
import numpy as np import numpy as np
@@ -20,7 +22,20 @@ class CallerService:
self._lock = threading.Lock() self._lock = threading.Lock()
self._websockets: dict[str, any] = {} # caller_id -> WebSocket self._websockets: dict[str, any] = {} # caller_id -> WebSocket
self._call_sids: dict[str, str] = {} # caller_id -> SignalWire callSid self._call_sids: dict[str, str] = {} # caller_id -> SignalWire callSid
self.streaming_tts: bool = False # True while TTS audio is being streamed self._stream_sids: dict[str, str] = {} # caller_id -> SignalWire streamSid
self._send_locks: dict[str, asyncio.Lock] = {} # per-caller send lock
self._streaming_tts: set[str] = set() # caller_ids currently receiving TTS
def _get_send_lock(self, caller_id: str) -> asyncio.Lock:
if caller_id not in self._send_locks:
self._send_locks[caller_id] = asyncio.Lock()
return self._send_locks[caller_id]
def is_streaming_tts(self, caller_id: str) -> bool:
return caller_id in self._streaming_tts
def is_streaming_tts_any(self) -> bool:
return len(self._streaming_tts) > 0
def add_to_queue(self, caller_id: str, phone: str): def add_to_queue(self, caller_id: str, phone: str):
with self._lock: with self._lock:
@@ -94,6 +109,8 @@ class CallerService:
print(f"[Caller] {call_info['phone']} hung up — channel {call_info['channel']} released") print(f"[Caller] {call_info['phone']} hung up — channel {call_info['channel']} released")
self._websockets.pop(caller_id, None) self._websockets.pop(caller_id, None)
self._call_sids.pop(caller_id, None) self._call_sids.pop(caller_id, None)
self._stream_sids.pop(caller_id, None)
self._send_locks.pop(caller_id, None)
def reset(self): def reset(self):
with self._lock: with self._lock:
@@ -105,6 +122,9 @@ class CallerService:
self._caller_counter = 0 self._caller_counter = 0
self._websockets.clear() self._websockets.clear()
self._call_sids.clear() self._call_sids.clear()
self._stream_sids.clear()
self._send_locks.clear()
self._streaming_tts.clear()
print("[Caller] Service reset") print("[Caller] Service reset")
def register_websocket(self, caller_id: str, websocket): def register_websocket(self, caller_id: str, websocket):
@@ -119,29 +139,34 @@ class CallerService:
"""Send small audio chunk to caller via SignalWire WebSocket. """Send small audio chunk to caller via SignalWire WebSocket.
Encodes L16 PCM as base64 JSON per SignalWire protocol. Encodes L16 PCM as base64 JSON per SignalWire protocol.
""" """
if caller_id in self._streaming_tts:
return # Don't send host audio during TTS streaming
ws = self._websockets.get(caller_id) ws = self._websockets.get(caller_id)
if not ws: if not ws:
return return
try: lock = self._get_send_lock(caller_id)
import base64 async with lock:
if sample_rate != 16000: try:
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0 if sample_rate != 16000:
ratio = 16000 / sample_rate audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
out_len = int(len(audio) * ratio) ratio = 16000 / sample_rate
indices = (np.arange(out_len) / ratio).astype(int) out_len = int(len(audio) * ratio)
indices = np.clip(indices, 0, len(audio) - 1) indices = (np.arange(out_len) / ratio).astype(int)
audio = audio[indices] indices = np.clip(indices, 0, len(audio) - 1)
pcm_data = (audio * 32767).astype(np.int16).tobytes() audio = audio[indices]
pcm_data = (audio * 32767).astype(np.int16).tobytes()
payload = base64.b64encode(pcm_data).decode('ascii') payload = base64.b64encode(pcm_data).decode('ascii')
import json stream_sid = self._stream_sids.get(caller_id, "")
await ws.send_text(json.dumps({ await ws.send_text(json.dumps({
"event": "media", "event": "media",
"media": {"payload": payload} "streamSid": stream_sid,
})) "media": {"payload": payload}
except Exception as e: }))
print(f"[Caller] Failed to send audio: {e}") except Exception as e:
print(f"[Caller] Failed to send audio: {e}")
async def stream_audio_to_caller(self, caller_id: str, pcm_data: bytes, sample_rate: int): async def stream_audio_to_caller(self, caller_id: str, pcm_data: bytes, sample_rate: int):
"""Stream large audio (TTS) to caller in real-time chunks via SignalWire WebSocket.""" """Stream large audio (TTS) to caller in real-time chunks via SignalWire WebSocket."""
@@ -149,10 +174,10 @@ class CallerService:
if not ws: if not ws:
return return
self.streaming_tts = True lock = self._get_send_lock(caller_id)
self._streaming_tts.add(caller_id)
chunks_sent = 0
try: try:
import base64
import json
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0 audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
if sample_rate != 16000: if sample_rate != 16000:
ratio = 16000 / sample_rate ratio = 16000 / sample_rate
@@ -161,23 +186,40 @@ class CallerService:
indices = np.clip(indices, 0, len(audio) - 1) indices = np.clip(indices, 0, len(audio) - 1)
audio = audio[indices] audio = audio[indices]
total_chunks = (len(audio) + 959) // 960
duration_s = len(audio) / 16000
print(f"[Caller] TTS stream starting: {duration_s:.1f}s audio, {total_chunks} chunks")
chunk_samples = 960 chunk_samples = 960
chunk_duration = chunk_samples / 16000 # 60ms per chunk
for i in range(0, len(audio), chunk_samples): for i in range(0, len(audio), chunk_samples):
if caller_id not in self._websockets: if caller_id not in self._websockets:
print(f"[Caller] TTS stream aborted: caller {caller_id} disconnected at chunk {chunks_sent}/{total_chunks}")
break break
t0 = time.time()
chunk = audio[i:i + chunk_samples] chunk = audio[i:i + chunk_samples]
pcm_chunk = (chunk * 32767).astype(np.int16).tobytes() pcm_chunk = (chunk * 32767).astype(np.int16).tobytes()
payload = base64.b64encode(pcm_chunk).decode('ascii') payload = base64.b64encode(pcm_chunk).decode('ascii')
await ws.send_text(json.dumps({ stream_sid = self._stream_sids.get(caller_id, "")
"event": "media", async with lock:
"media": {"payload": payload} await ws.send_text(json.dumps({
})) "event": "media",
await asyncio.sleep(0.055) "streamSid": stream_sid,
"media": {"payload": payload}
}))
chunks_sent += 1
# Sleep to match real-time playback rate
elapsed = time.time() - t0
sleep_time = max(0, chunk_duration - elapsed)
await asyncio.sleep(sleep_time)
print(f"[Caller] TTS stream finished: {chunks_sent}/{total_chunks} chunks sent")
except Exception as e: except Exception as e:
print(f"[Caller] Failed to stream audio: {e}") print(f"[Caller] TTS stream failed at chunk {chunks_sent}: {e}")
finally: finally:
self.streaming_tts = False self._streaming_tts.discard(caller_id)
def register_call_sid(self, caller_id: str, call_sid: str): def register_call_sid(self, caller_id: str, call_sid: str):
"""Track SignalWire callSid for a caller""" """Track SignalWire callSid for a caller"""
@@ -190,3 +232,11 @@ class CallerService:
def unregister_call_sid(self, caller_id: str): def unregister_call_sid(self, caller_id: str):
"""Remove callSid tracking""" """Remove callSid tracking"""
self._call_sids.pop(caller_id, None) self._call_sids.pop(caller_id, None)
def register_stream_sid(self, caller_id: str, stream_sid: str):
"""Track SignalWire streamSid for a caller"""
self._stream_sids[caller_id] = stream_sid
def unregister_stream_sid(self, caller_id: str):
"""Remove streamSid tracking"""
self._stream_sids.pop(caller_id, None)

View File

@@ -3,13 +3,13 @@
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI Radio Show</title> <title>Luke at The Roost</title>
<link rel="stylesheet" href="/css/style.css"> <link rel="stylesheet" href="/css/style.css">
</head> </head>
<body> <body>
<div id="app"> <div id="app">
<header> <header>
<h1>AI Radio Show</h1> <h1>Luke at The Roost</h1>
<div class="header-buttons"> <div class="header-buttons">
<button id="new-session-btn" class="new-session-btn">New Session</button> <button id="new-session-btn" class="new-session-btn">New Session</button>
<button id="settings-btn">Settings</button> <button id="settings-btn">Settings</button>
@@ -53,7 +53,7 @@
<!-- Call Queue --> <!-- Call Queue -->
<section class="queue-section"> <section class="queue-section">
<h2>Incoming Calls</h2> <h2>Incoming Calls <span style="font-size:0.6em;font-weight:normal;color:var(--text-muted);">(208) 439-5853</span></h2>
<div id="call-queue" class="call-queue"> <div id="call-queue" class="call-queue">
<div class="queue-empty">No callers waiting</div> <div class="queue-empty">No callers waiting</div>
</div> </div>
@@ -207,6 +207,6 @@
</div> </div>
</div> </div>
<script src="/js/app.js?v=13"></script> <script src="/js/app.js?v=15"></script>
</body> </body>
</html> </html>