Improve SignalWire streaming, randomize caller names, update frontend

- Add streamSid tracking and per-caller send locks for SignalWire
- Improve TTS streaming with real-time pacing and detailed logging
- Block host audio to caller during TTS playback
- Randomize caller names between sessions from name pools
- Update page title and show phone number in UI

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-06 01:56:05 -07:00
parent b0643d6082
commit a94fc92647
5 changed files with 127 additions and 45 deletions

View File

@@ -3,6 +3,7 @@
"input_channel": 1,
"output_device": 13,
"caller_channel": 3,
"live_caller_channel": 9,
"music_channel": 5,
"sfx_channel": 7,
"phone_filter": false

View File

@@ -20,6 +20,7 @@ class Settings(BaseSettings):
signalwire_space: str = os.getenv("SIGNALWIRE_SPACE", "")
signalwire_token: str = os.getenv("SIGNALWIRE_TOKEN", "")
signalwire_phone: str = os.getenv("SIGNALWIRE_PHONE", "")
signalwire_stream_url: str = os.getenv("SIGNALWIRE_STREAM_URL", "")
# LLM Settings
llm_provider: str = "openrouter" # "openrouter" or "ollama"
@@ -28,7 +29,7 @@ class Settings(BaseSettings):
ollama_host: str = "http://localhost:11434"
# TTS Settings
tts_provider: str = "kokoro" # "kokoro", "elevenlabs", "vits", or "bark"
tts_provider: str = "inworld" # "kokoro", "elevenlabs", "inworld", "vits", or "bark"
# Audio Settings
sample_rate: int = 24000

View File

@@ -40,19 +40,47 @@ app.add_middleware(
# Base caller info (name, voice) - backgrounds generated dynamically per session
import random
MALE_NAMES = [
"Tony", "Rick", "Dennis", "Earl", "Marcus", "Keith", "Darnell", "Wayne",
"Greg", "Andre", "Ray", "Jerome", "Hector", "Travis", "Vince", "Leon",
"Dale", "Frank", "Terrence", "Bobby", "Cliff", "Nate", "Reggie", "Carl",
]
FEMALE_NAMES = [
"Jasmine", "Megan", "Tanya", "Carla", "Brenda", "Sheila", "Denise", "Tamika",
"Lorraine", "Crystal", "Angie", "Renee", "Monique", "Gina", "Patrice", "Deb",
"Shonda", "Marlene", "Yolanda", "Stacy", "Jackie", "Carmen", "Rita", "Val",
]
CALLER_BASES = {
"1": {"name": "Tony", "voice": "VR6AewLTigWG4xSOukaG", "gender": "male", "age_range": (35, 55)},
"2": {"name": "Jasmine", "voice": "jBpfuIE2acCO8z3wKNLl", "gender": "female", "age_range": (25, 38)},
"3": {"name": "Rick", "voice": "TxGEqnHWrfWFTfGW9XjX", "gender": "male", "age_range": (40, 58)},
"4": {"name": "Megan", "voice": "EXAVITQu4vr4xnSDxMaL", "gender": "female", "age_range": (24, 35)},
"5": {"name": "Dennis", "voice": "pNInz6obpgDQGcFmaJgB", "gender": "male", "age_range": (32, 48)},
"6": {"name": "Tanya", "voice": "21m00Tcm4TlvDq8ikWAM", "gender": "female", "age_range": (30, 45)},
"7": {"name": "Earl", "voice": "ODq5zmih8GrVes37Dizd", "gender": "male", "age_range": (58, 72)},
"8": {"name": "Carla", "voice": "XB0fDUnXU5powFXDhCwa", "gender": "female", "age_range": (38, 52)},
"9": {"name": "Marcus", "voice": "IKne3meq5aSn9XLyUdCD", "gender": "male", "age_range": (24, 34)},
"0": {"name": "Brenda", "voice": "pFZP5JQG7iQjIQuC4Bku", "gender": "female", "age_range": (45, 60)},
"1": {"voice": "VR6AewLTigWG4xSOukaG", "gender": "male", "age_range": (35, 55)},
"2": {"voice": "jBpfuIE2acCO8z3wKNLl", "gender": "female", "age_range": (25, 38)},
"3": {"voice": "TxGEqnHWrfWFTfGW9XjX", "gender": "male", "age_range": (40, 58)},
"4": {"voice": "EXAVITQu4vr4xnSDxMaL", "gender": "female", "age_range": (24, 35)},
"5": {"voice": "pNInz6obpgDQGcFmaJgB", "gender": "male", "age_range": (32, 48)},
"6": {"voice": "21m00Tcm4TlvDq8ikWAM", "gender": "female", "age_range": (30, 45)},
"7": {"voice": "ODq5zmih8GrVes37Dizd", "gender": "male", "age_range": (58, 72)},
"8": {"voice": "XB0fDUnXU5powFXDhCwa", "gender": "female", "age_range": (38, 52)},
"9": {"voice": "IKne3meq5aSn9XLyUdCD", "gender": "male", "age_range": (24, 34)},
"0": {"voice": "pFZP5JQG7iQjIQuC4Bku", "gender": "female", "age_range": (45, 60)},
}
def _randomize_caller_names():
"""Assign random names to callers, unique per gender."""
males = random.sample(MALE_NAMES, sum(1 for c in CALLER_BASES.values() if c["gender"] == "male"))
females = random.sample(FEMALE_NAMES, sum(1 for c in CALLER_BASES.values() if c["gender"] == "female"))
mi, fi = 0, 0
for base in CALLER_BASES.values():
if base["gender"] == "male":
base["name"] = males[mi]
mi += 1
else:
base["name"] = females[fi]
fi += 1
_randomize_caller_names() # Initial assignment
# Background components for dynamic generation
JOBS_MALE = [
"runs a small HVAC business", "works as a long-haul trucker", "is a high school football coach",
@@ -431,8 +459,10 @@ class Session:
if self._research_task and not self._research_task.done():
self._research_task.cancel()
self._research_task = None
_randomize_caller_names()
self.id = str(uuid.uuid4())[:8]
print(f"[Session] Reset - new session ID: {self.id}")
names = [CALLER_BASES[k]["name"] for k in sorted(CALLER_BASES.keys())]
print(f"[Session] Reset - new session ID: {self.id}, callers: {', '.join(names)}")
session = Session()

View File

@@ -1,6 +1,8 @@
"""Phone caller queue and audio stream service"""
import asyncio
import base64
import json
import time
import threading
import numpy as np
@@ -20,7 +22,20 @@ class CallerService:
self._lock = threading.Lock()
self._websockets: dict[str, any] = {} # caller_id -> WebSocket
self._call_sids: dict[str, str] = {} # caller_id -> SignalWire callSid
self.streaming_tts: bool = False # True while TTS audio is being streamed
self._stream_sids: dict[str, str] = {} # caller_id -> SignalWire streamSid
self._send_locks: dict[str, asyncio.Lock] = {} # per-caller send lock
self._streaming_tts: set[str] = set() # caller_ids currently receiving TTS
def _get_send_lock(self, caller_id: str) -> asyncio.Lock:
if caller_id not in self._send_locks:
self._send_locks[caller_id] = asyncio.Lock()
return self._send_locks[caller_id]
def is_streaming_tts(self, caller_id: str) -> bool:
return caller_id in self._streaming_tts
def is_streaming_tts_any(self) -> bool:
return len(self._streaming_tts) > 0
def add_to_queue(self, caller_id: str, phone: str):
with self._lock:
@@ -94,6 +109,8 @@ class CallerService:
print(f"[Caller] {call_info['phone']} hung up — channel {call_info['channel']} released")
self._websockets.pop(caller_id, None)
self._call_sids.pop(caller_id, None)
self._stream_sids.pop(caller_id, None)
self._send_locks.pop(caller_id, None)
def reset(self):
with self._lock:
@@ -105,6 +122,9 @@ class CallerService:
self._caller_counter = 0
self._websockets.clear()
self._call_sids.clear()
self._stream_sids.clear()
self._send_locks.clear()
self._streaming_tts.clear()
print("[Caller] Service reset")
def register_websocket(self, caller_id: str, websocket):
@@ -119,29 +139,34 @@ class CallerService:
"""Send small audio chunk to caller via SignalWire WebSocket.
Encodes L16 PCM as base64 JSON per SignalWire protocol.
"""
if caller_id in self._streaming_tts:
return # Don't send host audio during TTS streaming
ws = self._websockets.get(caller_id)
if not ws:
return
try:
import base64
if sample_rate != 16000:
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
ratio = 16000 / sample_rate
out_len = int(len(audio) * ratio)
indices = (np.arange(out_len) / ratio).astype(int)
indices = np.clip(indices, 0, len(audio) - 1)
audio = audio[indices]
pcm_data = (audio * 32767).astype(np.int16).tobytes()
lock = self._get_send_lock(caller_id)
async with lock:
try:
if sample_rate != 16000:
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
ratio = 16000 / sample_rate
out_len = int(len(audio) * ratio)
indices = (np.arange(out_len) / ratio).astype(int)
indices = np.clip(indices, 0, len(audio) - 1)
audio = audio[indices]
pcm_data = (audio * 32767).astype(np.int16).tobytes()
payload = base64.b64encode(pcm_data).decode('ascii')
import json
await ws.send_text(json.dumps({
"event": "media",
"media": {"payload": payload}
}))
except Exception as e:
print(f"[Caller] Failed to send audio: {e}")
payload = base64.b64encode(pcm_data).decode('ascii')
stream_sid = self._stream_sids.get(caller_id, "")
await ws.send_text(json.dumps({
"event": "media",
"streamSid": stream_sid,
"media": {"payload": payload}
}))
except Exception as e:
print(f"[Caller] Failed to send audio: {e}")
async def stream_audio_to_caller(self, caller_id: str, pcm_data: bytes, sample_rate: int):
"""Stream large audio (TTS) to caller in real-time chunks via SignalWire WebSocket."""
@@ -149,10 +174,10 @@ class CallerService:
if not ws:
return
self.streaming_tts = True
lock = self._get_send_lock(caller_id)
self._streaming_tts.add(caller_id)
chunks_sent = 0
try:
import base64
import json
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
if sample_rate != 16000:
ratio = 16000 / sample_rate
@@ -161,23 +186,40 @@ class CallerService:
indices = np.clip(indices, 0, len(audio) - 1)
audio = audio[indices]
total_chunks = (len(audio) + 959) // 960
duration_s = len(audio) / 16000
print(f"[Caller] TTS stream starting: {duration_s:.1f}s audio, {total_chunks} chunks")
chunk_samples = 960
chunk_duration = chunk_samples / 16000 # 60ms per chunk
for i in range(0, len(audio), chunk_samples):
if caller_id not in self._websockets:
print(f"[Caller] TTS stream aborted: caller {caller_id} disconnected at chunk {chunks_sent}/{total_chunks}")
break
t0 = time.time()
chunk = audio[i:i + chunk_samples]
pcm_chunk = (chunk * 32767).astype(np.int16).tobytes()
payload = base64.b64encode(pcm_chunk).decode('ascii')
await ws.send_text(json.dumps({
"event": "media",
"media": {"payload": payload}
}))
await asyncio.sleep(0.055)
stream_sid = self._stream_sids.get(caller_id, "")
async with lock:
await ws.send_text(json.dumps({
"event": "media",
"streamSid": stream_sid,
"media": {"payload": payload}
}))
chunks_sent += 1
# Sleep to match real-time playback rate
elapsed = time.time() - t0
sleep_time = max(0, chunk_duration - elapsed)
await asyncio.sleep(sleep_time)
print(f"[Caller] TTS stream finished: {chunks_sent}/{total_chunks} chunks sent")
except Exception as e:
print(f"[Caller] Failed to stream audio: {e}")
print(f"[Caller] TTS stream failed at chunk {chunks_sent}: {e}")
finally:
self.streaming_tts = False
self._streaming_tts.discard(caller_id)
def register_call_sid(self, caller_id: str, call_sid: str):
"""Track SignalWire callSid for a caller"""
@@ -190,3 +232,11 @@ class CallerService:
def unregister_call_sid(self, caller_id: str):
"""Remove callSid tracking"""
self._call_sids.pop(caller_id, None)
def register_stream_sid(self, caller_id: str, stream_sid: str):
"""Track SignalWire streamSid for a caller"""
self._stream_sids[caller_id] = stream_sid
def unregister_stream_sid(self, caller_id: str):
"""Remove streamSid tracking"""
self._stream_sids.pop(caller_id, None)

View File

@@ -3,13 +3,13 @@
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI Radio Show</title>
<title>Luke at The Roost</title>
<link rel="stylesheet" href="/css/style.css">
</head>
<body>
<div id="app">
<header>
<h1>AI Radio Show</h1>
<h1>Luke at The Roost</h1>
<div class="header-buttons">
<button id="new-session-btn" class="new-session-btn">New Session</button>
<button id="settings-btn">Settings</button>
@@ -53,7 +53,7 @@
<!-- Call Queue -->
<section class="queue-section">
<h2>Incoming Calls</h2>
<h2>Incoming Calls <span style="font-size:0.6em;font-weight:normal;color:var(--text-muted);">(208) 439-5853</span></h2>
<div id="call-queue" class="call-queue">
<div class="queue-empty">No callers waiting</div>
</div>
@@ -207,6 +207,6 @@
</div>
</div>
<script src="/js/app.js?v=13"></script>
<script src="/js/app.js?v=15"></script>
</body>
</html>