Improve SignalWire streaming, randomize caller names, update frontend
- Add streamSid tracking and per-caller send locks for SignalWire - Improve TTS streaming with real-time pacing and detailed logging - Block host audio to caller during TTS playback - Randomize caller names between sessions from name pools - Update page title and show phone number in UI Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@
|
|||||||
"input_channel": 1,
|
"input_channel": 1,
|
||||||
"output_device": 13,
|
"output_device": 13,
|
||||||
"caller_channel": 3,
|
"caller_channel": 3,
|
||||||
|
"live_caller_channel": 9,
|
||||||
"music_channel": 5,
|
"music_channel": 5,
|
||||||
"sfx_channel": 7,
|
"sfx_channel": 7,
|
||||||
"phone_filter": false
|
"phone_filter": false
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ class Settings(BaseSettings):
|
|||||||
signalwire_space: str = os.getenv("SIGNALWIRE_SPACE", "")
|
signalwire_space: str = os.getenv("SIGNALWIRE_SPACE", "")
|
||||||
signalwire_token: str = os.getenv("SIGNALWIRE_TOKEN", "")
|
signalwire_token: str = os.getenv("SIGNALWIRE_TOKEN", "")
|
||||||
signalwire_phone: str = os.getenv("SIGNALWIRE_PHONE", "")
|
signalwire_phone: str = os.getenv("SIGNALWIRE_PHONE", "")
|
||||||
|
signalwire_stream_url: str = os.getenv("SIGNALWIRE_STREAM_URL", "")
|
||||||
|
|
||||||
# LLM Settings
|
# LLM Settings
|
||||||
llm_provider: str = "openrouter" # "openrouter" or "ollama"
|
llm_provider: str = "openrouter" # "openrouter" or "ollama"
|
||||||
@@ -28,7 +29,7 @@ class Settings(BaseSettings):
|
|||||||
ollama_host: str = "http://localhost:11434"
|
ollama_host: str = "http://localhost:11434"
|
||||||
|
|
||||||
# TTS Settings
|
# TTS Settings
|
||||||
tts_provider: str = "kokoro" # "kokoro", "elevenlabs", "vits", or "bark"
|
tts_provider: str = "inworld" # "kokoro", "elevenlabs", "inworld", "vits", or "bark"
|
||||||
|
|
||||||
# Audio Settings
|
# Audio Settings
|
||||||
sample_rate: int = 24000
|
sample_rate: int = 24000
|
||||||
|
|||||||
@@ -40,19 +40,47 @@ app.add_middleware(
|
|||||||
# Base caller info (name, voice) - backgrounds generated dynamically per session
|
# Base caller info (name, voice) - backgrounds generated dynamically per session
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
MALE_NAMES = [
|
||||||
|
"Tony", "Rick", "Dennis", "Earl", "Marcus", "Keith", "Darnell", "Wayne",
|
||||||
|
"Greg", "Andre", "Ray", "Jerome", "Hector", "Travis", "Vince", "Leon",
|
||||||
|
"Dale", "Frank", "Terrence", "Bobby", "Cliff", "Nate", "Reggie", "Carl",
|
||||||
|
]
|
||||||
|
|
||||||
|
FEMALE_NAMES = [
|
||||||
|
"Jasmine", "Megan", "Tanya", "Carla", "Brenda", "Sheila", "Denise", "Tamika",
|
||||||
|
"Lorraine", "Crystal", "Angie", "Renee", "Monique", "Gina", "Patrice", "Deb",
|
||||||
|
"Shonda", "Marlene", "Yolanda", "Stacy", "Jackie", "Carmen", "Rita", "Val",
|
||||||
|
]
|
||||||
|
|
||||||
CALLER_BASES = {
|
CALLER_BASES = {
|
||||||
"1": {"name": "Tony", "voice": "VR6AewLTigWG4xSOukaG", "gender": "male", "age_range": (35, 55)},
|
"1": {"voice": "VR6AewLTigWG4xSOukaG", "gender": "male", "age_range": (35, 55)},
|
||||||
"2": {"name": "Jasmine", "voice": "jBpfuIE2acCO8z3wKNLl", "gender": "female", "age_range": (25, 38)},
|
"2": {"voice": "jBpfuIE2acCO8z3wKNLl", "gender": "female", "age_range": (25, 38)},
|
||||||
"3": {"name": "Rick", "voice": "TxGEqnHWrfWFTfGW9XjX", "gender": "male", "age_range": (40, 58)},
|
"3": {"voice": "TxGEqnHWrfWFTfGW9XjX", "gender": "male", "age_range": (40, 58)},
|
||||||
"4": {"name": "Megan", "voice": "EXAVITQu4vr4xnSDxMaL", "gender": "female", "age_range": (24, 35)},
|
"4": {"voice": "EXAVITQu4vr4xnSDxMaL", "gender": "female", "age_range": (24, 35)},
|
||||||
"5": {"name": "Dennis", "voice": "pNInz6obpgDQGcFmaJgB", "gender": "male", "age_range": (32, 48)},
|
"5": {"voice": "pNInz6obpgDQGcFmaJgB", "gender": "male", "age_range": (32, 48)},
|
||||||
"6": {"name": "Tanya", "voice": "21m00Tcm4TlvDq8ikWAM", "gender": "female", "age_range": (30, 45)},
|
"6": {"voice": "21m00Tcm4TlvDq8ikWAM", "gender": "female", "age_range": (30, 45)},
|
||||||
"7": {"name": "Earl", "voice": "ODq5zmih8GrVes37Dizd", "gender": "male", "age_range": (58, 72)},
|
"7": {"voice": "ODq5zmih8GrVes37Dizd", "gender": "male", "age_range": (58, 72)},
|
||||||
"8": {"name": "Carla", "voice": "XB0fDUnXU5powFXDhCwa", "gender": "female", "age_range": (38, 52)},
|
"8": {"voice": "XB0fDUnXU5powFXDhCwa", "gender": "female", "age_range": (38, 52)},
|
||||||
"9": {"name": "Marcus", "voice": "IKne3meq5aSn9XLyUdCD", "gender": "male", "age_range": (24, 34)},
|
"9": {"voice": "IKne3meq5aSn9XLyUdCD", "gender": "male", "age_range": (24, 34)},
|
||||||
"0": {"name": "Brenda", "voice": "pFZP5JQG7iQjIQuC4Bku", "gender": "female", "age_range": (45, 60)},
|
"0": {"voice": "pFZP5JQG7iQjIQuC4Bku", "gender": "female", "age_range": (45, 60)},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _randomize_caller_names():
|
||||||
|
"""Assign random names to callers, unique per gender."""
|
||||||
|
males = random.sample(MALE_NAMES, sum(1 for c in CALLER_BASES.values() if c["gender"] == "male"))
|
||||||
|
females = random.sample(FEMALE_NAMES, sum(1 for c in CALLER_BASES.values() if c["gender"] == "female"))
|
||||||
|
mi, fi = 0, 0
|
||||||
|
for base in CALLER_BASES.values():
|
||||||
|
if base["gender"] == "male":
|
||||||
|
base["name"] = males[mi]
|
||||||
|
mi += 1
|
||||||
|
else:
|
||||||
|
base["name"] = females[fi]
|
||||||
|
fi += 1
|
||||||
|
|
||||||
|
_randomize_caller_names() # Initial assignment
|
||||||
|
|
||||||
# Background components for dynamic generation
|
# Background components for dynamic generation
|
||||||
JOBS_MALE = [
|
JOBS_MALE = [
|
||||||
"runs a small HVAC business", "works as a long-haul trucker", "is a high school football coach",
|
"runs a small HVAC business", "works as a long-haul trucker", "is a high school football coach",
|
||||||
@@ -431,8 +459,10 @@ class Session:
|
|||||||
if self._research_task and not self._research_task.done():
|
if self._research_task and not self._research_task.done():
|
||||||
self._research_task.cancel()
|
self._research_task.cancel()
|
||||||
self._research_task = None
|
self._research_task = None
|
||||||
|
_randomize_caller_names()
|
||||||
self.id = str(uuid.uuid4())[:8]
|
self.id = str(uuid.uuid4())[:8]
|
||||||
print(f"[Session] Reset - new session ID: {self.id}")
|
names = [CALLER_BASES[k]["name"] for k in sorted(CALLER_BASES.keys())]
|
||||||
|
print(f"[Session] Reset - new session ID: {self.id}, callers: {', '.join(names)}")
|
||||||
|
|
||||||
|
|
||||||
session = Session()
|
session = Session()
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
"""Phone caller queue and audio stream service"""
|
"""Phone caller queue and audio stream service"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
import time
|
import time
|
||||||
import threading
|
import threading
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -20,7 +22,20 @@ class CallerService:
|
|||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
self._websockets: dict[str, any] = {} # caller_id -> WebSocket
|
self._websockets: dict[str, any] = {} # caller_id -> WebSocket
|
||||||
self._call_sids: dict[str, str] = {} # caller_id -> SignalWire callSid
|
self._call_sids: dict[str, str] = {} # caller_id -> SignalWire callSid
|
||||||
self.streaming_tts: bool = False # True while TTS audio is being streamed
|
self._stream_sids: dict[str, str] = {} # caller_id -> SignalWire streamSid
|
||||||
|
self._send_locks: dict[str, asyncio.Lock] = {} # per-caller send lock
|
||||||
|
self._streaming_tts: set[str] = set() # caller_ids currently receiving TTS
|
||||||
|
|
||||||
|
def _get_send_lock(self, caller_id: str) -> asyncio.Lock:
|
||||||
|
if caller_id not in self._send_locks:
|
||||||
|
self._send_locks[caller_id] = asyncio.Lock()
|
||||||
|
return self._send_locks[caller_id]
|
||||||
|
|
||||||
|
def is_streaming_tts(self, caller_id: str) -> bool:
|
||||||
|
return caller_id in self._streaming_tts
|
||||||
|
|
||||||
|
def is_streaming_tts_any(self) -> bool:
|
||||||
|
return len(self._streaming_tts) > 0
|
||||||
|
|
||||||
def add_to_queue(self, caller_id: str, phone: str):
|
def add_to_queue(self, caller_id: str, phone: str):
|
||||||
with self._lock:
|
with self._lock:
|
||||||
@@ -94,6 +109,8 @@ class CallerService:
|
|||||||
print(f"[Caller] {call_info['phone']} hung up — channel {call_info['channel']} released")
|
print(f"[Caller] {call_info['phone']} hung up — channel {call_info['channel']} released")
|
||||||
self._websockets.pop(caller_id, None)
|
self._websockets.pop(caller_id, None)
|
||||||
self._call_sids.pop(caller_id, None)
|
self._call_sids.pop(caller_id, None)
|
||||||
|
self._stream_sids.pop(caller_id, None)
|
||||||
|
self._send_locks.pop(caller_id, None)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
with self._lock:
|
with self._lock:
|
||||||
@@ -105,6 +122,9 @@ class CallerService:
|
|||||||
self._caller_counter = 0
|
self._caller_counter = 0
|
||||||
self._websockets.clear()
|
self._websockets.clear()
|
||||||
self._call_sids.clear()
|
self._call_sids.clear()
|
||||||
|
self._stream_sids.clear()
|
||||||
|
self._send_locks.clear()
|
||||||
|
self._streaming_tts.clear()
|
||||||
print("[Caller] Service reset")
|
print("[Caller] Service reset")
|
||||||
|
|
||||||
def register_websocket(self, caller_id: str, websocket):
|
def register_websocket(self, caller_id: str, websocket):
|
||||||
@@ -119,29 +139,34 @@ class CallerService:
|
|||||||
"""Send small audio chunk to caller via SignalWire WebSocket.
|
"""Send small audio chunk to caller via SignalWire WebSocket.
|
||||||
Encodes L16 PCM as base64 JSON per SignalWire protocol.
|
Encodes L16 PCM as base64 JSON per SignalWire protocol.
|
||||||
"""
|
"""
|
||||||
|
if caller_id in self._streaming_tts:
|
||||||
|
return # Don't send host audio during TTS streaming
|
||||||
|
|
||||||
ws = self._websockets.get(caller_id)
|
ws = self._websockets.get(caller_id)
|
||||||
if not ws:
|
if not ws:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
lock = self._get_send_lock(caller_id)
|
||||||
import base64
|
async with lock:
|
||||||
if sample_rate != 16000:
|
try:
|
||||||
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
|
if sample_rate != 16000:
|
||||||
ratio = 16000 / sample_rate
|
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
|
||||||
out_len = int(len(audio) * ratio)
|
ratio = 16000 / sample_rate
|
||||||
indices = (np.arange(out_len) / ratio).astype(int)
|
out_len = int(len(audio) * ratio)
|
||||||
indices = np.clip(indices, 0, len(audio) - 1)
|
indices = (np.arange(out_len) / ratio).astype(int)
|
||||||
audio = audio[indices]
|
indices = np.clip(indices, 0, len(audio) - 1)
|
||||||
pcm_data = (audio * 32767).astype(np.int16).tobytes()
|
audio = audio[indices]
|
||||||
|
pcm_data = (audio * 32767).astype(np.int16).tobytes()
|
||||||
|
|
||||||
payload = base64.b64encode(pcm_data).decode('ascii')
|
payload = base64.b64encode(pcm_data).decode('ascii')
|
||||||
import json
|
stream_sid = self._stream_sids.get(caller_id, "")
|
||||||
await ws.send_text(json.dumps({
|
await ws.send_text(json.dumps({
|
||||||
"event": "media",
|
"event": "media",
|
||||||
"media": {"payload": payload}
|
"streamSid": stream_sid,
|
||||||
}))
|
"media": {"payload": payload}
|
||||||
except Exception as e:
|
}))
|
||||||
print(f"[Caller] Failed to send audio: {e}")
|
except Exception as e:
|
||||||
|
print(f"[Caller] Failed to send audio: {e}")
|
||||||
|
|
||||||
async def stream_audio_to_caller(self, caller_id: str, pcm_data: bytes, sample_rate: int):
|
async def stream_audio_to_caller(self, caller_id: str, pcm_data: bytes, sample_rate: int):
|
||||||
"""Stream large audio (TTS) to caller in real-time chunks via SignalWire WebSocket."""
|
"""Stream large audio (TTS) to caller in real-time chunks via SignalWire WebSocket."""
|
||||||
@@ -149,10 +174,10 @@ class CallerService:
|
|||||||
if not ws:
|
if not ws:
|
||||||
return
|
return
|
||||||
|
|
||||||
self.streaming_tts = True
|
lock = self._get_send_lock(caller_id)
|
||||||
|
self._streaming_tts.add(caller_id)
|
||||||
|
chunks_sent = 0
|
||||||
try:
|
try:
|
||||||
import base64
|
|
||||||
import json
|
|
||||||
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
|
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
|
||||||
if sample_rate != 16000:
|
if sample_rate != 16000:
|
||||||
ratio = 16000 / sample_rate
|
ratio = 16000 / sample_rate
|
||||||
@@ -161,23 +186,40 @@ class CallerService:
|
|||||||
indices = np.clip(indices, 0, len(audio) - 1)
|
indices = np.clip(indices, 0, len(audio) - 1)
|
||||||
audio = audio[indices]
|
audio = audio[indices]
|
||||||
|
|
||||||
|
total_chunks = (len(audio) + 959) // 960
|
||||||
|
duration_s = len(audio) / 16000
|
||||||
|
print(f"[Caller] TTS stream starting: {duration_s:.1f}s audio, {total_chunks} chunks")
|
||||||
|
|
||||||
chunk_samples = 960
|
chunk_samples = 960
|
||||||
|
chunk_duration = chunk_samples / 16000 # 60ms per chunk
|
||||||
|
|
||||||
for i in range(0, len(audio), chunk_samples):
|
for i in range(0, len(audio), chunk_samples):
|
||||||
if caller_id not in self._websockets:
|
if caller_id not in self._websockets:
|
||||||
|
print(f"[Caller] TTS stream aborted: caller {caller_id} disconnected at chunk {chunks_sent}/{total_chunks}")
|
||||||
break
|
break
|
||||||
|
t0 = time.time()
|
||||||
chunk = audio[i:i + chunk_samples]
|
chunk = audio[i:i + chunk_samples]
|
||||||
pcm_chunk = (chunk * 32767).astype(np.int16).tobytes()
|
pcm_chunk = (chunk * 32767).astype(np.int16).tobytes()
|
||||||
payload = base64.b64encode(pcm_chunk).decode('ascii')
|
payload = base64.b64encode(pcm_chunk).decode('ascii')
|
||||||
await ws.send_text(json.dumps({
|
stream_sid = self._stream_sids.get(caller_id, "")
|
||||||
"event": "media",
|
async with lock:
|
||||||
"media": {"payload": payload}
|
await ws.send_text(json.dumps({
|
||||||
}))
|
"event": "media",
|
||||||
await asyncio.sleep(0.055)
|
"streamSid": stream_sid,
|
||||||
|
"media": {"payload": payload}
|
||||||
|
}))
|
||||||
|
chunks_sent += 1
|
||||||
|
# Sleep to match real-time playback rate
|
||||||
|
elapsed = time.time() - t0
|
||||||
|
sleep_time = max(0, chunk_duration - elapsed)
|
||||||
|
await asyncio.sleep(sleep_time)
|
||||||
|
|
||||||
|
print(f"[Caller] TTS stream finished: {chunks_sent}/{total_chunks} chunks sent")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[Caller] Failed to stream audio: {e}")
|
print(f"[Caller] TTS stream failed at chunk {chunks_sent}: {e}")
|
||||||
finally:
|
finally:
|
||||||
self.streaming_tts = False
|
self._streaming_tts.discard(caller_id)
|
||||||
|
|
||||||
def register_call_sid(self, caller_id: str, call_sid: str):
|
def register_call_sid(self, caller_id: str, call_sid: str):
|
||||||
"""Track SignalWire callSid for a caller"""
|
"""Track SignalWire callSid for a caller"""
|
||||||
@@ -190,3 +232,11 @@ class CallerService:
|
|||||||
def unregister_call_sid(self, caller_id: str):
|
def unregister_call_sid(self, caller_id: str):
|
||||||
"""Remove callSid tracking"""
|
"""Remove callSid tracking"""
|
||||||
self._call_sids.pop(caller_id, None)
|
self._call_sids.pop(caller_id, None)
|
||||||
|
|
||||||
|
def register_stream_sid(self, caller_id: str, stream_sid: str):
|
||||||
|
"""Track SignalWire streamSid for a caller"""
|
||||||
|
self._stream_sids[caller_id] = stream_sid
|
||||||
|
|
||||||
|
def unregister_stream_sid(self, caller_id: str):
|
||||||
|
"""Remove streamSid tracking"""
|
||||||
|
self._stream_sids.pop(caller_id, None)
|
||||||
|
|||||||
@@ -3,13 +3,13 @@
|
|||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<title>AI Radio Show</title>
|
<title>Luke at The Roost</title>
|
||||||
<link rel="stylesheet" href="/css/style.css">
|
<link rel="stylesheet" href="/css/style.css">
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="app">
|
<div id="app">
|
||||||
<header>
|
<header>
|
||||||
<h1>AI Radio Show</h1>
|
<h1>Luke at The Roost</h1>
|
||||||
<div class="header-buttons">
|
<div class="header-buttons">
|
||||||
<button id="new-session-btn" class="new-session-btn">New Session</button>
|
<button id="new-session-btn" class="new-session-btn">New Session</button>
|
||||||
<button id="settings-btn">Settings</button>
|
<button id="settings-btn">Settings</button>
|
||||||
@@ -53,7 +53,7 @@
|
|||||||
|
|
||||||
<!-- Call Queue -->
|
<!-- Call Queue -->
|
||||||
<section class="queue-section">
|
<section class="queue-section">
|
||||||
<h2>Incoming Calls</h2>
|
<h2>Incoming Calls <span style="font-size:0.6em;font-weight:normal;color:var(--text-muted);">(208) 439-5853</span></h2>
|
||||||
<div id="call-queue" class="call-queue">
|
<div id="call-queue" class="call-queue">
|
||||||
<div class="queue-empty">No callers waiting</div>
|
<div class="queue-empty">No callers waiting</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -207,6 +207,6 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script src="/js/app.js?v=13"></script>
|
<script src="/js/app.js?v=15"></script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
Reference in New Issue
Block a user