Improve SignalWire streaming, randomize caller names, update frontend

- Add streamSid tracking and per-caller send locks for SignalWire - Improve TTS streaming with real-time pacing and detailed logging - Block host audio to caller during TTS playback - Randomize caller names between sessions from name pools - Update page title and show phone number in UI Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 01:56:05 -07:00
parent b0643d6082
commit a94fc92647
5 changed files with 127 additions and 45 deletions
@@ -3,6 +3,7 @@
  "input_channel": 1,
  "output_device": 13,
  "caller_channel": 3,
+  "live_caller_channel": 9,
  "music_channel": 5,
  "sfx_channel": 7,
  "phone_filter": false
@@ -20,6 +20,7 @@ class Settings(BaseSettings):
    signalwire_space: str = os.getenv("SIGNALWIRE_SPACE", "")
    signalwire_token: str = os.getenv("SIGNALWIRE_TOKEN", "")
    signalwire_phone: str = os.getenv("SIGNALWIRE_PHONE", "")
+    signalwire_stream_url: str = os.getenv("SIGNALWIRE_STREAM_URL", "")

    # LLM Settings
    llm_provider: str = "openrouter"  # "openrouter" or "ollama"
@@ -28,7 +29,7 @@ class Settings(BaseSettings):
    ollama_host: str = "http://localhost:11434"

    # TTS Settings
-    tts_provider: str = "kokoro"  # "kokoro", "elevenlabs", "vits", or "bark"
+    tts_provider: str = "inworld"  # "kokoro", "elevenlabs", "inworld", "vits", or "bark"

    # Audio Settings
    sample_rate: int = 24000
@@ -40,19 +40,47 @@ app.add_middleware(
 # Base caller info (name, voice) - backgrounds generated dynamically per session
 import random

+MALE_NAMES = [
+    "Tony", "Rick", "Dennis", "Earl", "Marcus", "Keith", "Darnell", "Wayne",
+    "Greg", "Andre", "Ray", "Jerome", "Hector", "Travis", "Vince", "Leon",
+    "Dale", "Frank", "Terrence", "Bobby", "Cliff", "Nate", "Reggie", "Carl",
+]
+
+FEMALE_NAMES = [
+    "Jasmine", "Megan", "Tanya", "Carla", "Brenda", "Sheila", "Denise", "Tamika",
+    "Lorraine", "Crystal", "Angie", "Renee", "Monique", "Gina", "Patrice", "Deb",
+    "Shonda", "Marlene", "Yolanda", "Stacy", "Jackie", "Carmen", "Rita", "Val",
+]
+
 CALLER_BASES = {
-    "1": {"name": "Tony", "voice": "VR6AewLTigWG4xSOukaG", "gender": "male", "age_range": (35, 55)},
-    "2": {"name": "Jasmine", "voice": "jBpfuIE2acCO8z3wKNLl", "gender": "female", "age_range": (25, 38)},
-    "3": {"name": "Rick", "voice": "TxGEqnHWrfWFTfGW9XjX", "gender": "male", "age_range": (40, 58)},
-    "4": {"name": "Megan", "voice": "EXAVITQu4vr4xnSDxMaL", "gender": "female", "age_range": (24, 35)},
-    "5": {"name": "Dennis", "voice": "pNInz6obpgDQGcFmaJgB", "gender": "male", "age_range": (32, 48)},
-    "6": {"name": "Tanya", "voice": "21m00Tcm4TlvDq8ikWAM", "gender": "female", "age_range": (30, 45)},
-    "7": {"name": "Earl", "voice": "ODq5zmih8GrVes37Dizd", "gender": "male", "age_range": (58, 72)},
-    "8": {"name": "Carla", "voice": "XB0fDUnXU5powFXDhCwa", "gender": "female", "age_range": (38, 52)},
-    "9": {"name": "Marcus", "voice": "IKne3meq5aSn9XLyUdCD", "gender": "male", "age_range": (24, 34)},
-    "0": {"name": "Brenda", "voice": "pFZP5JQG7iQjIQuC4Bku", "gender": "female", "age_range": (45, 60)},
+    "1": {"voice": "VR6AewLTigWG4xSOukaG", "gender": "male", "age_range": (35, 55)},
+    "2": {"voice": "jBpfuIE2acCO8z3wKNLl", "gender": "female", "age_range": (25, 38)},
+    "3": {"voice": "TxGEqnHWrfWFTfGW9XjX", "gender": "male", "age_range": (40, 58)},
+    "4": {"voice": "EXAVITQu4vr4xnSDxMaL", "gender": "female", "age_range": (24, 35)},
+    "5": {"voice": "pNInz6obpgDQGcFmaJgB", "gender": "male", "age_range": (32, 48)},
+    "6": {"voice": "21m00Tcm4TlvDq8ikWAM", "gender": "female", "age_range": (30, 45)},
+    "7": {"voice": "ODq5zmih8GrVes37Dizd", "gender": "male", "age_range": (58, 72)},
+    "8": {"voice": "XB0fDUnXU5powFXDhCwa", "gender": "female", "age_range": (38, 52)},
+    "9": {"voice": "IKne3meq5aSn9XLyUdCD", "gender": "male", "age_range": (24, 34)},
+    "0": {"voice": "pFZP5JQG7iQjIQuC4Bku", "gender": "female", "age_range": (45, 60)},
 }

+
+def _randomize_caller_names():
+    """Assign random names to callers, unique per gender."""
+    males = random.sample(MALE_NAMES, sum(1 for c in CALLER_BASES.values() if c["gender"] == "male"))
+    females = random.sample(FEMALE_NAMES, sum(1 for c in CALLER_BASES.values() if c["gender"] == "female"))
+    mi, fi = 0, 0
+    for base in CALLER_BASES.values():
+        if base["gender"] == "male":
+            base["name"] = males[mi]
+            mi += 1
+        else:
+            base["name"] = females[fi]
+            fi += 1
+
+_randomize_caller_names()  # Initial assignment
+
 # Background components for dynamic generation
 JOBS_MALE = [
    "runs a small HVAC business", "works as a long-haul trucker", "is a high school football coach",
@@ -431,8 +459,10 @@ class Session:
        if self._research_task and not self._research_task.done():
            self._research_task.cancel()
        self._research_task = None
+        _randomize_caller_names()
        self.id = str(uuid.uuid4())[:8]
-        print(f"[Session] Reset - new session ID: {self.id}")
+        names = [CALLER_BASES[k]["name"] for k in sorted(CALLER_BASES.keys())]
+        print(f"[Session] Reset - new session ID: {self.id}, callers: {', '.join(names)}")


 session = Session()
@@ -1,6 +1,8 @@
 """Phone caller queue and audio stream service"""

 import asyncio
+import base64
+import json
 import time
 import threading
 import numpy as np
@@ -20,7 +22,20 @@ class CallerService:
        self._lock = threading.Lock()
        self._websockets: dict[str, any] = {}  # caller_id -> WebSocket
        self._call_sids: dict[str, str] = {}  # caller_id -> SignalWire callSid
-        self.streaming_tts: bool = False  # True while TTS audio is being streamed
+        self._stream_sids: dict[str, str] = {}  # caller_id -> SignalWire streamSid
+        self._send_locks: dict[str, asyncio.Lock] = {}  # per-caller send lock
+        self._streaming_tts: set[str] = set()  # caller_ids currently receiving TTS
+
+    def _get_send_lock(self, caller_id: str) -> asyncio.Lock:
+        if caller_id not in self._send_locks:
+            self._send_locks[caller_id] = asyncio.Lock()
+        return self._send_locks[caller_id]
+
+    def is_streaming_tts(self, caller_id: str) -> bool:
+        return caller_id in self._streaming_tts
+
+    def is_streaming_tts_any(self) -> bool:
+        return len(self._streaming_tts) > 0

    def add_to_queue(self, caller_id: str, phone: str):
        with self._lock:
@@ -94,6 +109,8 @@ class CallerService:
            print(f"[Caller] {call_info['phone']} hung up — channel {call_info['channel']} released")
        self._websockets.pop(caller_id, None)
        self._call_sids.pop(caller_id, None)
+        self._stream_sids.pop(caller_id, None)
+        self._send_locks.pop(caller_id, None)

    def reset(self):
        with self._lock:
@@ -105,6 +122,9 @@ class CallerService:
            self._caller_counter = 0
            self._websockets.clear()
            self._call_sids.clear()
+            self._stream_sids.clear()
+            self._send_locks.clear()
+            self._streaming_tts.clear()
        print("[Caller] Service reset")

    def register_websocket(self, caller_id: str, websocket):
@@ -119,29 +139,34 @@ class CallerService:
        """Send small audio chunk to caller via SignalWire WebSocket.
        Encodes L16 PCM as base64 JSON per SignalWire protocol.
        """
+        if caller_id in self._streaming_tts:
+            return  # Don't send host audio during TTS streaming
+
        ws = self._websockets.get(caller_id)
        if not ws:
            return

-        try:
-            import base64
-            if sample_rate != 16000:
-                audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
-                ratio = 16000 / sample_rate
-                out_len = int(len(audio) * ratio)
-                indices = (np.arange(out_len) / ratio).astype(int)
-                indices = np.clip(indices, 0, len(audio) - 1)
-                audio = audio[indices]
-                pcm_data = (audio * 32767).astype(np.int16).tobytes()
+        lock = self._get_send_lock(caller_id)
+        async with lock:
+            try:
+                if sample_rate != 16000:
+                    audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
+                    ratio = 16000 / sample_rate
+                    out_len = int(len(audio) * ratio)
+                    indices = (np.arange(out_len) / ratio).astype(int)
+                    indices = np.clip(indices, 0, len(audio) - 1)
+                    audio = audio[indices]
+                    pcm_data = (audio * 32767).astype(np.int16).tobytes()

-            payload = base64.b64encode(pcm_data).decode('ascii')
-            import json
-            await ws.send_text(json.dumps({
-                "event": "media",
-                "media": {"payload": payload}
-            }))
-        except Exception as e:
-            print(f"[Caller] Failed to send audio: {e}")
+                payload = base64.b64encode(pcm_data).decode('ascii')
+                stream_sid = self._stream_sids.get(caller_id, "")
+                await ws.send_text(json.dumps({
+                    "event": "media",
+                    "streamSid": stream_sid,
+                    "media": {"payload": payload}
+                }))
+            except Exception as e:
+                print(f"[Caller] Failed to send audio: {e}")

    async def stream_audio_to_caller(self, caller_id: str, pcm_data: bytes, sample_rate: int):
        """Stream large audio (TTS) to caller in real-time chunks via SignalWire WebSocket."""
@@ -149,10 +174,10 @@ class CallerService:
        if not ws:
            return

-        self.streaming_tts = True
+        lock = self._get_send_lock(caller_id)
+        self._streaming_tts.add(caller_id)
+        chunks_sent = 0
        try:
-            import base64
-            import json
            audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
            if sample_rate != 16000:
                ratio = 16000 / sample_rate
@@ -161,23 +186,40 @@ class CallerService:
                indices = np.clip(indices, 0, len(audio) - 1)
                audio = audio[indices]

+            total_chunks = (len(audio) + 959) // 960
+            duration_s = len(audio) / 16000
+            print(f"[Caller] TTS stream starting: {duration_s:.1f}s audio, {total_chunks} chunks")
+
            chunk_samples = 960
+            chunk_duration = chunk_samples / 16000  # 60ms per chunk
+
            for i in range(0, len(audio), chunk_samples):
                if caller_id not in self._websockets:
+                    print(f"[Caller] TTS stream aborted: caller {caller_id} disconnected at chunk {chunks_sent}/{total_chunks}")
                    break
+                t0 = time.time()
                chunk = audio[i:i + chunk_samples]
                pcm_chunk = (chunk * 32767).astype(np.int16).tobytes()
                payload = base64.b64encode(pcm_chunk).decode('ascii')
-                await ws.send_text(json.dumps({
-                    "event": "media",
-                    "media": {"payload": payload}
-                }))
-                await asyncio.sleep(0.055)
+                stream_sid = self._stream_sids.get(caller_id, "")
+                async with lock:
+                    await ws.send_text(json.dumps({
+                        "event": "media",
+                        "streamSid": stream_sid,
+                        "media": {"payload": payload}
+                    }))
+                chunks_sent += 1
+                # Sleep to match real-time playback rate
+                elapsed = time.time() - t0
+                sleep_time = max(0, chunk_duration - elapsed)
+                await asyncio.sleep(sleep_time)
+
+            print(f"[Caller] TTS stream finished: {chunks_sent}/{total_chunks} chunks sent")

        except Exception as e:
-            print(f"[Caller] Failed to stream audio: {e}")
+            print(f"[Caller] TTS stream failed at chunk {chunks_sent}: {e}")
        finally:
-            self.streaming_tts = False
+            self._streaming_tts.discard(caller_id)

    def register_call_sid(self, caller_id: str, call_sid: str):
        """Track SignalWire callSid for a caller"""
@@ -190,3 +232,11 @@ class CallerService:
    def unregister_call_sid(self, caller_id: str):
        """Remove callSid tracking"""
        self._call_sids.pop(caller_id, None)
+
+    def register_stream_sid(self, caller_id: str, stream_sid: str):
+        """Track SignalWire streamSid for a caller"""
+        self._stream_sids[caller_id] = stream_sid
+
+    def unregister_stream_sid(self, caller_id: str):
+        """Remove streamSid tracking"""
+        self._stream_sids.pop(caller_id, None)
@@ -3,13 +3,13 @@
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>AI Radio Show</title>
+    <title>Luke at The Roost</title>
    <link rel="stylesheet" href="/css/style.css">
 </head>
 <body>
    <div id="app">
        <header>
-            <h1>AI Radio Show</h1>
+            <h1>Luke at The Roost</h1>
            <div class="header-buttons">
                <button id="new-session-btn" class="new-session-btn">New Session</button>
                <button id="settings-btn">Settings</button>
@@ -53,7 +53,7 @@

            <!-- Call Queue -->
            <section class="queue-section">
-                <h2>Incoming Calls</h2>
+                <h2>Incoming Calls <span style="font-size:0.6em;font-weight:normal;color:var(--text-muted);">(208) 439-5853</span></h2>
                <div id="call-queue" class="call-queue">
                    <div class="queue-empty">No callers waiting</div>
                </div>
@@ -207,6 +207,6 @@
        </div>
    </div>

-    <script src="/js/app.js?v=13"></script>
+    <script src="/js/app.js?v=15"></script>
 </body>
 </html>