Files
ai-podcast/backend/services/caller_service.py
tcpsyn 356bf145b8 Add show improvement features: crossfade, emotions, returning callers, transcripts, screening
- Music crossfade: smooth 3-second blend between tracks instead of hard stop/start
- Emotional detection: analyze host mood from recent messages so callers adapt tone
- AI caller summaries: generate call summaries with timestamps for show history
- Returning callers: persist regular callers across sessions with call history
- Session export: generate transcripts with speaker labels and chapter markers
- Caller screening: AI pre-screens phone callers to get name and topic while queued

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-07 02:43:01 -07:00

297 lines
12 KiB
Python

"""Phone caller queue and audio stream service"""
import asyncio
import base64
import json
import time
import threading
import numpy as np
from typing import Optional
class CallerService:
"""Manages phone caller queue, channel allocation, and WebSocket streams"""
FIRST_REAL_CHANNEL = 3
def __init__(self):
self._queue: list[dict] = []
self.active_calls: dict[str, dict] = {}
self._allocated_channels: set[int] = set()
self._caller_counter: int = 0
self._lock = threading.Lock()
self._websockets: dict[str, any] = {} # caller_id -> WebSocket
self._call_sids: dict[str, str] = {} # caller_id -> SignalWire callSid
self._stream_sids: dict[str, str] = {} # caller_id -> SignalWire streamSid
self._send_locks: dict[str, asyncio.Lock] = {} # per-caller send lock
self._streaming_tts: set[str] = set() # caller_ids currently receiving TTS
self._screening_state: dict[str, dict] = {} # caller_id -> screening conversation
def _get_send_lock(self, caller_id: str) -> asyncio.Lock:
if caller_id not in self._send_locks:
self._send_locks[caller_id] = asyncio.Lock()
return self._send_locks[caller_id]
def is_streaming_tts(self, caller_id: str) -> bool:
return caller_id in self._streaming_tts
def is_streaming_tts_any(self) -> bool:
return len(self._streaming_tts) > 0
def add_to_queue(self, caller_id: str, phone: str):
with self._lock:
self._queue.append({
"caller_id": caller_id,
"phone": phone,
"queued_at": time.time(),
})
print(f"[Caller] {phone} added to queue (ID: {caller_id})")
def remove_from_queue(self, caller_id: str):
with self._lock:
self._queue = [c for c in self._queue if c["caller_id"] != caller_id]
print(f"[Caller] {caller_id} removed from queue")
def allocate_channel(self) -> int:
with self._lock:
ch = self.FIRST_REAL_CHANNEL
while ch in self._allocated_channels:
ch += 1
self._allocated_channels.add(ch)
return ch
def release_channel(self, channel: int):
with self._lock:
self._allocated_channels.discard(channel)
def take_call(self, caller_id: str) -> dict:
caller = None
with self._lock:
for c in self._queue:
if c["caller_id"] == caller_id:
caller = c
break
if caller:
self._queue = [c for c in self._queue if c["caller_id"] != caller_id]
if not caller:
raise ValueError(f"Caller {caller_id} not in queue")
channel = self.allocate_channel()
self._caller_counter += 1
phone = caller["phone"]
call_info = {
"caller_id": caller_id,
"phone": phone,
"channel": channel,
"started_at": time.time(),
}
self.active_calls[caller_id] = call_info
print(f"[Caller] {phone} taken on air — channel {channel}")
return call_info
def hangup(self, caller_id: str):
call_info = self.active_calls.pop(caller_id, None)
if call_info:
self.release_channel(call_info["channel"])
print(f"[Caller] {call_info['phone']} hung up — channel {call_info['channel']} released")
self._websockets.pop(caller_id, None)
self._call_sids.pop(caller_id, None)
self._stream_sids.pop(caller_id, None)
self._send_locks.pop(caller_id, None)
self._screening_state.pop(caller_id, None)
def reset(self):
with self._lock:
for call_info in self.active_calls.values():
self._allocated_channels.discard(call_info["channel"])
self._queue.clear()
self.active_calls.clear()
self._allocated_channels.clear()
self._caller_counter = 0
self._websockets.clear()
self._call_sids.clear()
self._stream_sids.clear()
self._send_locks.clear()
self._streaming_tts.clear()
self._screening_state.clear()
print("[Caller] Service reset")
# --- Screening ---
def start_screening(self, caller_id: str):
"""Initialize screening state for a queued caller"""
self._screening_state[caller_id] = {
"conversation": [],
"caller_name": None,
"topic": None,
"status": "screening", # screening, complete
"response_count": 0,
}
print(f"[Screening] Started for {caller_id}")
def get_screening_state(self, caller_id: str) -> Optional[dict]:
return self._screening_state.get(caller_id)
def update_screening(self, caller_id: str, caller_text: str = None,
screener_text: str = None, caller_name: str = None,
topic: str = None):
"""Update screening conversation and extracted info"""
state = self._screening_state.get(caller_id)
if not state:
return
if caller_text:
state["conversation"].append({"role": "caller", "content": caller_text})
state["response_count"] += 1
if screener_text:
state["conversation"].append({"role": "screener", "content": screener_text})
if caller_name:
state["caller_name"] = caller_name
if topic:
state["topic"] = topic
def end_screening(self, caller_id: str):
"""Mark screening as complete"""
state = self._screening_state.get(caller_id)
if state:
state["status"] = "complete"
print(f"[Screening] Complete for {caller_id}: name={state.get('caller_name')}, topic={state.get('topic')}")
def get_queue(self) -> list[dict]:
"""Get queue with screening info enrichment"""
now = time.time()
with self._lock:
result = []
for c in self._queue:
entry = {
"caller_id": c["caller_id"],
"phone": c["phone"],
"wait_time": int(now - c["queued_at"]),
}
screening = self._screening_state.get(c["caller_id"])
if screening:
entry["screening_status"] = screening["status"]
entry["caller_name"] = screening.get("caller_name")
entry["screening_summary"] = screening.get("topic")
else:
entry["screening_status"] = None
entry["caller_name"] = None
entry["screening_summary"] = None
result.append(entry)
return result
def register_websocket(self, caller_id: str, websocket):
"""Register a WebSocket for a caller"""
self._websockets[caller_id] = websocket
def unregister_websocket(self, caller_id: str):
"""Unregister a WebSocket"""
self._websockets.pop(caller_id, None)
async def send_audio_to_caller(self, caller_id: str, pcm_data: bytes, sample_rate: int):
"""Send small audio chunk to caller via SignalWire WebSocket.
Encodes L16 PCM as base64 JSON per SignalWire protocol.
"""
if caller_id in self._streaming_tts:
return # Don't send host audio during TTS streaming
ws = self._websockets.get(caller_id)
if not ws:
return
lock = self._get_send_lock(caller_id)
async with lock:
try:
if sample_rate != 16000:
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
ratio = 16000 / sample_rate
out_len = int(len(audio) * ratio)
indices = (np.arange(out_len) / ratio).astype(int)
indices = np.clip(indices, 0, len(audio) - 1)
audio = audio[indices]
pcm_data = (audio * 32767).astype(np.int16).tobytes()
payload = base64.b64encode(pcm_data).decode('ascii')
stream_sid = self._stream_sids.get(caller_id, "")
await ws.send_text(json.dumps({
"event": "media",
"streamSid": stream_sid,
"media": {"payload": payload}
}))
except Exception as e:
print(f"[Caller] Failed to send audio: {e}")
async def stream_audio_to_caller(self, caller_id: str, pcm_data: bytes, sample_rate: int):
"""Stream large audio (TTS) to caller in real-time chunks via SignalWire WebSocket."""
ws = self._websockets.get(caller_id)
if not ws:
return
lock = self._get_send_lock(caller_id)
self._streaming_tts.add(caller_id)
chunks_sent = 0
try:
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
if sample_rate != 16000:
ratio = 16000 / sample_rate
out_len = int(len(audio) * ratio)
indices = (np.arange(out_len) / ratio).astype(int)
indices = np.clip(indices, 0, len(audio) - 1)
audio = audio[indices]
total_chunks = (len(audio) + 959) // 960
duration_s = len(audio) / 16000
print(f"[Caller] TTS stream starting: {duration_s:.1f}s audio, {total_chunks} chunks")
chunk_samples = 960
chunk_duration = chunk_samples / 16000 # 60ms per chunk
for i in range(0, len(audio), chunk_samples):
if caller_id not in self._websockets:
print(f"[Caller] TTS stream aborted: caller {caller_id} disconnected at chunk {chunks_sent}/{total_chunks}")
break
t0 = time.time()
chunk = audio[i:i + chunk_samples]
pcm_chunk = (chunk * 32767).astype(np.int16).tobytes()
payload = base64.b64encode(pcm_chunk).decode('ascii')
stream_sid = self._stream_sids.get(caller_id, "")
async with lock:
await ws.send_text(json.dumps({
"event": "media",
"streamSid": stream_sid,
"media": {"payload": payload}
}))
chunks_sent += 1
# Sleep to match real-time playback rate
elapsed = time.time() - t0
sleep_time = max(0, chunk_duration - elapsed)
await asyncio.sleep(sleep_time)
print(f"[Caller] TTS stream finished: {chunks_sent}/{total_chunks} chunks sent")
except Exception as e:
print(f"[Caller] TTS stream failed at chunk {chunks_sent}: {e}")
finally:
self._streaming_tts.discard(caller_id)
def register_call_sid(self, caller_id: str, call_sid: str):
"""Track SignalWire callSid for a caller"""
self._call_sids[caller_id] = call_sid
def get_call_sid(self, caller_id: str) -> str | None:
"""Get SignalWire callSid for a caller"""
return self._call_sids.get(caller_id)
def unregister_call_sid(self, caller_id: str):
"""Remove callSid tracking"""
self._call_sids.pop(caller_id, None)
def register_stream_sid(self, caller_id: str, stream_sid: str):
"""Track SignalWire streamSid for a caller"""
self._stream_sids[caller_id] = stream_sid
def unregister_stream_sid(self, caller_id: str):
"""Remove streamSid tracking"""
self._stream_sids.pop(caller_id, None)