Add show improvement features: crossfade, emotions, returning callers, transcripts, screening

- Music crossfade: smooth 3-second blend between tracks instead of hard stop/start
- Emotional detection: analyze host mood from recent messages so callers adapt tone
- AI caller summaries: generate call summaries with timestamps for show history
- Returning callers: persist regular callers across sessions with call history
- Session export: generate transcripts with speaker labels and chapter markers
- Caller screening: AI pre-screens phone callers to get name and topic while queued

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-07 02:43:01 -07:00
parent de5577e582
commit 356bf145b8
13 changed files with 3736 additions and 40 deletions

View File

@@ -25,6 +25,7 @@ from .services.llm import llm_service
from .services.tts import generate_speech
from .services.audio import audio_service
from .services.news import news_service, extract_keywords, STOP_WORDS
from .services.regulars import regular_caller_service
app = FastAPI(title="AI Radio Show")
@@ -115,7 +116,8 @@ CALLER_BASES = {
def _randomize_callers():
"""Assign random names and voices to callers, unique per gender."""
"""Assign random names and voices to callers, unique per gender.
Overrides 2-3 slots with returning regulars when available."""
num_m = sum(1 for c in CALLER_BASES.values() if c["gender"] == "male")
num_f = sum(1 for c in CALLER_BASES.values() if c["gender"] == "female")
males = random.sample(MALE_NAMES, num_m)
@@ -125,6 +127,8 @@ def _randomize_callers():
f_voices = random.sample(female_pool, min(num_f, len(female_pool)))
mi, fi = 0, 0
for base in CALLER_BASES.values():
base["returning"] = False
base["regular_id"] = None
if base["gender"] == "male":
base["name"] = males[mi]
base["voice"] = m_voices[mi]
@@ -134,6 +138,32 @@ def _randomize_callers():
base["voice"] = f_voices[fi]
fi += 1
# Override 2-3 random slots with returning callers
try:
returning = regular_caller_service.get_returning_callers(random.randint(2, 3))
if returning:
keys_by_gender = {"male": [], "female": []}
for k, v in CALLER_BASES.items():
keys_by_gender[v["gender"]].append(k)
for regular in returning:
gender = regular["gender"]
candidates = keys_by_gender.get(gender, [])
if not candidates:
continue
key = random.choice(candidates)
candidates.remove(key)
base = CALLER_BASES[key]
base["name"] = regular["name"]
base["returning"] = True
base["regular_id"] = regular["id"]
# Keep the randomly assigned voice — regulars sound different each time
if returning:
names = [r["name"] for r in returning]
print(f"[Regulars] Injected returning callers: {', '.join(names)}")
except Exception as e:
print(f"[Regulars] Failed to inject returning callers: {e}")
_randomize_callers() # Initial assignment
# Background components for dynamic generation
@@ -1239,10 +1269,68 @@ def pick_location() -> str:
return random.choice(LOCATIONS_OUT_OF_STATE)
def _generate_returning_caller_background(base: dict) -> str:
"""Generate background for a returning regular caller."""
regular_id = base.get("regular_id")
regulars = regular_caller_service.get_regulars()
regular = next((r for r in regulars if r["id"] == regular_id), None)
if not regular:
return generate_caller_background(base)
gender = regular["gender"]
age = regular["age"]
job = regular["job"]
location = regular["location"]
traits = regular.get("personality_traits", [])
# Build previous calls section
prev_calls = regular.get("call_history", [])
prev_section = ""
if prev_calls:
lines = [f"- {c['summary']}" for c in prev_calls[-3:]]
prev_section = "\nPREVIOUS CALLS:\n" + "\n".join(lines)
prev_section += "\nYou're calling back with an update — something has changed since last time. Reference your previous call(s) naturally."
# Reuse standard personality layers
interest1, interest2 = random.sample(INTERESTS, 2)
quirk1, quirk2 = random.sample(QUIRKS, 2)
people_pool = PEOPLE_MALE if gender == "male" else PEOPLE_FEMALE
person1, person2 = random.sample(people_pool, 2)
tic1, tic2 = random.sample(VERBAL_TICS, 2)
arc = random.choice(EMOTIONAL_ARCS)
vehicle = random.choice(VEHICLES)
having = random.choice(HAVING_RIGHT_NOW)
time_ctx = _get_time_context()
moon = _get_moon_phase()
season_ctx = _get_seasonal_context()
trait_str = ", ".join(traits) if traits else "a regular caller"
parts = [
f"{age}, {job} {location}. Returning caller — {trait_str}.",
f"{interest1.capitalize()}, {interest2}.",
f"{quirk1.capitalize()}, {quirk2}.",
f"\nRIGHT NOW: {time_ctx} Moon: {moon}.",
f"\nSEASON: {season_ctx}",
f"\nPEOPLE IN THEIR LIFE: {person1.capitalize()}. {person2.capitalize()}. Use their names when talking about them.",
f"\nDRIVES: {vehicle.capitalize()}.",
f"\nHAVING RIGHT NOW: {having}",
f"\nVERBAL HABITS: Tends to say \"{tic1}\" and \"{tic2}\" — use these naturally in conversation.",
f"\nEMOTIONAL ARC: {arc}",
f"\nRELATIONSHIP TO THE SHOW: Has called before. Comfortable on air. Knows Luke a bit. Might reference their last call.",
prev_section,
]
return " ".join(parts[:3]) + "".join(parts[3:])
def generate_caller_background(base: dict) -> str:
"""Generate a unique background for a caller (sync, no research).
~30% of callers are 'topic callers' who call about something interesting
instead of a personal problem. Includes full personality layers for realism."""
if base.get("returning") and base.get("regular_id"):
return _generate_returning_caller_background(base)
gender = base["gender"]
age = random.randint(*base["age_range"])
jobs = JOBS_MALE if gender == "male" else JOBS_FEMALE
@@ -1491,8 +1579,58 @@ async def enrich_caller_background(background: str) -> str:
return background
def detect_host_mood(messages: list[dict]) -> str:
"""Analyze recent host messages to detect mood signals for caller adaptation."""
host_msgs = [m["content"] for m in messages if m.get("role") in ("user", "host")][-5:]
if not host_msgs:
return ""
signals = []
# Check average word count — short responses suggest dismissiveness
avg_words = sum(len(m.split()) for m in host_msgs) / len(host_msgs)
if avg_words < 8:
signals.append("The host is giving short responses — they might be losing interest, testing you, or waiting for you to bring something real. Don't ramble. Get to the point or change the subject.")
# Pushback patterns
pushback_phrases = ["i don't think", "that's not", "come on", "really?", "i disagree",
"that doesn't", "are you sure", "i don't buy", "no way", "but that's",
"hold on", "wait a minute", "let's be honest"]
pushback_count = sum(1 for m in host_msgs for p in pushback_phrases if p in m.lower())
if pushback_count >= 2:
signals.append("The host is pushing back — they're challenging you. Don't fold immediately. Defend your position or concede specifically, not generically.")
# Supportive patterns
supportive_phrases = ["i hear you", "that makes sense", "i get it", "that's real",
"i feel you", "you're right", "absolutely", "exactly", "good for you",
"i respect that", "that took guts", "i'm glad you"]
supportive_count = sum(1 for m in host_msgs for p in supportive_phrases if p in m.lower())
if supportive_count >= 2:
signals.append("The host is being supportive — they're with you. You can go deeper. Share something you've been holding back.")
# Joking patterns
joke_indicators = ["haha", "lmao", "lol", "that's hilarious", "no way", "you're killing me",
"shut up", "get out", "are you serious", "you're joking"]
joke_count = sum(1 for m in host_msgs for p in joke_indicators if p in m.lower())
if joke_count >= 2:
signals.append("The host is in a playful mood — joking around. You can joke back, lean into the humor, but you can also use it as a door to something real.")
# Probing — lots of questions
question_count = sum(m.count("?") for m in host_msgs)
if question_count >= 3:
signals.append("The host is asking a lot of questions — they're digging. Give them real answers. Don't deflect.")
if not signals:
return ""
# Cap at 2 signals
signals = signals[:2]
return "\nEMOTIONAL READ ON THE HOST:\n" + "\n".join(f"- {s}" for s in signals) + "\n"
def get_caller_prompt(caller: dict, conversation_summary: str = "", show_history: str = "",
news_context: str = "", research_context: str = "") -> str:
news_context: str = "", research_context: str = "",
emotional_read: str = "") -> str:
"""Generate a natural system prompt for a caller"""
context = ""
if conversation_summary:
@@ -1519,7 +1657,7 @@ Continue naturally. Don't repeat yourself.
return f"""You're {caller['name']}, calling a late-night radio show called "Luke at the Roost." It's late. You trust this host.
{caller['vibe']}
{history}{context}{world_context}
{history}{context}{world_context}{emotional_read}
HOW TO TALK:
- Sound like a real person on the phone, not an essay. This is a conversation, not a monologue.
- VARY YOUR LENGTH. Sometimes one sentence. Sometimes two or three. Match the moment.
@@ -1607,6 +1745,8 @@ class CallRecord:
caller_name: str # "Tony" or "Caller #3"
summary: str # LLM-generated summary after hangup
transcript: list[dict] = field(default_factory=list)
started_at: float = 0.0
ended_at: float = 0.0
class Session:
@@ -1616,6 +1756,7 @@ class Session:
self.conversation: list[dict] = []
self.caller_backgrounds: dict[str, str] = {} # Generated backgrounds for this session
self.call_history: list[CallRecord] = []
self._call_started_at: float = 0.0
self.active_real_caller: dict | None = None
self.ai_respond_mode: str = "manual" # "manual" or "auto"
self.auto_followup: bool = False
@@ -1626,13 +1767,14 @@ class Session:
def start_call(self, caller_key: str):
self.current_caller_key = caller_key
self.conversation = []
self._call_started_at = time.time()
def end_call(self):
self.current_caller_key = None
self.conversation = []
def add_message(self, role: str, content: str):
self.conversation.append({"role": role, "content": content})
self.conversation.append({"role": role, "content": content, "timestamp": time.time()})
def get_caller_background(self, caller_key: str) -> str:
"""Get or generate background for a caller in this session"""
@@ -1977,7 +2119,7 @@ async def get_callers():
"""Get list of available callers"""
return {
"callers": [
{"key": k, "name": v["name"]}
{"key": k, "name": v["name"], "returning": v.get("returning", False)}
for k, v in CALLER_BASES.items()
],
"current": session.current_caller_key,
@@ -1985,6 +2127,12 @@ async def get_callers():
}
@app.get("/api/regulars")
async def get_regulars():
"""Get list of regular callers"""
return {"regulars": regular_caller_service.get_regulars()}
@app.post("/api/session/reset")
async def reset_session():
"""Reset session - all callers get fresh backgrounds"""
@@ -2037,6 +2185,9 @@ async def hangup():
session._research_task = None
caller_name = session.caller["name"] if session.caller else None
caller_key = session.current_caller_key
conversation_snapshot = list(session.conversation)
call_started = getattr(session, '_call_started_at', 0.0)
session.end_call()
# Play hangup sound in background so response returns immediately
@@ -2044,9 +2195,74 @@ async def hangup():
if hangup_sound.exists():
threading.Thread(target=audio_service.play_sfx, args=(str(hangup_sound),), daemon=True).start()
# Generate summary for AI caller in background
if caller_name and conversation_snapshot:
asyncio.create_task(_summarize_ai_call(caller_key, caller_name, conversation_snapshot, call_started))
return {"status": "disconnected", "caller": caller_name}
async def _summarize_ai_call(caller_key: str, caller_name: str, conversation: list[dict], started_at: float):
"""Background task: summarize AI caller conversation and store in history"""
ended_at = time.time()
summary = ""
if conversation:
transcript_text = "\n".join(
f"{msg['role']}: {msg['content']}" for msg in conversation
)
try:
summary = await llm_service.generate(
messages=[{"role": "user", "content": f"Summarize this radio show call in 1-2 sentences:\n{transcript_text}"}],
system_prompt="You summarize radio show conversations concisely. Focus on what the caller talked about and any emotional moments.",
)
except Exception as e:
print(f"[AI Summary] Failed to generate summary: {e}")
summary = f"{caller_name} called in."
session.call_history.append(CallRecord(
caller_type="ai",
caller_name=caller_name,
summary=summary,
transcript=conversation,
started_at=started_at,
ended_at=ended_at,
))
print(f"[AI Summary] {caller_name} call summarized: {summary[:80]}...")
# Returning caller promotion/update logic
try:
base = CALLER_BASES.get(caller_key) if caller_key else None
if base and summary:
if base.get("returning") and base.get("regular_id"):
# Update existing regular's call history
regular_caller_service.update_after_call(base["regular_id"], summary)
elif len(conversation) >= 6 and random.random() < 0.20:
# 20% chance to promote first-timer with 6+ messages
bg = session.caller_backgrounds.get(caller_key, "")
traits = []
for label in ["QUIRK", "STRONG OPINION", "SECRET SIDE", "FOOD OPINION"]:
for line in bg.split("\n"):
if label in line:
traits.append(line.split(":", 1)[-1].strip()[:80])
break
# Extract job and location from first line of background
first_line = bg.split(".")[0] if bg else ""
parts = first_line.split(",", 1)
job_loc = parts[1].strip() if len(parts) > 1 else ""
job_parts = job_loc.rsplit(" in ", 1) if " in " in job_loc else (job_loc, "unknown")
regular_caller_service.add_regular(
name=caller_name,
gender=base.get("gender", "male"),
age=random.randint(*base.get("age_range", (30, 50))),
job=job_parts[0].strip() if isinstance(job_parts, tuple) else job_parts[0],
location="in " + job_parts[1].strip() if isinstance(job_parts, tuple) and len(job_parts) > 1 else "unknown",
personality_traits=traits[:4],
first_call_summary=summary,
)
except Exception as e:
print(f"[Regulars] Promotion logic error: {e}")
# --- Chat & TTS Endpoints ---
import re
@@ -2174,7 +2390,8 @@ async def chat(request: ChatRequest):
conversation_summary = session.get_conversation_summary()
show_history = session.get_show_history()
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history)
mood = detect_host_mood(session.conversation)
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, emotional_read=mood)
messages = _normalize_messages_for_llm(session.conversation[-10:])
response = await llm_service.generate(
@@ -2276,13 +2493,16 @@ async def get_music():
@app.post("/api/music/play")
async def play_music(request: MusicRequest):
"""Load and play a music track"""
"""Load and play a music track, crossfading if already playing"""
track_path = settings.music_dir / request.track
if not track_path.exists():
raise HTTPException(404, "Track not found")
audio_service.load_music(str(track_path))
audio_service.play_music()
if audio_service.is_music_playing():
audio_service.crossfade_to(str(track_path))
else:
audio_service.load_music(str(track_path))
audio_service.play_music()
return {"status": "playing", "track": request.track}
@@ -2352,6 +2572,9 @@ async def play_ad(request: MusicRequest):
if not ad_path.exists():
raise HTTPException(404, "Ad not found")
if audio_service._music_playing:
audio_service.stop_music(fade_duration=1.0)
await asyncio.sleep(1.1)
audio_service.play_ad(str(ad_path))
return {"status": "playing", "track": request.track}
@@ -2393,6 +2616,126 @@ async def update_settings(data: dict):
return llm_service.get_settings()
# --- Caller Screening ---
SCREENING_PROMPT = """You are a friendly, brief phone screener for "Luke at the Roost" radio show.
Your job: Get the caller's first name and what they want to talk about. That's it.
Rules:
- Be warm but brief (1-2 sentences per response)
- First ask their name, then ask what they want to talk about
- After you have both, say something like "Great, sit tight and we'll get you on with Luke!"
- Never pretend to be Luke or the host
- Keep it casual and conversational
- If they're hard to understand, ask them to repeat"""
_screening_audio_buffers: dict[str, bytearray] = {}
async def _start_screening_greeting(caller_id: str):
"""Send initial screening greeting to queued caller after brief delay"""
await asyncio.sleep(2) # Wait for stream to stabilize
ws = caller_service._websockets.get(caller_id)
if not ws:
return
caller_service.start_screening(caller_id)
greeting = "Hey there! Thanks for calling Luke at the Roost. What's your name?"
caller_service.update_screening(caller_id, screener_text=greeting)
try:
audio_bytes = await generate_speech(greeting, "Sarah", "none")
if audio_bytes:
await caller_service.stream_audio_to_caller(caller_id, audio_bytes, 24000)
except Exception as e:
print(f"[Screening] Greeting TTS failed: {e}")
async def _handle_screening_audio(caller_id: str, pcm_data: bytes, sample_rate: int):
"""Process audio from a queued caller for screening conversation"""
state = caller_service.get_screening_state(caller_id)
if not state or state["status"] == "complete":
return
# Skip if TTS is currently streaming to this caller
if caller_service.is_streaming_tts(caller_id):
return
# Transcribe caller speech
try:
text = await transcribe_audio(pcm_data, source_sample_rate=sample_rate)
except Exception as e:
print(f"[Screening] Transcription failed: {e}")
return
if not text or not text.strip():
return
print(f"[Screening] Caller {caller_id}: {text}")
caller_service.update_screening(caller_id, caller_text=text)
# Build conversation for LLM
messages = []
for msg in state["conversation"]:
role = "assistant" if msg["role"] == "screener" else "user"
messages.append({"role": role, "content": msg["content"]})
# Generate screener response
try:
response = await llm_service.generate(
messages=messages,
system_prompt=SCREENING_PROMPT
)
except Exception as e:
print(f"[Screening] LLM failed: {e}")
return
if not response or not response.strip():
return
response = response.strip()
print(f"[Screening] Screener → {caller_id}: {response}")
caller_service.update_screening(caller_id, screener_text=response)
# After 2+ caller responses, try to extract name and topic
if state["response_count"] >= 2:
try:
extract_prompt = f"""From this screening conversation, extract the caller's name and topic.
Conversation:
{chr(10).join(f'{m["role"]}: {m["content"]}' for m in state["conversation"])}
Respond with ONLY JSON: {{"name": "their first name or null", "topic": "brief topic or null"}}"""
extract = await llm_service.generate(
messages=[{"role": "user", "content": extract_prompt}],
system_prompt="You extract structured data from conversations. Respond with only valid JSON."
)
json_match = re.search(r'\{[^}]+\}', extract)
if json_match:
info = json.loads(json_match.group())
if info.get("name"):
caller_service.update_screening(caller_id, caller_name=info["name"])
if info.get("topic"):
caller_service.update_screening(caller_id, topic=info["topic"])
if info.get("name") and info.get("topic"):
caller_service.end_screening(caller_id)
broadcast_event("screening_complete", {
"caller_id": caller_id,
"name": info["name"],
"topic": info["topic"]
})
except Exception as e:
print(f"[Screening] Extract failed: {e}")
# TTS the screener response back to caller
try:
audio_bytes = await generate_speech(response, "Sarah", "none")
if audio_bytes:
await caller_service.stream_audio_to_caller(caller_id, audio_bytes, 24000)
except Exception as e:
print(f"[Screening] Response TTS failed: {e}")
@app.websocket("/api/signalwire/stream")
async def signalwire_audio_stream(websocket: WebSocket):
"""Handle SignalWire bidirectional audio stream"""
@@ -2402,6 +2745,7 @@ async def signalwire_audio_stream(websocket: WebSocket):
caller_phone = "Unknown"
call_sid = ""
audio_buffer = bytearray()
screening_buffer = bytearray()
CHUNK_DURATION_S = 3
SAMPLE_RATE = 16000
chunk_samples = CHUNK_DURATION_S * SAMPLE_RATE
@@ -2448,6 +2792,9 @@ async def signalwire_audio_stream(websocket: WebSocket):
if stream_sid:
caller_service.register_stream_sid(caller_id, stream_sid)
# Start screening conversation
asyncio.create_task(_start_screening_greeting(caller_id))
elif event == "media" and stream_started:
try:
payload = msg.get("media", {}).get("payload", "")
@@ -2458,6 +2805,16 @@ async def signalwire_audio_stream(websocket: WebSocket):
call_info = caller_service.active_calls.get(caller_id)
if not call_info:
# Caller is queued, not on air — route to screening
screening_buffer.extend(pcm_data)
if len(screening_buffer) >= chunk_samples * 2:
pcm_chunk = bytes(screening_buffer[:chunk_samples * 2])
screening_buffer = screening_buffer[chunk_samples * 2:]
audio_check = np.frombuffer(pcm_chunk, dtype=np.int16).astype(np.float32) / 32768.0
if np.abs(audio_check).max() >= 0.01:
asyncio.create_task(
_handle_screening_audio(caller_id, pcm_chunk, SAMPLE_RATE)
)
continue
audio_buffer.extend(pcm_data)
@@ -2713,7 +3070,8 @@ async def _trigger_ai_auto_respond(accumulated_text: str):
conversation_summary = session.get_conversation_summary()
show_history = session.get_show_history()
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history)
mood = detect_host_mood(session.conversation)
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, emotional_read=mood)
messages = _normalize_messages_for_llm(session.conversation[-10:])
response = await llm_service.generate(
@@ -2785,7 +3143,8 @@ async def ai_respond():
conversation_summary = session.get_conversation_summary()
show_history = session.get_show_history()
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history)
mood = detect_host_mood(session.conversation)
system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, emotional_read=mood)
messages = _normalize_messages_for_llm(session.conversation[-10:])
response = await llm_service.generate(
@@ -2856,6 +3215,7 @@ async def hangup_real_caller():
caller_id = session.active_real_caller["caller_id"]
caller_phone = session.active_real_caller["phone"]
conversation_snapshot = list(session.conversation)
call_started = getattr(session, '_call_started_at', 0.0)
auto_followup_enabled = session.auto_followup
# End the phone call via SignalWire
@@ -2875,7 +3235,7 @@ async def hangup_real_caller():
threading.Thread(target=audio_service.play_sfx, args=(str(hangup_sound),), daemon=True).start()
asyncio.create_task(
_summarize_real_call(caller_phone, conversation_snapshot, auto_followup_enabled)
_summarize_real_call(caller_phone, conversation_snapshot, call_started, auto_followup_enabled)
)
return {
@@ -2884,8 +3244,9 @@ async def hangup_real_caller():
}
async def _summarize_real_call(caller_phone: str, conversation: list, auto_followup_enabled: bool):
async def _summarize_real_call(caller_phone: str, conversation: list, started_at: float, auto_followup_enabled: bool):
"""Background task: summarize call and store in history"""
ended_at = time.time()
summary = ""
if conversation:
transcript_text = "\n".join(
@@ -2901,6 +3262,8 @@ async def _summarize_real_call(caller_phone: str, conversation: list, auto_follo
caller_name=caller_phone,
summary=summary,
transcript=conversation,
started_at=started_at,
ended_at=ended_at,
))
print(f"[Real Caller] {caller_phone} call summarized: {summary[:80]}...")
@@ -2963,6 +3326,70 @@ async def set_auto_followup(data: dict):
return {"enabled": session.auto_followup}
# --- Transcript & Chapter Export ---
@app.get("/api/session/export")
async def export_session():
"""Export session transcript with speaker labels and chapters from call boundaries"""
if not session.call_history:
raise HTTPException(400, "No calls in this session to export")
# Find the earliest call start as session base time
session_start = min(
(r.started_at for r in session.call_history if r.started_at > 0),
default=time.time()
)
transcript_lines = []
chapters = []
for i, record in enumerate(session.call_history):
# Chapter from call start time
offset_seconds = max(0, record.started_at - session_start) if record.started_at > 0 else 0
chapter_title = f"{record.caller_name}"
if record.summary:
# Use first sentence of summary for chapter title
short_summary = record.summary.split(".")[0].strip()
if short_summary:
chapter_title += f" \u2014 {short_summary}"
chapters.append({"startTime": round(offset_seconds), "title": chapter_title})
# Separator between calls
if i > 0:
transcript_lines.append("")
transcript_lines.append(f"--- Call {i + 1}: {record.caller_name} ---")
transcript_lines.append("")
# Transcript lines with timestamps
for msg in record.transcript:
msg_offset = msg.get("timestamp", 0) - session_start if msg.get("timestamp") else offset_seconds
if msg_offset < 0:
msg_offset = 0
mins = int(msg_offset // 60)
secs = int(msg_offset % 60)
role = msg.get("role", "")
if role in ("user", "host"):
speaker = "HOST"
elif role.startswith("real_caller:"):
speaker = role.split(":", 1)[1].upper()
elif role.startswith("ai_caller:"):
speaker = role.split(":", 1)[1].upper()
elif role == "assistant":
speaker = record.caller_name.upper()
else:
speaker = role.upper()
transcript_lines.append(f"[{mins:02d}:{secs:02d}] {speaker}: {msg['content']}")
return {
"session_id": session.id,
"transcript": "\n".join(transcript_lines),
"chapters": chapters,
"call_count": len(session.call_history),
}
# --- Server Control Endpoints ---
import subprocess

View File

@@ -53,6 +53,14 @@ class AudioService:
self._music_volume: float = 0.3
self._music_loop: bool = True
# Music crossfade state
self._crossfade_active: bool = False
self._crossfade_old_data: Optional[np.ndarray] = None
self._crossfade_old_position: int = 0
self._crossfade_progress: float = 0.0
self._crossfade_samples: int = 0
self._crossfade_step: float = 0.0
# Caller playback state
self._caller_stop_event = threading.Event()
self._caller_thread: Optional[threading.Thread] = None
@@ -578,6 +586,55 @@ class AudioService:
print(f"Failed to load music: {e}")
return False
def crossfade_to(self, file_path: str, duration: float = 3.0):
"""Crossfade from current music track to a new one"""
import librosa
if not self._music_playing or self._music_resampled is None:
if self.load_music(file_path):
self.play_music()
return
# Load the new track
path = Path(file_path)
if not path.exists():
print(f"Music file not found: {file_path}")
return
try:
audio, sr = librosa.load(str(path), sr=self.output_sample_rate, mono=True)
new_data = audio.astype(np.float32)
except Exception as e:
print(f"Failed to load music for crossfade: {e}")
return
# Get device sample rate for resampling
if self.output_device is not None:
device_info = sd.query_devices(self.output_device)
device_sr = int(device_info['default_samplerate'])
else:
device_sr = self.output_sample_rate
if self.output_sample_rate != device_sr:
new_resampled = librosa.resample(new_data, orig_sr=self.output_sample_rate, target_sr=device_sr)
else:
new_resampled = new_data.copy()
# Swap: current becomes old, new becomes current
self._crossfade_old_data = self._music_resampled
self._crossfade_old_position = self._music_position
self._music_resampled = new_resampled
self._music_data = new_data
self._music_position = 0
# Configure crossfade timing
self._crossfade_samples = int(device_sr * duration)
self._crossfade_progress = 0.0
self._crossfade_step = 1.0 / self._crossfade_samples if self._crossfade_samples > 0 else 1.0
self._crossfade_active = True
print(f"Crossfading to {path.name} over {duration}s")
def play_music(self):
"""Start music playback to specific channel"""
import librosa
@@ -625,24 +682,54 @@ class AudioService:
if not self._music_playing or self._music_resampled is None:
return
# Read new track samples
end_pos = self._music_position + frames
if end_pos <= len(self._music_resampled):
outdata[:, channel_idx] = self._music_resampled[self._music_position:end_pos] * self._music_volume
new_samples = self._music_resampled[self._music_position:end_pos].copy()
self._music_position = end_pos
else:
remaining = len(self._music_resampled) - self._music_position
new_samples = np.zeros(frames, dtype=np.float32)
if remaining > 0:
outdata[:remaining, channel_idx] = self._music_resampled[self._music_position:] * self._music_volume
new_samples[:remaining] = self._music_resampled[self._music_position:]
if self._music_loop:
self._music_position = 0
wrap_frames = frames - remaining
if wrap_frames > 0:
outdata[remaining:, channel_idx] = self._music_resampled[:wrap_frames] * self._music_volume
new_samples[remaining:] = self._music_resampled[:wrap_frames]
self._music_position = wrap_frames
else:
self._music_playing = False
self._music_position = len(self._music_resampled)
if remaining <= 0:
self._music_playing = False
if self._crossfade_active and self._crossfade_old_data is not None:
# Read old track samples
old_end = self._crossfade_old_position + frames
if old_end <= len(self._crossfade_old_data):
old_samples = self._crossfade_old_data[self._crossfade_old_position:old_end]
self._crossfade_old_position = old_end
else:
old_remaining = len(self._crossfade_old_data) - self._crossfade_old_position
old_samples = np.zeros(frames, dtype=np.float32)
if old_remaining > 0:
old_samples[:old_remaining] = self._crossfade_old_data[self._crossfade_old_position:]
self._crossfade_old_position = len(self._crossfade_old_data)
# Compute fade curves for this chunk
start_progress = self._crossfade_progress
end_progress = min(1.0, start_progress + self._crossfade_step * frames)
fade_in = np.linspace(start_progress, end_progress, frames, dtype=np.float32)
fade_out = 1.0 - fade_in
outdata[:, channel_idx] = (old_samples * fade_out + new_samples * fade_in) * self._music_volume
self._crossfade_progress = end_progress
if self._crossfade_progress >= 1.0:
self._crossfade_active = False
self._crossfade_old_data = None
print("Crossfade complete")
else:
outdata[:, channel_idx] = new_samples * self._music_volume
try:
self._music_stream = sd.OutputStream(
@@ -659,15 +746,48 @@ class AudioService:
print(f"Music playback error: {e}")
self._music_playing = False
def stop_music(self):
"""Stop music playback"""
self._music_playing = False
if self._music_stream:
def stop_music(self, fade_duration: float = 2.0):
"""Stop music playback with fade out"""
if not self._music_playing or not self._music_stream:
self._music_playing = False
if self._music_stream:
self._music_stream.stop()
self._music_stream.close()
self._music_stream = None
self._music_position = 0
return
if fade_duration <= 0:
self._music_playing = False
self._music_stream.stop()
self._music_stream.close()
self._music_stream = None
self._music_position = 0
print("Music stopped")
self._music_position = 0
print("Music stopped")
return
import threading
original_volume = self._music_volume
steps = 20
step_time = fade_duration / steps
def _fade():
for i in range(steps):
if not self._music_playing:
break
self._music_volume = original_volume * (1 - (i + 1) / steps)
import time
time.sleep(step_time)
self._music_playing = False
if self._music_stream:
self._music_stream.stop()
self._music_stream.close()
self._music_stream = None
self._music_position = 0
self._music_volume = original_volume
print("Music faded out and stopped")
threading.Thread(target=_fade, daemon=True).start()
def play_ad(self, file_path: str):
"""Load and play an ad file once (no loop) on the ad channel"""

View File

@@ -25,6 +25,7 @@ class CallerService:
self._stream_sids: dict[str, str] = {} # caller_id -> SignalWire streamSid
self._send_locks: dict[str, asyncio.Lock] = {} # per-caller send lock
self._streaming_tts: set[str] = set() # caller_ids currently receiving TTS
self._screening_state: dict[str, dict] = {} # caller_id -> screening conversation
def _get_send_lock(self, caller_id: str) -> asyncio.Lock:
if caller_id not in self._send_locks:
@@ -51,18 +52,6 @@ class CallerService:
self._queue = [c for c in self._queue if c["caller_id"] != caller_id]
print(f"[Caller] {caller_id} removed from queue")
def get_queue(self) -> list[dict]:
now = time.time()
with self._lock:
return [
{
"caller_id": c["caller_id"],
"phone": c["phone"],
"wait_time": int(now - c["queued_at"]),
}
for c in self._queue
]
def allocate_channel(self) -> int:
with self._lock:
ch = self.FIRST_REAL_CHANNEL
@@ -111,6 +100,7 @@ class CallerService:
self._call_sids.pop(caller_id, None)
self._stream_sids.pop(caller_id, None)
self._send_locks.pop(caller_id, None)
self._screening_state.pop(caller_id, None)
def reset(self):
with self._lock:
@@ -125,8 +115,72 @@ class CallerService:
self._stream_sids.clear()
self._send_locks.clear()
self._streaming_tts.clear()
self._screening_state.clear()
print("[Caller] Service reset")
# --- Screening ---
def start_screening(self, caller_id: str):
"""Initialize screening state for a queued caller"""
self._screening_state[caller_id] = {
"conversation": [],
"caller_name": None,
"topic": None,
"status": "screening", # screening, complete
"response_count": 0,
}
print(f"[Screening] Started for {caller_id}")
def get_screening_state(self, caller_id: str) -> Optional[dict]:
return self._screening_state.get(caller_id)
def update_screening(self, caller_id: str, caller_text: str = None,
screener_text: str = None, caller_name: str = None,
topic: str = None):
"""Update screening conversation and extracted info"""
state = self._screening_state.get(caller_id)
if not state:
return
if caller_text:
state["conversation"].append({"role": "caller", "content": caller_text})
state["response_count"] += 1
if screener_text:
state["conversation"].append({"role": "screener", "content": screener_text})
if caller_name:
state["caller_name"] = caller_name
if topic:
state["topic"] = topic
def end_screening(self, caller_id: str):
"""Mark screening as complete"""
state = self._screening_state.get(caller_id)
if state:
state["status"] = "complete"
print(f"[Screening] Complete for {caller_id}: name={state.get('caller_name')}, topic={state.get('topic')}")
def get_queue(self) -> list[dict]:
"""Get queue with screening info enrichment"""
now = time.time()
with self._lock:
result = []
for c in self._queue:
entry = {
"caller_id": c["caller_id"],
"phone": c["phone"],
"wait_time": int(now - c["queued_at"]),
}
screening = self._screening_state.get(c["caller_id"])
if screening:
entry["screening_status"] = screening["status"]
entry["caller_name"] = screening.get("caller_name")
entry["screening_summary"] = screening.get("topic")
else:
entry["screening_status"] = None
entry["caller_name"] = None
entry["screening_summary"] = None
result.append(entry)
return result
def register_websocket(self, caller_id: str, websocket):
"""Register a WebSocket for a caller"""
self._websockets[caller_id] = websocket

View File

@@ -0,0 +1,95 @@
"""Returning caller persistence service"""
import json
import time
import uuid
from pathlib import Path
from typing import Optional
DATA_FILE = Path(__file__).parent.parent.parent / "data" / "regulars.json"
MAX_REGULARS = 12
class RegularCallerService:
"""Manages persistent 'regular' callers who return across sessions"""
def __init__(self):
self._regulars: list[dict] = []
self._load()
def _load(self):
if DATA_FILE.exists():
try:
with open(DATA_FILE) as f:
data = json.load(f)
self._regulars = data.get("regulars", [])
print(f"[Regulars] Loaded {len(self._regulars)} regular callers")
except Exception as e:
print(f"[Regulars] Failed to load: {e}")
self._regulars = []
def _save(self):
try:
DATA_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(DATA_FILE, "w") as f:
json.dump({"regulars": self._regulars}, f, indent=2)
except Exception as e:
print(f"[Regulars] Failed to save: {e}")
def get_regulars(self) -> list[dict]:
return list(self._regulars)
def get_returning_callers(self, count: int = 2) -> list[dict]:
"""Get up to `count` regulars for returning caller slots"""
import random
if not self._regulars:
return []
available = [r for r in self._regulars if len(r.get("call_history", [])) > 0]
if not available:
return []
return random.sample(available, min(count, len(available)))
def add_regular(self, name: str, gender: str, age: int, job: str,
location: str, personality_traits: list[str],
first_call_summary: str) -> dict:
"""Promote a first-time caller to regular"""
# Retire oldest if at cap
if len(self._regulars) >= MAX_REGULARS:
self._regulars.sort(key=lambda r: r.get("last_call", 0))
retired = self._regulars.pop(0)
print(f"[Regulars] Retired {retired['name']} to make room")
regular = {
"id": str(uuid.uuid4())[:8],
"name": name,
"gender": gender,
"age": age,
"job": job,
"location": location,
"personality_traits": personality_traits,
"call_history": [
{"summary": first_call_summary, "timestamp": time.time()}
],
"last_call": time.time(),
"created_at": time.time(),
}
self._regulars.append(regular)
self._save()
print(f"[Regulars] Promoted {name} to regular (total: {len(self._regulars)})")
return regular
def update_after_call(self, regular_id: str, call_summary: str):
"""Update a regular's history after a returning call"""
for regular in self._regulars:
if regular["id"] == regular_id:
regular.setdefault("call_history", []).append(
{"summary": call_summary, "timestamp": time.time()}
)
regular["last_call"] = time.time()
self._save()
print(f"[Regulars] Updated {regular['name']} call history ({len(regular['call_history'])} calls)")
return
print(f"[Regulars] Regular {regular_id} not found for update")
regular_caller_service = RegularCallerService()