Add show improvement features: crossfade, emotions, returning callers, transcripts, screening

- Music crossfade: smooth 3-second blend between tracks instead of hard stop/start - Emotional detection: analyze host mood from recent messages so callers adapt tone - AI caller summaries: generate call summaries with timestamps for show history - Returning callers: persist regular callers across sessions with call history - Session export: generate transcripts with speaker labels and chapter markers - Caller screening: AI pre-screens phone callers to get name and topic while queued Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-07 02:43:01 -07:00
parent de5577e582
commit 356bf145b8
13 changed files with 3736 additions and 40 deletions
--- a/backend/main.py
+++ b/backend/main.py
@@ -25,6 +25,7 @@ from .services.llm import llm_service
 from .services.tts import generate_speech
 from .services.audio import audio_service
 from .services.news import news_service, extract_keywords, STOP_WORDS
+from .services.regulars import regular_caller_service

 app = FastAPI(title="AI Radio Show")

@@ -115,7 +116,8 @@ CALLER_BASES = {


 def _randomize_callers():
-    """Assign random names and voices to callers, unique per gender."""
+    """Assign random names and voices to callers, unique per gender.
+    Overrides 2-3 slots with returning regulars when available."""
    num_m = sum(1 for c in CALLER_BASES.values() if c["gender"] == "male")
    num_f = sum(1 for c in CALLER_BASES.values() if c["gender"] == "female")
    males = random.sample(MALE_NAMES, num_m)
@@ -125,6 +127,8 @@ def _randomize_callers():
    f_voices = random.sample(female_pool, min(num_f, len(female_pool)))
    mi, fi = 0, 0
    for base in CALLER_BASES.values():
+        base["returning"] = False
+        base["regular_id"] = None
        if base["gender"] == "male":
            base["name"] = males[mi]
            base["voice"] = m_voices[mi]
@@ -134,6 +138,32 @@ def _randomize_callers():
            base["voice"] = f_voices[fi]
            fi += 1

+    # Override 2-3 random slots with returning callers
+    try:
+        returning = regular_caller_service.get_returning_callers(random.randint(2, 3))
+        if returning:
+            keys_by_gender = {"male": [], "female": []}
+            for k, v in CALLER_BASES.items():
+                keys_by_gender[v["gender"]].append(k)
+
+            for regular in returning:
+                gender = regular["gender"]
+                candidates = keys_by_gender.get(gender, [])
+                if not candidates:
+                    continue
+                key = random.choice(candidates)
+                candidates.remove(key)
+                base = CALLER_BASES[key]
+                base["name"] = regular["name"]
+                base["returning"] = True
+                base["regular_id"] = regular["id"]
+                # Keep the randomly assigned voice — regulars sound different each time
+            if returning:
+                names = [r["name"] for r in returning]
+                print(f"[Regulars] Injected returning callers: {', '.join(names)}")
+    except Exception as e:
+        print(f"[Regulars] Failed to inject returning callers: {e}")
+
 _randomize_callers()  # Initial assignment

 # Background components for dynamic generation
@@ -1239,10 +1269,68 @@ def pick_location() -> str:
    return random.choice(LOCATIONS_OUT_OF_STATE)


+def _generate_returning_caller_background(base: dict) -> str:
+    """Generate background for a returning regular caller."""
+    regular_id = base.get("regular_id")
+    regulars = regular_caller_service.get_regulars()
+    regular = next((r for r in regulars if r["id"] == regular_id), None)
+    if not regular:
+        return generate_caller_background(base)
+
+    gender = regular["gender"]
+    age = regular["age"]
+    job = regular["job"]
+    location = regular["location"]
+    traits = regular.get("personality_traits", [])
+
+    # Build previous calls section
+    prev_calls = regular.get("call_history", [])
+    prev_section = ""
+    if prev_calls:
+        lines = [f"- {c['summary']}" for c in prev_calls[-3:]]
+        prev_section = "\nPREVIOUS CALLS:\n" + "\n".join(lines)
+        prev_section += "\nYou're calling back with an update — something has changed since last time. Reference your previous call(s) naturally."
+
+    # Reuse standard personality layers
+    interest1, interest2 = random.sample(INTERESTS, 2)
+    quirk1, quirk2 = random.sample(QUIRKS, 2)
+    people_pool = PEOPLE_MALE if gender == "male" else PEOPLE_FEMALE
+    person1, person2 = random.sample(people_pool, 2)
+    tic1, tic2 = random.sample(VERBAL_TICS, 2)
+    arc = random.choice(EMOTIONAL_ARCS)
+    vehicle = random.choice(VEHICLES)
+    having = random.choice(HAVING_RIGHT_NOW)
+
+    time_ctx = _get_time_context()
+    moon = _get_moon_phase()
+    season_ctx = _get_seasonal_context()
+
+    trait_str = ", ".join(traits) if traits else "a regular caller"
+
+    parts = [
+        f"{age}, {job} {location}. Returning caller — {trait_str}.",
+        f"{interest1.capitalize()}, {interest2}.",
+        f"{quirk1.capitalize()}, {quirk2}.",
+        f"\nRIGHT NOW: {time_ctx} Moon: {moon}.",
+        f"\nSEASON: {season_ctx}",
+        f"\nPEOPLE IN THEIR LIFE: {person1.capitalize()}. {person2.capitalize()}. Use their names when talking about them.",
+        f"\nDRIVES: {vehicle.capitalize()}.",
+        f"\nHAVING RIGHT NOW: {having}",
+        f"\nVERBAL HABITS: Tends to say \"{tic1}\" and \"{tic2}\" — use these naturally in conversation.",
+        f"\nEMOTIONAL ARC: {arc}",
+        f"\nRELATIONSHIP TO THE SHOW: Has called before. Comfortable on air. Knows Luke a bit. Might reference their last call.",
+        prev_section,
+    ]
+
+    return " ".join(parts[:3]) + "".join(parts[3:])
+
+
 def generate_caller_background(base: dict) -> str:
    """Generate a unique background for a caller (sync, no research).
    ~30% of callers are 'topic callers' who call about something interesting
    instead of a personal problem. Includes full personality layers for realism."""
+    if base.get("returning") and base.get("regular_id"):
+        return _generate_returning_caller_background(base)
    gender = base["gender"]
    age = random.randint(*base["age_range"])
    jobs = JOBS_MALE if gender == "male" else JOBS_FEMALE
@@ -1491,8 +1579,58 @@ async def enrich_caller_background(background: str) -> str:

    return background

+def detect_host_mood(messages: list[dict]) -> str:
+    """Analyze recent host messages to detect mood signals for caller adaptation."""
+    host_msgs = [m["content"] for m in messages if m.get("role") in ("user", "host")][-5:]
+    if not host_msgs:
+        return ""
+
+    signals = []
+
+    # Check average word count — short responses suggest dismissiveness
+    avg_words = sum(len(m.split()) for m in host_msgs) / len(host_msgs)
+    if avg_words < 8:
+        signals.append("The host is giving short responses — they might be losing interest, testing you, or waiting for you to bring something real. Don't ramble. Get to the point or change the subject.")
+
+    # Pushback patterns
+    pushback_phrases = ["i don't think", "that's not", "come on", "really?", "i disagree",
+                        "that doesn't", "are you sure", "i don't buy", "no way", "but that's",
+                        "hold on", "wait a minute", "let's be honest"]
+    pushback_count = sum(1 for m in host_msgs for p in pushback_phrases if p in m.lower())
+    if pushback_count >= 2:
+        signals.append("The host is pushing back — they're challenging you. Don't fold immediately. Defend your position or concede specifically, not generically.")
+
+    # Supportive patterns
+    supportive_phrases = ["i hear you", "that makes sense", "i get it", "that's real",
+                          "i feel you", "you're right", "absolutely", "exactly", "good for you",
+                          "i respect that", "that took guts", "i'm glad you"]
+    supportive_count = sum(1 for m in host_msgs for p in supportive_phrases if p in m.lower())
+    if supportive_count >= 2:
+        signals.append("The host is being supportive — they're with you. You can go deeper. Share something you've been holding back.")
+
+    # Joking patterns
+    joke_indicators = ["haha", "lmao", "lol", "that's hilarious", "no way", "you're killing me",
+                       "shut up", "get out", "are you serious", "you're joking"]
+    joke_count = sum(1 for m in host_msgs for p in joke_indicators if p in m.lower())
+    if joke_count >= 2:
+        signals.append("The host is in a playful mood — joking around. You can joke back, lean into the humor, but you can also use it as a door to something real.")
+
+    # Probing — lots of questions
+    question_count = sum(m.count("?") for m in host_msgs)
+    if question_count >= 3:
+        signals.append("The host is asking a lot of questions — they're digging. Give them real answers. Don't deflect.")
+
+    if not signals:
+        return ""
+
+    # Cap at 2 signals
+    signals = signals[:2]
+    return "\nEMOTIONAL READ ON THE HOST:\n" + "\n".join(f"- {s}" for s in signals) + "\n"
+
+
 def get_caller_prompt(caller: dict, conversation_summary: str = "", show_history: str = "",
-                      news_context: str = "", research_context: str = "") -> str:
+                      news_context: str = "", research_context: str = "",
+                      emotional_read: str = "") -> str:
    """Generate a natural system prompt for a caller"""
    context = ""
    if conversation_summary:
@@ -1519,7 +1657,7 @@ Continue naturally. Don't repeat yourself.
    return f"""You're {caller['name']}, calling a late-night radio show called "Luke at the Roost." It's late. You trust this host.

 {caller['vibe']}
-{history}{context}{world_context}
+{history}{context}{world_context}{emotional_read}
 HOW TO TALK:
 - Sound like a real person on the phone, not an essay. This is a conversation, not a monologue.
 - VARY YOUR LENGTH. Sometimes one sentence. Sometimes two or three. Match the moment.
@@ -1607,6 +1745,8 @@ class CallRecord:
    caller_name: str          # "Tony" or "Caller #3"
    summary: str              # LLM-generated summary after hangup
    transcript: list[dict] = field(default_factory=list)
+    started_at: float = 0.0
+    ended_at: float = 0.0


 class Session:
@@ -1616,6 +1756,7 @@ class Session:
        self.conversation: list[dict] = []
        self.caller_backgrounds: dict[str, str] = {}  # Generated backgrounds for this session
        self.call_history: list[CallRecord] = []
+        self._call_started_at: float = 0.0
        self.active_real_caller: dict | None = None
        self.ai_respond_mode: str = "manual"  # "manual" or "auto"
        self.auto_followup: bool = False
@@ -1626,13 +1767,14 @@ class Session:
    def start_call(self, caller_key: str):
        self.current_caller_key = caller_key
        self.conversation = []
+        self._call_started_at = time.time()

    def end_call(self):
        self.current_caller_key = None
        self.conversation = []

    def add_message(self, role: str, content: str):
-        self.conversation.append({"role": role, "content": content})
+        self.conversation.append({"role": role, "content": content, "timestamp": time.time()})

    def get_caller_background(self, caller_key: str) -> str:
        """Get or generate background for a caller in this session"""
@@ -1977,7 +2119,7 @@ async def get_callers():
    """Get list of available callers"""
    return {
        "callers": [
-            {"key": k, "name": v["name"]}
+            {"key": k, "name": v["name"], "returning": v.get("returning", False)}
            for k, v in CALLER_BASES.items()
        ],
        "current": session.current_caller_key,
@@ -1985,6 +2127,12 @@ async def get_callers():
    }


+@app.get("/api/regulars")
+async def get_regulars():
+    """Get list of regular callers"""
+    return {"regulars": regular_caller_service.get_regulars()}
+
+
@app.post("/api/session/reset")
 async def reset_session():
    """Reset session - all callers get fresh backgrounds"""
@@ -2037,6 +2185,9 @@ async def hangup():
        session._research_task = None

    caller_name = session.caller["name"] if session.caller else None
+    caller_key = session.current_caller_key
+    conversation_snapshot = list(session.conversation)
+    call_started = getattr(session, '_call_started_at', 0.0)
    session.end_call()

    # Play hangup sound in background so response returns immediately
@@ -2044,9 +2195,74 @@ async def hangup():
    if hangup_sound.exists():
        threading.Thread(target=audio_service.play_sfx, args=(str(hangup_sound),), daemon=True).start()

+    # Generate summary for AI caller in background
+    if caller_name and conversation_snapshot:
+        asyncio.create_task(_summarize_ai_call(caller_key, caller_name, conversation_snapshot, call_started))
+
    return {"status": "disconnected", "caller": caller_name}


+async def _summarize_ai_call(caller_key: str, caller_name: str, conversation: list[dict], started_at: float):
+    """Background task: summarize AI caller conversation and store in history"""
+    ended_at = time.time()
+    summary = ""
+    if conversation:
+        transcript_text = "\n".join(
+            f"{msg['role']}: {msg['content']}" for msg in conversation
+        )
+        try:
+            summary = await llm_service.generate(
+                messages=[{"role": "user", "content": f"Summarize this radio show call in 1-2 sentences:\n{transcript_text}"}],
+                system_prompt="You summarize radio show conversations concisely. Focus on what the caller talked about and any emotional moments.",
+            )
+        except Exception as e:
+            print(f"[AI Summary] Failed to generate summary: {e}")
+            summary = f"{caller_name} called in."
+
+    session.call_history.append(CallRecord(
+        caller_type="ai",
+        caller_name=caller_name,
+        summary=summary,
+        transcript=conversation,
+        started_at=started_at,
+        ended_at=ended_at,
+    ))
+    print(f"[AI Summary] {caller_name} call summarized: {summary[:80]}...")
+
+    # Returning caller promotion/update logic
+    try:
+        base = CALLER_BASES.get(caller_key) if caller_key else None
+        if base and summary:
+            if base.get("returning") and base.get("regular_id"):
+                # Update existing regular's call history
+                regular_caller_service.update_after_call(base["regular_id"], summary)
+            elif len(conversation) >= 6 and random.random() < 0.20:
+                # 20% chance to promote first-timer with 6+ messages
+                bg = session.caller_backgrounds.get(caller_key, "")
+                traits = []
+                for label in ["QUIRK", "STRONG OPINION", "SECRET SIDE", "FOOD OPINION"]:
+                    for line in bg.split("\n"):
+                        if label in line:
+                            traits.append(line.split(":", 1)[-1].strip()[:80])
+                            break
+                # Extract job and location from first line of background
+                first_line = bg.split(".")[0] if bg else ""
+                parts = first_line.split(",", 1)
+                job_loc = parts[1].strip() if len(parts) > 1 else ""
+                job_parts = job_loc.rsplit(" in ", 1) if " in " in job_loc else (job_loc, "unknown")
+                regular_caller_service.add_regular(
+                    name=caller_name,
+                    gender=base.get("gender", "male"),
+                    age=random.randint(*base.get("age_range", (30, 50))),
+                    job=job_parts[0].strip() if isinstance(job_parts, tuple) else job_parts[0],
+                    location="in " + job_parts[1].strip() if isinstance(job_parts, tuple) and len(job_parts) > 1 else "unknown",
+                    personality_traits=traits[:4],
+                    first_call_summary=summary,
+                )
+    except Exception as e:
+        print(f"[Regulars] Promotion logic error: {e}")
+
+
 # --- Chat & TTS Endpoints ---

 import re
@@ -2174,7 +2390,8 @@ async def chat(request: ChatRequest):

        conversation_summary = session.get_conversation_summary()
        show_history = session.get_show_history()
-        system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history)
+        mood = detect_host_mood(session.conversation)
+        system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, emotional_read=mood)

        messages = _normalize_messages_for_llm(session.conversation[-10:])
        response = await llm_service.generate(
@@ -2276,13 +2493,16 @@ async def get_music():

@app.post("/api/music/play")
 async def play_music(request: MusicRequest):
-    """Load and play a music track"""
+    """Load and play a music track, crossfading if already playing"""
    track_path = settings.music_dir / request.track
    if not track_path.exists():
        raise HTTPException(404, "Track not found")

-    audio_service.load_music(str(track_path))
-    audio_service.play_music()
+    if audio_service.is_music_playing():
+        audio_service.crossfade_to(str(track_path))
+    else:
+        audio_service.load_music(str(track_path))
+        audio_service.play_music()
    return {"status": "playing", "track": request.track}


@@ -2352,6 +2572,9 @@ async def play_ad(request: MusicRequest):
    if not ad_path.exists():
        raise HTTPException(404, "Ad not found")

+    if audio_service._music_playing:
+        audio_service.stop_music(fade_duration=1.0)
+        await asyncio.sleep(1.1)
    audio_service.play_ad(str(ad_path))
    return {"status": "playing", "track": request.track}

@@ -2393,6 +2616,126 @@ async def update_settings(data: dict):
    return llm_service.get_settings()


+# --- Caller Screening ---
+
+SCREENING_PROMPT = """You are a friendly, brief phone screener for "Luke at the Roost" radio show.
+Your job: Get the caller's first name and what they want to talk about. That's it.
+
+Rules:
+- Be warm but brief (1-2 sentences per response)
+- First ask their name, then ask what they want to talk about
+- After you have both, say something like "Great, sit tight and we'll get you on with Luke!"
+- Never pretend to be Luke or the host
+- Keep it casual and conversational
+- If they're hard to understand, ask them to repeat"""
+
+_screening_audio_buffers: dict[str, bytearray] = {}
+
+
+async def _start_screening_greeting(caller_id: str):
+    """Send initial screening greeting to queued caller after brief delay"""
+    await asyncio.sleep(2)  # Wait for stream to stabilize
+
+    ws = caller_service._websockets.get(caller_id)
+    if not ws:
+        return
+
+    caller_service.start_screening(caller_id)
+    greeting = "Hey there! Thanks for calling Luke at the Roost. What's your name?"
+    caller_service.update_screening(caller_id, screener_text=greeting)
+
+    try:
+        audio_bytes = await generate_speech(greeting, "Sarah", "none")
+        if audio_bytes:
+            await caller_service.stream_audio_to_caller(caller_id, audio_bytes, 24000)
+    except Exception as e:
+        print(f"[Screening] Greeting TTS failed: {e}")
+
+
+async def _handle_screening_audio(caller_id: str, pcm_data: bytes, sample_rate: int):
+    """Process audio from a queued caller for screening conversation"""
+    state = caller_service.get_screening_state(caller_id)
+    if not state or state["status"] == "complete":
+        return
+
+    # Skip if TTS is currently streaming to this caller
+    if caller_service.is_streaming_tts(caller_id):
+        return
+
+    # Transcribe caller speech
+    try:
+        text = await transcribe_audio(pcm_data, source_sample_rate=sample_rate)
+    except Exception as e:
+        print(f"[Screening] Transcription failed: {e}")
+        return
+
+    if not text or not text.strip():
+        return
+
+    print(f"[Screening] Caller {caller_id}: {text}")
+    caller_service.update_screening(caller_id, caller_text=text)
+
+    # Build conversation for LLM
+    messages = []
+    for msg in state["conversation"]:
+        role = "assistant" if msg["role"] == "screener" else "user"
+        messages.append({"role": role, "content": msg["content"]})
+
+    # Generate screener response
+    try:
+        response = await llm_service.generate(
+            messages=messages,
+            system_prompt=SCREENING_PROMPT
+        )
+    except Exception as e:
+        print(f"[Screening] LLM failed: {e}")
+        return
+
+    if not response or not response.strip():
+        return
+
+    response = response.strip()
+    print(f"[Screening] Screener → {caller_id}: {response}")
+    caller_service.update_screening(caller_id, screener_text=response)
+
+    # After 2+ caller responses, try to extract name and topic
+    if state["response_count"] >= 2:
+        try:
+            extract_prompt = f"""From this screening conversation, extract the caller's name and topic.
+Conversation:
+{chr(10).join(f'{m["role"]}: {m["content"]}' for m in state["conversation"])}
+
+Respond with ONLY JSON: {{"name": "their first name or null", "topic": "brief topic or null"}}"""
+            extract = await llm_service.generate(
+                messages=[{"role": "user", "content": extract_prompt}],
+                system_prompt="You extract structured data from conversations. Respond with only valid JSON."
+            )
+            json_match = re.search(r'\{[^}]+\}', extract)
+            if json_match:
+                info = json.loads(json_match.group())
+                if info.get("name"):
+                    caller_service.update_screening(caller_id, caller_name=info["name"])
+                if info.get("topic"):
+                    caller_service.update_screening(caller_id, topic=info["topic"])
+                if info.get("name") and info.get("topic"):
+                    caller_service.end_screening(caller_id)
+                    broadcast_event("screening_complete", {
+                        "caller_id": caller_id,
+                        "name": info["name"],
+                        "topic": info["topic"]
+                    })
+        except Exception as e:
+            print(f"[Screening] Extract failed: {e}")
+
+    # TTS the screener response back to caller
+    try:
+        audio_bytes = await generate_speech(response, "Sarah", "none")
+        if audio_bytes:
+            await caller_service.stream_audio_to_caller(caller_id, audio_bytes, 24000)
+    except Exception as e:
+        print(f"[Screening] Response TTS failed: {e}")
+
+
@app.websocket("/api/signalwire/stream")
 async def signalwire_audio_stream(websocket: WebSocket):
    """Handle SignalWire bidirectional audio stream"""
@@ -2402,6 +2745,7 @@ async def signalwire_audio_stream(websocket: WebSocket):
    caller_phone = "Unknown"
    call_sid = ""
    audio_buffer = bytearray()
+    screening_buffer = bytearray()
    CHUNK_DURATION_S = 3
    SAMPLE_RATE = 16000
    chunk_samples = CHUNK_DURATION_S * SAMPLE_RATE
@@ -2448,6 +2792,9 @@ async def signalwire_audio_stream(websocket: WebSocket):
                if stream_sid:
                    caller_service.register_stream_sid(caller_id, stream_sid)

+                # Start screening conversation
+                asyncio.create_task(_start_screening_greeting(caller_id))
+
            elif event == "media" and stream_started:
                try:
                    payload = msg.get("media", {}).get("payload", "")
@@ -2458,6 +2805,16 @@ async def signalwire_audio_stream(websocket: WebSocket):

                    call_info = caller_service.active_calls.get(caller_id)
                    if not call_info:
+                        # Caller is queued, not on air — route to screening
+                        screening_buffer.extend(pcm_data)
+                        if len(screening_buffer) >= chunk_samples * 2:
+                            pcm_chunk = bytes(screening_buffer[:chunk_samples * 2])
+                            screening_buffer = screening_buffer[chunk_samples * 2:]
+                            audio_check = np.frombuffer(pcm_chunk, dtype=np.int16).astype(np.float32) / 32768.0
+                            if np.abs(audio_check).max() >= 0.01:
+                                asyncio.create_task(
+                                    _handle_screening_audio(caller_id, pcm_chunk, SAMPLE_RATE)
+                                )
                        continue

                    audio_buffer.extend(pcm_data)
@@ -2713,7 +3070,8 @@ async def _trigger_ai_auto_respond(accumulated_text: str):

        conversation_summary = session.get_conversation_summary()
        show_history = session.get_show_history()
-        system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history)
+        mood = detect_host_mood(session.conversation)
+        system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, emotional_read=mood)

        messages = _normalize_messages_for_llm(session.conversation[-10:])
        response = await llm_service.generate(
@@ -2785,7 +3143,8 @@ async def ai_respond():

        conversation_summary = session.get_conversation_summary()
        show_history = session.get_show_history()
-        system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history)
+        mood = detect_host_mood(session.conversation)
+        system_prompt = get_caller_prompt(session.caller, conversation_summary, show_history, emotional_read=mood)

        messages = _normalize_messages_for_llm(session.conversation[-10:])
        response = await llm_service.generate(
@@ -2856,6 +3215,7 @@ async def hangup_real_caller():
    caller_id = session.active_real_caller["caller_id"]
    caller_phone = session.active_real_caller["phone"]
    conversation_snapshot = list(session.conversation)
+    call_started = getattr(session, '_call_started_at', 0.0)
    auto_followup_enabled = session.auto_followup

    # End the phone call via SignalWire
@@ -2875,7 +3235,7 @@ async def hangup_real_caller():
        threading.Thread(target=audio_service.play_sfx, args=(str(hangup_sound),), daemon=True).start()

    asyncio.create_task(
-        _summarize_real_call(caller_phone, conversation_snapshot, auto_followup_enabled)
+        _summarize_real_call(caller_phone, conversation_snapshot, call_started, auto_followup_enabled)
    )

    return {
@@ -2884,8 +3244,9 @@ async def hangup_real_caller():
    }


-async def _summarize_real_call(caller_phone: str, conversation: list, auto_followup_enabled: bool):
+async def _summarize_real_call(caller_phone: str, conversation: list, started_at: float, auto_followup_enabled: bool):
    """Background task: summarize call and store in history"""
+    ended_at = time.time()
    summary = ""
    if conversation:
        transcript_text = "\n".join(
@@ -2901,6 +3262,8 @@ async def _summarize_real_call(caller_phone: str, conversation: list, auto_follo
        caller_name=caller_phone,
        summary=summary,
        transcript=conversation,
+        started_at=started_at,
+        ended_at=ended_at,
    ))
    print(f"[Real Caller] {caller_phone} call summarized: {summary[:80]}...")

@@ -2963,6 +3326,70 @@ async def set_auto_followup(data: dict):
    return {"enabled": session.auto_followup}


+# --- Transcript & Chapter Export ---
+
+@app.get("/api/session/export")
+async def export_session():
+    """Export session transcript with speaker labels and chapters from call boundaries"""
+    if not session.call_history:
+        raise HTTPException(400, "No calls in this session to export")
+
+    # Find the earliest call start as session base time
+    session_start = min(
+        (r.started_at for r in session.call_history if r.started_at > 0),
+        default=time.time()
+    )
+
+    transcript_lines = []
+    chapters = []
+
+    for i, record in enumerate(session.call_history):
+        # Chapter from call start time
+        offset_seconds = max(0, record.started_at - session_start) if record.started_at > 0 else 0
+        chapter_title = f"{record.caller_name}"
+        if record.summary:
+            # Use first sentence of summary for chapter title
+            short_summary = record.summary.split(".")[0].strip()
+            if short_summary:
+                chapter_title += f" \u2014 {short_summary}"
+        chapters.append({"startTime": round(offset_seconds), "title": chapter_title})
+
+        # Separator between calls
+        if i > 0:
+            transcript_lines.append("")
+            transcript_lines.append(f"--- Call {i + 1}: {record.caller_name} ---")
+            transcript_lines.append("")
+
+        # Transcript lines with timestamps
+        for msg in record.transcript:
+            msg_offset = msg.get("timestamp", 0) - session_start if msg.get("timestamp") else offset_seconds
+            if msg_offset < 0:
+                msg_offset = 0
+            mins = int(msg_offset // 60)
+            secs = int(msg_offset % 60)
+
+            role = msg.get("role", "")
+            if role in ("user", "host"):
+                speaker = "HOST"
+            elif role.startswith("real_caller:"):
+                speaker = role.split(":", 1)[1].upper()
+            elif role.startswith("ai_caller:"):
+                speaker = role.split(":", 1)[1].upper()
+            elif role == "assistant":
+                speaker = record.caller_name.upper()
+            else:
+                speaker = role.upper()
+
+            transcript_lines.append(f"[{mins:02d}:{secs:02d}] {speaker}: {msg['content']}")
+
+    return {
+        "session_id": session.id,
+        "transcript": "\n".join(transcript_lines),
+        "chapters": chapters,
+        "call_count": len(session.call_history),
+    }
+
+
 # --- Server Control Endpoints ---

 import subprocess