Fix Whisper misspelling caller names — hint + fuzzy correction

- Pass all caller names as Whisper initial_prompt hint for correct spelling - Post-transcription fuzzy match corrects remaining misspellings (Levenshtein) - Prevents AI callers from "correcting" the host on their own name Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 07:42:18 -06:00
parent eb1e18a997
commit 4589670b37
1 changed files with 109 additions and 2 deletions
@@ -7633,6 +7633,104 @@ async def start_recording():
    return {"status": "recording"}
 def _get_all_caller_names() -> list[str]:
    """Get all current caller names (from backgrounds or base assignments)."""
    names = []
    for key in CALLER_BASES:
        bg = session.caller_backgrounds.get(key)
        if bg and hasattr(bg, "name"):
            names.append(bg.name)
        elif isinstance(bg, str):
            pass  # raw string background, no structured name
        elif "name" in CALLER_BASES[key]:
            names.append(CALLER_BASES[key]["name"])
    # Always include Devon (the intern)
    names.append("Devon")
    return names
 def _fix_caller_names(text: str, names: list[str]) -> str:
    """Fix Whisper misspellings of caller names using fuzzy matching.
    Compares each word against known names — if within edit distance 2
    and the word isn't a common English word, replace it."""
    if not names or not text:
        return text
    # Build lookup: lowercase name -> original name
    name_map = {n.lower(): n for n in names if n}
    if not name_map:
        return text
    # Common short words that happen to be close to names — never replace these
    _common_words = {
        "the", "and", "but", "for", "not", "you", "all", "can", "had", "her",
        "was", "one", "our", "out", "are", "has", "his", "how", "its", "may",
        "new", "now", "old", "see", "way", "who", "did", "get", "got", "him",
        "let", "say", "she", "too", "use", "been", "call", "come", "each",
        "from", "have", "just", "know", "like", "long", "look", "make", "many",
        "much", "over", "said", "some", "take", "tell", "than", "that", "them",
        "then", "they", "this", "time", "very", "want", "well", "went", "were",
        "what", "when", "will", "with", "your", "been", "yeah", "okay", "sure",
        "right", "about", "think", "really", "gonna", "gotta", "would", "could",
        "should", "never", "still", "here", "there", "where", "being", "doing",
        "going", "having", "saying", "man", "hey", "yes", "no",
    }
    def _edit_distance(a: str, b: str) -> int:
        """Levenshtein distance between two strings."""
        if len(a) < len(b):
            return _edit_distance(b, a)
        if len(b) == 0:
            return len(a)
        prev = list(range(len(b) + 1))
        for i, ca in enumerate(a):
            curr = [i + 1]
            for j, cb in enumerate(b):
                cost = 0 if ca == cb else 1
                curr.append(min(curr[j] + 1, prev[j + 1] + 1, prev[j] + cost))
            prev = curr
        return prev[len(b)]
    words = text.split()
    changed = False
    for i, word in enumerate(words):
        # Strip punctuation for matching but preserve it
        stripped = word.strip(".,!?;:\"'—-")
        if not stripped or len(stripped) < 3:
            continue
        low = stripped.lower()
        if low in _common_words:
            continue
        # Exact match (already correct)
        if low in name_map:
            # Fix capitalization if needed
            correct = name_map[low]
            if stripped != correct:
                words[i] = word.replace(stripped, correct)
                changed = True
            continue
        # Fuzzy match against all names
        for name_low, name_orig in name_map.items():
            if abs(len(low) - len(name_low)) > 2:
                continue
            dist = _edit_distance(low, name_low)
            # Allow distance 1 for short names (3-4 chars), distance 2 for longer
            max_dist = 1 if len(name_low) <= 4 else 2
            if dist <= max_dist and dist > 0:
                words[i] = word.replace(stripped, name_orig)
                changed = True
                break
    if changed:
        result = " ".join(words)
        if result != text:
            print(f"[NameFix] '{text}' -> '{result}'")
        return result
    return text
@app.post("/api/record/stop")
 async def stop_recording():
    """Stop recording and transcribe"""
@@ -7641,14 +7739,23 @@ async def stop_recording():
    if len(audio_bytes) < 100:
        return {"text": "", "status": "no_audio"}
-    # Build context hint from current caller for better transcription accuracy
+    # Build context hint with ALL caller names for Whisper's initial_prompt
    caller_names = _get_all_caller_names()
    context_hint = ""
    if caller_names:
        names_str = ", ".join(caller_names)
        context_hint = f"Callers on today's show: {names_str}."
    if session.caller:
        caller_name = session.caller.get("name", "")
-        context_hint = f"Host Luke is talking to a caller named {caller_name}."
+        context_hint += f" Host Luke is currently talking to {caller_name}."
    # Transcribe the recorded audio (16kHz raw PCM from audio service)
    text = await transcribe_audio(audio_bytes, source_sample_rate=16000, context_hint=context_hint)
    # Post-transcription: fix Whisper misspellings of caller names
    if text and caller_names:
        text = _fix_caller_names(text, caller_names)
    return {"text": text, "status": "transcribed"}