Fix Whisper misspelling caller names — hint + fuzzy correction

- Pass all caller names as Whisper initial_prompt hint for correct spelling - Post-transcription fuzzy match corrects remaining misspellings (Levenshtein) - Prevents AI callers from "correcting" the host on their own name Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 07:42:18 -06:00
parent eb1e18a997
commit 4589670b37
1 changed files with 109 additions and 2 deletions
@@ -7633,6 +7633,104 @@ async def start_recording():
    return {"status": "recording"}


+def _get_all_caller_names() -> list[str]:
+    """Get all current caller names (from backgrounds or base assignments)."""
+    names = []
+    for key in CALLER_BASES:
+        bg = session.caller_backgrounds.get(key)
+        if bg and hasattr(bg, "name"):
+            names.append(bg.name)
+        elif isinstance(bg, str):
+            pass  # raw string background, no structured name
+        elif "name" in CALLER_BASES[key]:
+            names.append(CALLER_BASES[key]["name"])
+    # Always include Devon (the intern)
+    names.append("Devon")
+    return names
+
+
+def _fix_caller_names(text: str, names: list[str]) -> str:
+    """Fix Whisper misspellings of caller names using fuzzy matching.
+    Compares each word against known names — if within edit distance 2
+    and the word isn't a common English word, replace it."""
+    if not names or not text:
+        return text
+
+    # Build lookup: lowercase name -> original name
+    name_map = {n.lower(): n for n in names if n}
+    if not name_map:
+        return text
+
+    # Common short words that happen to be close to names — never replace these
+    _common_words = {
+        "the", "and", "but", "for", "not", "you", "all", "can", "had", "her",
+        "was", "one", "our", "out", "are", "has", "his", "how", "its", "may",
+        "new", "now", "old", "see", "way", "who", "did", "get", "got", "him",
+        "let", "say", "she", "too", "use", "been", "call", "come", "each",
+        "from", "have", "just", "know", "like", "long", "look", "make", "many",
+        "much", "over", "said", "some", "take", "tell", "than", "that", "them",
+        "then", "they", "this", "time", "very", "want", "well", "went", "were",
+        "what", "when", "will", "with", "your", "been", "yeah", "okay", "sure",
+        "right", "about", "think", "really", "gonna", "gotta", "would", "could",
+        "should", "never", "still", "here", "there", "where", "being", "doing",
+        "going", "having", "saying", "man", "hey", "yes", "no",
+    }
+
+    def _edit_distance(a: str, b: str) -> int:
+        """Levenshtein distance between two strings."""
+        if len(a) < len(b):
+            return _edit_distance(b, a)
+        if len(b) == 0:
+            return len(a)
+        prev = list(range(len(b) + 1))
+        for i, ca in enumerate(a):
+            curr = [i + 1]
+            for j, cb in enumerate(b):
+                cost = 0 if ca == cb else 1
+                curr.append(min(curr[j] + 1, prev[j + 1] + 1, prev[j] + cost))
+            prev = curr
+        return prev[len(b)]
+
+    words = text.split()
+    changed = False
+    for i, word in enumerate(words):
+        # Strip punctuation for matching but preserve it
+        stripped = word.strip(".,!?;:\"'—-")
+        if not stripped or len(stripped) < 3:
+            continue
+        low = stripped.lower()
+        if low in _common_words:
+            continue
+
+        # Exact match (already correct)
+        if low in name_map:
+            # Fix capitalization if needed
+            correct = name_map[low]
+            if stripped != correct:
+                words[i] = word.replace(stripped, correct)
+                changed = True
+            continue
+
+        # Fuzzy match against all names
+        for name_low, name_orig in name_map.items():
+            if abs(len(low) - len(name_low)) > 2:
+                continue
+            dist = _edit_distance(low, name_low)
+            # Allow distance 1 for short names (3-4 chars), distance 2 for longer
+            max_dist = 1 if len(name_low) <= 4 else 2
+            if dist <= max_dist and dist > 0:
+                words[i] = word.replace(stripped, name_orig)
+                changed = True
+                break
+
+    if changed:
+        result = " ".join(words)
+        if result != text:
+            print(f"[NameFix] '{text}' -> '{result}'")
+        return result
+    return text
+
+
@app.post("/api/record/stop")
 async def stop_recording():
    """Stop recording and transcribe"""
@@ -7641,14 +7739,23 @@ async def stop_recording():
    if len(audio_bytes) < 100:
        return {"text": "", "status": "no_audio"}

-    # Build context hint from current caller for better transcription accuracy
+    # Build context hint with ALL caller names for Whisper's initial_prompt
+    caller_names = _get_all_caller_names()
    context_hint = ""
+    if caller_names:
+        names_str = ", ".join(caller_names)
+        context_hint = f"Callers on today's show: {names_str}."
    if session.caller:
        caller_name = session.caller.get("name", "")
-        context_hint = f"Host Luke is talking to a caller named {caller_name}."
+        context_hint += f" Host Luke is currently talking to {caller_name}."

    # Transcribe the recorded audio (16kHz raw PCM from audio service)
    text = await transcribe_audio(audio_bytes, source_sample_rate=16000, context_hint=context_hint)
+
+    # Post-transcription: fix Whisper misspellings of caller names
+    if text and caller_names:
+        text = _fix_caller_names(text, caller_names)
+
    return {"text": text, "status": "transcribed"}