Fix Whisper misspelling caller names — hint + fuzzy correction
- Pass all caller names as Whisper initial_prompt hint for correct spelling - Post-transcription fuzzy match corrects remaining misspellings (Levenshtein) - Prevents AI callers from "correcting" the host on their own name Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+109
-2
@@ -7633,6 +7633,104 @@ async def start_recording():
|
|||||||
return {"status": "recording"}
|
return {"status": "recording"}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_all_caller_names() -> list[str]:
|
||||||
|
"""Get all current caller names (from backgrounds or base assignments)."""
|
||||||
|
names = []
|
||||||
|
for key in CALLER_BASES:
|
||||||
|
bg = session.caller_backgrounds.get(key)
|
||||||
|
if bg and hasattr(bg, "name"):
|
||||||
|
names.append(bg.name)
|
||||||
|
elif isinstance(bg, str):
|
||||||
|
pass # raw string background, no structured name
|
||||||
|
elif "name" in CALLER_BASES[key]:
|
||||||
|
names.append(CALLER_BASES[key]["name"])
|
||||||
|
# Always include Devon (the intern)
|
||||||
|
names.append("Devon")
|
||||||
|
return names
|
||||||
|
|
||||||
|
|
||||||
|
def _fix_caller_names(text: str, names: list[str]) -> str:
|
||||||
|
"""Fix Whisper misspellings of caller names using fuzzy matching.
|
||||||
|
Compares each word against known names — if within edit distance 2
|
||||||
|
and the word isn't a common English word, replace it."""
|
||||||
|
if not names or not text:
|
||||||
|
return text
|
||||||
|
|
||||||
|
# Build lookup: lowercase name -> original name
|
||||||
|
name_map = {n.lower(): n for n in names if n}
|
||||||
|
if not name_map:
|
||||||
|
return text
|
||||||
|
|
||||||
|
# Common short words that happen to be close to names — never replace these
|
||||||
|
_common_words = {
|
||||||
|
"the", "and", "but", "for", "not", "you", "all", "can", "had", "her",
|
||||||
|
"was", "one", "our", "out", "are", "has", "his", "how", "its", "may",
|
||||||
|
"new", "now", "old", "see", "way", "who", "did", "get", "got", "him",
|
||||||
|
"let", "say", "she", "too", "use", "been", "call", "come", "each",
|
||||||
|
"from", "have", "just", "know", "like", "long", "look", "make", "many",
|
||||||
|
"much", "over", "said", "some", "take", "tell", "than", "that", "them",
|
||||||
|
"then", "they", "this", "time", "very", "want", "well", "went", "were",
|
||||||
|
"what", "when", "will", "with", "your", "been", "yeah", "okay", "sure",
|
||||||
|
"right", "about", "think", "really", "gonna", "gotta", "would", "could",
|
||||||
|
"should", "never", "still", "here", "there", "where", "being", "doing",
|
||||||
|
"going", "having", "saying", "man", "hey", "yes", "no",
|
||||||
|
}
|
||||||
|
|
||||||
|
def _edit_distance(a: str, b: str) -> int:
|
||||||
|
"""Levenshtein distance between two strings."""
|
||||||
|
if len(a) < len(b):
|
||||||
|
return _edit_distance(b, a)
|
||||||
|
if len(b) == 0:
|
||||||
|
return len(a)
|
||||||
|
prev = list(range(len(b) + 1))
|
||||||
|
for i, ca in enumerate(a):
|
||||||
|
curr = [i + 1]
|
||||||
|
for j, cb in enumerate(b):
|
||||||
|
cost = 0 if ca == cb else 1
|
||||||
|
curr.append(min(curr[j] + 1, prev[j + 1] + 1, prev[j] + cost))
|
||||||
|
prev = curr
|
||||||
|
return prev[len(b)]
|
||||||
|
|
||||||
|
words = text.split()
|
||||||
|
changed = False
|
||||||
|
for i, word in enumerate(words):
|
||||||
|
# Strip punctuation for matching but preserve it
|
||||||
|
stripped = word.strip(".,!?;:\"'—-")
|
||||||
|
if not stripped or len(stripped) < 3:
|
||||||
|
continue
|
||||||
|
low = stripped.lower()
|
||||||
|
if low in _common_words:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Exact match (already correct)
|
||||||
|
if low in name_map:
|
||||||
|
# Fix capitalization if needed
|
||||||
|
correct = name_map[low]
|
||||||
|
if stripped != correct:
|
||||||
|
words[i] = word.replace(stripped, correct)
|
||||||
|
changed = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Fuzzy match against all names
|
||||||
|
for name_low, name_orig in name_map.items():
|
||||||
|
if abs(len(low) - len(name_low)) > 2:
|
||||||
|
continue
|
||||||
|
dist = _edit_distance(low, name_low)
|
||||||
|
# Allow distance 1 for short names (3-4 chars), distance 2 for longer
|
||||||
|
max_dist = 1 if len(name_low) <= 4 else 2
|
||||||
|
if dist <= max_dist and dist > 0:
|
||||||
|
words[i] = word.replace(stripped, name_orig)
|
||||||
|
changed = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if changed:
|
||||||
|
result = " ".join(words)
|
||||||
|
if result != text:
|
||||||
|
print(f"[NameFix] '{text}' -> '{result}'")
|
||||||
|
return result
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/record/stop")
|
@app.post("/api/record/stop")
|
||||||
async def stop_recording():
|
async def stop_recording():
|
||||||
"""Stop recording and transcribe"""
|
"""Stop recording and transcribe"""
|
||||||
@@ -7641,14 +7739,23 @@ async def stop_recording():
|
|||||||
if len(audio_bytes) < 100:
|
if len(audio_bytes) < 100:
|
||||||
return {"text": "", "status": "no_audio"}
|
return {"text": "", "status": "no_audio"}
|
||||||
|
|
||||||
# Build context hint from current caller for better transcription accuracy
|
# Build context hint with ALL caller names for Whisper's initial_prompt
|
||||||
|
caller_names = _get_all_caller_names()
|
||||||
context_hint = ""
|
context_hint = ""
|
||||||
|
if caller_names:
|
||||||
|
names_str = ", ".join(caller_names)
|
||||||
|
context_hint = f"Callers on today's show: {names_str}."
|
||||||
if session.caller:
|
if session.caller:
|
||||||
caller_name = session.caller.get("name", "")
|
caller_name = session.caller.get("name", "")
|
||||||
context_hint = f"Host Luke is talking to a caller named {caller_name}."
|
context_hint += f" Host Luke is currently talking to {caller_name}."
|
||||||
|
|
||||||
# Transcribe the recorded audio (16kHz raw PCM from audio service)
|
# Transcribe the recorded audio (16kHz raw PCM from audio service)
|
||||||
text = await transcribe_audio(audio_bytes, source_sample_rate=16000, context_hint=context_hint)
|
text = await transcribe_audio(audio_bytes, source_sample_rate=16000, context_hint=context_hint)
|
||||||
|
|
||||||
|
# Post-transcription: fix Whisper misspellings of caller names
|
||||||
|
if text and caller_names:
|
||||||
|
text = _fix_caller_names(text, caller_names)
|
||||||
|
|
||||||
return {"text": text, "status": "transcribed"}
|
return {"text": text, "status": "transcribed"}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user