From a1c94a368294bf70faff7084dc6304b4333dac63 Mon Sep 17 00:00:00 2001 From: tcpsyn Date: Thu, 5 Feb 2026 17:18:22 -0700 Subject: [PATCH] Fix unnatural response cutoffs - Replace aggressive sentence-count limiting with ensure_complete_thought() which only trims if the LLM was actually cut off mid-sentence - Softer prompt guidance for natural brevity instead of rigid sentence count - max_tokens at 100 as natural length cap Co-Authored-By: Claude Opus 4.6 --- backend/main.py | 41 +++++++++++++++-------------------------- backend/services/llm.py | 2 +- 2 files changed, 16 insertions(+), 27 deletions(-) diff --git a/backend/main.py b/backend/main.py index 72081ea..3f9e243 100644 --- a/backend/main.py +++ b/backend/main.py @@ -264,10 +264,9 @@ Continue naturally. Don't repeat yourself. {history}{context} HOW TO TALK: - Sound like a real person chatting, not writing. -- MAX 1-2 sentences. Seriously — no more than 2 sentences EVER. One sentence is ideal. -- ALWAYS finish your sentence. Never trail off mid-thought. +- Be brief. Say what you need to say and stop. Think quick back-and-forth, not speeches. +- ALWAYS complete your thought. Never trail off or leave a sentence unfinished. - Swear naturally if it fits: fuck, shit, damn, etc. -- DO NOT ramble, explain, or monologue. Say one thing and stop. SPELLING FOR TEXT-TO-SPEECH (use proper spelling so TTS pronounces correctly): - Write "you know" not "yanno" or "y'know" @@ -569,30 +568,20 @@ async def hangup(): import re -def limit_sentences(text: str, max_sentences: int = 2) -> str: - """Keep only the first N complete sentences. Never cuts mid-sentence.""" +def ensure_complete_thought(text: str) -> str: + """If text was cut off mid-sentence, trim to the last complete sentence.""" text = text.strip() if not text: return text - - sentences = [] - current = 0 - for i, ch in enumerate(text): - if ch in '.!?' and i + 1 < len(text) and text[i + 1] in ' \n\t': - sentences.append(text[current:i + 1].strip()) - current = i + 2 - if len(sentences) >= max_sentences: - break - elif ch in '.!?' and i == len(text) - 1: - sentences.append(text[current:i + 1].strip()) - if len(sentences) >= max_sentences: - break - - if sentences: - return ' '.join(sentences) - - # No sentence-ending punctuation found — return text with period - return text.rstrip(',;: ') + '.' + # Already ends with sentence-ending punctuation — good + if text[-1] in '.!?': + return text + # Cut off mid-sentence — find the last complete sentence + for i in range(len(text) - 1, -1, -1): + if text[i] in '.!?': + return text[:i + 1] + # No punctuation at all — just add a period + return text.rstrip(',;:— -') + '.' def clean_for_tts(text: str) -> str: @@ -661,7 +650,7 @@ async def chat(request: ChatRequest): # Clean response for TTS (remove parenthetical actions, asterisks, etc.) response = clean_for_tts(response) - response = limit_sentences(response) + response = ensure_complete_thought(response) print(f"[Chat] Cleaned: {response[:100] if response else '(empty)'}...") @@ -1037,7 +1026,7 @@ async def _check_ai_auto_respond(real_caller_text: str, real_caller_name: str): system_prompt=system_prompt, ) response = clean_for_tts(response) - response = limit_sentences(response) + response = ensure_complete_thought(response) if not response or not response.strip(): return diff --git a/backend/services/llm.py b/backend/services/llm.py index fdc5c29..d6c108b 100644 --- a/backend/services/llm.py +++ b/backend/services/llm.py @@ -124,7 +124,7 @@ class LLMService: json={ "model": self.openrouter_model, "messages": messages, - "max_tokens": 150, + "max_tokens": 100, }, ) response.raise_for_status()