From 6a56967540b1822199a03aeaabb2f7cddcfcad97 Mon Sep 17 00:00:00 2001
From: tcpsyn <tcpsyn@gmail.com>
Date: Thu, 5 Feb 2026 17:07:41 -0700
Subject: [PATCH] Enforce shorter AI responses and prevent cut-off sentences

- Reduce max_tokens from 100 to 75 for shorter output
- Add truncate_to_complete_sentence() to trim at last punctuation
- Applied to both chat and auto-respond paths

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/main.py         | 21 +++++++++++++++++++++
 backend/services/llm.py |  2 +-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/backend/main.py b/backend/main.py
index bf1868f..aee7a31 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -569,6 +569,25 @@ async def hangup():
 
 import re
 
+def truncate_to_complete_sentence(text: str) -> str:
+    """Truncate text to the last complete sentence ending with punctuation."""
+    text = text.strip()
+    if not text:
+        return text
+    # If already ends with sentence-ending punctuation, return as-is
+    if text[-1] in '.!?':
+        return text
+    # Find the last sentence-ending punctuation
+    last_period = text.rfind('.')
+    last_excl = text.rfind('!')
+    last_quest = text.rfind('?')
+    last_end = max(last_period, last_excl, last_quest)
+    if last_end > 0:
+        return text[:last_end + 1]
+    # No complete sentence found — add a period
+    return text + '.'
+
+
 def clean_for_tts(text: str) -> str:
     """Strip out non-speakable content and fix phonetic spellings for TTS"""
     # Remove content in parentheses: (laughs), (pausing), (looking away), etc.
@@ -635,6 +654,7 @@ async def chat(request: ChatRequest):
 
     # Clean response for TTS (remove parenthetical actions, asterisks, etc.)
     response = clean_for_tts(response)
+    response = truncate_to_complete_sentence(response)
 
     print(f"[Chat] Cleaned: {response[:100] if response else '(empty)'}...")
 
@@ -1010,6 +1030,7 @@ async def _check_ai_auto_respond(real_caller_text: str, real_caller_name: str):
         system_prompt=system_prompt,
     )
     response = clean_for_tts(response)
+    response = truncate_to_complete_sentence(response)
     if not response or not response.strip():
         return
 
diff --git a/backend/services/llm.py b/backend/services/llm.py
index d6c108b..053957e 100644
--- a/backend/services/llm.py
+++ b/backend/services/llm.py
@@ -124,7 +124,7 @@ class LLMService:
                         json={
                             "model": self.openrouter_model,
                             "messages": messages,
-                            "max_tokens": 100,
+                            "max_tokens": 75,
                         },
                     )
                     response.raise_for_status()