From 3192735615455e8a3749f21a2560be173a9eeeca Mon Sep 17 00:00:00 2001
From: tcpsyn <tcpsyn@gmail.com>
Date: Thu, 5 Feb 2026 17:04:12 -0700
Subject: [PATCH] Fix AI responses being cut off

- Increase max_tokens from 100 to 150 to avoid mid-sentence truncation
- Tighten prompt to 1-2 short sentences with emphasis on completing them

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/main.py         | 4 ++--
 backend/services/llm.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/backend/main.py b/backend/main.py
index e61754f..33b85b5 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -264,8 +264,8 @@ Continue naturally. Don't repeat yourself.
 {history}{context}
 HOW TO TALK:
 - Sound like a real person chatting, not writing.
-- Keep responses to 2-3 sentences. Enough to make your point, short enough for back-and-forth.
-- ALWAYS finish your thought completely. Never stop mid-sentence.
+- Keep responses to 1-2 SHORT sentences. Be brief. This is a fast-paced call, not a monologue.
+- ALWAYS finish your sentence. Never leave a thought incomplete or trailing off.
 - Swear naturally if it fits: fuck, shit, damn, etc.
 
 SPELLING FOR TEXT-TO-SPEECH (use proper spelling so TTS pronounces correctly):
diff --git a/backend/services/llm.py b/backend/services/llm.py
index d6c108b..fdc5c29 100644
--- a/backend/services/llm.py
+++ b/backend/services/llm.py
@@ -124,7 +124,7 @@ class LLMService:
                         json={
                             "model": self.openrouter_model,
                             "messages": messages,
-                            "max_tokens": 100,
+                            "max_tokens": 150,
                         },
                     )
                     response.raise_for_status()