From 3192735615455e8a3749f21a2560be173a9eeeca Mon Sep 17 00:00:00 2001 From: tcpsyn Date: Thu, 5 Feb 2026 17:04:12 -0700 Subject: [PATCH] Fix AI responses being cut off - Increase max_tokens from 100 to 150 to avoid mid-sentence truncation - Tighten prompt to 1-2 short sentences with emphasis on completing them Co-Authored-By: Claude Opus 4.6 --- backend/main.py | 4 ++-- backend/services/llm.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/main.py b/backend/main.py index e61754f..33b85b5 100644 --- a/backend/main.py +++ b/backend/main.py @@ -264,8 +264,8 @@ Continue naturally. Don't repeat yourself. {history}{context} HOW TO TALK: - Sound like a real person chatting, not writing. -- Keep responses to 2-3 sentences. Enough to make your point, short enough for back-and-forth. -- ALWAYS finish your thought completely. Never stop mid-sentence. +- Keep responses to 1-2 SHORT sentences. Be brief. This is a fast-paced call, not a monologue. +- ALWAYS finish your sentence. Never leave a thought incomplete or trailing off. - Swear naturally if it fits: fuck, shit, damn, etc. SPELLING FOR TEXT-TO-SPEECH (use proper spelling so TTS pronounces correctly): diff --git a/backend/services/llm.py b/backend/services/llm.py index d6c108b..fdc5c29 100644 --- a/backend/services/llm.py +++ b/backend/services/llm.py @@ -124,7 +124,7 @@ class LLMService: json={ "model": self.openrouter_model, "messages": messages, - "max_tokens": 100, + "max_tokens": 150, }, ) response.raise_for_status()