From 0e65fa5084865c7b86d6e91f18a9b3a4659732aa Mon Sep 17 00:00:00 2001
From: tcpsyn <tcpsyn@gmail.com>
Date: Thu, 5 Feb 2026 17:05:51 -0700
Subject: [PATCH] =?UTF-8?q?Force=20shorter=20AI=20responses=20=E2=80=94=20?=
 =?UTF-8?q?max=201-2=20sentences?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Much stronger prompt language: "no more than 2 sentences EVER"
- Added "DO NOT ramble" instruction
- Reduced max_tokens back to 100 as hard limit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/main.py         | 5 +++--
 backend/services/llm.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/backend/main.py b/backend/main.py
index 33b85b5..bf1868f 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -264,9 +264,10 @@ Continue naturally. Don't repeat yourself.
 {history}{context}
 HOW TO TALK:
 - Sound like a real person chatting, not writing.
-- Keep responses to 1-2 SHORT sentences. Be brief. This is a fast-paced call, not a monologue.
-- ALWAYS finish your sentence. Never leave a thought incomplete or trailing off.
+- MAX 1-2 sentences. Seriously — no more than 2 sentences EVER. One sentence is ideal.
+- ALWAYS finish your sentence. Never trail off mid-thought.
 - Swear naturally if it fits: fuck, shit, damn, etc.
+- DO NOT ramble, explain, or monologue. Say one thing and stop.
 
 SPELLING FOR TEXT-TO-SPEECH (use proper spelling so TTS pronounces correctly):
 - Write "you know" not "yanno" or "y'know"
diff --git a/backend/services/llm.py b/backend/services/llm.py
index fdc5c29..d6c108b 100644
--- a/backend/services/llm.py
+++ b/backend/services/llm.py
@@ -124,7 +124,7 @@ class LLMService:
                         json={
                             "model": self.openrouter_model,
                             "messages": messages,
-                            "max_tokens": 150,
+                            "max_tokens": 100,
                         },
                     )
                     response.raise_for_status()