From 0e65fa5084865c7b86d6e91f18a9b3a4659732aa Mon Sep 17 00:00:00 2001 From: tcpsyn Date: Thu, 5 Feb 2026 17:05:51 -0700 Subject: [PATCH] =?UTF-8?q?Force=20shorter=20AI=20responses=20=E2=80=94=20?= =?UTF-8?q?max=201-2=20sentences?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Much stronger prompt language: "no more than 2 sentences EVER" - Added "DO NOT ramble" instruction - Reduced max_tokens back to 100 as hard limit Co-Authored-By: Claude Opus 4.6 --- backend/main.py | 5 +++-- backend/services/llm.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/backend/main.py b/backend/main.py index 33b85b5..bf1868f 100644 --- a/backend/main.py +++ b/backend/main.py @@ -264,9 +264,10 @@ Continue naturally. Don't repeat yourself. {history}{context} HOW TO TALK: - Sound like a real person chatting, not writing. -- Keep responses to 1-2 SHORT sentences. Be brief. This is a fast-paced call, not a monologue. -- ALWAYS finish your sentence. Never leave a thought incomplete or trailing off. +- MAX 1-2 sentences. Seriously — no more than 2 sentences EVER. One sentence is ideal. +- ALWAYS finish your sentence. Never trail off mid-thought. - Swear naturally if it fits: fuck, shit, damn, etc. +- DO NOT ramble, explain, or monologue. Say one thing and stop. SPELLING FOR TEXT-TO-SPEECH (use proper spelling so TTS pronounces correctly): - Write "you know" not "yanno" or "y'know" diff --git a/backend/services/llm.py b/backend/services/llm.py index fdc5c29..d6c108b 100644 --- a/backend/services/llm.py +++ b/backend/services/llm.py @@ -124,7 +124,7 @@ class LLMService: json={ "model": self.openrouter_model, "messages": messages, - "max_tokens": 150, + "max_tokens": 100, }, ) response.raise_for_status()