Caller model routing — cycle, style-matched, mid-show override

- Three strategies: single model, cycle through pool, style-matched
- 18 communication styles mapped to 7 models (Grok, Sonnet, Mistral, Qwen, DeepSeek, Gemini, Llama)
- Per-caller model locked for entire call, overridable mid-show
- Model badges on caller buttons and info panel
- Settings UI for strategy, pool, style mapping, fallback
- Fallback to Sonnet on model failure
- 6 new models added to pricing and dropdown
- Checkpoint persistence for all model state

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-21 01:58:03 -06:00
parent e0fb3cac68
commit 314d5f9452
6 changed files with 487 additions and 4 deletions
+145
View File
@@ -6237,6 +6237,39 @@ class Session:
self.relationship_context: dict[str, str] = {} # caller_key → relationship prompt injection
self.intern_monitoring: bool = True # Devon monitors conversations by default
self.show_theme: str = "" # Current show theme (e.g. "St. Patrick's Day")
# Caller model routing
self.caller_model_strategy: str = "single" # "single" | "cycle" | "style_matched"
self.caller_model_pool: list[str] = [
"x-ai/grok-4",
"anthropic/claude-sonnet-4-5",
"mistralai/mistral-medium-3",
"qwen/qwen3-235b-a22b",
"deepseek/deepseek-chat-v3-0324",
"google/gemini-2.5-pro",
"meta-llama/llama-4-maverick",
]
self.caller_model_map: dict[str, str] = {
"high_energy": "x-ai/grok-4",
"confrontational": "x-ai/grok-4",
"angry_venting": "x-ai/grok-4",
"bragger": "x-ai/grok-4",
"comedian": "x-ai/grok-4",
"quiet_nervous": "anthropic/claude-sonnet-4-5",
"sweet_earnest": "anthropic/claude-sonnet-4-5",
"emotional": "anthropic/claude-sonnet-4-5",
"deadpan": "mistralai/mistral-medium-3",
"mysterious": "mistralai/mistral-medium-3",
"world_weary": "mistralai/mistral-medium-3",
"storyteller": "qwen/qwen3-235b-a22b",
"rambling": "qwen/qwen3-235b-a22b",
"oversharer": "deepseek/deepseek-chat-v3-0324",
"conspiracy": "deepseek/deepseek-chat-v3-0324",
"know_it_all": "google/gemini-2.5-pro",
"first_time": "meta-llama/llama-4-maverick",
}
self.caller_model_fallback: str = "anthropic/claude-sonnet-4-5"
self.caller_models: dict[str, str] = {} # caller_key → assigned model
self._caller_model_cycle_idx: int = 0
def start_call(self, caller_key: str):
self.current_caller_key = caller_key
@@ -6253,6 +6286,35 @@ class Session:
def add_message(self, role: str, content: str):
self.conversation.append({"role": role, "content": content, "timestamp": time.time()})
def get_caller_model(self, caller_key: str) -> str | None:
"""Get the assigned model for a caller, or assign one based on strategy.
Returns None to use default category routing."""
if self.caller_model_strategy == "single":
return None # use default category_models["caller_dialog"]
# Already assigned — keep consistent for the whole call
if caller_key in self.caller_models:
return self.caller_models[caller_key]
model = None
if self.caller_model_strategy == "cycle":
if self.caller_model_pool:
model = self.caller_model_pool[self._caller_model_cycle_idx % len(self.caller_model_pool)]
self._caller_model_cycle_idx += 1
elif self.caller_model_strategy == "style_matched":
raw_style = self.caller_styles.get(caller_key, "")
style_key = _normalize_style_key(raw_style) if raw_style else ""
model = self.caller_model_map.get(style_key)
if not model and self.caller_model_pool:
model = self.caller_model_pool[0]
if model:
self.caller_models[caller_key] = model
caller_name = CALLER_BASES.get(caller_key, {}).get("name", caller_key)
print(f"[CallerModel] Assigned {model} to {caller_name} (strategy={self.caller_model_strategy})")
return model
def get_caller_background(self, caller_key: str) -> str:
"""Get or generate background for a caller in this session.
Returns the natural_description string for prompt injection."""
@@ -6607,6 +6669,12 @@ def _save_checkpoint():
"caller_queue": session.caller_queue,
"relationship_context": session.relationship_context,
"intern_monitoring": session.intern_monitoring,
"caller_model_strategy": session.caller_model_strategy,
"caller_model_pool": session.caller_model_pool,
"caller_model_map": session.caller_model_map,
"caller_model_fallback": session.caller_model_fallback,
"caller_models": session.caller_models,
"caller_model_cycle_idx": session._caller_model_cycle_idx,
"costs": cost_tracker.get_live_summary(),
"cost_records": {
"llm": [asdict(r) for r in cost_tracker.llm_records],
@@ -6653,6 +6721,12 @@ def _load_checkpoint() -> bool:
session.caller_queue = data.get("caller_queue", [])
session.relationship_context = data.get("relationship_context", {})
session.intern_monitoring = data.get("intern_monitoring", True)
session.caller_model_strategy = data.get("caller_model_strategy", "single")
session.caller_model_pool = data.get("caller_model_pool", ["anthropic/claude-sonnet-4-5"])
session.caller_model_map = data.get("caller_model_map", {})
session.caller_model_fallback = data.get("caller_model_fallback", "anthropic/claude-sonnet-4-5")
session.caller_models = data.get("caller_models", {})
session._caller_model_cycle_idx = data.get("caller_model_cycle_idx", 0)
for key, snapshot in data.get("caller_bases", {}).items():
if key in CALLER_BASES:
CALLER_BASES[key]["name"] = snapshot["name"]
@@ -8563,6 +8637,7 @@ async def chat(request: ChatRequest):
max_tokens=max_tokens,
category="caller_dialog",
caller_name=session.caller.get("name", "") if session.caller else "",
model_override=session.get_caller_model(session.current_caller_key) if session.current_caller_key else None,
)
# Discard if call changed while we were generating
@@ -8953,6 +9028,74 @@ async def set_show_theme(data: dict):
return {"theme": session.show_theme}
# --- Caller Model Routing ---
@app.get("/api/caller-models")
async def get_caller_models():
"""Get current caller model routing config and per-caller assignments."""
assignments = {}
for key in CALLER_BASES:
name = CALLER_BASES[key].get("name", key)
model = session.caller_models.get(key)
assignments[key] = {"name": name, "model": model or "(default)"}
return {
"strategy": session.caller_model_strategy,
"pool": session.caller_model_pool,
"map": session.caller_model_map,
"fallback": session.caller_model_fallback,
"assignments": assignments,
}
@app.post("/api/caller-models")
async def set_caller_models(data: dict):
"""Update caller model routing strategy, pool, map, or fallback."""
if "strategy" in data:
strategy = data["strategy"]
if strategy not in ("single", "cycle", "style_matched"):
raise HTTPException(400, f"Invalid strategy: {strategy}")
session.caller_model_strategy = strategy
print(f"[CallerModel] Strategy set to: {strategy}")
if "pool" in data:
pool = data["pool"]
if not isinstance(pool, list) or not pool:
raise HTTPException(400, "pool must be a non-empty list of model IDs")
session.caller_model_pool = pool
print(f"[CallerModel] Pool set to: {pool}")
if "map" in data:
session.caller_model_map = data["map"]
print(f"[CallerModel] Style map set: {len(data['map'])} entries")
if "fallback" in data:
session.caller_model_fallback = data["fallback"]
print(f"[CallerModel] Fallback set to: {data['fallback']}")
# Clear existing assignments so new strategy takes effect
if "strategy" in data or "pool" in data or "map" in data:
session.caller_models.clear()
session._caller_model_cycle_idx = 0
print(f"[CallerModel] Cleared caller assignments (new config)")
_save_checkpoint()
return await get_caller_models()
@app.post("/api/caller-models/{caller_key}")
async def set_caller_model_override(caller_key: str, data: dict):
"""Override the model for a specific caller mid-show."""
if caller_key not in CALLER_BASES:
raise HTTPException(404, f"Unknown caller key: {caller_key}")
model = data.get("model", "").strip()
if not model:
# Clear override
session.caller_models.pop(caller_key, None)
name = CALLER_BASES[caller_key].get("name", caller_key)
print(f"[CallerModel] Cleared override for {name}")
else:
session.caller_models[caller_key] = model
name = CALLER_BASES[caller_key].get("name", caller_key)
print(f"[CallerModel] Override {name}{model}")
_save_checkpoint()
return {"caller_key": caller_key, "model": session.caller_models.get(caller_key, "(default)")}
# --- Cost Tracking Endpoints ---
@app.get("/api/costs")
@@ -9442,6 +9585,7 @@ async def _trigger_ai_auto_respond(accumulated_text: str):
max_tokens=max_tokens,
category="caller_dialog",
caller_name=session.caller.get("name", "") if session.caller else "",
model_override=session.get_caller_model(session.current_caller_key) if session.current_caller_key else None,
)
# Discard if call changed during generation
@@ -9543,6 +9687,7 @@ async def ai_respond():
max_tokens=max_tokens,
category="caller_dialog",
caller_name=session.caller.get("name", "") if session.caller else "",
model_override=session.get_caller_model(session.current_caller_key) if session.current_caller_key else None,
)
if _session_epoch != epoch:
+6
View File
@@ -45,6 +45,12 @@ OPENROUTER_PRICING = {
"openai/gpt-4o-mini": {"prompt": 0.15, "completion": 0.60},
"openai/gpt-4o": {"prompt": 2.50, "completion": 10.00},
"meta-llama/llama-3.1-8b-instruct": {"prompt": 0.06, "completion": 0.06},
"deepseek/deepseek-chat-v3-0324": {"prompt": 0.27, "completion": 1.10},
"moonshotai/kimi-k2": {"prompt": 0.60, "completion": 2.00},
"mistralai/mistral-medium-3": {"prompt": 0.40, "completion": 2.00},
"meta-llama/llama-4-maverick": {"prompt": 0.20, "completion": 0.60},
"qwen/qwen3-235b-a22b": {"prompt": 0.20, "completion": 0.60},
"google/gemini-2.5-pro": {"prompt": 1.25, "completion": 10.00},
}
# TTS pricing per character
+12 -4
View File
@@ -23,6 +23,13 @@ OPENROUTER_MODELS = [
"google/gemini-2.5-flash",
"openai/gpt-4o-mini",
"openai/gpt-4o",
# New dialog models
"deepseek/deepseek-chat-v3-0324",
"moonshotai/kimi-k2",
"mistralai/mistral-medium-3",
"meta-llama/llama-4-maverick",
"qwen/qwen3-235b-a22b",
"google/gemini-2.5-pro",
# Legacy
"anthropic/claude-3-haiku",
"google/gemini-flash-1.5",
@@ -125,12 +132,13 @@ class LLMService:
response_format: Optional[dict] = None,
category: str = "unknown",
caller_name: str = "",
model_override: Optional[str] = None,
) -> str:
if system_prompt:
messages = [{"role": "system", "content": system_prompt}] + messages
if self.provider == "openrouter":
return await self._call_openrouter_with_fallback(messages, max_tokens=max_tokens, response_format=response_format, category=category, caller_name=caller_name)
return await self._call_openrouter_with_fallback(messages, max_tokens=max_tokens, response_format=response_format, category=category, caller_name=caller_name, model_override=model_override)
else:
return await self._call_ollama(messages, max_tokens=max_tokens)
@@ -295,11 +303,11 @@ class LLMService:
"""Get the best model for a given category based on config routing."""
return settings.category_models.get(category, self.openrouter_model)
async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None, response_format: Optional[dict] = None, category: str = "unknown", caller_name: str = "") -> str:
async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None, response_format: Optional[dict] = None, category: str = "unknown", caller_name: str = "", model_override: Optional[str] = None) -> str:
"""Try category-specific model, then fallback models. Always returns a response."""
# Use category-specific model if configured, otherwise primary
model = self._get_model_for_category(category)
# Use explicit override if provided, else category routing, else primary
model = model_override or self._get_model_for_category(category)
result = await self._call_openrouter_once(messages, model, max_tokens=max_tokens, response_format=response_format, category=category, caller_name=caller_name)
if result is not None:
return result