Update model routing with latest OpenRouter models
Style-matched defaults: - Grok 4.1 Fast for edgy callers (high_energy, confrontational, comedian etc.) - Claude Sonnet 4.6 for emotional callers (quiet_nervous, sweet_earnest, emotional) - Mistral Large 2512 for deadpan/mysterious/world-weary - DeepSeek R1 Distill for storyteller/oversharer/conspiracy/rambler - Gemini 2.5 Flash for know_it_all - Llama 3.3 70B for first_time/reluctant callers Category routing: Grok 4.1 Fast for dialog/devon/backgrounds, Gemini Flash for monitor/summary Updated OPENROUTER_MODELS and OPENROUTER_PRICING with all new models Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+10
-11
@@ -29,21 +29,20 @@ class Settings(BaseSettings):
|
|||||||
|
|
||||||
# LLM Settings
|
# LLM Settings
|
||||||
llm_provider: str = "openrouter" # "openrouter" or "ollama"
|
llm_provider: str = "openrouter" # "openrouter" or "ollama"
|
||||||
openrouter_model: str = "anthropic/claude-sonnet-4-5" # primary/default model
|
openrouter_model: str = "anthropic/claude-sonnet-4.6" # primary/default model
|
||||||
ollama_model: str = "llama3.2"
|
ollama_model: str = "llama3.2"
|
||||||
ollama_host: str = "http://localhost:11434"
|
ollama_host: str = "http://localhost:11434"
|
||||||
|
|
||||||
# Per-category model routing — cheaper models for non-critical tasks
|
# Per-category model routing
|
||||||
# Categories: caller_dialog, devon_monitor, devon_ask, background_gen,
|
# caller_dialog is overridden by style_matched routing (see Session.caller_model_map)
|
||||||
# call_summary, news_summary, topic_gen, unknown
|
|
||||||
category_models: dict = {
|
category_models: dict = {
|
||||||
"caller_dialog": "x-ai/grok-4", # full Grok 4 — edgier dialog, latency OK (gaps cut in post)
|
"caller_dialog": "x-ai/grok-4.1-fast", # fallback if style_matched disabled ($0.20/$0.50)
|
||||||
"devon_ask": "x-ai/grok-4", # Devon should match the show's edgy energy
|
"devon_ask": "x-ai/grok-4.1-fast", # Devon matches show energy, cheap ($0.20/$0.50)
|
||||||
"devon_monitor": "google/gemini-2.5-flash", # Devon polling — just decisions, keep cheap
|
"devon_monitor": "google/gemini-2.5-flash", # just yes/no decisions, keep cheap ($0.15/$0.60)
|
||||||
"background_gen": "x-ai/grok-4", # wilder, more specific caller backgrounds
|
"background_gen": "x-ai/grok-4.1-fast", # wilder caller backgrounds ($0.20/$0.50)
|
||||||
"call_summary": "google/gemini-2.5-flash", # post-call summaries
|
"call_summary": "google/gemini-2.5-flash", # post-call, no personality needed ($0.15/$0.60)
|
||||||
"news_summary": "google/gemini-2.5-flash", # news digests
|
"news_summary": "google/gemini-2.5-flash", # just digesting headlines ($0.15/$0.60)
|
||||||
"topic_gen": "google/gemini-2.5-flash", # topic generation
|
"topic_gen": "google/gemini-2.5-flash", # structured output ($0.15/$0.60)
|
||||||
}
|
}
|
||||||
|
|
||||||
# TTS Settings
|
# TTS Settings
|
||||||
|
|||||||
+31
-25
@@ -6240,34 +6240,40 @@ class Session:
|
|||||||
# Caller model routing
|
# Caller model routing
|
||||||
self.caller_model_strategy: str = "style_matched" # "single" | "cycle" | "style_matched"
|
self.caller_model_strategy: str = "style_matched" # "single" | "cycle" | "style_matched"
|
||||||
self.caller_model_pool: list[str] = [
|
self.caller_model_pool: list[str] = [
|
||||||
"x-ai/grok-4",
|
"x-ai/grok-4.1-fast", # edgy, casual, great value ($0.20/$0.50)
|
||||||
"anthropic/claude-sonnet-4-5",
|
"anthropic/claude-sonnet-4.6", # empathetic, nuanced ($3/$15)
|
||||||
"mistralai/mistral-medium-3",
|
"mistralai/mistral-large-2512", # dry wit, precise ($0.50/$1.50)
|
||||||
"qwen/qwen3-235b-a22b",
|
"deepseek/deepseek-r1-distill-llama-70b", # raw reasoning ($0.70/$0.80)
|
||||||
"deepseek/deepseek-chat-v3-0324",
|
"meta-llama/llama-3.3-70b-instruct", # casual, natural ($0.10/$0.32)
|
||||||
"google/gemini-2.5-pro",
|
"google/gemini-2.5-flash", # analytical ($0.30/$2.50)
|
||||||
"meta-llama/llama-4-maverick",
|
|
||||||
]
|
]
|
||||||
self.caller_model_map: dict[str, str] = {
|
self.caller_model_map: dict[str, str] = {
|
||||||
"high_energy": "x-ai/grok-4",
|
# Grok 4.1 Fast — edgy, provocative, unfiltered humor
|
||||||
"confrontational": "x-ai/grok-4",
|
"high_energy": "x-ai/grok-4.1-fast",
|
||||||
"angry_venting": "x-ai/grok-4",
|
"confrontational": "x-ai/grok-4.1-fast",
|
||||||
"bragger": "x-ai/grok-4",
|
"angry_venting": "x-ai/grok-4.1-fast",
|
||||||
"comedian": "x-ai/grok-4",
|
"bragger": "x-ai/grok-4.1-fast",
|
||||||
"quiet_nervous": "anthropic/claude-sonnet-4-5",
|
"comedian": "x-ai/grok-4.1-fast",
|
||||||
"sweet_earnest": "anthropic/claude-sonnet-4-5",
|
# Claude Sonnet 4.6 — empathetic, genuine emotional depth
|
||||||
"emotional": "anthropic/claude-sonnet-4-5",
|
"quiet_nervous": "anthropic/claude-sonnet-4.6",
|
||||||
"deadpan": "mistralai/mistral-medium-3",
|
"sweet_earnest": "anthropic/claude-sonnet-4.6",
|
||||||
"mysterious": "mistralai/mistral-medium-3",
|
"emotional": "anthropic/claude-sonnet-4.6",
|
||||||
"world_weary": "mistralai/mistral-medium-3",
|
# Mistral Large — dry, witty, precise delivery
|
||||||
"storyteller": "qwen/qwen3-235b-a22b",
|
"deadpan": "mistralai/mistral-large-2512",
|
||||||
"rambling": "qwen/qwen3-235b-a22b",
|
"mysterious": "mistralai/mistral-large-2512",
|
||||||
"oversharer": "deepseek/deepseek-chat-v3-0324",
|
"world_weary": "mistralai/mistral-large-2512",
|
||||||
"conspiracy": "deepseek/deepseek-chat-v3-0324",
|
# DeepSeek R1 Distill — raw, unfiltered, commits to the bit
|
||||||
"know_it_all": "google/gemini-2.5-pro",
|
"storyteller": "deepseek/deepseek-r1-distill-llama-70b",
|
||||||
"first_time": "meta-llama/llama-4-maverick",
|
"oversharer": "deepseek/deepseek-r1-distill-llama-70b",
|
||||||
|
"conspiracy": "deepseek/deepseek-r1-distill-llama-70b",
|
||||||
|
"rambling": "deepseek/deepseek-r1-distill-llama-70b",
|
||||||
|
# Gemini 2.5 Flash — articulate, analytical, cites facts
|
||||||
|
"know_it_all": "google/gemini-2.5-flash",
|
||||||
|
# Llama 3.3 70B — casual, natural hesitation, first-timer energy
|
||||||
|
"first_time": "meta-llama/llama-3.3-70b-instruct",
|
||||||
|
"reluctant_caller": "meta-llama/llama-3.3-70b-instruct",
|
||||||
}
|
}
|
||||||
self.caller_model_fallback: str = "anthropic/claude-sonnet-4-5"
|
self.caller_model_fallback: str = "anthropic/claude-sonnet-4.6"
|
||||||
self.caller_models: dict[str, str] = {} # caller_key → assigned model
|
self.caller_models: dict[str, str] = {} # caller_key → assigned model
|
||||||
self._caller_model_cycle_idx: int = 0
|
self._caller_model_cycle_idx: int = 0
|
||||||
|
|
||||||
|
|||||||
@@ -32,25 +32,38 @@ class TTSCallRecord:
|
|||||||
|
|
||||||
# OpenRouter pricing per 1M tokens (as of March 2026)
|
# OpenRouter pricing per 1M tokens (as of March 2026)
|
||||||
OPENROUTER_PRICING = {
|
OPENROUTER_PRICING = {
|
||||||
|
# Claude
|
||||||
|
"anthropic/claude-sonnet-4.6": {"prompt": 3.00, "completion": 15.00},
|
||||||
"anthropic/claude-sonnet-4-5": {"prompt": 3.00, "completion": 15.00},
|
"anthropic/claude-sonnet-4-5": {"prompt": 3.00, "completion": 15.00},
|
||||||
"anthropic/claude-haiku-4.5": {"prompt": 0.80, "completion": 4.00},
|
"anthropic/claude-haiku-4.5": {"prompt": 0.80, "completion": 4.00},
|
||||||
"anthropic/claude-3-haiku": {"prompt": 0.25, "completion": 1.25},
|
"anthropic/claude-3-haiku": {"prompt": 0.25, "completion": 1.25},
|
||||||
|
# Grok
|
||||||
|
"x-ai/grok-4.1-fast": {"prompt": 0.20, "completion": 0.50},
|
||||||
"x-ai/grok-4": {"prompt": 3.00, "completion": 15.00},
|
"x-ai/grok-4": {"prompt": 3.00, "completion": 15.00},
|
||||||
"x-ai/grok-4-fast": {"prompt": 5.00, "completion": 15.00},
|
"x-ai/grok-4-fast": {"prompt": 5.00, "completion": 15.00},
|
||||||
"minimax/minimax-m2-her": {"prompt": 0.50, "completion": 1.50},
|
# Mistral
|
||||||
"mistralai/mistral-small-creative": {"prompt": 0.20, "completion": 0.60},
|
"mistralai/mistral-large-2512": {"prompt": 0.50, "completion": 1.50},
|
||||||
|
"mistralai/mistral-small-2603": {"prompt": 0.15, "completion": 0.60},
|
||||||
|
"mistralai/mistral-medium-3": {"prompt": 0.40, "completion": 2.00},
|
||||||
|
"mistralai/mistral-small-creative": {"prompt": 0.10, "completion": 0.30},
|
||||||
|
# DeepSeek
|
||||||
|
"deepseek/deepseek-r1-distill-llama-70b": {"prompt": 0.70, "completion": 0.80},
|
||||||
|
"deepseek/deepseek-chat-v3-0324": {"prompt": 0.27, "completion": 1.10},
|
||||||
"deepseek/deepseek-v3.2": {"prompt": 0.14, "completion": 0.28},
|
"deepseek/deepseek-v3.2": {"prompt": 0.14, "completion": 0.28},
|
||||||
"google/gemini-2.5-flash": {"prompt": 0.15, "completion": 0.60},
|
# Google
|
||||||
|
"google/gemini-2.5-flash": {"prompt": 0.30, "completion": 2.50},
|
||||||
|
"google/gemini-2.5-pro": {"prompt": 1.25, "completion": 10.00},
|
||||||
|
"google/gemini-3-flash-preview": {"prompt": 0.50, "completion": 3.00},
|
||||||
"google/gemini-flash-1.5": {"prompt": 0.075, "completion": 0.30},
|
"google/gemini-flash-1.5": {"prompt": 0.075, "completion": 0.30},
|
||||||
|
# Meta
|
||||||
|
"meta-llama/llama-3.3-70b-instruct": {"prompt": 0.10, "completion": 0.32},
|
||||||
|
"meta-llama/llama-4-maverick": {"prompt": 0.20, "completion": 0.60},
|
||||||
|
# Other
|
||||||
|
"moonshotai/kimi-k2": {"prompt": 0.60, "completion": 2.00},
|
||||||
|
"qwen/qwen3-235b-a22b": {"prompt": 0.20, "completion": 0.60},
|
||||||
|
"minimax/minimax-m2-her": {"prompt": 0.50, "completion": 1.50},
|
||||||
"openai/gpt-4o-mini": {"prompt": 0.15, "completion": 0.60},
|
"openai/gpt-4o-mini": {"prompt": 0.15, "completion": 0.60},
|
||||||
"openai/gpt-4o": {"prompt": 2.50, "completion": 10.00},
|
"openai/gpt-4o": {"prompt": 2.50, "completion": 10.00},
|
||||||
"meta-llama/llama-3.1-8b-instruct": {"prompt": 0.06, "completion": 0.06},
|
|
||||||
"deepseek/deepseek-chat-v3-0324": {"prompt": 0.27, "completion": 1.10},
|
|
||||||
"moonshotai/kimi-k2": {"prompt": 0.60, "completion": 2.00},
|
|
||||||
"mistralai/mistral-medium-3": {"prompt": 0.40, "completion": 2.00},
|
|
||||||
"meta-llama/llama-4-maverick": {"prompt": 0.20, "completion": 0.60},
|
|
||||||
"qwen/qwen3-235b-a22b": {"prompt": 0.20, "completion": 0.60},
|
|
||||||
"google/gemini-2.5-pro": {"prompt": 1.25, "completion": 10.00},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# TTS pricing per character
|
# TTS pricing per character
|
||||||
|
|||||||
+16
-16
@@ -10,26 +10,26 @@ from .cost_tracker import cost_tracker
|
|||||||
|
|
||||||
# Available OpenRouter models
|
# Available OpenRouter models
|
||||||
OPENROUTER_MODELS = [
|
OPENROUTER_MODELS = [
|
||||||
# Default
|
# Primary
|
||||||
"anthropic/claude-sonnet-4-5",
|
"anthropic/claude-sonnet-4.6",
|
||||||
# Best for natural dialog
|
"x-ai/grok-4.1-fast",
|
||||||
"x-ai/grok-4",
|
"x-ai/grok-4",
|
||||||
"x-ai/grok-4-fast",
|
# Style-matched pool
|
||||||
"minimax/minimax-m2-her",
|
"mistralai/mistral-large-2512",
|
||||||
"mistralai/mistral-small-creative",
|
"deepseek/deepseek-r1-distill-llama-70b",
|
||||||
"deepseek/deepseek-v3.2",
|
"meta-llama/llama-3.3-70b-instruct",
|
||||||
# Other
|
|
||||||
"anthropic/claude-haiku-4.5",
|
|
||||||
"google/gemini-2.5-flash",
|
"google/gemini-2.5-flash",
|
||||||
"openai/gpt-4o-mini",
|
# Other good options
|
||||||
"openai/gpt-4o",
|
"anthropic/claude-sonnet-4-5",
|
||||||
# New dialog models
|
"anthropic/claude-haiku-4.5",
|
||||||
"deepseek/deepseek-chat-v3-0324",
|
"deepseek/deepseek-chat-v3-0324",
|
||||||
"moonshotai/kimi-k2",
|
"mistralai/mistral-small-2603",
|
||||||
"mistralai/mistral-medium-3",
|
|
||||||
"meta-llama/llama-4-maverick",
|
|
||||||
"qwen/qwen3-235b-a22b",
|
|
||||||
"google/gemini-2.5-pro",
|
"google/gemini-2.5-pro",
|
||||||
|
"google/gemini-3-flash-preview",
|
||||||
|
"x-ai/grok-4-fast",
|
||||||
|
"moonshotai/kimi-k2",
|
||||||
|
"qwen/qwen3-235b-a22b",
|
||||||
|
"meta-llama/llama-4-maverick",
|
||||||
# Legacy
|
# Legacy
|
||||||
"anthropic/claude-3-haiku",
|
"anthropic/claude-3-haiku",
|
||||||
"google/gemini-flash-1.5",
|
"google/gemini-flash-1.5",
|
||||||
|
|||||||
Reference in New Issue
Block a user