Update model routing with latest OpenRouter models

Style-matched defaults: - Grok 4.1 Fast for edgy callers (high_energy, confrontational, comedian etc.) - Claude Sonnet 4.6 for emotional callers (quiet_nervous, sweet_earnest, emotional) - Mistral Large 2512 for deadpan/mysterious/world-weary - DeepSeek R1 Distill for storyteller/oversharer/conspiracy/rambler - Gemini 2.5 Flash for know_it_all - Llama 3.3 70B for first_time/reluctant callers Category routing: Grok 4.1 Fast for dialog/devon/backgrounds, Gemini Flash for monitor/summary Updated OPENROUTER_MODELS and OPENROUTER_PRICING with all new models Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 02:31:33 -06:00
parent e614599650
commit c516402402
4 changed files with 80 additions and 62 deletions
@@ -32,25 +32,38 @@ class TTSCallRecord:

 # OpenRouter pricing per 1M tokens (as of March 2026)
 OPENROUTER_PRICING = {
+    # Claude
+    "anthropic/claude-sonnet-4.6":      {"prompt": 3.00,  "completion": 15.00},
    "anthropic/claude-sonnet-4-5":      {"prompt": 3.00,  "completion": 15.00},
    "anthropic/claude-haiku-4.5":       {"prompt": 0.80,  "completion": 4.00},
    "anthropic/claude-3-haiku":         {"prompt": 0.25,  "completion": 1.25},
+    # Grok
+    "x-ai/grok-4.1-fast":             {"prompt": 0.20,  "completion": 0.50},
    "x-ai/grok-4":                     {"prompt": 3.00,  "completion": 15.00},
    "x-ai/grok-4-fast":                {"prompt": 5.00,  "completion": 15.00},
-    "minimax/minimax-m2-her":           {"prompt": 0.50,  "completion": 1.50},
-    "mistralai/mistral-small-creative": {"prompt": 0.20,  "completion": 0.60},
+    # Mistral
+    "mistralai/mistral-large-2512":    {"prompt": 0.50,  "completion": 1.50},
+    "mistralai/mistral-small-2603":    {"prompt": 0.15,  "completion": 0.60},
+    "mistralai/mistral-medium-3":      {"prompt": 0.40,  "completion": 2.00},
+    "mistralai/mistral-small-creative": {"prompt": 0.10, "completion": 0.30},
+    # DeepSeek
+    "deepseek/deepseek-r1-distill-llama-70b": {"prompt": 0.70, "completion": 0.80},
+    "deepseek/deepseek-chat-v3-0324":  {"prompt": 0.27,  "completion": 1.10},
    "deepseek/deepseek-v3.2":          {"prompt": 0.14,  "completion": 0.28},
-    "google/gemini-2.5-flash":          {"prompt": 0.15,  "completion": 0.60},
+    # Google
+    "google/gemini-2.5-flash":          {"prompt": 0.30,  "completion": 2.50},
+    "google/gemini-2.5-pro":           {"prompt": 1.25,  "completion": 10.00},
+    "google/gemini-3-flash-preview":   {"prompt": 0.50,  "completion": 3.00},
    "google/gemini-flash-1.5":          {"prompt": 0.075, "completion": 0.30},
+    # Meta
+    "meta-llama/llama-3.3-70b-instruct": {"prompt": 0.10, "completion": 0.32},
+    "meta-llama/llama-4-maverick":     {"prompt": 0.20,  "completion": 0.60},
+    # Other
+    "moonshotai/kimi-k2":              {"prompt": 0.60,  "completion": 2.00},
+    "qwen/qwen3-235b-a22b":           {"prompt": 0.20,  "completion": 0.60},
+    "minimax/minimax-m2-her":           {"prompt": 0.50,  "completion": 1.50},
    "openai/gpt-4o-mini":              {"prompt": 0.15,  "completion": 0.60},
    "openai/gpt-4o":                   {"prompt": 2.50,  "completion": 10.00},
-    "meta-llama/llama-3.1-8b-instruct": {"prompt": 0.06, "completion": 0.06},
-    "deepseek/deepseek-chat-v3-0324":  {"prompt": 0.27,  "completion": 1.10},
-    "moonshotai/kimi-k2":              {"prompt": 0.60,  "completion": 2.00},
-    "mistralai/mistral-medium-3":      {"prompt": 0.40,  "completion": 2.00},
-    "meta-llama/llama-4-maverick":     {"prompt": 0.20,  "completion": 0.60},
-    "qwen/qwen3-235b-a22b":           {"prompt": 0.20,  "completion": 0.60},
-    "google/gemini-2.5-pro":           {"prompt": 1.25,  "completion": 10.00},
 }

 # TTS pricing per character
@@ -10,26 +10,26 @@ from .cost_tracker import cost_tracker

 # Available OpenRouter models
 OPENROUTER_MODELS = [
-    # Default
-    "anthropic/claude-sonnet-4-5",
-    # Best for natural dialog
+    # Primary
+    "anthropic/claude-sonnet-4.6",
+    "x-ai/grok-4.1-fast",
    "x-ai/grok-4",
-    "x-ai/grok-4-fast",
-    "minimax/minimax-m2-her",
-    "mistralai/mistral-small-creative",
-    "deepseek/deepseek-v3.2",
-    # Other
-    "anthropic/claude-haiku-4.5",
+    # Style-matched pool
+    "mistralai/mistral-large-2512",
+    "deepseek/deepseek-r1-distill-llama-70b",
+    "meta-llama/llama-3.3-70b-instruct",
    "google/gemini-2.5-flash",
-    "openai/gpt-4o-mini",
-    "openai/gpt-4o",
-    # New dialog models
+    # Other good options
+    "anthropic/claude-sonnet-4-5",
+    "anthropic/claude-haiku-4.5",
    "deepseek/deepseek-chat-v3-0324",
-    "moonshotai/kimi-k2",
-    "mistralai/mistral-medium-3",
-    "meta-llama/llama-4-maverick",
-    "qwen/qwen3-235b-a22b",
+    "mistralai/mistral-small-2603",
    "google/gemini-2.5-pro",
+    "google/gemini-3-flash-preview",
+    "x-ai/grok-4-fast",
+    "moonshotai/kimi-k2",
+    "qwen/qwen3-235b-a22b",
+    "meta-llama/llama-4-maverick",
    # Legacy
    "anthropic/claude-3-haiku",
    "google/gemini-flash-1.5",