diff --git a/backend/config.py b/backend/config.py
index c8891af..4e4c6f3 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -29,21 +29,20 @@ class Settings(BaseSettings):
 
     # LLM Settings
     llm_provider: str = "openrouter"  # "openrouter" or "ollama"
-    openrouter_model: str = "anthropic/claude-sonnet-4-5"  # primary/default model
+    openrouter_model: str = "anthropic/claude-sonnet-4.6"  # primary/default model
     ollama_model: str = "llama3.2"
     ollama_host: str = "http://localhost:11434"
 
-    # Per-category model routing — cheaper models for non-critical tasks
-    # Categories: caller_dialog, devon_monitor, devon_ask, background_gen,
-    #             call_summary, news_summary, topic_gen, unknown
+    # Per-category model routing
+    # caller_dialog is overridden by style_matched routing (see Session.caller_model_map)
     category_models: dict = {
-        "caller_dialog": "x-ai/grok-4",                         # full Grok 4 — edgier dialog, latency OK (gaps cut in post)
-        "devon_ask": "x-ai/grok-4",                             # Devon should match the show's edgy energy
-        "devon_monitor": "google/gemini-2.5-flash",           # Devon polling — just decisions, keep cheap
-        "background_gen": "x-ai/grok-4",                      # wilder, more specific caller backgrounds
-        "call_summary": "google/gemini-2.5-flash",            # post-call summaries
-        "news_summary": "google/gemini-2.5-flash",            # news digests
-        "topic_gen": "google/gemini-2.5-flash",               # topic generation
+        "caller_dialog": "x-ai/grok-4.1-fast",               # fallback if style_matched disabled ($0.20/$0.50)
+        "devon_ask": "x-ai/grok-4.1-fast",                   # Devon matches show energy, cheap ($0.20/$0.50)
+        "devon_monitor": "google/gemini-2.5-flash",          # just yes/no decisions, keep cheap ($0.15/$0.60)
+        "background_gen": "x-ai/grok-4.1-fast",              # wilder caller backgrounds ($0.20/$0.50)
+        "call_summary": "google/gemini-2.5-flash",           # post-call, no personality needed ($0.15/$0.60)
+        "news_summary": "google/gemini-2.5-flash",           # just digesting headlines ($0.15/$0.60)
+        "topic_gen": "google/gemini-2.5-flash",              # structured output ($0.15/$0.60)
     }
 
     # TTS Settings
diff --git a/backend/main.py b/backend/main.py
index d3f60d4..e9299d2 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -6240,34 +6240,40 @@ class Session:
         # Caller model routing
         self.caller_model_strategy: str = "style_matched"  # "single" | "cycle" | "style_matched"
         self.caller_model_pool: list[str] = [
-            "x-ai/grok-4",
-            "anthropic/claude-sonnet-4-5",
-            "mistralai/mistral-medium-3",
-            "qwen/qwen3-235b-a22b",
-            "deepseek/deepseek-chat-v3-0324",
-            "google/gemini-2.5-pro",
-            "meta-llama/llama-4-maverick",
+            "x-ai/grok-4.1-fast",               # edgy, casual, great value ($0.20/$0.50)
+            "anthropic/claude-sonnet-4.6",        # empathetic, nuanced ($3/$15)
+            "mistralai/mistral-large-2512",       # dry wit, precise ($0.50/$1.50)
+            "deepseek/deepseek-r1-distill-llama-70b",  # raw reasoning ($0.70/$0.80)
+            "meta-llama/llama-3.3-70b-instruct",  # casual, natural ($0.10/$0.32)
+            "google/gemini-2.5-flash",            # analytical ($0.30/$2.50)
         ]
         self.caller_model_map: dict[str, str] = {
-            "high_energy": "x-ai/grok-4",
-            "confrontational": "x-ai/grok-4",
-            "angry_venting": "x-ai/grok-4",
-            "bragger": "x-ai/grok-4",
-            "comedian": "x-ai/grok-4",
-            "quiet_nervous": "anthropic/claude-sonnet-4-5",
-            "sweet_earnest": "anthropic/claude-sonnet-4-5",
-            "emotional": "anthropic/claude-sonnet-4-5",
-            "deadpan": "mistralai/mistral-medium-3",
-            "mysterious": "mistralai/mistral-medium-3",
-            "world_weary": "mistralai/mistral-medium-3",
-            "storyteller": "qwen/qwen3-235b-a22b",
-            "rambling": "qwen/qwen3-235b-a22b",
-            "oversharer": "deepseek/deepseek-chat-v3-0324",
-            "conspiracy": "deepseek/deepseek-chat-v3-0324",
-            "know_it_all": "google/gemini-2.5-pro",
-            "first_time": "meta-llama/llama-4-maverick",
+            # Grok 4.1 Fast — edgy, provocative, unfiltered humor
+            "high_energy":       "x-ai/grok-4.1-fast",
+            "confrontational":   "x-ai/grok-4.1-fast",
+            "angry_venting":     "x-ai/grok-4.1-fast",
+            "bragger":           "x-ai/grok-4.1-fast",
+            "comedian":          "x-ai/grok-4.1-fast",
+            # Claude Sonnet 4.6 — empathetic, genuine emotional depth
+            "quiet_nervous":     "anthropic/claude-sonnet-4.6",
+            "sweet_earnest":     "anthropic/claude-sonnet-4.6",
+            "emotional":         "anthropic/claude-sonnet-4.6",
+            # Mistral Large — dry, witty, precise delivery
+            "deadpan":           "mistralai/mistral-large-2512",
+            "mysterious":        "mistralai/mistral-large-2512",
+            "world_weary":       "mistralai/mistral-large-2512",
+            # DeepSeek R1 Distill — raw, unfiltered, commits to the bit
+            "storyteller":       "deepseek/deepseek-r1-distill-llama-70b",
+            "oversharer":        "deepseek/deepseek-r1-distill-llama-70b",
+            "conspiracy":        "deepseek/deepseek-r1-distill-llama-70b",
+            "rambling":          "deepseek/deepseek-r1-distill-llama-70b",
+            # Gemini 2.5 Flash — articulate, analytical, cites facts
+            "know_it_all":       "google/gemini-2.5-flash",
+            # Llama 3.3 70B — casual, natural hesitation, first-timer energy
+            "first_time":        "meta-llama/llama-3.3-70b-instruct",
+            "reluctant_caller":  "meta-llama/llama-3.3-70b-instruct",
         }
-        self.caller_model_fallback: str = "anthropic/claude-sonnet-4-5"
+        self.caller_model_fallback: str = "anthropic/claude-sonnet-4.6"
         self.caller_models: dict[str, str] = {}  # caller_key → assigned model
         self._caller_model_cycle_idx: int = 0
 
diff --git a/backend/services/cost_tracker.py b/backend/services/cost_tracker.py
index c3d2c4c..355d180 100644
--- a/backend/services/cost_tracker.py
+++ b/backend/services/cost_tracker.py
@@ -32,25 +32,38 @@ class TTSCallRecord:
 
 # OpenRouter pricing per 1M tokens (as of March 2026)
 OPENROUTER_PRICING = {
+    # Claude
+    "anthropic/claude-sonnet-4.6":      {"prompt": 3.00,  "completion": 15.00},
     "anthropic/claude-sonnet-4-5":      {"prompt": 3.00,  "completion": 15.00},
     "anthropic/claude-haiku-4.5":       {"prompt": 0.80,  "completion": 4.00},
     "anthropic/claude-3-haiku":         {"prompt": 0.25,  "completion": 1.25},
+    # Grok
+    "x-ai/grok-4.1-fast":             {"prompt": 0.20,  "completion": 0.50},
     "x-ai/grok-4":                     {"prompt": 3.00,  "completion": 15.00},
     "x-ai/grok-4-fast":                {"prompt": 5.00,  "completion": 15.00},
-    "minimax/minimax-m2-her":           {"prompt": 0.50,  "completion": 1.50},
-    "mistralai/mistral-small-creative": {"prompt": 0.20,  "completion": 0.60},
+    # Mistral
+    "mistralai/mistral-large-2512":    {"prompt": 0.50,  "completion": 1.50},
+    "mistralai/mistral-small-2603":    {"prompt": 0.15,  "completion": 0.60},
+    "mistralai/mistral-medium-3":      {"prompt": 0.40,  "completion": 2.00},
+    "mistralai/mistral-small-creative": {"prompt": 0.10, "completion": 0.30},
+    # DeepSeek
+    "deepseek/deepseek-r1-distill-llama-70b": {"prompt": 0.70, "completion": 0.80},
+    "deepseek/deepseek-chat-v3-0324":  {"prompt": 0.27,  "completion": 1.10},
     "deepseek/deepseek-v3.2":          {"prompt": 0.14,  "completion": 0.28},
-    "google/gemini-2.5-flash":          {"prompt": 0.15,  "completion": 0.60},
+    # Google
+    "google/gemini-2.5-flash":          {"prompt": 0.30,  "completion": 2.50},
+    "google/gemini-2.5-pro":           {"prompt": 1.25,  "completion": 10.00},
+    "google/gemini-3-flash-preview":   {"prompt": 0.50,  "completion": 3.00},
     "google/gemini-flash-1.5":          {"prompt": 0.075, "completion": 0.30},
+    # Meta
+    "meta-llama/llama-3.3-70b-instruct": {"prompt": 0.10, "completion": 0.32},
+    "meta-llama/llama-4-maverick":     {"prompt": 0.20,  "completion": 0.60},
+    # Other
+    "moonshotai/kimi-k2":              {"prompt": 0.60,  "completion": 2.00},
+    "qwen/qwen3-235b-a22b":           {"prompt": 0.20,  "completion": 0.60},
+    "minimax/minimax-m2-her":           {"prompt": 0.50,  "completion": 1.50},
     "openai/gpt-4o-mini":              {"prompt": 0.15,  "completion": 0.60},
     "openai/gpt-4o":                   {"prompt": 2.50,  "completion": 10.00},
-    "meta-llama/llama-3.1-8b-instruct": {"prompt": 0.06, "completion": 0.06},
-    "deepseek/deepseek-chat-v3-0324":  {"prompt": 0.27,  "completion": 1.10},
-    "moonshotai/kimi-k2":              {"prompt": 0.60,  "completion": 2.00},
-    "mistralai/mistral-medium-3":      {"prompt": 0.40,  "completion": 2.00},
-    "meta-llama/llama-4-maverick":     {"prompt": 0.20,  "completion": 0.60},
-    "qwen/qwen3-235b-a22b":           {"prompt": 0.20,  "completion": 0.60},
-    "google/gemini-2.5-pro":           {"prompt": 1.25,  "completion": 10.00},
 }
 
 # TTS pricing per character
diff --git a/backend/services/llm.py b/backend/services/llm.py
index 3e5704f..772ae26 100644
--- a/backend/services/llm.py
+++ b/backend/services/llm.py
@@ -10,26 +10,26 @@ from .cost_tracker import cost_tracker
 
 # Available OpenRouter models
 OPENROUTER_MODELS = [
-    # Default
-    "anthropic/claude-sonnet-4-5",
-    # Best for natural dialog
+    # Primary
+    "anthropic/claude-sonnet-4.6",
+    "x-ai/grok-4.1-fast",
     "x-ai/grok-4",
-    "x-ai/grok-4-fast",
-    "minimax/minimax-m2-her",
-    "mistralai/mistral-small-creative",
-    "deepseek/deepseek-v3.2",
-    # Other
-    "anthropic/claude-haiku-4.5",
+    # Style-matched pool
+    "mistralai/mistral-large-2512",
+    "deepseek/deepseek-r1-distill-llama-70b",
+    "meta-llama/llama-3.3-70b-instruct",
     "google/gemini-2.5-flash",
-    "openai/gpt-4o-mini",
-    "openai/gpt-4o",
-    # New dialog models
+    # Other good options
+    "anthropic/claude-sonnet-4-5",
+    "anthropic/claude-haiku-4.5",
     "deepseek/deepseek-chat-v3-0324",
-    "moonshotai/kimi-k2",
-    "mistralai/mistral-medium-3",
-    "meta-llama/llama-4-maverick",
-    "qwen/qwen3-235b-a22b",
+    "mistralai/mistral-small-2603",
     "google/gemini-2.5-pro",
+    "google/gemini-3-flash-preview",
+    "x-ai/grok-4-fast",
+    "moonshotai/kimi-k2",
+    "qwen/qwen3-235b-a22b",
+    "meta-llama/llama-4-maverick",
     # Legacy
     "anthropic/claude-3-haiku",
     "google/gemini-flash-1.5",