diff --git a/backend/config.py b/backend/config.py index c8891af..4e4c6f3 100644 --- a/backend/config.py +++ b/backend/config.py @@ -29,21 +29,20 @@ class Settings(BaseSettings): # LLM Settings llm_provider: str = "openrouter" # "openrouter" or "ollama" - openrouter_model: str = "anthropic/claude-sonnet-4-5" # primary/default model + openrouter_model: str = "anthropic/claude-sonnet-4.6" # primary/default model ollama_model: str = "llama3.2" ollama_host: str = "http://localhost:11434" - # Per-category model routing — cheaper models for non-critical tasks - # Categories: caller_dialog, devon_monitor, devon_ask, background_gen, - # call_summary, news_summary, topic_gen, unknown + # Per-category model routing + # caller_dialog is overridden by style_matched routing (see Session.caller_model_map) category_models: dict = { - "caller_dialog": "x-ai/grok-4", # full Grok 4 — edgier dialog, latency OK (gaps cut in post) - "devon_ask": "x-ai/grok-4", # Devon should match the show's edgy energy - "devon_monitor": "google/gemini-2.5-flash", # Devon polling — just decisions, keep cheap - "background_gen": "x-ai/grok-4", # wilder, more specific caller backgrounds - "call_summary": "google/gemini-2.5-flash", # post-call summaries - "news_summary": "google/gemini-2.5-flash", # news digests - "topic_gen": "google/gemini-2.5-flash", # topic generation + "caller_dialog": "x-ai/grok-4.1-fast", # fallback if style_matched disabled ($0.20/$0.50) + "devon_ask": "x-ai/grok-4.1-fast", # Devon matches show energy, cheap ($0.20/$0.50) + "devon_monitor": "google/gemini-2.5-flash", # just yes/no decisions, keep cheap ($0.15/$0.60) + "background_gen": "x-ai/grok-4.1-fast", # wilder caller backgrounds ($0.20/$0.50) + "call_summary": "google/gemini-2.5-flash", # post-call, no personality needed ($0.15/$0.60) + "news_summary": "google/gemini-2.5-flash", # just digesting headlines ($0.15/$0.60) + "topic_gen": "google/gemini-2.5-flash", # structured output ($0.15/$0.60) } # TTS Settings diff --git a/backend/main.py b/backend/main.py index d3f60d4..e9299d2 100644 --- a/backend/main.py +++ b/backend/main.py @@ -6240,34 +6240,40 @@ class Session: # Caller model routing self.caller_model_strategy: str = "style_matched" # "single" | "cycle" | "style_matched" self.caller_model_pool: list[str] = [ - "x-ai/grok-4", - "anthropic/claude-sonnet-4-5", - "mistralai/mistral-medium-3", - "qwen/qwen3-235b-a22b", - "deepseek/deepseek-chat-v3-0324", - "google/gemini-2.5-pro", - "meta-llama/llama-4-maverick", + "x-ai/grok-4.1-fast", # edgy, casual, great value ($0.20/$0.50) + "anthropic/claude-sonnet-4.6", # empathetic, nuanced ($3/$15) + "mistralai/mistral-large-2512", # dry wit, precise ($0.50/$1.50) + "deepseek/deepseek-r1-distill-llama-70b", # raw reasoning ($0.70/$0.80) + "meta-llama/llama-3.3-70b-instruct", # casual, natural ($0.10/$0.32) + "google/gemini-2.5-flash", # analytical ($0.30/$2.50) ] self.caller_model_map: dict[str, str] = { - "high_energy": "x-ai/grok-4", - "confrontational": "x-ai/grok-4", - "angry_venting": "x-ai/grok-4", - "bragger": "x-ai/grok-4", - "comedian": "x-ai/grok-4", - "quiet_nervous": "anthropic/claude-sonnet-4-5", - "sweet_earnest": "anthropic/claude-sonnet-4-5", - "emotional": "anthropic/claude-sonnet-4-5", - "deadpan": "mistralai/mistral-medium-3", - "mysterious": "mistralai/mistral-medium-3", - "world_weary": "mistralai/mistral-medium-3", - "storyteller": "qwen/qwen3-235b-a22b", - "rambling": "qwen/qwen3-235b-a22b", - "oversharer": "deepseek/deepseek-chat-v3-0324", - "conspiracy": "deepseek/deepseek-chat-v3-0324", - "know_it_all": "google/gemini-2.5-pro", - "first_time": "meta-llama/llama-4-maverick", + # Grok 4.1 Fast — edgy, provocative, unfiltered humor + "high_energy": "x-ai/grok-4.1-fast", + "confrontational": "x-ai/grok-4.1-fast", + "angry_venting": "x-ai/grok-4.1-fast", + "bragger": "x-ai/grok-4.1-fast", + "comedian": "x-ai/grok-4.1-fast", + # Claude Sonnet 4.6 — empathetic, genuine emotional depth + "quiet_nervous": "anthropic/claude-sonnet-4.6", + "sweet_earnest": "anthropic/claude-sonnet-4.6", + "emotional": "anthropic/claude-sonnet-4.6", + # Mistral Large — dry, witty, precise delivery + "deadpan": "mistralai/mistral-large-2512", + "mysterious": "mistralai/mistral-large-2512", + "world_weary": "mistralai/mistral-large-2512", + # DeepSeek R1 Distill — raw, unfiltered, commits to the bit + "storyteller": "deepseek/deepseek-r1-distill-llama-70b", + "oversharer": "deepseek/deepseek-r1-distill-llama-70b", + "conspiracy": "deepseek/deepseek-r1-distill-llama-70b", + "rambling": "deepseek/deepseek-r1-distill-llama-70b", + # Gemini 2.5 Flash — articulate, analytical, cites facts + "know_it_all": "google/gemini-2.5-flash", + # Llama 3.3 70B — casual, natural hesitation, first-timer energy + "first_time": "meta-llama/llama-3.3-70b-instruct", + "reluctant_caller": "meta-llama/llama-3.3-70b-instruct", } - self.caller_model_fallback: str = "anthropic/claude-sonnet-4-5" + self.caller_model_fallback: str = "anthropic/claude-sonnet-4.6" self.caller_models: dict[str, str] = {} # caller_key → assigned model self._caller_model_cycle_idx: int = 0 diff --git a/backend/services/cost_tracker.py b/backend/services/cost_tracker.py index c3d2c4c..355d180 100644 --- a/backend/services/cost_tracker.py +++ b/backend/services/cost_tracker.py @@ -32,25 +32,38 @@ class TTSCallRecord: # OpenRouter pricing per 1M tokens (as of March 2026) OPENROUTER_PRICING = { + # Claude + "anthropic/claude-sonnet-4.6": {"prompt": 3.00, "completion": 15.00}, "anthropic/claude-sonnet-4-5": {"prompt": 3.00, "completion": 15.00}, "anthropic/claude-haiku-4.5": {"prompt": 0.80, "completion": 4.00}, "anthropic/claude-3-haiku": {"prompt": 0.25, "completion": 1.25}, + # Grok + "x-ai/grok-4.1-fast": {"prompt": 0.20, "completion": 0.50}, "x-ai/grok-4": {"prompt": 3.00, "completion": 15.00}, "x-ai/grok-4-fast": {"prompt": 5.00, "completion": 15.00}, - "minimax/minimax-m2-her": {"prompt": 0.50, "completion": 1.50}, - "mistralai/mistral-small-creative": {"prompt": 0.20, "completion": 0.60}, + # Mistral + "mistralai/mistral-large-2512": {"prompt": 0.50, "completion": 1.50}, + "mistralai/mistral-small-2603": {"prompt": 0.15, "completion": 0.60}, + "mistralai/mistral-medium-3": {"prompt": 0.40, "completion": 2.00}, + "mistralai/mistral-small-creative": {"prompt": 0.10, "completion": 0.30}, + # DeepSeek + "deepseek/deepseek-r1-distill-llama-70b": {"prompt": 0.70, "completion": 0.80}, + "deepseek/deepseek-chat-v3-0324": {"prompt": 0.27, "completion": 1.10}, "deepseek/deepseek-v3.2": {"prompt": 0.14, "completion": 0.28}, - "google/gemini-2.5-flash": {"prompt": 0.15, "completion": 0.60}, + # Google + "google/gemini-2.5-flash": {"prompt": 0.30, "completion": 2.50}, + "google/gemini-2.5-pro": {"prompt": 1.25, "completion": 10.00}, + "google/gemini-3-flash-preview": {"prompt": 0.50, "completion": 3.00}, "google/gemini-flash-1.5": {"prompt": 0.075, "completion": 0.30}, + # Meta + "meta-llama/llama-3.3-70b-instruct": {"prompt": 0.10, "completion": 0.32}, + "meta-llama/llama-4-maverick": {"prompt": 0.20, "completion": 0.60}, + # Other + "moonshotai/kimi-k2": {"prompt": 0.60, "completion": 2.00}, + "qwen/qwen3-235b-a22b": {"prompt": 0.20, "completion": 0.60}, + "minimax/minimax-m2-her": {"prompt": 0.50, "completion": 1.50}, "openai/gpt-4o-mini": {"prompt": 0.15, "completion": 0.60}, "openai/gpt-4o": {"prompt": 2.50, "completion": 10.00}, - "meta-llama/llama-3.1-8b-instruct": {"prompt": 0.06, "completion": 0.06}, - "deepseek/deepseek-chat-v3-0324": {"prompt": 0.27, "completion": 1.10}, - "moonshotai/kimi-k2": {"prompt": 0.60, "completion": 2.00}, - "mistralai/mistral-medium-3": {"prompt": 0.40, "completion": 2.00}, - "meta-llama/llama-4-maverick": {"prompt": 0.20, "completion": 0.60}, - "qwen/qwen3-235b-a22b": {"prompt": 0.20, "completion": 0.60}, - "google/gemini-2.5-pro": {"prompt": 1.25, "completion": 10.00}, } # TTS pricing per character diff --git a/backend/services/llm.py b/backend/services/llm.py index 3e5704f..772ae26 100644 --- a/backend/services/llm.py +++ b/backend/services/llm.py @@ -10,26 +10,26 @@ from .cost_tracker import cost_tracker # Available OpenRouter models OPENROUTER_MODELS = [ - # Default - "anthropic/claude-sonnet-4-5", - # Best for natural dialog + # Primary + "anthropic/claude-sonnet-4.6", + "x-ai/grok-4.1-fast", "x-ai/grok-4", - "x-ai/grok-4-fast", - "minimax/minimax-m2-her", - "mistralai/mistral-small-creative", - "deepseek/deepseek-v3.2", - # Other - "anthropic/claude-haiku-4.5", + # Style-matched pool + "mistralai/mistral-large-2512", + "deepseek/deepseek-r1-distill-llama-70b", + "meta-llama/llama-3.3-70b-instruct", "google/gemini-2.5-flash", - "openai/gpt-4o-mini", - "openai/gpt-4o", - # New dialog models + # Other good options + "anthropic/claude-sonnet-4-5", + "anthropic/claude-haiku-4.5", "deepseek/deepseek-chat-v3-0324", - "moonshotai/kimi-k2", - "mistralai/mistral-medium-3", - "meta-llama/llama-4-maverick", - "qwen/qwen3-235b-a22b", + "mistralai/mistral-small-2603", "google/gemini-2.5-pro", + "google/gemini-3-flash-preview", + "x-ai/grok-4-fast", + "moonshotai/kimi-k2", + "qwen/qwen3-235b-a22b", + "meta-llama/llama-4-maverick", # Legacy "anthropic/claude-3-haiku", "google/gemini-flash-1.5",