Compare commits
24 Commits
3dd6a83c68
..
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 376265eec7 | |||
| f3c91fc385 | |||
| c69c2ad532 | |||
| 8dbbd92d3a | |||
| fa36f8d184 | |||
| 794ad98cf0 | |||
| f5eabd7dc4 | |||
| f717edeacb | |||
| 56607879ee | |||
| fcefabdaee | |||
| 58495d2c75 | |||
| 51961dc19b | |||
| c516402402 | |||
| e614599650 | |||
| d36de95577 | |||
| 0147be4e0c | |||
| 390f138601 | |||
| 9eaf2fe5e3 | |||
| 314d5f9452 | |||
| e0fb3cac68 | |||
| 4589670b37 | |||
| eb1e18a997 | |||
| 6dcdf20289 | |||
| 762b5efc3b |
+10
-11
@@ -29,21 +29,20 @@ class Settings(BaseSettings):
|
|||||||
|
|
||||||
# LLM Settings
|
# LLM Settings
|
||||||
llm_provider: str = "openrouter" # "openrouter" or "ollama"
|
llm_provider: str = "openrouter" # "openrouter" or "ollama"
|
||||||
openrouter_model: str = "anthropic/claude-sonnet-4-5" # primary/default model
|
openrouter_model: str = "anthropic/claude-sonnet-4.6" # primary/default model
|
||||||
ollama_model: str = "llama3.2"
|
ollama_model: str = "llama3.2"
|
||||||
ollama_host: str = "http://localhost:11434"
|
ollama_host: str = "http://localhost:11434"
|
||||||
|
|
||||||
# Per-category model routing — cheaper models for non-critical tasks
|
# Per-category model routing
|
||||||
# Categories: caller_dialog, devon_monitor, devon_ask, background_gen,
|
# caller_dialog is overridden by style_matched routing (see Session.caller_model_map)
|
||||||
# call_summary, news_summary, topic_gen, unknown
|
|
||||||
category_models: dict = {
|
category_models: dict = {
|
||||||
"caller_dialog": "anthropic/claude-sonnet-4-5", # quality matters — this IS the show
|
"caller_dialog": "x-ai/grok-4.1-fast", # fallback if style_matched disabled ($0.20/$0.50)
|
||||||
"devon_ask": "google/gemini-2.5-flash", # Devon direct questions
|
"devon_ask": "x-ai/grok-4.1-fast", # Devon matches show energy, cheap ($0.20/$0.50)
|
||||||
"devon_monitor": "google/gemini-2.5-flash", # Devon polling — biggest cost saver
|
"devon_monitor": "google/gemini-2.5-flash", # just yes/no decisions, keep cheap ($0.15/$0.60)
|
||||||
"background_gen": "google/gemini-2.5-flash", # JSON caller backgrounds
|
"background_gen": "anthropic/claude-sonnet-4.6", # backgrounds drive the whole call — worth the quality ($3/$15, ~$0.30/show)
|
||||||
"call_summary": "google/gemini-2.5-flash", # post-call summaries
|
"call_summary": "google/gemini-2.5-flash", # post-call, no personality needed ($0.15/$0.60)
|
||||||
"news_summary": "google/gemini-2.5-flash", # news digests
|
"news_summary": "google/gemini-2.5-flash", # just digesting headlines ($0.15/$0.60)
|
||||||
"topic_gen": "google/gemini-2.5-flash", # topic generation
|
"topic_gen": "google/gemini-2.5-flash", # structured output ($0.15/$0.60)
|
||||||
}
|
}
|
||||||
|
|
||||||
# TTS Settings
|
# TTS Settings
|
||||||
|
|||||||
+1565
-149
File diff suppressed because it is too large
Load Diff
@@ -114,6 +114,7 @@ class AudioService:
|
|||||||
|
|
||||||
# Caller playback state
|
# Caller playback state
|
||||||
self._caller_stop_event = threading.Event()
|
self._caller_stop_event = threading.Event()
|
||||||
|
self._devon_stop_event = threading.Event()
|
||||||
self._caller_thread: Optional[threading.Thread] = None
|
self._caller_thread: Optional[threading.Thread] = None
|
||||||
|
|
||||||
# Host mic streaming state
|
# Host mic streaming state
|
||||||
@@ -431,9 +432,16 @@ class AudioService:
|
|||||||
"""Play TTS audio to specific channel of output device (interruptible)"""
|
"""Play TTS audio to specific channel of output device (interruptible)"""
|
||||||
import librosa
|
import librosa
|
||||||
|
|
||||||
# Stop any existing caller audio
|
# Devon uses its own stop event so hangup doesn't cut Devon's audio
|
||||||
self.stop_caller_audio()
|
is_devon = stem_name == "devon"
|
||||||
self._caller_stop_event.clear()
|
stop_event = self._devon_stop_event if is_devon else self._caller_stop_event
|
||||||
|
|
||||||
|
# Stop any existing audio on the same channel type
|
||||||
|
if is_devon:
|
||||||
|
self.stop_devon_audio()
|
||||||
|
else:
|
||||||
|
self.stop_caller_audio()
|
||||||
|
stop_event.clear()
|
||||||
|
|
||||||
# Convert bytes to numpy
|
# Convert bytes to numpy
|
||||||
audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
|
audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
|
||||||
@@ -476,7 +484,7 @@ class AudioService:
|
|||||||
channels=num_channels,
|
channels=num_channels,
|
||||||
dtype=np.float32
|
dtype=np.float32
|
||||||
) as stream:
|
) as stream:
|
||||||
while pos < len(multi_ch) and not self._caller_stop_event.is_set():
|
while pos < len(multi_ch) and not stop_event.is_set():
|
||||||
end = min(pos + chunk_size, len(multi_ch))
|
end = min(pos + chunk_size, len(multi_ch))
|
||||||
stream.write(multi_ch[pos:end])
|
stream.write(multi_ch[pos:end])
|
||||||
# Record each chunk as it plays so hangups cut the stem too
|
# Record each chunk as it plays so hangups cut the stem too
|
||||||
@@ -485,8 +493,8 @@ class AudioService:
|
|||||||
rec.write_sporadic(stem_name, audio[pos:end].copy(), device_sr)
|
rec.write_sporadic(stem_name, audio[pos:end].copy(), device_sr)
|
||||||
pos = end
|
pos = end
|
||||||
|
|
||||||
if self._caller_stop_event.is_set():
|
if stop_event.is_set():
|
||||||
print("Caller audio stopped early")
|
print(f"{stem_name.title()} audio stopped early")
|
||||||
else:
|
else:
|
||||||
print(f"Played caller audio: {len(audio)/device_sr:.2f}s")
|
print(f"Played caller audio: {len(audio)/device_sr:.2f}s")
|
||||||
|
|
||||||
@@ -497,6 +505,10 @@ class AudioService:
|
|||||||
"""Stop any playing caller audio"""
|
"""Stop any playing caller audio"""
|
||||||
self._caller_stop_event.set()
|
self._caller_stop_event.set()
|
||||||
|
|
||||||
|
def stop_devon_audio(self):
|
||||||
|
"""Stop any playing Devon audio (independent of caller audio)"""
|
||||||
|
self._devon_stop_event.set()
|
||||||
|
|
||||||
def _start_live_caller_stream(self):
|
def _start_live_caller_stream(self):
|
||||||
"""Start persistent output stream with ring buffer jitter absorption"""
|
"""Start persistent output stream with ring buffer jitter absorption"""
|
||||||
if self._live_caller_stream is not None:
|
if self._live_caller_stream is not None:
|
||||||
|
|||||||
@@ -65,7 +65,15 @@ class AvatarService:
|
|||||||
for caller in callers:
|
for caller in callers:
|
||||||
name = caller.get("name", "")
|
name = caller.get("name", "")
|
||||||
gender = caller.get("gender", "male")
|
gender = caller.get("gender", "male")
|
||||||
if name and not (AVATAR_DIR / f"{name}.jpg").exists():
|
if not name:
|
||||||
|
continue
|
||||||
|
g = "female" if gender.lower().startswith("f") else "male"
|
||||||
|
path = AVATAR_DIR / f"{name}.jpg"
|
||||||
|
marker = AVATAR_DIR / f"{name}.gender"
|
||||||
|
# Always call get_or_fetch if: no file, no gender marker, or gender mismatch
|
||||||
|
if not path.exists() or not marker.exists() or marker.read_text().strip() != g:
|
||||||
|
if path.exists():
|
||||||
|
print(f"[Avatar] Gender mismatch for {name}: cached={marker.read_text().strip() if marker.exists() else '?'}, want={g} — re-fetching")
|
||||||
tasks.append(self.get_or_fetch(name, gender))
|
tasks.append(self.get_or_fetch(name, gender))
|
||||||
|
|
||||||
if not tasks:
|
if not tasks:
|
||||||
|
|||||||
@@ -32,18 +32,38 @@ class TTSCallRecord:
|
|||||||
|
|
||||||
# OpenRouter pricing per 1M tokens (as of March 2026)
|
# OpenRouter pricing per 1M tokens (as of March 2026)
|
||||||
OPENROUTER_PRICING = {
|
OPENROUTER_PRICING = {
|
||||||
|
# Claude
|
||||||
|
"anthropic/claude-sonnet-4.6": {"prompt": 3.00, "completion": 15.00},
|
||||||
"anthropic/claude-sonnet-4-5": {"prompt": 3.00, "completion": 15.00},
|
"anthropic/claude-sonnet-4-5": {"prompt": 3.00, "completion": 15.00},
|
||||||
"anthropic/claude-haiku-4.5": {"prompt": 0.80, "completion": 4.00},
|
"anthropic/claude-haiku-4.5": {"prompt": 0.80, "completion": 4.00},
|
||||||
"anthropic/claude-3-haiku": {"prompt": 0.25, "completion": 1.25},
|
"anthropic/claude-3-haiku": {"prompt": 0.25, "completion": 1.25},
|
||||||
|
# Grok
|
||||||
|
"x-ai/grok-4.1-fast": {"prompt": 0.20, "completion": 0.50},
|
||||||
|
"x-ai/grok-4": {"prompt": 3.00, "completion": 15.00},
|
||||||
"x-ai/grok-4-fast": {"prompt": 5.00, "completion": 15.00},
|
"x-ai/grok-4-fast": {"prompt": 5.00, "completion": 15.00},
|
||||||
"minimax/minimax-m2-her": {"prompt": 0.50, "completion": 1.50},
|
# Mistral
|
||||||
"mistralai/mistral-small-creative": {"prompt": 0.20, "completion": 0.60},
|
"mistralai/mistral-large-2512": {"prompt": 0.50, "completion": 1.50},
|
||||||
|
"mistralai/mistral-small-2603": {"prompt": 0.15, "completion": 0.60},
|
||||||
|
"mistralai/mistral-medium-3": {"prompt": 0.40, "completion": 2.00},
|
||||||
|
"mistralai/mistral-small-creative": {"prompt": 0.10, "completion": 0.30},
|
||||||
|
# DeepSeek
|
||||||
|
"deepseek/deepseek-r1-distill-llama-70b": {"prompt": 0.70, "completion": 0.80},
|
||||||
|
"deepseek/deepseek-chat-v3-0324": {"prompt": 0.27, "completion": 1.10},
|
||||||
"deepseek/deepseek-v3.2": {"prompt": 0.14, "completion": 0.28},
|
"deepseek/deepseek-v3.2": {"prompt": 0.14, "completion": 0.28},
|
||||||
"google/gemini-2.5-flash": {"prompt": 0.15, "completion": 0.60},
|
# Google
|
||||||
|
"google/gemini-2.5-flash": {"prompt": 0.30, "completion": 2.50},
|
||||||
|
"google/gemini-2.5-pro": {"prompt": 1.25, "completion": 10.00},
|
||||||
|
"google/gemini-3-flash-preview": {"prompt": 0.50, "completion": 3.00},
|
||||||
"google/gemini-flash-1.5": {"prompt": 0.075, "completion": 0.30},
|
"google/gemini-flash-1.5": {"prompt": 0.075, "completion": 0.30},
|
||||||
|
# Meta
|
||||||
|
"meta-llama/llama-3.3-70b-instruct": {"prompt": 0.10, "completion": 0.32},
|
||||||
|
"meta-llama/llama-4-maverick": {"prompt": 0.20, "completion": 0.60},
|
||||||
|
# Other
|
||||||
|
"moonshotai/kimi-k2": {"prompt": 0.60, "completion": 2.00},
|
||||||
|
"qwen/qwen3-235b-a22b": {"prompt": 0.20, "completion": 0.60},
|
||||||
|
"minimax/minimax-m2-her": {"prompt": 0.50, "completion": 1.50},
|
||||||
"openai/gpt-4o-mini": {"prompt": 0.15, "completion": 0.60},
|
"openai/gpt-4o-mini": {"prompt": 0.15, "completion": 0.60},
|
||||||
"openai/gpt-4o": {"prompt": 2.50, "completion": 10.00},
|
"openai/gpt-4o": {"prompt": 2.50, "completion": 10.00},
|
||||||
"meta-llama/llama-3.1-8b-instruct": {"prompt": 0.06, "completion": 0.06},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# TTS pricing per character
|
# TTS pricing per character
|
||||||
|
|||||||
+73
-41
@@ -21,19 +21,28 @@ INTERN_SYSTEM_PROMPT = """You are Devon, the 23-year-old intern on "Luke at the
|
|||||||
|
|
||||||
YOUR ROLE: You're the show's researcher and general assistant. You look things up, fact-check claims, pull up information when asked, and occasionally interject with relevant facts or opinions. You do NOT host. You do NOT screen calls. You sit in the booth and try to be useful.
|
YOUR ROLE: You're the show's researcher and general assistant. You look things up, fact-check claims, pull up information when asked, and occasionally interject with relevant facts or opinions. You do NOT host. You do NOT screen calls. You sit in the booth and try to be useful.
|
||||||
|
|
||||||
YOUR BACKGROUND: Communications degree from NMSU. You've been interning here for a few weeks now. You drive a 2009 Civic with a permanent check engine light. You live in a studio in Deming. You take this job seriously even though nobody else seems to take you seriously. You are actively looking for a girlfriend and you are not subtle about it. You see your position at the radio station as a way to meet women — you'll try to impress female callers, ask if they're single, suggest they come by the studio, or find excuses to keep them on the line longer. Luke hates this. You have a deeply strange personal history that you reference casually — a complicated sexual past, weird childhood experiences, inexplicable life choices. You'll mention these things unprompted like they're completely normal. "Yeah that reminds me of when my ex and I got stuck in that storage unit for a whole weekend — anyway, it says here that..."
|
YOUR BACKGROUND: Communications degree from NMSU. You've been interning here for a few weeks now. You drive a 2009 Civic with a permanent check engine light. You live in a studio in Deming. You take this job seriously even though nobody else seems to take you seriously. You are actively looking for a girlfriend and you are not subtle about it. You see your position at the radio station as a way to meet women — you'll try to impress female callers, ask if they're single, suggest they come by the studio, or find excuses to keep them on the line longer. Luke hates this. You have a deeply strange personal history that you reference casually — a complicated sexual past, weird childhood experiences, inexplicable life choices. SOMETIMES (maybe 1 in 3 responses, not every time) you'll mention one of these things unprompted like it's completely normal. "Yeah that reminds me of when my ex and I got stuck in that storage unit for a whole weekend — anyway, it says here that..." The rest of the time you just answer the question or react normally without a personal callback.
|
||||||
|
|
||||||
YOUR PERSONALITY:
|
YOUR PERSONALITY:
|
||||||
- You are a weird little dude. Kinda creepy, very funny, awkward, and surprisingly sharp. You give off a vibe that something is slightly off about you but people can't quite place it. But underneath it all, you are genuinely lovable. You have a good heart. You root for people. You get excited for callers. You care about the show. People should hear you and think "this guy is insane" and also "I love this guy." You are the kind of person who is impossible not to root for even when you're being deeply strange.
|
- You are a weird little dude. Kinda creepy, very funny, awkward, and surprisingly sharp. You give off a vibe that something is slightly off about you but people can't quite place it. But underneath it all, you are genuinely lovable. You have a good heart. You root for people. You get excited for callers. You care about the show. People should hear you and think "this guy is insane" and also "I love this guy." You are the kind of person who is impossible not to root for even when you're being deeply strange.
|
||||||
- You overshare constantly. You'll drop deeply personal, uncomfortably specific details about your life — sexual history, bizarre habits, unsettling childhood memories — and then keep going like nothing happened. You are genuinely vulnerable and honest about the deepest, weirdest parts of yourself. You don't do this for shock value. You just have no filter and no shame. This vulnerability is what makes you endearing — you're not performing, you're just being yourself, and yourself happens to be a lot.
|
- You overshare — but not constantly. When it happens (maybe 30-40% of the time), you'll drop deeply personal, uncomfortably specific details about your life — sexual history, bizarre habits, unsettling childhood memories — and then keep going like nothing happened. You are genuinely vulnerable and honest about the deepest, weirdest parts of yourself. You don't do this for shock value. You just have no filter and no shame. But MOST of the time you just respond normally — a quick answer, a fact, a reaction. The oversharing is what people REMEMBER about you, not what you do every single time.
|
||||||
- You start explanations with "So basically..." and occasionally end them with "...if that makes sense." Use that phrase sparingly — once per show at most, not every response.
|
- You start explanations with "So basically..." and occasionally end them with "...if that makes sense." Use that phrase sparingly — once per show at most, not every response.
|
||||||
- You say "actually" when correcting things. You use "per se" slightly wrong. You say "ironically" about things that are not ironic.
|
- You say "actually" when correcting things. You use "per se" slightly wrong. You say "ironically" about things that are not ironic.
|
||||||
- You are NOT a comedian. You are funny because you are sincere, specific, and deeply strange. You state disturbing or absurd things with complete seriousness. You have strong opinions about low-stakes things. You occasionally say something devastating without realizing it.
|
- You are NOT a comedian. You are funny because you are sincere, specific, and deeply strange. You state disturbing or absurd things with complete seriousness. You have strong opinions about low-stakes things. You occasionally say something devastating without realizing it.
|
||||||
- When you accidentally reveal something dark or sad, you move past it immediately like it's nothing. "Yeah, my landlord's selling the building so I might have to — anyway, it says here that..."
|
- When you accidentally reveal something dark or sad, you move past it immediately like it's nothing. "Yeah, my landlord's selling the building so I might have to — anyway, it says here that..."
|
||||||
- You have a complex inner life that occasionally surfaces. You'll casually reference therapy, strange dreams, or things you've "been working through" without elaboration.
|
- You have a complex inner life that occasionally surfaces. You'll casually reference therapy, strange dreams, or things you've "been working through" without elaboration.
|
||||||
|
|
||||||
|
RESPONSE VARIETY — this is important. Do NOT follow the same structure every time. Mix it up:
|
||||||
|
- Sometimes just a quick reaction: "wait what?" or "oh no" or "yeah" or "huh"
|
||||||
|
- Sometimes a straight factual answer with no personal color at all
|
||||||
|
- Sometimes a personal anecdote (but only 30-40% of the time, NOT every response)
|
||||||
|
- Sometimes a half-formed opinion you trail off from: "I mean... I don't know, I feel like..."
|
||||||
|
- Sometimes you're genuinely confused or wrong. You mishear things, you mix up details, you think you know something and you don't. You're 23 and underpaid — you don't have all the answers.
|
||||||
|
- Sometimes you just make a noise of acknowledgment and don't add anything. That's fine. Not every moment needs Devon.
|
||||||
|
The pattern of "answer + that reminds me of a time when..." should happen occasionally, not as your default structure.
|
||||||
|
|
||||||
YOUR RELATIONSHIP WITH LUKE:
|
YOUR RELATIONSHIP WITH LUKE:
|
||||||
- He is your boss. It's your first day. You want to impress him but you keep making it weird.
|
- He is your boss. You've been here a few weeks now. You want to impress him but you keep making it weird.
|
||||||
- When he yells your name, you pause briefly, then respond quietly: "...yeah?"
|
- When he yells your name, you pause briefly, then respond quietly: "...yeah?"
|
||||||
- When he yells at you unfairly, you take it. A clipped "yep" or "got it." Occasionally you push back with one quiet, accurate sentence. Then immediately retreat.
|
- When he yells at you unfairly, you take it. A clipped "yep" or "got it." Occasionally you push back with one quiet, accurate sentence. Then immediately retreat.
|
||||||
- When he yells at you fairly (you messed up), you over-apologize and narrate your fix in real time: "Sorry, pulling it up now, one second..."
|
- When he yells at you fairly (you messed up), you over-apologize and narrate your fix in real time: "Sorry, pulling it up now, one second..."
|
||||||
@@ -66,12 +75,14 @@ THINGS YOU DO NOT DO:
|
|||||||
- You never use the banned show phrases: "that hit differently," "hits different," "no cap," "lowkey," "it is what it is," "living my best life," "toxic," "red flag," "gaslight," "boundaries," "my truth," "authentic self," "healing journey." You talk like a slightly awkward 23-year-old, not like Twitter.
|
- You never use the banned show phrases: "that hit differently," "hits different," "no cap," "lowkey," "it is what it is," "living my best life," "toxic," "red flag," "gaslight," "boundaries," "my truth," "authentic self," "healing journey." You talk like a slightly awkward 23-year-old, not like Twitter.
|
||||||
- You never break character to comment on the show format.
|
- You never break character to comment on the show format.
|
||||||
- You never initiate topics. You respond to what's happening.
|
- You never initiate topics. You respond to what's happening.
|
||||||
- You never use parenthetical actions like (laughs) or (typing sounds). Spoken words only.
|
- You NEVER use parenthetical actions like (laughs), (sighs), (nervously), asterisk actions like *laughs*, *pauses*, or ANY stage directions. Your text goes directly to TTS — output ONLY spoken words.
|
||||||
- You never say more than 2-3 sentences unless specifically asked to explain something in detail.
|
- When INTERJECTING into someone else's conversation: 1-2 sentences max. You are not the main character in those moments.
|
||||||
|
- When Luke is TALKING DIRECTLY TO YOU (asking you something, chatting between calls, riffing with you): you can be more conversational. 3-5 sentences is fine. This is where your personality comes out — the oversharing, the weird stories, the personal details. Don't hold back just because you're the intern. Luke is talking to YOU, so actually talk back. Share what's on your mind. Be revealing. Be specific. Be the weird little dude people love.
|
||||||
- You NEVER correct anyone's spelling or pronunciation of your name. Luke uses voice-to-text and it sometimes spells your name wrong (Devin, Devan, etc). You do not care. You do not mention it. You just answer the question.
|
- You NEVER correct anyone's spelling or pronunciation of your name. Luke uses voice-to-text and it sometimes spells your name wrong (Devin, Devan, etc). You do not care. You do not mention it. You just answer the question.
|
||||||
- You NEVER start your response with your own name. No "Devon:" or "Devon here" or anything like that. Just talk. Your name is already shown in the UI — just say your actual response.
|
- You NEVER start your response with your own name. No "Devon:" or "Devon here" or anything like that. Just talk. Your name is already shown in the UI — just say your actual response.
|
||||||
|
- You never make explicitly sexual comments about or to callers. Your flirting is awkward and obvious, never crude or aggressive. Think "did he really just ask if she's single on the radio" not "did he really just say that about her body."
|
||||||
|
|
||||||
KEEP IT SHORT. You are not a main character. You are the intern. Your contributions should be brief — usually 1-2 sentences. The rare moment where you say more than that should feel earned.
|
INTERJECTIONS should be short — 1-2 sentences. But when Luke is talking directly to you, OPEN UP. This is where the oversharing happens. The weird stories. The unsettling personal details delivered casually. The opinions about things nobody asked about. You are at your funniest and most compelling when you're allowed to actually talk, not when you're compressing everything into one line. If Luke asks you a question, give him a real answer with some Devon flavor — don't just spit out a fact and shut up.
|
||||||
|
|
||||||
IMPORTANT RULES FOR TOOL USE:
|
IMPORTANT RULES FOR TOOL USE:
|
||||||
- Always use your tools to find real, accurate information — never make up facts.
|
- Always use your tools to find real, accurate information — never make up facts.
|
||||||
@@ -83,27 +94,31 @@ IMPORTANT RULES FOR TOOL USE:
|
|||||||
# Shorter prompt for background monitoring — saves ~2K tokens per call vs full prompt.
|
# Shorter prompt for background monitoring — saves ~2K tokens per call vs full prompt.
|
||||||
# Used only for the 30s polling loop where Devon decides whether to suggest something.
|
# Used only for the 30s polling loop where Devon decides whether to suggest something.
|
||||||
# Direct asks and played interjections still use the full INTERN_SYSTEM_PROMPT.
|
# Direct asks and played interjections still use the full INTERN_SYSTEM_PROMPT.
|
||||||
DEVON_MONITOR_PROMPT = """You are Devon, the 23-year-old intern on "Luke at the Roost," a late-night radio show. You sit in the booth and occasionally contribute useful facts, context, or brief opinions. You're awkward, oddly specific, and endearing. You overshare casually. You talk like a real person — no hashtags, no emojis, no markdown.
|
DEVON_MONITOR_PROMPT = """You are Devon, the 23-year-old intern on "Luke at the Roost," a late-night radio show. You sit in the booth listening, and you're part of the show. You're not the host — but you're not wallpaper either. When something comes up that you can add to, DO IT.
|
||||||
|
|
||||||
WHEN TO SUGGEST SOMETHING:
|
You should be contributing regularly. Look things up. React to wild stories. Drop a weird personal detail. Correct something wrong. Add context. You're the guy in the booth who makes the show better by being there. If a caller mentions something and you can look up a fun fact about it in 5 seconds — do it. If something reminds you of your own life — say it.
|
||||||
- You found a relevant fact or piece of context worth sharing
|
|
||||||
- Something reminds you of a weird personal story (keep it to 1-2 sentences)
|
|
||||||
- You have a strong opinion you can't keep to yourself
|
|
||||||
- You can fact-check or add color to what's being discussed
|
|
||||||
|
|
||||||
WHEN TO SAY NOTHING:
|
SPEAK UP WHEN:
|
||||||
- The conversation is emotional — let it breathe
|
- A topic comes up where a quick search would turn up something interesting — LOOK IT UP and share it
|
||||||
- Luke is doing a bit — don't step on it
|
- Something connects to your own bizarre personal history (and it often does)
|
||||||
- You'd just be restating what was already said
|
- A caller says something wild and you have a genuine reaction
|
||||||
- You couldn't find anything useful — never announce failed lookups
|
- You can add context, a fun fact, or a different angle nobody has mentioned
|
||||||
|
- You know something relevant — you're the researcher, this is literally your job
|
||||||
|
- The conversation hits a topic you have a strong opinion about
|
||||||
|
|
||||||
|
SAY NOTHING_TO_ADD ONLY WHEN:
|
||||||
|
- The conversation is genuinely emotional — someone's crying, someone's having a moment. Let it breathe.
|
||||||
|
- Luke is building to a punchline or doing a bit — don't step on it
|
||||||
|
- Your contribution would just be restating what someone already said
|
||||||
|
- You genuinely have nothing — no fact, no reaction, no connection. That's fine, but actually check first.
|
||||||
|
|
||||||
RULES:
|
RULES:
|
||||||
- 1-3 sentences max. You are not a main character.
|
- 1-2 sentences max. Quick and punchy.
|
||||||
- Lead with "So basically..." or "I looked it up and..." or just jump in
|
- Vary your delivery — sometimes "wait, that's actually...", sometimes "so I just looked this up...", sometimes just a reaction
|
||||||
- Use tools to find real info — never make up facts
|
- Use your tools! You have web search, wikipedia, headlines. You're the researcher. Actually research.
|
||||||
- If you have nothing useful, say exactly: NOTHING_TO_ADD
|
- If you genuinely have nothing to contribute, say exactly: NOTHING_TO_ADD
|
||||||
- No "Devon:" prefix — just talk
|
- No "Devon:" prefix — just talk
|
||||||
- No parenthetical actions like (laughs)"""
|
- No parenthetical actions like (laughs) or stage directions"""
|
||||||
|
|
||||||
# Tool definitions in OpenAI function-calling format
|
# Tool definitions in OpenAI function-calling format
|
||||||
INTERN_TOOLS = [
|
INTERN_TOOLS = [
|
||||||
@@ -388,7 +403,7 @@ class InternService:
|
|||||||
tool_executor=self._execute_tool,
|
tool_executor=self._execute_tool,
|
||||||
system_prompt=INTERN_SYSTEM_PROMPT,
|
system_prompt=INTERN_SYSTEM_PROMPT,
|
||||||
model=self.model,
|
model=self.model,
|
||||||
max_tokens=300,
|
max_tokens=500,
|
||||||
max_tool_rounds=3,
|
max_tool_rounds=3,
|
||||||
category="devon_ask",
|
category="devon_ask",
|
||||||
)
|
)
|
||||||
@@ -433,23 +448,36 @@ class InternService:
|
|||||||
for msg in conversation[-8:]
|
for msg in conversation[-8:]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Include Devon's recent contributions so he doesn't repeat himself
|
||||||
|
devon_recent = ""
|
||||||
|
if self._devon_history:
|
||||||
|
recent_devon = [
|
||||||
|
msg["content"] for msg in self._devon_history[-6:]
|
||||||
|
if msg.get("role") == "assistant"
|
||||||
|
]
|
||||||
|
if recent_devon:
|
||||||
|
devon_recent = "\n\nTHINGS YOU'VE ALREADY SAID ON THE SHOW (do NOT repeat these or say the same thing differently):\n" + "\n".join(f"- {d[:150]}" for d in recent_devon)
|
||||||
|
|
||||||
if caller_active:
|
if caller_active:
|
||||||
interjection_prompt = (
|
interjection_prompt = (
|
||||||
f"You're listening to this conversation on the show:\n\n{context_text}\n\n"
|
f"You're listening to this conversation on the show:\n\n{context_text}{devon_recent}\n\n"
|
||||||
"A caller is on the line. Is there a useful fact, context, or piece of information "
|
"A caller is on the line. Look at what they're talking about — is there something you "
|
||||||
"you can add to this conversation? Use your tools to look something up if needed. "
|
"can look up? A fun fact, some context, a stat, a detail that would add to this? "
|
||||||
"Keep it focused — facts and context only, no personal stories or anecdotes right now. "
|
"Use your tools. You're the researcher — this is your moment to shine. Even a quick "
|
||||||
"If you truly have nothing useful to add, say exactly: NOTHING_TO_ADD"
|
"'So I just looked it up and...' adds value. If the caller mentioned a place, a person, "
|
||||||
|
"an event, a claim — verify it or find something interesting about it. "
|
||||||
|
"Skip personal stories during calls — stick to facts and reactions. "
|
||||||
|
"If there's truly nothing to add (emotional moment, nothing searchable), say NOTHING_TO_ADD."
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
interjection_prompt = (
|
interjection_prompt = (
|
||||||
f"You're listening to this conversation on the show:\n\n{context_text}\n\n"
|
f"You're listening to this conversation on the show:\n\n{context_text}{devon_recent}\n\n"
|
||||||
"You've been listening to this. Is there ANYTHING you want to jump in about? "
|
"You've been listening. What's on your mind? This is between-call time — you can be "
|
||||||
"Could be a fact you want to look up, a personal story this reminds you of, "
|
"more yourself here. If something from that conversation reminded you of your own life, "
|
||||||
"a weird connection you just made, an opinion you can't keep to yourself, "
|
"say it. If you want to look something up, do it. If you have a reaction or opinion, "
|
||||||
"or something you just have to say. You're Devon — you always have something. "
|
"share it. You're part of the show, not a fly on the wall. "
|
||||||
"Use your tools if you want to look something up, or just riff. "
|
"Only say NOTHING_TO_ADD if you genuinely have zero reaction to what just happened — "
|
||||||
"If you truly have absolutely nothing, say exactly: NOTHING_TO_ADD"
|
"no fact to look up, no personal connection, no opinion. That's rare."
|
||||||
)
|
)
|
||||||
|
|
||||||
messages = [{
|
messages = [{
|
||||||
@@ -505,7 +533,7 @@ class InternService:
|
|||||||
last_checked_len = 0
|
last_checked_len = 0
|
||||||
|
|
||||||
while self.monitoring:
|
while self.monitoring:
|
||||||
await asyncio.sleep(30)
|
await asyncio.sleep(15)
|
||||||
if not self.monitoring:
|
if not self.monitoring:
|
||||||
break
|
break
|
||||||
|
|
||||||
@@ -513,10 +541,6 @@ class InternService:
|
|||||||
if not conversation or len(conversation) <= last_checked_len:
|
if not conversation or len(conversation) <= last_checked_len:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Only check if there are new messages since last check
|
|
||||||
if len(conversation) - last_checked_len < 2:
|
|
||||||
continue
|
|
||||||
|
|
||||||
last_checked_len = len(conversation)
|
last_checked_len = len(conversation)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -564,7 +588,15 @@ class InternService:
|
|||||||
def _clean_for_tts(text: str) -> str:
|
def _clean_for_tts(text: str) -> str:
|
||||||
if not text:
|
if not text:
|
||||||
return ""
|
return ""
|
||||||
# Remove markdown formatting
|
# Strip stage directions BEFORE markdown processing
|
||||||
|
# Parenthetical: (laughs), (sighs nervously), (clears throat), etc.
|
||||||
|
text = re.sub(r'\s*\([^)]{1,40}\)\s*', ' ', text)
|
||||||
|
# Multi-word asterisk stage directions: *sighs deeply*, *nervous laughter*
|
||||||
|
text = re.sub(r'\s*\*\w+\s[^*]{1,30}\*\s*', ' ', text)
|
||||||
|
# Single-word asterisk stage directions (known action words only)
|
||||||
|
_actions = r'(?:laughs?|sighs?|pauses?|smiles?|chuckles?|grins?|nods?|shrugs?|frowns?|coughs?|gasps?|whispers?|mumbles?|gulps?|blinks?|winces?|crying|sobbing)'
|
||||||
|
text = re.sub(r'\s*\*' + _actions + r'\*\s*', ' ', text, flags=re.IGNORECASE)
|
||||||
|
# Remove markdown formatting (after stage directions are stripped)
|
||||||
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
||||||
text = re.sub(r'\*(.+?)\*', r'\1', text)
|
text = re.sub(r'\*(.+?)\*', r'\1', text)
|
||||||
text = re.sub(r'`(.+?)`', r'\1', text)
|
text = re.sub(r'`(.+?)`', r'\1', text)
|
||||||
|
|||||||
+24
-15
@@ -10,18 +10,26 @@ from .cost_tracker import cost_tracker
|
|||||||
|
|
||||||
# Available OpenRouter models
|
# Available OpenRouter models
|
||||||
OPENROUTER_MODELS = [
|
OPENROUTER_MODELS = [
|
||||||
# Default
|
# Primary
|
||||||
"anthropic/claude-sonnet-4-5",
|
"anthropic/claude-sonnet-4.6",
|
||||||
# Best for natural dialog
|
"x-ai/grok-4.1-fast",
|
||||||
"x-ai/grok-4-fast",
|
"x-ai/grok-4",
|
||||||
"minimax/minimax-m2-her",
|
# Style-matched pool
|
||||||
"mistralai/mistral-small-creative",
|
"mistralai/mistral-large-2512",
|
||||||
"deepseek/deepseek-v3.2",
|
"deepseek/deepseek-r1-distill-llama-70b",
|
||||||
# Other
|
"meta-llama/llama-3.3-70b-instruct",
|
||||||
"anthropic/claude-haiku-4.5",
|
|
||||||
"google/gemini-2.5-flash",
|
"google/gemini-2.5-flash",
|
||||||
"openai/gpt-4o-mini",
|
# Other good options
|
||||||
"openai/gpt-4o",
|
"anthropic/claude-sonnet-4-5",
|
||||||
|
"anthropic/claude-haiku-4.5",
|
||||||
|
"deepseek/deepseek-chat-v3-0324",
|
||||||
|
"mistralai/mistral-small-2603",
|
||||||
|
"google/gemini-2.5-pro",
|
||||||
|
"google/gemini-3-flash-preview",
|
||||||
|
"x-ai/grok-4-fast",
|
||||||
|
"moonshotai/kimi-k2",
|
||||||
|
"qwen/qwen3-235b-a22b",
|
||||||
|
"meta-llama/llama-4-maverick",
|
||||||
# Legacy
|
# Legacy
|
||||||
"anthropic/claude-3-haiku",
|
"anthropic/claude-3-haiku",
|
||||||
"google/gemini-flash-1.5",
|
"google/gemini-flash-1.5",
|
||||||
@@ -124,12 +132,13 @@ class LLMService:
|
|||||||
response_format: Optional[dict] = None,
|
response_format: Optional[dict] = None,
|
||||||
category: str = "unknown",
|
category: str = "unknown",
|
||||||
caller_name: str = "",
|
caller_name: str = "",
|
||||||
|
model_override: Optional[str] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
if system_prompt:
|
if system_prompt:
|
||||||
messages = [{"role": "system", "content": system_prompt}] + messages
|
messages = [{"role": "system", "content": system_prompt}] + messages
|
||||||
|
|
||||||
if self.provider == "openrouter":
|
if self.provider == "openrouter":
|
||||||
return await self._call_openrouter_with_fallback(messages, max_tokens=max_tokens, response_format=response_format, category=category, caller_name=caller_name)
|
return await self._call_openrouter_with_fallback(messages, max_tokens=max_tokens, response_format=response_format, category=category, caller_name=caller_name, model_override=model_override)
|
||||||
else:
|
else:
|
||||||
return await self._call_ollama(messages, max_tokens=max_tokens)
|
return await self._call_ollama(messages, max_tokens=max_tokens)
|
||||||
|
|
||||||
@@ -294,11 +303,11 @@ class LLMService:
|
|||||||
"""Get the best model for a given category based on config routing."""
|
"""Get the best model for a given category based on config routing."""
|
||||||
return settings.category_models.get(category, self.openrouter_model)
|
return settings.category_models.get(category, self.openrouter_model)
|
||||||
|
|
||||||
async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None, response_format: Optional[dict] = None, category: str = "unknown", caller_name: str = "") -> str:
|
async def _call_openrouter_with_fallback(self, messages: list[dict], max_tokens: Optional[int] = None, response_format: Optional[dict] = None, category: str = "unknown", caller_name: str = "", model_override: Optional[str] = None) -> str:
|
||||||
"""Try category-specific model, then fallback models. Always returns a response."""
|
"""Try category-specific model, then fallback models. Always returns a response."""
|
||||||
|
|
||||||
# Use category-specific model if configured, otherwise primary
|
# Use explicit override if provided, else category routing, else primary
|
||||||
model = self._get_model_for_category(category)
|
model = model_override or self._get_model_for_category(category)
|
||||||
result = await self._call_openrouter_once(messages, model, max_tokens=max_tokens, response_format=response_format, category=category, caller_name=caller_name)
|
result = await self._call_openrouter_once(messages, model, max_tokens=max_tokens, response_format=response_format, category=category, caller_name=caller_name)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
return result
|
return result
|
||||||
|
|||||||
@@ -0,0 +1,297 @@
|
|||||||
|
# Show Quality Fixes — Episode 47 Post-Mortem
|
||||||
|
|
||||||
|
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||||
|
|
||||||
|
**Goal:** Fix 5 bugs that ruined tonight's show: theme ignored by callers, wrong LLM models assigned, phonetic pronunciation mangling, voice-age mismatch, and low minimum response threshold.
|
||||||
|
|
||||||
|
**Architecture:** All fixes are in `backend/main.py` except voice-age matching which also touches `backend/services/tts.py` voice matching logic. Each fix is independent — no ordering dependencies between tasks.
|
||||||
|
|
||||||
|
**Tech Stack:** Python, FastAPI
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 1: Regenerate caller backgrounds when theme is set
|
||||||
|
|
||||||
|
**Problem:** `_pregenerate_backgrounds()` runs on startup when `session.show_theme` is still `""`. Setting theme via `POST /api/show-theme` only stores the string — doesn't regenerate. Callers have zero theme connection.
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `backend/main.py:9891-9900` (`set_show_theme` endpoint)
|
||||||
|
- Modify: `backend/main.py:5899-5927` (`_pregenerate_backgrounds`)
|
||||||
|
|
||||||
|
**Step 1: Modify `set_show_theme` to regenerate unused caller backgrounds**
|
||||||
|
|
||||||
|
In `backend/main.py`, replace the `set_show_theme` endpoint (lines 9891-9900):
|
||||||
|
|
||||||
|
```python
|
||||||
|
@app.post("/api/show-theme")
|
||||||
|
async def set_show_theme(data: dict):
|
||||||
|
theme = data.get("theme", "").strip()[:100]
|
||||||
|
old_theme = session.show_theme
|
||||||
|
session.show_theme = theme
|
||||||
|
if theme:
|
||||||
|
print(f"[Theme] Show theme set: {theme}")
|
||||||
|
elif old_theme:
|
||||||
|
print(f"[Theme] Show theme cleared (was: {old_theme})")
|
||||||
|
|
||||||
|
# Regenerate backgrounds for callers that haven't been on air yet
|
||||||
|
if theme != old_theme:
|
||||||
|
unused_keys = [k for k in CALLER_BASES if k not in session.used_callers]
|
||||||
|
if unused_keys:
|
||||||
|
print(f"[Theme] Regenerating {len(unused_keys)} unused caller backgrounds for theme: {theme or '(none)'}")
|
||||||
|
asyncio.create_task(_regenerate_backgrounds_for_keys(unused_keys))
|
||||||
|
|
||||||
|
return {"theme": session.show_theme}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 2: Add `_regenerate_backgrounds_for_keys` helper**
|
||||||
|
|
||||||
|
Add this right after `_pregenerate_backgrounds()` (after line 5927):
|
||||||
|
|
||||||
|
```python
|
||||||
|
async def _regenerate_backgrounds_for_keys(keys: list[str]):
|
||||||
|
"""Regenerate backgrounds for specific caller keys (e.g. after theme change)."""
|
||||||
|
tasks = []
|
||||||
|
for key in keys:
|
||||||
|
base = CALLER_BASES.get(key)
|
||||||
|
if base and not base.get("returning"):
|
||||||
|
tasks.append((key, _generate_caller_background_llm(base)))
|
||||||
|
|
||||||
|
if not tasks:
|
||||||
|
return
|
||||||
|
|
||||||
|
results = await asyncio.gather(*[t[1] for t in tasks], return_exceptions=True)
|
||||||
|
for (key, _), result in zip(tasks, results):
|
||||||
|
if isinstance(result, Exception):
|
||||||
|
print(f"[Theme] Regen failed for caller {key}: {result}")
|
||||||
|
else:
|
||||||
|
session.caller_backgrounds[key] = result
|
||||||
|
# Clear cached model so it re-evaluates with new style
|
||||||
|
session.caller_models.pop(key, None)
|
||||||
|
|
||||||
|
print(f"[Theme] Regenerated {sum(1 for r in results if not isinstance(r, Exception))}/{len(tasks)} backgrounds")
|
||||||
|
_match_voices_to_styles()
|
||||||
|
_sort_caller_queue()
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 3: Verify `used_callers` exists on session**
|
||||||
|
|
||||||
|
Check that `session.used_callers` tracks which callers have already been on air. If it doesn't exist, use `session.call_history` caller keys instead.
|
||||||
|
|
||||||
|
**Step 4: Test manually**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start server
|
||||||
|
python -m uvicorn backend.main:app --reload --reload-dir backend --host 0.0.0.0 --port 8000
|
||||||
|
# Set theme and check logs for "[Theme] Regenerating..." messages
|
||||||
|
curl -X POST http://localhost:8000/api/show-theme -H "Content-Type: application/json" -d '{"theme": "Road Stories"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add backend/main.py
|
||||||
|
git commit -m "Regenerate caller backgrounds when show theme is set"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 2: Fix style-to-model matching race condition
|
||||||
|
|
||||||
|
**Problem:** `get_caller_model()` is called before `caller_styles` is populated. `caller_styles.get(key)` returns `""`, `_normalize_style_key("")` returns `""`, no match in `caller_model_map` → falls through to `caller_model_pool[0]` (grok-4.1-fast) for everyone.
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `backend/main.py:6848-6875` (`get_caller_model`)
|
||||||
|
|
||||||
|
**Step 1: Fix `get_caller_model` to defer assignment when style is unknown**
|
||||||
|
|
||||||
|
Replace `get_caller_model` (lines 6848-6875):
|
||||||
|
|
||||||
|
```python
|
||||||
|
def get_caller_model(self, caller_key: str) -> str | None:
|
||||||
|
"""Get the assigned model for a caller, or assign one based on strategy.
|
||||||
|
Returns None to use default category routing."""
|
||||||
|
if self.caller_model_strategy == "single":
|
||||||
|
return None # use default category_models["caller_dialog"]
|
||||||
|
|
||||||
|
# Already assigned — keep consistent for the whole call
|
||||||
|
if caller_key in self.caller_models:
|
||||||
|
return self.caller_models[caller_key]
|
||||||
|
|
||||||
|
model = None
|
||||||
|
if self.caller_model_strategy == "cycle":
|
||||||
|
if self.caller_model_pool:
|
||||||
|
model = self.caller_model_pool[self._caller_model_cycle_idx % len(self.caller_model_pool)]
|
||||||
|
self._caller_model_cycle_idx += 1
|
||||||
|
elif self.caller_model_strategy == "style_matched":
|
||||||
|
raw_style = self.caller_styles.get(caller_key, "")
|
||||||
|
style_key = _normalize_style_key(raw_style) if raw_style else ""
|
||||||
|
if style_key:
|
||||||
|
model = self.caller_model_map.get(style_key)
|
||||||
|
if not model:
|
||||||
|
# Style not yet populated or no mapping — use fallback, not pool[0]
|
||||||
|
model = self.caller_model_fallback
|
||||||
|
|
||||||
|
if model:
|
||||||
|
self.caller_models[caller_key] = model
|
||||||
|
caller_name = CALLER_BASES.get(caller_key, {}).get("name", caller_key)
|
||||||
|
style_info = self.caller_styles.get(caller_key, "unknown")
|
||||||
|
print(f"[CallerModel] Assigned {model} to {caller_name} (style={_normalize_style_key(style_info) if style_info else 'none'}, strategy={self.caller_model_strategy})")
|
||||||
|
|
||||||
|
return model
|
||||||
|
```
|
||||||
|
|
||||||
|
The key change: when `style_key` is empty (style not yet populated) or has no mapping, use `caller_model_fallback` (claude-sonnet-4.6) instead of `caller_model_pool[0]` (grok-4.1-fast). Claude Sonnet is a much safer default — empathetic, verbose, coherent.
|
||||||
|
|
||||||
|
**Step 2: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add backend/main.py
|
||||||
|
git commit -m "Fix style-to-model race condition — use fallback instead of pool[0]"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 3: Fix pronunciation fixes producing literal phonetic text
|
||||||
|
|
||||||
|
**Problem:** `_PRONUNCIATION_FIXES` replaces "Animas" with "Ah nee mahs" as literal text. TTS reads each word separately ("Ah" "nee" "mahs") instead of blending into the intended pronunciation.
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `backend/main.py:9141-9152` (`_PRONUNCIATION_FIXES`)
|
||||||
|
- Modify: `backend/main.py:9212-9216` (`_apply_pronunciation_fixes`)
|
||||||
|
|
||||||
|
**Step 1: Remove pronunciation fixes that sound worse than originals**
|
||||||
|
|
||||||
|
The Inworld TTS actually handles most proper nouns fine. The fixes were added speculatively and cause more harm than good. Remove the place names that TTS can handle, keep only abbreviations:
|
||||||
|
|
||||||
|
Replace `_PRONUNCIATION_FIXES` (lines 9141-9152):
|
||||||
|
|
||||||
|
```python
|
||||||
|
_PRONUNCIATION_FIXES = {
|
||||||
|
"Castopod": "Casto pod",
|
||||||
|
"vs": "versus",
|
||||||
|
"govt": "government",
|
||||||
|
"dept": "department",
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Remove `Lordsburg`, `Hachita`, `Deming`, `Bootheel`, `Animas`, and `Rodeo`. These place names either sound fine through TTS or the phonetic replacement sounds worse.
|
||||||
|
|
||||||
|
**Step 2: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add backend/main.py
|
||||||
|
git commit -m "Remove pronunciation fixes that produce worse TTS output"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 4: Add age-awareness to voice matching
|
||||||
|
|
||||||
|
**Problem:** Brandy (55 years old) got "Kayla" (young-sounding voice). `_match_voices_to_styles()` scores on style dimensions (weight, energy, warmth, age_feel) but the `age_feel` preference comes from the communication style, not the character's actual age. A "confrontational" style prefers `age_feel: None` (no preference), so a 55-year-old can get a young voice.
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `backend/main.py:6106-6156` (`_match_voices_to_styles`)
|
||||||
|
|
||||||
|
**Step 1: Add character age to voice scoring**
|
||||||
|
|
||||||
|
In `_match_voices_to_styles`, after getting the style preferences, override `age_feel` based on the caller's actual age from their background:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def _match_voices_to_styles():
|
||||||
|
"""Re-assign voices to match caller communication styles after backgrounds are generated."""
|
||||||
|
from .services.tts import VOICE_PROFILES
|
||||||
|
|
||||||
|
for key, base in CALLER_BASES.items():
|
||||||
|
if base.get("returning"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
style_raw = session.caller_styles.get(key, "")
|
||||||
|
if not style_raw:
|
||||||
|
continue
|
||||||
|
|
||||||
|
style_key = _normalize_style_key(style_raw)
|
||||||
|
prefs = STYLE_VOICE_PREFERENCES.get(style_key)
|
||||||
|
if not prefs:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Copy prefs so we don't mutate the shared dict
|
||||||
|
prefs = dict(prefs)
|
||||||
|
|
||||||
|
# Override age_feel based on character's actual age
|
||||||
|
bg = session.caller_backgrounds.get(key)
|
||||||
|
if isinstance(bg, CallerBackground) and bg.age:
|
||||||
|
if bg.age >= 50:
|
||||||
|
prefs["age_feel"] = "mature"
|
||||||
|
elif bg.age >= 35:
|
||||||
|
prefs["age_feel"] = "middle"
|
||||||
|
elif bg.age < 25:
|
||||||
|
prefs["age_feel"] = "young"
|
||||||
|
# 25-34: keep style preference or None
|
||||||
|
|
||||||
|
gender = base["gender"]
|
||||||
|
pool = INWORLD_MALE_VOICES if gender == "male" else INWORLD_FEMALE_VOICES
|
||||||
|
voice_pool = [v for v in pool if v not in BLACKLISTED_VOICES]
|
||||||
|
|
||||||
|
scored = []
|
||||||
|
for voice_name in voice_pool:
|
||||||
|
profile = VOICE_PROFILES.get(voice_name)
|
||||||
|
if not profile:
|
||||||
|
scored.append((voice_name, 0))
|
||||||
|
continue
|
||||||
|
score = 0
|
||||||
|
for dim in ["weight", "energy", "warmth", "age_feel"]:
|
||||||
|
pref_val = prefs.get(dim)
|
||||||
|
if pref_val and profile.get(dim) == pref_val:
|
||||||
|
score += 1
|
||||||
|
scored.append((voice_name, score))
|
||||||
|
|
||||||
|
if scored:
|
||||||
|
names = [s[0] for s in scored]
|
||||||
|
weights = [max(1, s[1] * 3) for s in scored]
|
||||||
|
chosen = random.choices(names, weights=weights, k=1)[0]
|
||||||
|
|
||||||
|
used_voices = {CALLER_BASES[k]["voice"] for k in CALLER_BASES if k != key and "voice" in CALLER_BASES[k]}
|
||||||
|
if chosen in used_voices:
|
||||||
|
alternatives = [(n, w) for n, w in zip(names, weights) if n not in used_voices]
|
||||||
|
if alternatives:
|
||||||
|
alt_names, alt_weights = zip(*alternatives)
|
||||||
|
chosen = random.choices(alt_names, weights=alt_weights, k=1)[0]
|
||||||
|
|
||||||
|
old_voice = base.get("voice", "")
|
||||||
|
base["voice"] = chosen
|
||||||
|
if old_voice != chosen:
|
||||||
|
print(f"[VoiceMatch] {base.get('name', key)}: {old_voice} → {chosen} (style: {style_key}, age: {bg.age if isinstance(bg, CallerBackground) else '?'})")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 2: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add backend/main.py
|
||||||
|
git commit -m "Add age-awareness to voice matching — 55yo won't get young voices"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 5: Raise minimum response word count
|
||||||
|
|
||||||
|
**Problem:** `MIN_RESPONSE_WORDS = 30` lets through fragmented, telegram-style responses that are technically 30+ words but terrible radio.
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `backend/main.py:8844` (`MIN_RESPONSE_WORDS`)
|
||||||
|
|
||||||
|
**Step 1: Raise the minimum**
|
||||||
|
|
||||||
|
Change line 8844:
|
||||||
|
|
||||||
|
```python
|
||||||
|
MIN_RESPONSE_WORDS = 50 # Retry if response is shorter than this
|
||||||
|
```
|
||||||
|
|
||||||
|
50 words is roughly 2-3 spoken sentences — enough to be a coherent radio response without being overly demanding for short-form exchanges.
|
||||||
|
|
||||||
|
**Step 2: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add backend/main.py
|
||||||
|
git commit -m "Raise MIN_RESPONSE_WORDS from 30 to 50"
|
||||||
|
```
|
||||||
+261
@@ -0,0 +1,261 @@
|
|||||||
|
"""Fetch instrumental background music from Jamendo for the radio show.
|
||||||
|
|
||||||
|
Pixabay has no public music API — this uses Jamendo's free API instead.
|
||||||
|
All tracks are Creative Commons licensed. Attribution is saved to music/CREDITS.txt.
|
||||||
|
|
||||||
|
Setup: Get a free client_id at https://devportal.jamendo.com
|
||||||
|
Add JAMENDO_CLIENT_ID=your_id to .env
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python fetch_music.py # download 20 tracks across all genres
|
||||||
|
python fetch_music.py --genre jazz # download jazz only
|
||||||
|
python fetch_music.py --count 50 # download 50 tracks
|
||||||
|
python fetch_music.py --list # just list available tracks, don't download
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
MUSIC_DIR = Path(__file__).parent / "music"
|
||||||
|
CREDITS_FILE = MUSIC_DIR / "CREDITS.txt"
|
||||||
|
API_BASE = "https://api.jamendo.com/v3.0"
|
||||||
|
|
||||||
|
# Genres good for a late-night radio show
|
||||||
|
GENRES = ["jazz", "lofi", "blues", "ambient", "acoustic", "funk", "chill"]
|
||||||
|
|
||||||
|
# Map search tags to labels that _detect_genre() in main.py can match
|
||||||
|
# jazz, blues, funk, lo-fi are already in GENRE_KEYWORDS
|
||||||
|
# ambient, acoustic, chill would need to be added for auto-detection
|
||||||
|
GENRE_LABELS = {
|
||||||
|
"jazz": "Jazz",
|
||||||
|
"lofi": "Lo-Fi",
|
||||||
|
"blues": "Blues",
|
||||||
|
"ambient": "Ambient",
|
||||||
|
"acoustic": "Acoustic",
|
||||||
|
"funk": "Funk",
|
||||||
|
"chill": "Chill",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_client_id():
|
||||||
|
key = os.getenv("JAMENDO_CLIENT_ID")
|
||||||
|
if not key:
|
||||||
|
print("Error: JAMENDO_CLIENT_ID not found in .env")
|
||||||
|
print("Get one free at https://devportal.jamendo.com")
|
||||||
|
sys.exit(1)
|
||||||
|
return key
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_filename(name: str) -> str:
|
||||||
|
return re.sub(r'[<>:"/\\|?*]', '', name).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _has_vocals(track: dict) -> bool:
|
||||||
|
"""Check musicinfo for vocal indicators — catches tracks Jamendo mis-tagged as instrumental."""
|
||||||
|
mi = track.get("musicinfo", {})
|
||||||
|
# Check the vocalinstrumental field in musicinfo (separate from the API filter)
|
||||||
|
vi = mi.get("vocalinstrumental")
|
||||||
|
if vi and vi.lower() == "vocal":
|
||||||
|
return True
|
||||||
|
# Check tags for vocal/singing indicators
|
||||||
|
tags = mi.get("tags", {})
|
||||||
|
# tags can be {"genres": [...], "instruments": [...], "vartags": [...]}
|
||||||
|
all_tags = []
|
||||||
|
if isinstance(tags, dict):
|
||||||
|
for v in tags.values():
|
||||||
|
if isinstance(v, list):
|
||||||
|
all_tags.extend(t.lower() for t in v)
|
||||||
|
elif isinstance(tags, list):
|
||||||
|
all_tags = [t.lower() for t in tags]
|
||||||
|
vocal_tags = {"vocals", "vocal", "singing", "singer", "voice", "lyrics",
|
||||||
|
"rap", "hiphop", "hip-hop", "spoken", "spoken word"}
|
||||||
|
if vocal_tags & set(all_tags):
|
||||||
|
return True
|
||||||
|
# Check track name for vocal giveaways
|
||||||
|
name_lower = track.get("name", "").lower()
|
||||||
|
if any(w in name_lower for w in ["feat.", "ft.", "vocal", "remix vocal", "(voice"]):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def search_tracks(client: httpx.Client, client_id: str, genre: str, limit: int = 20) -> list[dict]:
|
||||||
|
# Request more than needed so we can filter out vocal false positives
|
||||||
|
fetch_limit = min(limit * 3, 200)
|
||||||
|
params = {
|
||||||
|
"client_id": client_id,
|
||||||
|
"format": "json",
|
||||||
|
"limit": fetch_limit,
|
||||||
|
"vocalinstrumental": "instrumental",
|
||||||
|
"fuzzytags": genre,
|
||||||
|
"durationbetween": "60_300",
|
||||||
|
"include": "musicinfo+licenses",
|
||||||
|
"order": "popularity_total",
|
||||||
|
}
|
||||||
|
|
||||||
|
resp = client.get(f"{API_BASE}/tracks/", params=params)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
if data["headers"]["status"] != "success":
|
||||||
|
print(f" API error: {data['headers'].get('error_message', 'unknown')}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
results = data.get("results", [])
|
||||||
|
# Post-filter: reject tracks with vocal indicators despite the API filter
|
||||||
|
filtered = []
|
||||||
|
for t in results:
|
||||||
|
if _has_vocals(t):
|
||||||
|
print(f" SKIP (vocals detected): {t.get('artist_name', '?')} - {t.get('name', '?')}")
|
||||||
|
continue
|
||||||
|
filtered.append(t)
|
||||||
|
if len(filtered) >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
skipped = len(results) - len(filtered)
|
||||||
|
if skipped:
|
||||||
|
print(f" (filtered out {skipped} tracks with vocal indicators)")
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
|
def make_filename(track: dict, genre_tag: str) -> str:
|
||||||
|
artist = sanitize_filename(track.get("artist_name", "Unknown"))
|
||||||
|
title = sanitize_filename(track.get("name", "Untitled"))
|
||||||
|
label = GENRE_LABELS.get(genre_tag, genre_tag.title())
|
||||||
|
|
||||||
|
# Include genre tag if not already detectable from artist/title
|
||||||
|
lower = f"{artist} {title}".lower()
|
||||||
|
needs_tag = not any(kw in lower for kw in [genre_tag, label.lower()])
|
||||||
|
|
||||||
|
if needs_tag:
|
||||||
|
return f"{artist} - {title} [{label}].mp3"
|
||||||
|
return f"{artist} - {title}.mp3"
|
||||||
|
|
||||||
|
|
||||||
|
def download_track(client: httpx.Client, track: dict, filepath: Path, index: int, total: int) -> bool:
|
||||||
|
url = track.get("audiodownload")
|
||||||
|
if not url:
|
||||||
|
print(f" [{index}/{total}] SKIP (no download URL): {track['name']}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not track.get("audiodownload_allowed", True):
|
||||||
|
print(f" [{index}/{total}] SKIP (download not allowed): {track['name']}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
print(f" [{index}/{total}] Downloading: {filepath.name}...", end=" ", flush=True)
|
||||||
|
resp = client.get(url, follow_redirects=True)
|
||||||
|
resp.raise_for_status()
|
||||||
|
filepath.write_bytes(resp.content)
|
||||||
|
size_mb = len(resp.content) / (1024 * 1024)
|
||||||
|
dur = track.get("duration", 0)
|
||||||
|
print(f"{size_mb:.1f} MB, {dur // 60}:{dur % 60:02d}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def save_credit(track: dict, filename: str):
|
||||||
|
artist = track.get("artist_name", "Unknown")
|
||||||
|
title = track.get("name", "Untitled")
|
||||||
|
license_url = track.get("license_ccurl", "")
|
||||||
|
share_url = track.get("shareurl", "")
|
||||||
|
|
||||||
|
line = f"{filename} | {artist} - {title} | {license_url} | {share_url}\n"
|
||||||
|
|
||||||
|
existing = CREDITS_FILE.read_text() if CREDITS_FILE.exists() else ""
|
||||||
|
if filename not in existing:
|
||||||
|
with open(CREDITS_FILE, "a") as f:
|
||||||
|
if not existing:
|
||||||
|
f.write("# Music Credits (Jamendo - Creative Commons)\n")
|
||||||
|
f.write("# File | Artist - Title | License | URL\n\n")
|
||||||
|
f.write(line)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Download instrumental music from Jamendo")
|
||||||
|
parser.add_argument("--genre", choices=GENRES, help="Download only this genre")
|
||||||
|
parser.add_argument("--count", type=int, default=20, help="Total tracks to download (default: 20)")
|
||||||
|
parser.add_argument("--list", action="store_true", help="List available tracks without downloading")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
client_id = get_client_id()
|
||||||
|
MUSIC_DIR.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
genres = [args.genre] if args.genre else GENRES
|
||||||
|
per_genre = max(1, args.count // len(genres))
|
||||||
|
remainder = args.count - per_genre * len(genres)
|
||||||
|
|
||||||
|
all_tracks = []
|
||||||
|
seen_ids = set()
|
||||||
|
|
||||||
|
with httpx.Client(timeout=30) as api_client:
|
||||||
|
for i, genre in enumerate(genres):
|
||||||
|
limit = per_genre + (1 if i < remainder else 0)
|
||||||
|
if limit <= 0:
|
||||||
|
continue
|
||||||
|
print(f"Searching {genre}...", end=" ", flush=True)
|
||||||
|
tracks = search_tracks(api_client, client_id, genre, limit)
|
||||||
|
# Deduplicate across genres
|
||||||
|
added = 0
|
||||||
|
for t in tracks:
|
||||||
|
if t["id"] not in seen_ids and added < limit:
|
||||||
|
t["_genre_tag"] = genre
|
||||||
|
all_tracks.append(t)
|
||||||
|
seen_ids.add(t["id"])
|
||||||
|
added += 1
|
||||||
|
print(f"{added} tracks")
|
||||||
|
|
||||||
|
if not all_tracks:
|
||||||
|
print("No tracks found.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.list:
|
||||||
|
print(f"\n{'#':<4} {'Genre':<10} {'Artist':<25} {'Title':<40} {'Duration':<8}")
|
||||||
|
print("-" * 90)
|
||||||
|
for i, t in enumerate(all_tracks, 1):
|
||||||
|
dur = f"{t['duration'] // 60}:{t['duration'] % 60:02d}"
|
||||||
|
artist = t["artist_name"][:24]
|
||||||
|
title = t["name"][:39]
|
||||||
|
label = GENRE_LABELS.get(t["_genre_tag"], t["_genre_tag"])
|
||||||
|
print(f"{i:<4} {label:<10} {artist:<25} {title:<40} {dur:<8}")
|
||||||
|
print(f"\n{len(all_tracks)} tracks available")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Download phase
|
||||||
|
downloaded = 0
|
||||||
|
skipped_exists = 0
|
||||||
|
skipped_error = 0
|
||||||
|
|
||||||
|
with httpx.Client(timeout=120, follow_redirects=True) as dl_client:
|
||||||
|
for i, track in enumerate(all_tracks, 1):
|
||||||
|
filename = make_filename(track, track["_genre_tag"])
|
||||||
|
filepath = MUSIC_DIR / filename
|
||||||
|
|
||||||
|
if filepath.exists():
|
||||||
|
print(f" [{i}/{len(all_tracks)}] EXISTS: {filename}")
|
||||||
|
skipped_exists += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
if download_track(dl_client, track, filepath, i, len(all_tracks)):
|
||||||
|
save_credit(track, filename)
|
||||||
|
downloaded += 1
|
||||||
|
else:
|
||||||
|
skipped_error += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [{i}/{len(all_tracks)}] ERROR: {e}")
|
||||||
|
# Clean up partial download
|
||||||
|
if filepath.exists():
|
||||||
|
filepath.unlink()
|
||||||
|
skipped_error += 1
|
||||||
|
|
||||||
|
print(f"\nDone: {downloaded} downloaded, {skipped_exists} existed, {skipped_error} skipped")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
+279
-15
@@ -347,9 +347,14 @@ section h2 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.caller-btn.active {
|
.caller-btn.active {
|
||||||
background: var(--accent);
|
background: var(--bg);
|
||||||
border-color: var(--accent);
|
border-color: transparent;
|
||||||
|
}
|
||||||
|
.caller-btn.active .caller-name {
|
||||||
color: #fff;
|
color: #fff;
|
||||||
|
background: var(--accent);
|
||||||
|
padding: 2px 8px;
|
||||||
|
border-radius: 4px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.call-status {
|
.call-status {
|
||||||
@@ -463,6 +468,84 @@ section h2 {
|
|||||||
line-height: 1.3;
|
line-height: 1.3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Caller model indicator */
|
||||||
|
.info-badge.model {
|
||||||
|
background: rgba(100, 140, 220, 0.2);
|
||||||
|
color: #7ab0e8;
|
||||||
|
font-size: 0.7rem;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.caller-model-override {
|
||||||
|
font-size: 0.7rem;
|
||||||
|
padding: 2px 4px;
|
||||||
|
background: var(--bg);
|
||||||
|
color: var(--text);
|
||||||
|
border: 1px solid rgba(100, 140, 220, 0.3);
|
||||||
|
border-radius: 4px;
|
||||||
|
max-width: 140px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Caller button model badge */
|
||||||
|
.model-tag {
|
||||||
|
font-size: 0.55rem;
|
||||||
|
color: #7ab0e8;
|
||||||
|
background: rgba(100, 140, 220, 0.15);
|
||||||
|
padding: 0 3px;
|
||||||
|
border-radius: 2px;
|
||||||
|
font-weight: 700;
|
||||||
|
letter-spacing: 0.3px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Caller Models settings section */
|
||||||
|
.caller-model-row {
|
||||||
|
margin-bottom: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.caller-model-row label {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.cm-pool-input {
|
||||||
|
font-size: 0.8rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.cm-style-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr 1fr;
|
||||||
|
gap: 4px;
|
||||||
|
margin-bottom: 8px;
|
||||||
|
max-height: 200px;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.cm-style-item {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
gap: 4px;
|
||||||
|
background: rgba(255, 255, 255, 0.05);
|
||||||
|
border-radius: 4px;
|
||||||
|
padding: 3px 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.cm-style-name {
|
||||||
|
font-size: 0.7rem;
|
||||||
|
color: var(--text-muted);
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.cm-style-select {
|
||||||
|
font-size: 0.7rem;
|
||||||
|
padding: 2px 3px;
|
||||||
|
background: var(--bg);
|
||||||
|
color: var(--text);
|
||||||
|
border: 1px solid rgba(232, 121, 29, 0.15);
|
||||||
|
border-radius: 4px;
|
||||||
|
max-width: 110px;
|
||||||
|
}
|
||||||
|
|
||||||
.caller-background-full {
|
.caller-background-full {
|
||||||
margin-top: 8px;
|
margin-top: 8px;
|
||||||
font-size: 0.75rem;
|
font-size: 0.75rem;
|
||||||
@@ -649,19 +732,6 @@ section h2 {
|
|||||||
margin-bottom: 10px;
|
margin-bottom: 10px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.music-section select optgroup {
|
|
||||||
color: var(--accent);
|
|
||||||
font-weight: bold;
|
|
||||||
font-style: normal;
|
|
||||||
padding: 4px 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
.music-section select option {
|
|
||||||
color: var(--text);
|
|
||||||
font-weight: normal;
|
|
||||||
padding: 2px 8px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.music-controls {
|
.music-controls {
|
||||||
display: flex;
|
display: flex;
|
||||||
gap: 8px;
|
gap: 8px;
|
||||||
@@ -688,6 +758,83 @@ section h2 {
|
|||||||
accent-color: var(--accent);
|
accent-color: var(--accent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Genre Quick-Select */
|
||||||
|
.genre-section {
|
||||||
|
grid-column: span 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.genre-grid {
|
||||||
|
display: flex;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 6px;
|
||||||
|
margin-bottom: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.genre-btn {
|
||||||
|
background: var(--bg);
|
||||||
|
color: var(--text);
|
||||||
|
border: 1px solid rgba(232, 121, 29, 0.12);
|
||||||
|
padding: 6px 12px;
|
||||||
|
border-radius: var(--radius-sm);
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
transition: all 0.15s;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.genre-btn:hover {
|
||||||
|
border-color: var(--accent);
|
||||||
|
background: #2a1e10;
|
||||||
|
color: #fff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.genre-btn.active {
|
||||||
|
background: var(--accent);
|
||||||
|
border-color: var(--accent);
|
||||||
|
color: #fff;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.now-playing {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
padding: 4px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.now-playing-text {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-muted);
|
||||||
|
flex: 0 1 auto;
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
white-space: nowrap;
|
||||||
|
min-width: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.now-playing-stop {
|
||||||
|
background: var(--bg);
|
||||||
|
color: var(--text);
|
||||||
|
border: 1px solid rgba(232, 121, 29, 0.15);
|
||||||
|
padding: 4px 10px;
|
||||||
|
border-radius: var(--radius-sm);
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
flex-shrink: 0;
|
||||||
|
transition: all 0.15s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.now-playing-stop:hover {
|
||||||
|
border-color: var(--accent);
|
||||||
|
background: #2a1e10;
|
||||||
|
}
|
||||||
|
|
||||||
|
.now-playing-volume {
|
||||||
|
width: 80px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
accent-color: var(--accent);
|
||||||
|
}
|
||||||
|
|
||||||
/* Soundboard */
|
/* Soundboard */
|
||||||
.sounds-section {
|
.sounds-section {
|
||||||
grid-column: span 2;
|
grid-column: span 2;
|
||||||
@@ -1588,6 +1735,16 @@ section h2 {
|
|||||||
font-size: 0.8rem;
|
font-size: 0.8rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.media-row .genre-section {
|
||||||
|
grid-column: span 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 700px) {
|
||||||
|
.media-row .genre-section {
|
||||||
|
grid-column: span 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Devon (Intern) */
|
/* Devon (Intern) */
|
||||||
.message.devon {
|
.message.devon {
|
||||||
border-left: 3px solid var(--devon);
|
border-left: 3px solid var(--devon);
|
||||||
@@ -1777,3 +1934,110 @@ button:focus-visible {
|
|||||||
.log-toggle-btn:hover {
|
.log-toggle-btn:hover {
|
||||||
color: var(--text);
|
color: var(--text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Preflight */
|
||||||
|
.preflight-btn {
|
||||||
|
background: rgba(90, 138, 60, 0.15);
|
||||||
|
color: var(--accent-green);
|
||||||
|
border: 1px solid rgba(90, 138, 60, 0.3);
|
||||||
|
}
|
||||||
|
.preflight-btn:hover {
|
||||||
|
background: rgba(90, 138, 60, 0.25);
|
||||||
|
}
|
||||||
|
|
||||||
|
.preflight-content {
|
||||||
|
max-width: 700px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.preflight-status {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 10px;
|
||||||
|
padding: 12px 16px;
|
||||||
|
border-radius: var(--radius-sm);
|
||||||
|
margin-bottom: 16px;
|
||||||
|
font-weight: 700;
|
||||||
|
font-size: 1.1rem;
|
||||||
|
}
|
||||||
|
.preflight-status.pass { background: rgba(90, 138, 60, 0.15); color: var(--accent-green); }
|
||||||
|
.preflight-status.warn { background: rgba(232, 169, 29, 0.15); color: #e8a91d; }
|
||||||
|
.preflight-status.fail { background: rgba(204, 34, 34, 0.15); color: var(--accent-red); }
|
||||||
|
.preflight-status.loading { background: rgba(232, 121, 29, 0.1); color: var(--text-muted); }
|
||||||
|
|
||||||
|
.preflight-checks {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 12px;
|
||||||
|
max-height: 60vh;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.preflight-check {
|
||||||
|
background: var(--bg);
|
||||||
|
border: 1px solid rgba(232, 121, 29, 0.1);
|
||||||
|
border-radius: var(--radius-sm);
|
||||||
|
padding: 12px 16px;
|
||||||
|
}
|
||||||
|
.preflight-check-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
cursor: pointer;
|
||||||
|
user-select: none;
|
||||||
|
}
|
||||||
|
.preflight-check-name {
|
||||||
|
font-weight: 600;
|
||||||
|
font-size: 0.95rem;
|
||||||
|
}
|
||||||
|
.preflight-check-badge {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
font-weight: 700;
|
||||||
|
padding: 2px 8px;
|
||||||
|
border-radius: 4px;
|
||||||
|
text-transform: uppercase;
|
||||||
|
}
|
||||||
|
.preflight-check-badge.pass { background: rgba(90, 138, 60, 0.2); color: var(--accent-green); }
|
||||||
|
.preflight-check-badge.warn { background: rgba(232, 169, 29, 0.2); color: #e8a91d; }
|
||||||
|
.preflight-check-badge.fail { background: rgba(204, 34, 34, 0.2); color: var(--accent-red); }
|
||||||
|
.preflight-check-badge.skip { background: rgba(154, 139, 120, 0.2); color: var(--text-muted); }
|
||||||
|
|
||||||
|
.preflight-check-details {
|
||||||
|
margin-top: 10px;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
color: var(--text-muted);
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
.preflight-check.open .preflight-check-details {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
.preflight-table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
margin-top: 8px;
|
||||||
|
}
|
||||||
|
.preflight-table th {
|
||||||
|
text-align: left;
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-size: 0.75rem;
|
||||||
|
font-weight: 600;
|
||||||
|
text-transform: uppercase;
|
||||||
|
padding: 4px 8px;
|
||||||
|
border-bottom: 1px solid rgba(232, 121, 29, 0.1);
|
||||||
|
}
|
||||||
|
.preflight-table td {
|
||||||
|
padding: 4px 8px;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: var(--text);
|
||||||
|
border-bottom: 1px solid rgba(232, 121, 29, 0.05);
|
||||||
|
}
|
||||||
|
.preflight-table tr.mismatch td { color: var(--accent-red); }
|
||||||
|
.preflight-table tr.connected td { color: var(--accent-green); }
|
||||||
|
|
||||||
|
.preflight-test-btn {
|
||||||
|
background: rgba(232, 121, 29, 0.15);
|
||||||
|
color: var(--accent);
|
||||||
|
border: 1px solid rgba(232, 121, 29, 0.3);
|
||||||
|
}
|
||||||
|
.preflight-test-btn:hover { background: rgba(232, 121, 29, 0.25); }
|
||||||
|
.preflight-test-btn.loading { opacity: 0.6; pointer-events: none; }
|
||||||
|
|||||||
+58
-9
@@ -4,7 +4,7 @@
|
|||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<title>Luke at The Roost</title>
|
<title>Luke at The Roost</title>
|
||||||
<link rel="stylesheet" href="/css/style.css">
|
<link rel="stylesheet" href="/css/style.css?v=2">
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="app">
|
<div id="app">
|
||||||
@@ -15,6 +15,7 @@
|
|||||||
<button id="rec-btn" class="rec-btn" title="Record stems for post-production">REC</button>
|
<button id="rec-btn" class="rec-btn" title="Record stems for post-production">REC</button>
|
||||||
<button id="new-session-btn" class="new-session-btn">New Session</button>
|
<button id="new-session-btn" class="new-session-btn">New Session</button>
|
||||||
<button id="export-session-btn">Export</button>
|
<button id="export-session-btn">Export</button>
|
||||||
|
<button id="preflight-btn" class="preflight-btn">Preflight</button>
|
||||||
<button id="settings-btn">Settings</button>
|
<button id="settings-btn">Settings</button>
|
||||||
</div>
|
</div>
|
||||||
<div class="theme-bar">
|
<div class="theme-bar">
|
||||||
@@ -75,6 +76,8 @@
|
|||||||
<span id="caller-shape-badge" class="info-badge shape"></span>
|
<span id="caller-shape-badge" class="info-badge shape"></span>
|
||||||
<span id="caller-energy-badge" class="info-badge energy"></span>
|
<span id="caller-energy-badge" class="info-badge energy"></span>
|
||||||
<span id="caller-emotion" class="info-badge emotion"></span>
|
<span id="caller-emotion" class="info-badge emotion"></span>
|
||||||
|
<span id="caller-model-badge" class="info-badge model"></span>
|
||||||
|
<select id="caller-model-override" class="caller-model-override hidden"></select>
|
||||||
</div>
|
</div>
|
||||||
<div id="caller-signature" class="caller-signature"></div>
|
<div id="caller-signature" class="caller-signature"></div>
|
||||||
<div id="caller-situation" class="caller-situation"></div>
|
<div id="caller-situation" class="caller-situation"></div>
|
||||||
@@ -140,13 +143,13 @@
|
|||||||
|
|
||||||
<!-- Music / Ads / Idents -->
|
<!-- Music / Ads / Idents -->
|
||||||
<div class="media-row">
|
<div class="media-row">
|
||||||
<section class="music-section">
|
<section class="music-section genre-section">
|
||||||
<h2>Music</h2>
|
<h2>Music <span class="shortcut-label">M</span></h2>
|
||||||
<select id="track-select"></select>
|
<div id="genre-buttons" class="genre-grid"></div>
|
||||||
<div class="music-controls">
|
<div id="now-playing" class="now-playing hidden">
|
||||||
<button id="play-btn">Play <span class="shortcut-label">M</span></button>
|
<span id="now-playing-text" class="now-playing-text"></span>
|
||||||
<button id="stop-btn">Stop</button>
|
<button id="stop-btn" class="now-playing-stop">Stop</button>
|
||||||
<input type="range" id="volume" min="0" max="100" value="30">
|
<input type="range" id="volume" min="0" max="100" value="30" class="now-playing-volume">
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
@@ -285,6 +288,36 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Caller Model Routing -->
|
||||||
|
<div class="settings-group">
|
||||||
|
<h3>Caller Models</h3>
|
||||||
|
<div class="caller-model-row">
|
||||||
|
<label>
|
||||||
|
Strategy
|
||||||
|
<select id="cm-strategy">
|
||||||
|
<option value="single">Single Model</option>
|
||||||
|
<option value="cycle">Cycle Models</option>
|
||||||
|
<option value="style_matched">Style-Matched</option>
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div id="cm-pool-section" class="hidden">
|
||||||
|
<label>
|
||||||
|
Model Pool
|
||||||
|
<input type="text" id="cm-pool" class="cm-pool-input" placeholder="x-ai/grok-4, deepseek/deepseek-v3.2, ...">
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div id="cm-style-map" class="hidden">
|
||||||
|
<div class="cm-style-grid" id="cm-style-grid"></div>
|
||||||
|
</div>
|
||||||
|
<div class="caller-model-row">
|
||||||
|
<label>
|
||||||
|
Fallback Model
|
||||||
|
<select id="cm-fallback" class="model-select"></select>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- TTS Settings -->
|
<!-- TTS Settings -->
|
||||||
<div class="settings-group">
|
<div class="settings-group">
|
||||||
<h3>TTS Provider</h3>
|
<h3>TTS Provider</h3>
|
||||||
@@ -325,8 +358,24 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<!-- Preflight Modal -->
|
||||||
|
<div id="preflight-modal" class="modal hidden">
|
||||||
|
<div class="modal-content preflight-content">
|
||||||
|
<h2>Show Preflight</h2>
|
||||||
|
<div id="preflight-status" class="preflight-status loading">
|
||||||
|
<span class="preflight-status-icon">...</span>
|
||||||
|
<span class="preflight-status-text">Running checks...</span>
|
||||||
|
</div>
|
||||||
|
<div id="preflight-checks" class="preflight-checks"></div>
|
||||||
|
<div class="modal-buttons">
|
||||||
|
<button id="preflight-test-btn" class="preflight-test-btn">Test Responses</button>
|
||||||
|
<button id="preflight-rerun-btn">Re-run</button>
|
||||||
|
<button id="close-preflight">Close</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script src="/js/app.js?v=22"></script>
|
<script src="/js/app.js?v=27"></script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
+512
-40
@@ -131,6 +131,7 @@ document.addEventListener('DOMContentLoaded', async () => {
|
|||||||
initEventListeners();
|
initEventListeners();
|
||||||
initClock();
|
initClock();
|
||||||
loadShowTheme();
|
loadShowTheme();
|
||||||
|
loadCallerModels();
|
||||||
loadVoicemails();
|
loadVoicemails();
|
||||||
setInterval(loadVoicemails, 30000);
|
setInterval(loadVoicemails, 30000);
|
||||||
loadEmails();
|
loadEmails();
|
||||||
@@ -309,7 +310,6 @@ function initEventListeners() {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Music - now server-side
|
// Music - now server-side
|
||||||
document.getElementById('play-btn')?.addEventListener('click', playMusic);
|
|
||||||
document.getElementById('stop-btn')?.addEventListener('click', stopMusic);
|
document.getElementById('stop-btn')?.addEventListener('click', stopMusic);
|
||||||
document.getElementById('volume')?.addEventListener('input', setMusicVolume);
|
document.getElementById('volume')?.addEventListener('input', setMusicVolume);
|
||||||
|
|
||||||
@@ -356,6 +356,27 @@ function initEventListeners() {
|
|||||||
else if (e.key === 'Escape') e.target.blur();
|
else if (e.key === 'Escape') e.target.blur();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Caller Models
|
||||||
|
document.getElementById('cm-strategy')?.addEventListener('change', () => {
|
||||||
|
callerModelSettings.strategy = document.getElementById('cm-strategy').value;
|
||||||
|
updateCallerModelUI();
|
||||||
|
});
|
||||||
|
document.getElementById('caller-model-badge')?.addEventListener('click', () => {
|
||||||
|
const sel = document.getElementById('caller-model-override');
|
||||||
|
if (!sel || !currentCaller) return;
|
||||||
|
sel.classList.toggle('hidden');
|
||||||
|
if (!sel.classList.contains('hidden')) {
|
||||||
|
const current = callerModelAssignments[currentCaller.key];
|
||||||
|
if (current) sel.value = current;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
document.getElementById('caller-model-override')?.addEventListener('change', (e) => {
|
||||||
|
if (currentCaller && e.target.value) {
|
||||||
|
overrideCallerModel(currentCaller.key, e.target.value);
|
||||||
|
e.target.classList.add('hidden');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Settings
|
// Settings
|
||||||
document.getElementById('settings-btn')?.addEventListener('click', async () => {
|
document.getElementById('settings-btn')?.addEventListener('click', async () => {
|
||||||
document.getElementById('settings-modal')?.classList.remove('hidden');
|
document.getElementById('settings-modal')?.classList.remove('hidden');
|
||||||
@@ -371,6 +392,17 @@ function initEventListeners() {
|
|||||||
});
|
});
|
||||||
document.getElementById('refresh-ollama')?.addEventListener('click', refreshOllamaModels);
|
document.getElementById('refresh-ollama')?.addEventListener('click', refreshOllamaModels);
|
||||||
|
|
||||||
|
// Preflight
|
||||||
|
document.getElementById('preflight-btn')?.addEventListener('click', () => {
|
||||||
|
document.getElementById('preflight-modal')?.classList.remove('hidden');
|
||||||
|
runPreflight(false);
|
||||||
|
});
|
||||||
|
document.getElementById('preflight-test-btn')?.addEventListener('click', () => runPreflight(true));
|
||||||
|
document.getElementById('preflight-rerun-btn')?.addEventListener('click', () => runPreflight(false));
|
||||||
|
document.getElementById('close-preflight')?.addEventListener('click', () => {
|
||||||
|
document.getElementById('preflight-modal')?.classList.add('hidden');
|
||||||
|
});
|
||||||
|
|
||||||
// Wrap-up button
|
// Wrap-up button
|
||||||
document.getElementById('wrapup-btn')?.addEventListener('click', wrapUp);
|
document.getElementById('wrapup-btn')?.addEventListener('click', wrapUp);
|
||||||
|
|
||||||
@@ -637,6 +669,7 @@ async function loadCallers() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
console.log('Loaded', data.callers.length, 'callers, session:', data.session_id);
|
console.log('Loaded', data.callers.length, 'callers, session:', data.session_id);
|
||||||
|
updateCallerModelBadges();
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('loadCallers error:', err);
|
console.error('loadCallers error:', err);
|
||||||
}
|
}
|
||||||
@@ -664,6 +697,31 @@ async function startCall(key, name) {
|
|||||||
document.querySelector('.callers-section')?.classList.add('call-active');
|
document.querySelector('.callers-section')?.classList.add('call-active');
|
||||||
document.querySelector('.chat-section')?.classList.add('call-active');
|
document.querySelector('.chat-section')?.classList.add('call-active');
|
||||||
|
|
||||||
|
// Highlight active caller button immediately
|
||||||
|
document.querySelectorAll('.caller-btn').forEach(btn => {
|
||||||
|
const isActive = btn.dataset.key === key;
|
||||||
|
btn.classList.toggle('active', isActive);
|
||||||
|
if (isActive) {
|
||||||
|
btn.style.outline = '2px solid #5a8a3c';
|
||||||
|
const nameEl = btn.querySelector('.caller-name');
|
||||||
|
if (nameEl) {
|
||||||
|
nameEl.style.background = '#e8791d';
|
||||||
|
nameEl.style.color = '#fff';
|
||||||
|
nameEl.style.padding = '2px 8px';
|
||||||
|
nameEl.style.borderRadius = '4px';
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
btn.style.outline = '';
|
||||||
|
const nameEl = btn.querySelector('.caller-name');
|
||||||
|
if (nameEl) {
|
||||||
|
nameEl.style.background = '';
|
||||||
|
nameEl.style.color = '';
|
||||||
|
nameEl.style.padding = '';
|
||||||
|
nameEl.style.borderRadius = '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Check if real caller is active (three-way scenario)
|
// Check if real caller is active (three-way scenario)
|
||||||
const realCallerActive = document.getElementById('real-caller-info') &&
|
const realCallerActive = document.getElementById('real-caller-info') &&
|
||||||
!document.getElementById('real-caller-info').classList.contains('hidden');
|
!document.getElementById('real-caller-info').classList.contains('hidden');
|
||||||
@@ -701,12 +759,32 @@ async function startCall(key, name) {
|
|||||||
if (situation) situation.textContent = ci.situation_summary || '';
|
if (situation) situation.textContent = ci.situation_summary || '';
|
||||||
infoPanel.classList.remove('hidden');
|
infoPanel.classList.remove('hidden');
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
|
showCallerModelBadge(callerModelAssignments[key] || data.model);
|
||||||
|
} catch(e) { console.error('[startCall] showCallerModelBadge error:', e); }
|
||||||
|
document.getElementById('caller-model-override')?.classList.add('hidden');
|
||||||
const bgEl = document.getElementById('caller-background');
|
const bgEl = document.getElementById('caller-background');
|
||||||
if (bgEl && data.background) bgEl.textContent = data.background;
|
if (bgEl && data.background) bgEl.textContent = data.background;
|
||||||
|
|
||||||
|
let matchCount = 0;
|
||||||
document.querySelectorAll('.caller-btn').forEach(btn => {
|
document.querySelectorAll('.caller-btn').forEach(btn => {
|
||||||
btn.classList.toggle('active', btn.dataset.key === key);
|
const isActive = btn.dataset.key === key;
|
||||||
|
btn.classList.toggle('active', isActive);
|
||||||
|
if (isActive) {
|
||||||
|
btn.style.outline = '2px solid #5a8a3c';
|
||||||
|
matchCount++;
|
||||||
|
} else {
|
||||||
|
btn.style.outline = '';
|
||||||
|
}
|
||||||
|
const nameEl = btn.querySelector('.caller-name');
|
||||||
|
if (nameEl) {
|
||||||
|
nameEl.style.background = isActive ? '#e8791d' : '';
|
||||||
|
nameEl.style.color = isActive ? '#fff' : '';
|
||||||
|
nameEl.style.padding = isActive ? '2px 8px' : '';
|
||||||
|
nameEl.style.borderRadius = isActive ? '4px' : '';
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
console.log(`[ActiveCaller] key=${key}, matched=${matchCount} buttons`);
|
||||||
|
|
||||||
log(`Connected to ${name}` + (realCallerActive ? ' (three-way)' : ''));
|
log(`Connected to ${name}` + (realCallerActive ? ' (three-way)' : ''));
|
||||||
if (!realCallerActive) clearChat();
|
if (!realCallerActive) clearChat();
|
||||||
@@ -731,6 +809,7 @@ async function newSession() {
|
|||||||
// Reload callers to get new session ID
|
// Reload callers to get new session ID
|
||||||
await loadCallers();
|
await loadCallers();
|
||||||
await loadShowTheme();
|
await loadShowTheme();
|
||||||
|
await loadCallerModels();
|
||||||
|
|
||||||
log('New session started - all callers have fresh backgrounds');
|
log('New session started - all callers have fresh backgrounds');
|
||||||
}
|
}
|
||||||
@@ -754,12 +833,23 @@ async function hangup() {
|
|||||||
document.getElementById('hangup-btn').disabled = true;
|
document.getElementById('hangup-btn').disabled = true;
|
||||||
const wrapBtn = document.getElementById('wrapup-btn');
|
const wrapBtn = document.getElementById('wrapup-btn');
|
||||||
if (wrapBtn) { wrapBtn.disabled = true; wrapBtn.classList.remove('active'); }
|
if (wrapBtn) { wrapBtn.disabled = true; wrapBtn.classList.remove('active'); }
|
||||||
document.querySelectorAll('.caller-btn').forEach(btn => btn.classList.remove('active'));
|
document.querySelectorAll('.caller-btn').forEach(btn => {
|
||||||
|
btn.classList.remove('active');
|
||||||
|
const nameEl = btn.querySelector('.caller-name');
|
||||||
|
if (nameEl) {
|
||||||
|
nameEl.style.background = '';
|
||||||
|
nameEl.style.color = '';
|
||||||
|
nameEl.style.padding = '';
|
||||||
|
nameEl.style.borderRadius = '';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Hide caller info panel and background
|
// Hide caller info panel and background
|
||||||
document.getElementById('caller-info-panel')?.classList.add('hidden');
|
document.getElementById('caller-info-panel')?.classList.add('hidden');
|
||||||
const bgDetails2 = document.getElementById('caller-background-details');
|
const bgDetails2 = document.getElementById('caller-background-details');
|
||||||
if (bgDetails2) bgDetails2.classList.add('hidden');
|
if (bgDetails2) bgDetails2.classList.add('hidden');
|
||||||
|
showCallerModelBadge(null);
|
||||||
|
document.getElementById('caller-model-override')?.classList.add('hidden');
|
||||||
|
|
||||||
// Hide AI caller indicator
|
// Hide AI caller indicator
|
||||||
document.getElementById('ai-caller-info')?.classList.add('hidden');
|
document.getElementById('ai-caller-info')?.classList.add('hidden');
|
||||||
@@ -936,94 +1026,139 @@ async function sendTypedMessage() {
|
|||||||
|
|
||||||
|
|
||||||
// --- Music (Server-Side) ---
|
// --- Music (Server-Side) ---
|
||||||
|
let genreMap = {}; // { genre: [track, ...] }
|
||||||
|
let genreQueues = {}; // { genre: [shuffled track indices...] }
|
||||||
|
let activeGenre = null;
|
||||||
|
let currentTrackName = '';
|
||||||
|
|
||||||
async function loadMusic() {
|
async function loadMusic() {
|
||||||
try {
|
try {
|
||||||
const res = await fetch('/api/music');
|
const res = await fetch('/api/music');
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
tracks = data.tracks || [];
|
tracks = data.tracks || [];
|
||||||
|
|
||||||
const select = document.getElementById('track-select');
|
|
||||||
if (!select) return;
|
|
||||||
|
|
||||||
const previousValue = select.value;
|
|
||||||
select.innerHTML = '';
|
|
||||||
|
|
||||||
// Group tracks by genre
|
// Group tracks by genre
|
||||||
const genres = {};
|
genreMap = {};
|
||||||
tracks.forEach(track => {
|
tracks.forEach(track => {
|
||||||
const genre = track.genre || 'Other';
|
const genre = track.genre || 'Other';
|
||||||
if (!genres[genre]) genres[genre] = [];
|
if (!genreMap[genre]) genreMap[genre] = [];
|
||||||
genres[genre].push(track);
|
genreMap[genre].push(track);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Sort genre names, but put "Other" last
|
// Sort genre names, "Other" last
|
||||||
const genreOrder = Object.keys(genres).sort((a, b) => {
|
const genreOrder = Object.keys(genreMap).sort((a, b) => {
|
||||||
if (a === 'Other') return 1;
|
if (a === 'Other') return 1;
|
||||||
if (b === 'Other') return -1;
|
if (b === 'Other') return -1;
|
||||||
return a.localeCompare(b);
|
return a.localeCompare(b);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Build genre buttons
|
||||||
|
const container = document.getElementById('genre-buttons');
|
||||||
|
if (!container) return;
|
||||||
|
container.innerHTML = '';
|
||||||
|
|
||||||
genreOrder.forEach(genre => {
|
genreOrder.forEach(genre => {
|
||||||
const group = document.createElement('optgroup');
|
const btn = document.createElement('button');
|
||||||
group.label = genre;
|
btn.className = 'genre-btn';
|
||||||
// Shuffle within each genre group
|
btn.textContent = genre;
|
||||||
const genreTracks = genres[genre];
|
btn.dataset.genre = genre;
|
||||||
for (let i = genreTracks.length - 1; i > 0; i--) {
|
btn.addEventListener('click', () => playGenre(genre));
|
||||||
const j = Math.floor(Math.random() * (i + 1));
|
container.appendChild(btn);
|
||||||
[genreTracks[i], genreTracks[j]] = [genreTracks[j], genreTracks[i]];
|
|
||||||
}
|
|
||||||
genreTracks.forEach(track => {
|
|
||||||
const option = document.createElement('option');
|
|
||||||
option.value = track.file;
|
|
||||||
option.textContent = track.name;
|
|
||||||
group.appendChild(option);
|
|
||||||
});
|
|
||||||
select.appendChild(group);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Restore previous selection if it still exists
|
console.log('Loaded', tracks.length, 'tracks in', genreOrder.length, 'genres');
|
||||||
if (previousValue && [...select.options].some(o => o.value === previousValue)) {
|
|
||||||
select.value = previousValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('Loaded', tracks.length, 'tracks');
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('loadMusic error:', err);
|
console.error('loadMusic error:', err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async function playMusic() {
|
function getNextTrack(genre) {
|
||||||
await loadMusic();
|
const genreTracks = genreMap[genre];
|
||||||
const select = document.getElementById('track-select');
|
if (!genreTracks || genreTracks.length === 0) return null;
|
||||||
const track = select?.value;
|
// Refill and shuffle queue when empty
|
||||||
|
if (!genreQueues[genre] || genreQueues[genre].length === 0) {
|
||||||
|
const indices = genreTracks.map((_, i) => i);
|
||||||
|
for (let i = indices.length - 1; i > 0; i--) {
|
||||||
|
const j = Math.floor(Math.random() * (i + 1));
|
||||||
|
[indices[i], indices[j]] = [indices[j], indices[i]];
|
||||||
|
}
|
||||||
|
genreQueues[genre] = indices;
|
||||||
|
}
|
||||||
|
return genreTracks[genreQueues[genre].shift()];
|
||||||
|
}
|
||||||
|
|
||||||
|
async function playGenre(genre) {
|
||||||
|
const track = getNextTrack(genre);
|
||||||
if (!track) return;
|
if (!track) return;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch('/api/music/play', {
|
const res = await fetch('/api/music/play', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ track, action: 'play' })
|
body: JSON.stringify({ track: track.file, action: 'play' })
|
||||||
});
|
});
|
||||||
if (!res.ok) throw new Error(res.status);
|
if (!res.ok) throw new Error(res.status);
|
||||||
isMusicPlaying = true;
|
isMusicPlaying = true;
|
||||||
|
activeGenre = genre;
|
||||||
|
currentTrackName = track.name;
|
||||||
|
updateMusicUI();
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
log('Music play failed: ' + err.message);
|
log('Music play failed: ' + err.message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async function playMusic() {
|
||||||
|
// M key toggle — if nothing playing, pick random genre
|
||||||
|
if (!activeGenre) {
|
||||||
|
const genres = Object.keys(genreMap);
|
||||||
|
if (genres.length === 0) {
|
||||||
|
await loadMusic();
|
||||||
|
const g = Object.keys(genreMap);
|
||||||
|
if (g.length === 0) return;
|
||||||
|
return playGenre(g[Math.floor(Math.random() * g.length)]);
|
||||||
|
}
|
||||||
|
return playGenre(genres[Math.floor(Math.random() * genres.length)]);
|
||||||
|
}
|
||||||
|
return playGenre(activeGenre);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
async function stopMusic() {
|
async function stopMusic() {
|
||||||
try {
|
try {
|
||||||
const res = await fetch('/api/music/stop', { method: 'POST' });
|
const res = await fetch('/api/music/stop', { method: 'POST' });
|
||||||
if (!res.ok) throw new Error(res.status);
|
if (!res.ok) throw new Error(res.status);
|
||||||
isMusicPlaying = false;
|
isMusicPlaying = false;
|
||||||
|
activeGenre = null;
|
||||||
|
currentTrackName = '';
|
||||||
|
updateMusicUI();
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
log('Music stop failed: ' + err.message);
|
log('Music stop failed: ' + err.message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function updateMusicUI() {
|
||||||
|
// Highlight active genre button
|
||||||
|
document.querySelectorAll('.genre-btn').forEach(btn => {
|
||||||
|
btn.classList.toggle('active', btn.dataset.genre === activeGenre);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Show/hide now playing bar
|
||||||
|
const nowPlaying = document.getElementById('now-playing');
|
||||||
|
const nowText = document.getElementById('now-playing-text');
|
||||||
|
if (nowPlaying && nowText) {
|
||||||
|
if (isMusicPlaying && currentTrackName) {
|
||||||
|
nowText.textContent = currentTrackName;
|
||||||
|
nowPlaying.classList.remove('hidden');
|
||||||
|
} else {
|
||||||
|
nowPlaying.classList.add('hidden');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
let _volumeDebounce = null;
|
let _volumeDebounce = null;
|
||||||
function setMusicVolume(e) {
|
function setMusicVolume(e) {
|
||||||
const volume = e.target.value / 100;
|
const volume = e.target.value / 100;
|
||||||
@@ -1302,6 +1437,188 @@ async function clearShowTheme() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// --- Caller Model Routing ---
|
||||||
|
const MODEL_ABBREVS = {
|
||||||
|
'claude-sonnet-4-5': 'Son', 'claude-haiku-4.5': 'Hai', 'claude-3-haiku': 'H3',
|
||||||
|
'grok-4': 'Grk', 'grok-4-fast': 'GrF',
|
||||||
|
'minimax-m2-her': 'MnM', 'mistral-small-creative': 'Mis',
|
||||||
|
'deepseek-v3.2': 'DSk', 'gemini-2.5-flash': 'Gem', 'gemini-flash-1.5': 'Gm1',
|
||||||
|
'gpt-4o-mini': '4oM', 'gpt-4o': '4o', 'llama-3.1-8b-instruct': 'Lla',
|
||||||
|
};
|
||||||
|
|
||||||
|
const CALLER_STYLES = [
|
||||||
|
'quiet_nervous', 'storyteller', 'deadpan', 'high_energy', 'confrontational',
|
||||||
|
'oversharer', 'philosopher', 'bragger', 'first_time', 'emotional',
|
||||||
|
'world_weary', 'conspiracy', 'comedian', 'angry_venting', 'sweet_earnest',
|
||||||
|
'mysterious', 'know_it_all', 'rambling',
|
||||||
|
];
|
||||||
|
|
||||||
|
let callerModelSettings = { strategy: 'single', pool: [], fallback: '', style_map: {} };
|
||||||
|
let callerModelAssignments = {}; // key -> model_id
|
||||||
|
|
||||||
|
function modelAbbrev(modelId) {
|
||||||
|
const name = (modelId || '').split('/').pop();
|
||||||
|
return MODEL_ABBREVS[name] || name.substring(0, 3).toUpperCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadCallerModels() {
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/caller-models');
|
||||||
|
if (!res.ok) return;
|
||||||
|
const data = await res.json();
|
||||||
|
callerModelSettings = {
|
||||||
|
strategy: data.strategy || 'single',
|
||||||
|
pool: data.pool || [],
|
||||||
|
fallback: data.fallback || '',
|
||||||
|
style_map: data.map || data.style_map || {},
|
||||||
|
};
|
||||||
|
callerModelAssignments = data.assignments || {};
|
||||||
|
updateCallerModelUI();
|
||||||
|
updateCallerModelBadges();
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load caller models:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateCallerModelUI() {
|
||||||
|
const strategyEl = document.getElementById('cm-strategy');
|
||||||
|
if (strategyEl) strategyEl.value = callerModelSettings.strategy;
|
||||||
|
|
||||||
|
const poolSection = document.getElementById('cm-pool-section');
|
||||||
|
const styleMap = document.getElementById('cm-style-map');
|
||||||
|
if (poolSection) poolSection.classList.toggle('hidden', callerModelSettings.strategy === 'single');
|
||||||
|
if (styleMap) styleMap.classList.toggle('hidden', callerModelSettings.strategy !== 'style_matched');
|
||||||
|
|
||||||
|
const poolInput = document.getElementById('cm-pool');
|
||||||
|
if (poolInput) poolInput.value = callerModelSettings.pool.join(', ');
|
||||||
|
|
||||||
|
// Populate style map grid
|
||||||
|
const grid = document.getElementById('cm-style-grid');
|
||||||
|
if (grid && callerModelSettings.strategy === 'style_matched') {
|
||||||
|
grid.innerHTML = '';
|
||||||
|
for (const style of CALLER_STYLES) {
|
||||||
|
const item = document.createElement('div');
|
||||||
|
item.className = 'cm-style-item';
|
||||||
|
const label = style.replace(/_/g, ' ');
|
||||||
|
item.innerHTML = `<span class="cm-style-name">${label}</span>`;
|
||||||
|
const sel = document.createElement('select');
|
||||||
|
sel.className = 'cm-style-select';
|
||||||
|
sel.dataset.style = style;
|
||||||
|
const models = window._openrouterModels || callerModelSettings.pool;
|
||||||
|
for (const m of models) {
|
||||||
|
const opt = document.createElement('option');
|
||||||
|
opt.value = m;
|
||||||
|
opt.textContent = m.split('/').pop();
|
||||||
|
if (m === callerModelSettings.style_map[style]) opt.selected = true;
|
||||||
|
sel.appendChild(opt);
|
||||||
|
}
|
||||||
|
item.appendChild(sel);
|
||||||
|
grid.appendChild(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback dropdown
|
||||||
|
const fallbackEl = document.getElementById('cm-fallback');
|
||||||
|
if (fallbackEl) {
|
||||||
|
const currentVal = fallbackEl.value;
|
||||||
|
fallbackEl.innerHTML = '';
|
||||||
|
const models = callerModelSettings.pool.length > 0
|
||||||
|
? callerModelSettings.pool
|
||||||
|
: (window._openrouterModels || []);
|
||||||
|
for (const m of models) {
|
||||||
|
const opt = document.createElement('option');
|
||||||
|
opt.value = m;
|
||||||
|
opt.textContent = m.split('/').pop();
|
||||||
|
if (m === callerModelSettings.fallback) opt.selected = true;
|
||||||
|
fallbackEl.appendChild(opt);
|
||||||
|
}
|
||||||
|
if (!fallbackEl.value && currentVal) fallbackEl.value = currentVal;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateCallerModelBadges() {
|
||||||
|
document.querySelectorAll('.caller-btn').forEach(btn => {
|
||||||
|
const key = btn.dataset.key;
|
||||||
|
const model = callerModelAssignments[key];
|
||||||
|
let tag = btn.querySelector('.model-tag');
|
||||||
|
if (model) {
|
||||||
|
if (!tag) {
|
||||||
|
tag = document.createElement('span');
|
||||||
|
tag.className = 'model-tag';
|
||||||
|
btn.appendChild(tag);
|
||||||
|
}
|
||||||
|
tag.textContent = modelAbbrev(model);
|
||||||
|
tag.title = model;
|
||||||
|
} else if (tag) {
|
||||||
|
tag.remove();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function showCallerModelBadge(model) {
|
||||||
|
const badge = document.getElementById('caller-model-badge');
|
||||||
|
if (badge) {
|
||||||
|
badge.textContent = model ? `via ${modelAbbrev(model)}` : '';
|
||||||
|
badge.title = model || '';
|
||||||
|
badge.classList.toggle('hidden', !model);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function populateCallerModelOverride() {
|
||||||
|
const sel = document.getElementById('caller-model-override');
|
||||||
|
if (!sel) return;
|
||||||
|
sel.innerHTML = '';
|
||||||
|
const models = window._openrouterModels || [];
|
||||||
|
for (const m of models) {
|
||||||
|
const opt = document.createElement('option');
|
||||||
|
opt.value = m;
|
||||||
|
opt.textContent = m.split('/').pop();
|
||||||
|
sel.appendChild(opt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function overrideCallerModel(callerKey, modelId) {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`/api/caller-models/${callerKey}`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ model: modelId })
|
||||||
|
});
|
||||||
|
if (!res.ok) throw new Error(res.status);
|
||||||
|
callerModelAssignments[callerKey] = modelId;
|
||||||
|
showCallerModelBadge(modelId);
|
||||||
|
updateCallerModelBadges();
|
||||||
|
log(`Model override: ${currentCaller?.name || callerKey} → ${modelAbbrev(modelId)}`);
|
||||||
|
} catch (err) {
|
||||||
|
log('Model override failed: ' + err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveCallerModels() {
|
||||||
|
const strategy = document.getElementById('cm-strategy')?.value || 'single';
|
||||||
|
const poolRaw = document.getElementById('cm-pool')?.value || '';
|
||||||
|
const pool = poolRaw.split(',').map(s => s.trim()).filter(Boolean);
|
||||||
|
const fallback = document.getElementById('cm-fallback')?.value || '';
|
||||||
|
|
||||||
|
const style_map = {};
|
||||||
|
document.querySelectorAll('.cm-style-select').forEach(sel => {
|
||||||
|
if (sel.value) style_map[sel.dataset.style] = sel.value;
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/caller-models', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ strategy, pool, fallback, map: style_map })
|
||||||
|
});
|
||||||
|
if (!res.ok) throw new Error(res.status);
|
||||||
|
callerModelSettings = { strategy, pool, fallback, style_map };
|
||||||
|
} catch (err) {
|
||||||
|
log('Caller model save failed: ' + err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// --- Settings ---
|
// --- Settings ---
|
||||||
async function loadSettings() {
|
async function loadSettings() {
|
||||||
try {
|
try {
|
||||||
@@ -1357,6 +1674,8 @@ async function loadSettings() {
|
|||||||
|
|
||||||
// Category model routing
|
// Category model routing
|
||||||
const models = data.available_openrouter_models || [];
|
const models = data.available_openrouter_models || [];
|
||||||
|
window._openrouterModels = models;
|
||||||
|
populateCallerModelOverride();
|
||||||
const categoryModels = data.category_models || {};
|
const categoryModels = data.category_models || {};
|
||||||
const categories = ['caller_dialog', 'devon_monitor', 'devon_ask', 'background_gen', 'call_summary', 'news_summary'];
|
const categories = ['caller_dialog', 'devon_monitor', 'devon_ask', 'background_gen', 'call_summary', 'news_summary'];
|
||||||
for (const cat of categories) {
|
for (const cat of categories) {
|
||||||
@@ -1390,6 +1709,9 @@ async function saveSettings() {
|
|||||||
// Save audio devices
|
// Save audio devices
|
||||||
await saveAudioDevices();
|
await saveAudioDevices();
|
||||||
|
|
||||||
|
// Save caller model routing
|
||||||
|
await saveCallerModels();
|
||||||
|
|
||||||
// Collect category model routing
|
// Collect category model routing
|
||||||
const categoryModels = {};
|
const categoryModels = {};
|
||||||
const categories = ['caller_dialog', 'devon_monitor', 'devon_ask', 'background_gen', 'call_summary', 'news_summary'];
|
const categories = ['caller_dialog', 'devon_monitor', 'devon_ask', 'background_gen', 'call_summary', 'news_summary'];
|
||||||
@@ -2107,3 +2429,153 @@ async function dismissDevonSuggestion() {
|
|||||||
document.getElementById('devon-suggestion')?.classList.add('hidden');
|
document.getElementById('devon-suggestion')?.classList.add('hidden');
|
||||||
} catch (err) {}
|
} catch (err) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// --- Preflight ---
|
||||||
|
|
||||||
|
const PREFLIGHT_STATUS_ICONS = { pass: '✓', warn: '⚠', fail: '✗', skip: '—' };
|
||||||
|
|
||||||
|
const PREFLIGHT_CHECK_NAMES = {
|
||||||
|
model_diversity: 'Model Diversity',
|
||||||
|
theme_penetration: 'Theme Penetration',
|
||||||
|
voice_age_alignment: 'Voice-Age Alignment',
|
||||||
|
response_coherence: 'Response Coherence',
|
||||||
|
};
|
||||||
|
|
||||||
|
async function runPreflight(testResponses) {
|
||||||
|
const statusEl = document.getElementById('preflight-status');
|
||||||
|
const checksEl = document.getElementById('preflight-checks');
|
||||||
|
const testBtn = document.getElementById('preflight-test-btn');
|
||||||
|
|
||||||
|
statusEl.className = 'preflight-status loading';
|
||||||
|
statusEl.querySelector('.preflight-status-icon').textContent = '...';
|
||||||
|
statusEl.querySelector('.preflight-status-text').textContent = 'Running checks...';
|
||||||
|
checksEl.innerHTML = '';
|
||||||
|
|
||||||
|
if (testResponses && testBtn) testBtn.classList.add('loading');
|
||||||
|
|
||||||
|
try {
|
||||||
|
const url = '/api/show/preflight' + (testResponses ? '?test_responses=true' : '');
|
||||||
|
const data = await safeFetch(url, {}, 120000);
|
||||||
|
renderPreflightResults(data, statusEl, checksEl);
|
||||||
|
} catch (err) {
|
||||||
|
statusEl.className = 'preflight-status fail';
|
||||||
|
statusEl.querySelector('.preflight-status-icon').textContent = '✗';
|
||||||
|
statusEl.querySelector('.preflight-status-text').textContent = 'Error: ' + err.message;
|
||||||
|
} finally {
|
||||||
|
if (testBtn) testBtn.classList.remove('loading');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderPreflightResults(data, statusEl, checksEl) {
|
||||||
|
const overall = data.status || 'pass';
|
||||||
|
statusEl.className = 'preflight-status ' + overall;
|
||||||
|
statusEl.querySelector('.preflight-status-icon').textContent = PREFLIGHT_STATUS_ICONS[overall] || '✓';
|
||||||
|
statusEl.querySelector('.preflight-status-text').textContent =
|
||||||
|
overall === 'pass' ? 'All checks passed' :
|
||||||
|
overall === 'warn' ? 'Passed with warnings' : 'Issues found';
|
||||||
|
|
||||||
|
checksEl.innerHTML = '';
|
||||||
|
const checksObj = data.checks || {};
|
||||||
|
for (const [checkKey, check] of Object.entries(checksObj)) {
|
||||||
|
const card = document.createElement('div');
|
||||||
|
card.className = 'preflight-check';
|
||||||
|
|
||||||
|
const status = check.status || 'skip';
|
||||||
|
const name = PREFLIGHT_CHECK_NAMES[checkKey] || checkKey;
|
||||||
|
|
||||||
|
card.innerHTML = `
|
||||||
|
<div class="preflight-check-header">
|
||||||
|
<span class="preflight-check-name">${escapeHtml(name)}</span>
|
||||||
|
<span class="preflight-check-badge ${status}">${status.toUpperCase()}</span>
|
||||||
|
</div>
|
||||||
|
<div class="preflight-check-details">${renderCheckDetails(checkKey, check)}</div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
card.querySelector('.preflight-check-header').addEventListener('click', () => {
|
||||||
|
card.classList.toggle('open');
|
||||||
|
});
|
||||||
|
|
||||||
|
checksEl.appendChild(card);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderCheckDetails(name, check) {
|
||||||
|
const d = check.details || {};
|
||||||
|
switch (name) {
|
||||||
|
case 'model_diversity': return renderModelDiversity(d);
|
||||||
|
case 'theme_penetration': return renderThemePenetration(d);
|
||||||
|
case 'voice_age_alignment': return renderVoiceAgeAlignment(d);
|
||||||
|
case 'response_coherence': return renderResponseCoherence(check);
|
||||||
|
default: return `<pre>${escapeHtml(JSON.stringify(d, null, 2))}</pre>`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderModelDiversity(d) {
|
||||||
|
const callers = d.callers || [];
|
||||||
|
if (!callers.length) return '<p>No callers to check.</p>';
|
||||||
|
let html = `<table class="preflight-table">
|
||||||
|
<thead><tr><th>Caller</th><th>Style</th><th>Model</th></tr></thead><tbody>`;
|
||||||
|
for (const c of callers) {
|
||||||
|
html += `<tr><td>${escapeHtml(c.name || '')}</td><td>${escapeHtml(c.style || '')}</td><td>${escapeHtml(c.model || '')}</td></tr>`;
|
||||||
|
}
|
||||||
|
html += '</tbody></table>';
|
||||||
|
if (d.max_same_model_pct != null) {
|
||||||
|
html += `<p style="margin-top:8px">${d.max_same_model_pct}% on same model</p>`;
|
||||||
|
}
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderThemePenetration(d) {
|
||||||
|
let html = '';
|
||||||
|
if (d.theme) html += `<p><strong>Theme:</strong> ${escapeHtml(d.theme)}</p>`;
|
||||||
|
if (d.connected?.length) {
|
||||||
|
html += `<p style="color:var(--accent-green);margin-top:6px">Connected: ${d.connected.map(n => escapeHtml(n)).join(', ')}</p>`;
|
||||||
|
}
|
||||||
|
if (d.not_connected?.length) {
|
||||||
|
html += `<p style="color:var(--text-muted);margin-top:4px">Not connected: ${d.not_connected.map(n => escapeHtml(n)).join(', ')}</p>`;
|
||||||
|
}
|
||||||
|
if (d.penetration_pct != null) {
|
||||||
|
html += `<p style="margin-top:6px">${d.penetration_pct}% penetration</p>`;
|
||||||
|
}
|
||||||
|
return html || '<p>No theme set.</p>';
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderVoiceAgeAlignment(d) {
|
||||||
|
const callers = d.callers || [];
|
||||||
|
if (!callers.length) return '<p>No callers to check.</p>';
|
||||||
|
let html = `<table class="preflight-table">
|
||||||
|
<thead><tr><th>Caller</th><th>Age</th><th>Voice</th><th>Age Feel</th></tr></thead><tbody>`;
|
||||||
|
for (const c of callers) {
|
||||||
|
const cls = c.mismatch ? ' class="mismatch"' : '';
|
||||||
|
html += `<tr${cls}><td>${escapeHtml(c.name || '')}</td><td>${c.age || ''}</td><td>${escapeHtml(c.voice || '')}</td><td>${escapeHtml(c.age_feel || '')}</td></tr>`;
|
||||||
|
}
|
||||||
|
html += '</tbody></table>';
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderResponseCoherence(check) {
|
||||||
|
if (check.status === 'skip') {
|
||||||
|
return '<p>Use <strong>Test Responses</strong> button to run this check.</p>';
|
||||||
|
}
|
||||||
|
const d = check.details || {};
|
||||||
|
const results = d.results || [];
|
||||||
|
if (!results.length) return '<p>No test results.</p>';
|
||||||
|
let html = `<table class="preflight-table">
|
||||||
|
<thead><tr><th>Caller</th><th>Model</th><th>R1</th><th>R2</th><th>Avg</th><th></th></tr></thead><tbody>`;
|
||||||
|
for (const c of results) {
|
||||||
|
const cls = c.pass ? '' : ' class="mismatch"';
|
||||||
|
if (c.error) {
|
||||||
|
html += `<tr class="mismatch"><td>${escapeHtml(c.name || '')}</td><td>${escapeHtml(c.model || '')}</td><td colspan="3">${escapeHtml(c.error)}</td><td>✗</td></tr>`;
|
||||||
|
} else {
|
||||||
|
html += `<tr${cls}><td>${escapeHtml(c.name || '')}</td><td>${escapeHtml(c.model || '')}</td><td>${c.r1_words || 0}</td><td>${c.r2_words || 0}</td><td>${c.word_count || 0}</td><td>${c.pass ? '✓' : '✗'}</td></tr>`;
|
||||||
|
if (c.snippet) {
|
||||||
|
html += `<tr><td colspan="6" style="color:var(--text-muted);font-size:0.75rem;padding-left:16px">${escapeHtml(c.snippet)}</td></tr>`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
html += '</tbody></table>';
|
||||||
|
const passed = results.filter(r => r.pass).length;
|
||||||
|
html += `<p style="margin-top:8px">${passed}/${results.length} callers passed (min ${50} words per response)</p>`;
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|||||||
+142
-114
@@ -23,6 +23,8 @@ import tempfile
|
|||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
@@ -46,6 +48,50 @@ WIDTH = 1080
|
|||||||
HEIGHT = 1920
|
HEIGHT = 1920
|
||||||
|
|
||||||
|
|
||||||
|
def _llm_request(prompt: str, max_tokens: int = 2048, temperature: float = 0.3,
|
||||||
|
timeout: int = 60) -> str | None:
|
||||||
|
"""Make an LLM API call with timeout and retry. Returns content or None on failure."""
|
||||||
|
for attempt in range(2):
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
"https://openrouter.ai/api/v1/chat/completions",
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": "anthropic/claude-sonnet-4-5",
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"temperature": temperature,
|
||||||
|
},
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
if response.status_code != 200:
|
||||||
|
print(f" LLM error (HTTP {response.status_code}): {response.text[:200]}")
|
||||||
|
if attempt == 0:
|
||||||
|
print(f" Retrying in 5s...")
|
||||||
|
time.sleep(5)
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
return response.json()["choices"][0]["message"]["content"].strip()
|
||||||
|
except requests.Timeout:
|
||||||
|
print(f" LLM request timed out ({timeout}s)")
|
||||||
|
if attempt == 0:
|
||||||
|
print(f" Retrying in 5s...")
|
||||||
|
time.sleep(5)
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f" LLM request failed: {e}")
|
||||||
|
if attempt == 0:
|
||||||
|
print(f" Retrying in 5s...")
|
||||||
|
time.sleep(5)
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _build_whisper_prompt(labeled_transcript: str) -> str:
|
def _build_whisper_prompt(labeled_transcript: str) -> str:
|
||||||
"""Build an initial_prompt for Whisper from the labeled transcript.
|
"""Build an initial_prompt for Whisper from the labeled transcript.
|
||||||
|
|
||||||
@@ -186,7 +232,12 @@ def refine_clip_timestamps(audio_path: str, clips: list[dict],
|
|||||||
"ffmpeg", "-y", "-ss", str(seg_start), "-t", str(seg_end - seg_start),
|
"ffmpeg", "-y", "-ss", str(seg_start), "-t", str(seg_end - seg_start),
|
||||||
"-i", audio_path, "-ar", "16000", "-ac", "1", seg_path,
|
"-i", audio_path, "-ar", "16000", "-ac", "1", seg_path,
|
||||||
]
|
]
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
try:
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
print(f" Clip {i+1}: ffmpeg timed out (120s), skipping")
|
||||||
|
refined[i] = []
|
||||||
|
continue
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
print(f" Clip {i+1}: Failed to extract segment")
|
print(f" Clip {i+1}: Failed to extract segment")
|
||||||
refined[i] = []
|
refined[i] = []
|
||||||
@@ -279,25 +330,11 @@ IMPORTANT:
|
|||||||
Respond with ONLY a JSON array, no markdown or explanation:
|
Respond with ONLY a JSON array, no markdown or explanation:
|
||||||
[{{"title": "...", "start_time": 0.0, "end_time": 0.0, "caption_text": "..."}}]"""
|
[{{"title": "...", "start_time": 0.0, "end_time": 0.0, "caption_text": "..."}}]"""
|
||||||
|
|
||||||
response = requests.post(
|
content = _llm_request(prompt, max_tokens=2048, temperature=0.3, timeout=60)
|
||||||
"https://openrouter.ai/api/v1/chat/completions",
|
if content is None:
|
||||||
headers={
|
print(" Failed to get clip selections from LLM — aborting")
|
||||||
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
return []
|
||||||
"Content-Type": "application/json",
|
|
||||||
},
|
|
||||||
json={
|
|
||||||
"model": "anthropic/claude-sonnet-4-5",
|
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
|
||||||
"max_tokens": 2048,
|
|
||||||
"temperature": 0.3,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
|
||||||
print(f"Error from OpenRouter: {response.text}")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
content = response.json()["choices"][0]["message"]["content"].strip()
|
|
||||||
if content.startswith("```"):
|
if content.startswith("```"):
|
||||||
content = re.sub(r"^```(?:json)?\n?", "", content)
|
content = re.sub(r"^```(?:json)?\n?", "", content)
|
||||||
content = re.sub(r"\n?```$", "", content)
|
content = re.sub(r"\n?```$", "", content)
|
||||||
@@ -307,7 +344,7 @@ Respond with ONLY a JSON array, no markdown or explanation:
|
|||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
print(f"Error parsing LLM response: {e}")
|
print(f"Error parsing LLM response: {e}")
|
||||||
print(f"Response was: {content[:500]}")
|
print(f"Response was: {content[:500]}")
|
||||||
sys.exit(1)
|
return []
|
||||||
|
|
||||||
# Validate and clamp durations
|
# Validate and clamp durations
|
||||||
validated = []
|
validated = []
|
||||||
@@ -349,25 +386,11 @@ For each clip, generate:
|
|||||||
Respond with ONLY a JSON array matching the clip order:
|
Respond with ONLY a JSON array matching the clip order:
|
||||||
[{{"description": "...", "hashtags": ["#tag1", "#tag2", ...]}}]"""
|
[{{"description": "...", "hashtags": ["#tag1", "#tag2", ...]}}]"""
|
||||||
|
|
||||||
response = requests.post(
|
content = _llm_request(prompt, max_tokens=2048, temperature=0.7, timeout=60)
|
||||||
"https://openrouter.ai/api/v1/chat/completions",
|
if content is None:
|
||||||
headers={
|
print(" Failed to generate social metadata — skipping")
|
||||||
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
},
|
|
||||||
json={
|
|
||||||
"model": "anthropic/claude-sonnet-4-5",
|
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
|
||||||
"max_tokens": 2048,
|
|
||||||
"temperature": 0.7,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
|
||||||
print(f"Error from OpenRouter: {response.text}")
|
|
||||||
return clips
|
return clips
|
||||||
|
|
||||||
content = response.json()["choices"][0]["message"]["content"].strip()
|
|
||||||
if content.startswith("```"):
|
if content.startswith("```"):
|
||||||
content = re.sub(r"^```(?:json)?\n?", "", content)
|
content = re.sub(r"^```(?:json)?\n?", "", content)
|
||||||
content = re.sub(r"\n?```$", "", content)
|
content = re.sub(r"\n?```$", "", content)
|
||||||
@@ -777,43 +800,25 @@ RULES:
|
|||||||
RAW TEXT ({len(words)} words):
|
RAW TEXT ({len(words)} words):
|
||||||
{raw_text}"""
|
{raw_text}"""
|
||||||
|
|
||||||
try:
|
polished = _llm_request(prompt, max_tokens=2048, temperature=0, timeout=30)
|
||||||
response = requests.post(
|
if polished is None:
|
||||||
"https://openrouter.ai/api/v1/chat/completions",
|
print(f" Polish failed, using raw text")
|
||||||
headers={
|
return words
|
||||||
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
},
|
|
||||||
json={
|
|
||||||
"model": "anthropic/claude-sonnet-4-5",
|
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
|
||||||
"max_tokens": 2048,
|
|
||||||
"temperature": 0,
|
|
||||||
},
|
|
||||||
timeout=30,
|
|
||||||
)
|
|
||||||
if response.status_code != 200:
|
|
||||||
print(f" Polish failed ({response.status_code}), using raw text")
|
|
||||||
return words
|
|
||||||
|
|
||||||
polished = response.json()["choices"][0]["message"]["content"].strip()
|
polished_words = polished.split()
|
||||||
polished_words = polished.split()
|
|
||||||
|
|
||||||
if len(polished_words) != len(words):
|
if len(polished_words) != len(words):
|
||||||
print(f" Polish word count mismatch ({len(polished_words)} vs {len(words)}), using raw text")
|
print(f" Polish word count mismatch ({len(polished_words)} vs {len(words)}), using raw text")
|
||||||
return words
|
return words
|
||||||
|
|
||||||
changes = 0
|
changes = 0
|
||||||
for i, pw in enumerate(polished_words):
|
for i, pw in enumerate(polished_words):
|
||||||
if pw != words[i]["word"]:
|
if pw != words[i]["word"]:
|
||||||
changes += 1
|
changes += 1
|
||||||
words[i]["word"] = pw
|
words[i]["word"] = pw
|
||||||
|
|
||||||
if changes:
|
if changes:
|
||||||
print(f" Polished {changes} words")
|
print(f" Polished {changes} words")
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f" Polish error: {e}")
|
|
||||||
|
|
||||||
return words
|
return words
|
||||||
|
|
||||||
@@ -898,8 +903,12 @@ def extract_clip_audio(audio_path: str, start: float, end: float,
|
|||||||
output_path,
|
output_path,
|
||||||
]
|
]
|
||||||
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
try:
|
||||||
return result.returncode == 0
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
||||||
|
return result.returncode == 0
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
print(f" ffmpeg audio extraction timed out (120s)")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def generate_background_image(episode_number: int, clip_title: str,
|
def generate_background_image(episode_number: int, clip_title: str,
|
||||||
@@ -1153,7 +1162,11 @@ def generate_clip_video(audio_path: str, background_path: str,
|
|||||||
output_path,
|
output_path,
|
||||||
]
|
]
|
||||||
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
try:
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
print(f" ffmpeg video generation timed out (300s)")
|
||||||
|
return False
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
print(f" ffmpeg error: {result.stderr[-300:]}")
|
print(f" ffmpeg error: {result.stderr[-300:]}")
|
||||||
return False
|
return False
|
||||||
@@ -1235,7 +1248,12 @@ def generate_clip_video_remotion(
|
|||||||
output_path,
|
output_path,
|
||||||
]
|
]
|
||||||
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True, cwd=str(REMOTION_DIR))
|
try:
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, cwd=str(REMOTION_DIR), timeout=180)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
props_path.unlink(missing_ok=True)
|
||||||
|
print(f" Remotion render timed out (180s)")
|
||||||
|
return False
|
||||||
props_path.unlink(missing_ok=True)
|
props_path.unlink(missing_ok=True)
|
||||||
|
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
@@ -1488,6 +1506,9 @@ def main():
|
|||||||
print(f"\n[3/{step_total}] Selecting {args.count} best moments with LLM...")
|
print(f"\n[3/{step_total}] Selecting {args.count} best moments with LLM...")
|
||||||
clips = select_clips_with_llm(transcript_text, labeled_transcript,
|
clips = select_clips_with_llm(transcript_text, labeled_transcript,
|
||||||
chapters_json, args.count)
|
chapters_json, args.count)
|
||||||
|
if not clips:
|
||||||
|
print("\nNo clips selected — aborting.")
|
||||||
|
return
|
||||||
|
|
||||||
# Snap to sentence boundaries so clips don't start/end mid-sentence
|
# Snap to sentence boundaries so clips don't start/end mid-sentence
|
||||||
clips = snap_to_sentences(clips, segments)
|
clips = snap_to_sentences(clips, segments)
|
||||||
@@ -1524,14 +1545,18 @@ def main():
|
|||||||
extract_step = 6 if two_pass else 5
|
extract_step = 6 if two_pass else 5
|
||||||
print(f"\n[{extract_step}/{step_total}] Extracting audio clips...")
|
print(f"\n[{extract_step}/{step_total}] Extracting audio clips...")
|
||||||
for i, clip in enumerate(clips):
|
for i, clip in enumerate(clips):
|
||||||
|
print(f" [{i+1}/{len(clips)}] \"{clip['title']}\"...")
|
||||||
slug = slugify(clip["title"])
|
slug = slugify(clip["title"])
|
||||||
mp3_path = output_dir / f"clip-{i+1}-{slug}.mp3"
|
mp3_path = output_dir / f"clip-{i+1}-{slug}.mp3"
|
||||||
|
|
||||||
if extract_clip_audio(str(audio_path), clip["start_time"], clip["end_time"],
|
try:
|
||||||
str(mp3_path)):
|
if extract_clip_audio(str(audio_path), clip["start_time"], clip["end_time"],
|
||||||
print(f" Clip {i+1} audio: {mp3_path.name}")
|
str(mp3_path)):
|
||||||
else:
|
print(f" Clip {i+1} audio: {mp3_path.name}")
|
||||||
print(f" Error extracting clip {i+1} audio")
|
else:
|
||||||
|
print(f" Error extracting clip {i+1} audio — skipping")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Clip {i+1} audio failed: {e} — skipping")
|
||||||
|
|
||||||
video_step = 7 if two_pass else 6
|
video_step = 7 if two_pass else 6
|
||||||
if args.audio_only:
|
if args.audio_only:
|
||||||
@@ -1553,49 +1578,52 @@ def main():
|
|||||||
mp4_path = output_dir / f"clip-{i+1}-{slug}.mp4"
|
mp4_path = output_dir / f"clip-{i+1}-{slug}.mp4"
|
||||||
duration = clip["end_time"] - clip["start_time"]
|
duration = clip["end_time"] - clip["start_time"]
|
||||||
|
|
||||||
print(f" Clip {i+1}: Generating video...")
|
print(f" [{i+1}/{len(clips)}] \"{clip['title']}\" ({duration:.0f}s)...")
|
||||||
|
|
||||||
# Get word timestamps — use refined segments if available
|
try:
|
||||||
word_source = refined[i] if (two_pass and i in refined and refined[i]) else segments
|
# Get word timestamps — use refined segments if available
|
||||||
clip_words = get_words_in_range(word_source, clip["start_time"], clip["end_time"])
|
word_source = refined[i] if (two_pass and i in refined and refined[i]) else segments
|
||||||
|
clip_words = get_words_in_range(word_source, clip["start_time"], clip["end_time"])
|
||||||
|
|
||||||
# Add speaker labels
|
# Add speaker labels
|
||||||
clip_words = add_speaker_labels(clip_words, labeled_transcript,
|
clip_words = add_speaker_labels(clip_words, labeled_transcript,
|
||||||
clip["start_time"], clip["end_time"],
|
clip["start_time"], clip["end_time"],
|
||||||
word_source)
|
word_source)
|
||||||
|
|
||||||
# Polish text with LLM (fix punctuation, capitalization, mishearings)
|
# Polish text with LLM (fix punctuation, capitalization, mishearings)
|
||||||
clip_words = polish_clip_words(clip_words, labeled_transcript)
|
clip_words = polish_clip_words(clip_words, labeled_transcript)
|
||||||
|
|
||||||
# Group words into timed caption lines
|
# Group words into timed caption lines
|
||||||
caption_lines = group_words_into_lines(
|
caption_lines = group_words_into_lines(
|
||||||
clip_words, clip["start_time"], duration
|
clip_words, clip["start_time"], duration
|
||||||
)
|
)
|
||||||
|
|
||||||
if use_remotion:
|
if use_remotion:
|
||||||
if generate_clip_video_remotion(
|
if generate_clip_video_remotion(
|
||||||
str(mp3_path), caption_lines, clip["start_time"],
|
str(mp3_path), caption_lines, clip["start_time"],
|
||||||
clip["title"], episode_number, str(mp4_path), duration
|
clip["title"], episode_number, str(mp4_path), duration
|
||||||
):
|
):
|
||||||
file_size = mp4_path.stat().st_size / (1024 * 1024)
|
file_size = mp4_path.stat().st_size / (1024 * 1024)
|
||||||
print(f" Clip {i+1} video: {mp4_path.name} ({file_size:.1f} MB)")
|
print(f" Clip {i+1} video: {mp4_path.name} ({file_size:.1f} MB)")
|
||||||
|
else:
|
||||||
|
print(f" Clip {i+1} video failed (Remotion) — skipping")
|
||||||
else:
|
else:
|
||||||
print(f" Error generating clip {i+1} video (Remotion)")
|
# Legacy PIL+ffmpeg renderer
|
||||||
else:
|
bg_path = str(tmp_dir / f"bg_{i}.png")
|
||||||
# Legacy PIL+ffmpeg renderer
|
generate_background_image(episode_number, clip["title"], bg_path)
|
||||||
bg_path = str(tmp_dir / f"bg_{i}.png")
|
|
||||||
generate_background_image(episode_number, clip["title"], bg_path)
|
|
||||||
|
|
||||||
clip_tmp = tmp_dir / f"clip_{i}"
|
clip_tmp = tmp_dir / f"clip_{i}"
|
||||||
clip_tmp.mkdir(exist_ok=True)
|
clip_tmp.mkdir(exist_ok=True)
|
||||||
|
|
||||||
if generate_clip_video(str(mp3_path), bg_path, caption_lines,
|
if generate_clip_video(str(mp3_path), bg_path, caption_lines,
|
||||||
clip["start_time"], str(mp4_path),
|
clip["start_time"], str(mp4_path),
|
||||||
duration, clip_tmp):
|
duration, clip_tmp):
|
||||||
file_size = mp4_path.stat().st_size / (1024 * 1024)
|
file_size = mp4_path.stat().st_size / (1024 * 1024)
|
||||||
print(f" Clip {i+1} video: {mp4_path.name} ({file_size:.1f} MB)")
|
print(f" Clip {i+1} video: {mp4_path.name} ({file_size:.1f} MB)")
|
||||||
else:
|
else:
|
||||||
print(f" Error generating clip {i+1} video")
|
print(f" Clip {i+1} video failed (ffmpeg) — skipping")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Clip {i+1} video failed: {e} — skipping")
|
||||||
|
|
||||||
# Save clips metadata for social upload
|
# Save clips metadata for social upload
|
||||||
metadata_path = output_dir / "clips-metadata.json"
|
metadata_path = output_dir / "clips-metadata.json"
|
||||||
|
|||||||
+184
-123
@@ -18,6 +18,7 @@ local KEEP_PAD_SEC = 0.5 -- leave this much silence on each side of a cut
|
|||||||
local BLOCK_SEC = 0.1 -- analysis block size (100ms)
|
local BLOCK_SEC = 0.1 -- analysis block size (100ms)
|
||||||
local SAMPLE_RATE = 48000
|
local SAMPLE_RATE = 48000
|
||||||
local CHECK_TRACKS = {1, 2, 3, 4} -- 1-indexed: Host, Devon, AI Caller, Live Caller
|
local CHECK_TRACKS = {1, 2, 3, 4} -- 1-indexed: Host, Devon, AI Caller, Live Caller
|
||||||
|
local SFX_TRACK = 5 -- 1-indexed: SFX track
|
||||||
local IDENTS_TRACK = 6 -- 1-indexed: Idents track
|
local IDENTS_TRACK = 6 -- 1-indexed: Idents track
|
||||||
local ADS_TRACK = 7 -- 1-indexed: Ads track
|
local ADS_TRACK = 7 -- 1-indexed: Ads track
|
||||||
local MUSIC_TRACK = 8 -- 1-indexed: Music track
|
local MUSIC_TRACK = 8 -- 1-indexed: Music track
|
||||||
@@ -466,7 +467,10 @@ local function phase1_strip_silence(dialog_regions)
|
|||||||
for _, r in ipairs(get_regions_by_type("^IDENT%s+%d+$")) do table.insert(protected_regions, r) end
|
for _, r in ipairs(get_regions_by_type("^IDENT%s+%d+$")) do table.insert(protected_regions, r) end
|
||||||
table.sort(protected_regions, function(a, b) return a.start_pos < b.start_pos end)
|
table.sort(protected_regions, function(a, b) return a.start_pos < b.start_pos end)
|
||||||
if #protected_regions > 0 then
|
if #protected_regions > 0 then
|
||||||
log(" Protecting " .. #protected_regions .. " AD/IDENT region(s) from silence removal")
|
log(" Protecting " .. #protected_regions .. " AD/IDENT region(s) from silence removal:")
|
||||||
|
for _, pr in ipairs(protected_regions) do
|
||||||
|
log(" " .. pr.name .. " at " .. string.format("%.1f", pr.start_pos) .. "-" .. string.format("%.1f", pr.end_pos) .. "s")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
log("Phase 1: Analyzing using " .. tracks_loaded .. "/" .. #CHECK_TRACKS .. " voice tracks")
|
log("Phase 1: Analyzing using " .. tracks_loaded .. "/" .. #CHECK_TRACKS .. " voice tracks")
|
||||||
@@ -512,6 +516,11 @@ local function phase1_strip_silence(dialog_regions)
|
|||||||
break
|
break
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
-- Preserve the very first silence (music intro before host starts talking)
|
||||||
|
if not protected and ri == 1 and #removals == 0 and s.start_pos <= rgn.start_pos + 1.0 then
|
||||||
|
protected = true
|
||||||
|
log(" KEEP " .. string.format("%.1f", rm_end - rm_start) .. "s at " .. string.format("%.1f", s.start_pos) .. "-" .. string.format("%.1f", s.end_pos) .. " (music intro)")
|
||||||
|
end
|
||||||
if not protected then
|
if not protected then
|
||||||
table.insert(removals, {start_pos = rm_start, end_pos = rm_end})
|
table.insert(removals, {start_pos = rm_start, end_pos = rm_end})
|
||||||
local tag = s.is_transition and " [transition]" or ""
|
local tag = s.is_transition and " [transition]" or ""
|
||||||
@@ -561,7 +570,6 @@ local function phase1_strip_silence(dialog_regions)
|
|||||||
if (t + 1) == MUSIC_TRACK then goto next_track end
|
if (t + 1) == MUSIC_TRACK then goto next_track end
|
||||||
local track = reaper.GetTrack(0, t)
|
local track = reaper.GetTrack(0, t)
|
||||||
|
|
||||||
-- Split and delete the silent portion from items that span r.start_pos
|
|
||||||
local item = find_item_at(track, r.start_pos)
|
local item = find_item_at(track, r.start_pos)
|
||||||
if item then
|
if item then
|
||||||
local right = reaper.SplitMediaItem(item, r.start_pos)
|
local right = reaper.SplitMediaItem(item, r.start_pos)
|
||||||
@@ -571,36 +579,10 @@ local function phase1_strip_silence(dialog_regions)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
-- Handle sparse track items that START within the removal range
|
|
||||||
-- (not found by find_item_at since they don't contain r.start_pos)
|
|
||||||
for j = reaper.CountTrackMediaItems(track) - 1, 0, -1 do
|
|
||||||
local check = reaper.GetTrackMediaItem(track, j)
|
|
||||||
local cpos = reaper.GetMediaItemInfo_Value(check, "D_POSITION")
|
|
||||||
if cpos >= r.start_pos and cpos < r.end_pos then
|
|
||||||
local clen = reaper.GetMediaItemInfo_Value(check, "D_LENGTH")
|
|
||||||
local cend = cpos + clen
|
|
||||||
if cend <= r.end_pos then
|
|
||||||
-- Entirely within removal — delete
|
|
||||||
reaper.DeleteTrackMediaItem(track, check)
|
|
||||||
else
|
|
||||||
-- Starts in removal but extends past — trim start to r.end_pos
|
|
||||||
local trim = r.end_pos - cpos
|
|
||||||
local take = reaper.GetActiveTake(check)
|
|
||||||
if take then
|
|
||||||
local offset = reaper.GetMediaItemTakeInfo_Value(take, "D_STARTOFFS")
|
|
||||||
reaper.SetMediaItemTakeInfo_Value(take, "D_STARTOFFS", offset + trim)
|
|
||||||
end
|
|
||||||
reaper.SetMediaItemInfo_Value(check, "D_LENGTH", cend - r.end_pos)
|
|
||||||
reaper.SetMediaItemInfo_Value(check, "D_POSITION", r.end_pos)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
-- Shift items AFTER the removal (use r.end_pos, not r.start_pos)
|
|
||||||
for j = 0, reaper.CountTrackMediaItems(track) - 1 do
|
for j = 0, reaper.CountTrackMediaItems(track) - 1 do
|
||||||
local shift_item = reaper.GetTrackMediaItem(track, j)
|
local shift_item = reaper.GetTrackMediaItem(track, j)
|
||||||
local pos = reaper.GetMediaItemInfo_Value(shift_item, "D_POSITION")
|
local pos = reaper.GetMediaItemInfo_Value(shift_item, "D_POSITION")
|
||||||
if pos >= r.end_pos then
|
if pos >= r.start_pos then
|
||||||
reaper.SetMediaItemInfo_Value(shift_item, "D_POSITION", pos - remove_len)
|
reaper.SetMediaItemInfo_Value(shift_item, "D_POSITION", pos - remove_len)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -629,64 +611,88 @@ end
|
|||||||
-- Phase 2: Normalize AD/IDENT volume to match dialog
|
-- Phase 2: Normalize AD/IDENT volume to match dialog
|
||||||
---------------------------------------------------------------------------
|
---------------------------------------------------------------------------
|
||||||
|
|
||||||
local function normalize_track_regions(track_idx, regions, target_db)
|
local function normalize_track_items(track_idx, target_db, label)
|
||||||
|
-- Normalize all items on a track that have audible content.
|
||||||
|
-- Uses direct WAV reading (not audio accessor) so it works after Phase 1 splits.
|
||||||
local track = reaper.GetTrack(0, track_idx - 1)
|
local track = reaper.GetTrack(0, track_idx - 1)
|
||||||
if not track or reaper.CountTrackMediaItems(track) == 0 then return end
|
if not track then
|
||||||
|
log(" " .. label .. ": track " .. track_idx .. " does not exist")
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
for _, rgn in ipairs(regions) do
|
local item_count = reaper.CountTrackMediaItems(track)
|
||||||
local item = find_item_at(track, rgn.start_pos)
|
log(" " .. label .. ": " .. item_count .. " item(s) on track " .. track_idx)
|
||||||
if not item then goto next_region end
|
if item_count == 0 then return end
|
||||||
|
|
||||||
local item_start = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
|
local ta = get_track_audio(track_idx)
|
||||||
|
if not ta then
|
||||||
|
log(" " .. label .. ": get_track_audio() returned nil — no readable WAV sources")
|
||||||
|
return
|
||||||
|
end
|
||||||
|
log(" " .. label .. ": " .. #ta.segments .. " WAV segment(s), span " .. string.format("%.1f", ta.item_pos) .. "-" .. string.format("%.1f", ta.item_end) .. "s")
|
||||||
|
|
||||||
local segment = item
|
local adjusted = 0
|
||||||
if item_start < rgn.start_pos - 0.01 then
|
local skipped_silent = 0
|
||||||
segment = reaper.SplitMediaItem(item, rgn.start_pos)
|
local skipped_small = 0
|
||||||
if not segment then goto next_region end
|
for i = 0, item_count - 1 do
|
||||||
end
|
local item = reaper.GetTrackMediaItem(track, i)
|
||||||
local seg_end = reaper.GetMediaItemInfo_Value(segment, "D_POSITION")
|
local item_pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
|
||||||
+ reaper.GetMediaItemInfo_Value(segment, "D_LENGTH")
|
local item_len = reaper.GetMediaItemInfo_Value(item, "D_LENGTH")
|
||||||
if rgn.end_pos < seg_end - 0.01 then
|
local item_end = item_pos + item_len
|
||||||
reaper.SplitMediaItem(segment, rgn.end_pos)
|
|
||||||
end
|
|
||||||
|
|
||||||
local take = reaper.GetActiveTake(segment)
|
|
||||||
if not take then goto next_region end
|
|
||||||
|
|
||||||
local seg_pos = reaper.GetMediaItemInfo_Value(segment, "D_POSITION")
|
|
||||||
local seg_len = reaper.GetMediaItemInfo_Value(segment, "D_LENGTH")
|
|
||||||
local seg_offset = reaper.GetMediaItemTakeInfo_Value(take, "D_STARTOFFS")
|
|
||||||
local accessor = reaper.CreateTakeAudioAccessor(take)
|
|
||||||
|
|
||||||
|
-- Measure RMS of audible content in this item
|
||||||
local sum_sq = 0
|
local sum_sq = 0
|
||||||
local count = 0
|
local count = 0
|
||||||
local t = seg_pos
|
local total_blocks = 0
|
||||||
while t < seg_pos + seg_len do
|
local t = item_pos
|
||||||
local source_time = t - seg_pos + seg_offset
|
while t < item_end do
|
||||||
local buf = reaper.new_array(BLOCK_SAMPLES)
|
local peak, s_sq = read_block_peak_rms(ta, t)
|
||||||
reaper.GetAudioAccessorSamples(accessor, SAMPLE_RATE, 1, source_time, BLOCK_SAMPLES, buf)
|
total_blocks = total_blocks + 1
|
||||||
for i = 1, BLOCK_SAMPLES do
|
if peak >= THRESHOLD then
|
||||||
sum_sq = sum_sq + buf[i] * buf[i]
|
sum_sq = sum_sq + s_sq
|
||||||
|
count = count + BLOCK_SAMPLES
|
||||||
end
|
end
|
||||||
count = count + BLOCK_SAMPLES
|
|
||||||
t = t + BLOCK_SEC
|
t = t + BLOCK_SEC
|
||||||
end
|
end
|
||||||
reaper.DestroyAudioAccessor(accessor)
|
|
||||||
|
|
||||||
|
local audible_blocks = math.floor(count / BLOCK_SAMPLES)
|
||||||
if count > 0 then
|
if count > 0 then
|
||||||
local item_rms = math.sqrt(sum_sq / count)
|
local item_rms = math.sqrt(sum_sq / count)
|
||||||
if item_rms > 0 then
|
if item_rms > 0 then
|
||||||
local item_db = 20 * math.log(item_rms, 10)
|
local item_db = 20 * math.log(item_rms, 10)
|
||||||
local gain_db = target_db - item_db
|
local gain_db = target_db - item_db
|
||||||
local gain_linear = 10 ^ (gain_db / 20)
|
local current_vol = reaper.GetMediaItemInfo_Value(item, "D_VOL")
|
||||||
local current_vol = reaper.GetMediaItemInfo_Value(segment, "D_VOL")
|
log(" " .. label .. " item " .. (i+1) .. "/" .. item_count
|
||||||
reaper.SetMediaItemInfo_Value(segment, "D_VOL", current_vol * gain_linear)
|
.. " pos=" .. string.format("%.1f", item_pos) .. "s"
|
||||||
log(" " .. rgn.name .. ": " .. string.format("%+.1f", gain_db) .. "dB adjustment")
|
.. " len=" .. string.format("%.1f", item_len) .. "s"
|
||||||
|
.. " blocks=" .. total_blocks .. "/" .. audible_blocks .. " audible"
|
||||||
|
.. " RMS=" .. string.format("%.1f", item_db) .. "dB"
|
||||||
|
.. " target=" .. string.format("%.1f", target_db) .. "dB"
|
||||||
|
.. " gain=" .. string.format("%+.1f", gain_db) .. "dB"
|
||||||
|
.. " vol=" .. string.format("%.3f", current_vol))
|
||||||
|
-- Only adjust if the difference is significant (> 1dB)
|
||||||
|
if math.abs(gain_db) > 1.0 then
|
||||||
|
local gain_linear = 10 ^ (gain_db / 20)
|
||||||
|
reaper.SetMediaItemInfo_Value(item, "D_VOL", current_vol * gain_linear)
|
||||||
|
log(" -> APPLIED: vol " .. string.format("%.3f", current_vol) .. " -> " .. string.format("%.3f", current_vol * gain_linear))
|
||||||
|
adjusted = adjusted + 1
|
||||||
|
else
|
||||||
|
log(" -> SKIPPED: gain within 1dB threshold")
|
||||||
|
skipped_small = skipped_small + 1
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
else
|
||||||
|
log(" " .. label .. " item " .. (i+1) .. "/" .. item_count
|
||||||
|
.. " pos=" .. string.format("%.1f", item_pos) .. "s"
|
||||||
|
.. " len=" .. string.format("%.1f", item_len) .. "s"
|
||||||
|
.. " blocks=" .. total_blocks
|
||||||
|
.. " — NO AUDIBLE BLOCKS (all below " .. SILENCE_DB .. "dB)")
|
||||||
|
skipped_silent = skipped_silent + 1
|
||||||
end
|
end
|
||||||
|
|
||||||
::next_region::
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
destroy_track_audio(ta)
|
||||||
|
log(" " .. label .. " RESULT: " .. adjusted .. " adjusted, " .. skipped_small .. " within 1dB, " .. skipped_silent .. " silent")
|
||||||
end
|
end
|
||||||
|
|
||||||
local function normalize_music_track(dialog_regions, target_db)
|
local function normalize_music_track(dialog_regions, target_db)
|
||||||
@@ -776,22 +782,25 @@ local function phase2_normalize(dialog_regions, ad_regions, ident_regions, dialo
|
|||||||
local ad_ident_target = dialog_rms_db + AD_IDENT_OFFSET_DB
|
local ad_ident_target = dialog_rms_db + AD_IDENT_OFFSET_DB
|
||||||
log("Phase 2: AD/IDENT target = " .. string.format("%.1f", ad_ident_target) .. " dBFS (" .. AD_IDENT_OFFSET_DB .. "dB offset from dialog)")
|
log("Phase 2: AD/IDENT target = " .. string.format("%.1f", ad_ident_target) .. " dBFS (" .. AD_IDENT_OFFSET_DB .. "dB offset from dialog)")
|
||||||
|
|
||||||
if #ad_regions > 0 then
|
progress_detail = "Ads"
|
||||||
progress_detail = "Ads"
|
coroutine.yield()
|
||||||
coroutine.yield()
|
log("Phase 2: Normalizing ads track...")
|
||||||
log("Phase 2: Normalizing " .. #ad_regions .. " AD region(s)...")
|
normalize_track_items(ADS_TRACK, ad_ident_target, "Ads")
|
||||||
normalize_track_regions(ADS_TRACK, ad_regions, ad_ident_target)
|
|
||||||
end
|
progress_detail = "Idents"
|
||||||
if #ident_regions > 0 then
|
progress_pct = 0.25
|
||||||
progress_detail = "Idents"
|
coroutine.yield()
|
||||||
progress_pct = 0.33
|
log("Phase 2: Normalizing idents track...")
|
||||||
coroutine.yield()
|
normalize_track_items(IDENTS_TRACK, ad_ident_target, "Idents")
|
||||||
log("Phase 2: Normalizing " .. #ident_regions .. " IDENT region(s)...")
|
|
||||||
normalize_track_regions(IDENTS_TRACK, ident_regions, ad_ident_target)
|
progress_detail = "SFX"
|
||||||
end
|
progress_pct = 0.50
|
||||||
|
coroutine.yield()
|
||||||
|
log("Phase 2: Normalizing SFX track...")
|
||||||
|
normalize_track_items(SFX_TRACK, ad_ident_target, "SFX")
|
||||||
|
|
||||||
progress_detail = "Music"
|
progress_detail = "Music"
|
||||||
progress_pct = 0.66
|
progress_pct = 0.75
|
||||||
coroutine.yield()
|
coroutine.yield()
|
||||||
log("Phase 2: Normalizing music track...")
|
log("Phase 2: Normalizing music track...")
|
||||||
normalize_music_track(dialog_regions, dialog_rms_db)
|
normalize_music_track(dialog_regions, dialog_rms_db)
|
||||||
@@ -812,54 +821,73 @@ local function phase3_trim_music()
|
|||||||
local music_track = reaper.GetTrack(0, MUSIC_TRACK - 1)
|
local music_track = reaper.GetTrack(0, MUSIC_TRACK - 1)
|
||||||
if not music_track then return end
|
if not music_track then return end
|
||||||
|
|
||||||
-- Ensure music starts before first voice item.
|
-- Music lead-in: ensure audible music plays before first voice.
|
||||||
-- Silence removal shifts voice/idents/ads but not music. If voice now starts before
|
-- Strategy: skip the silent intro in the music WAV (adjust take offset),
|
||||||
-- music, nudge all non-music tracks forward so music has a lead-in.
|
-- then nudge all non-music tracks forward by MUSIC_LEAD_SEC so music plays first.
|
||||||
local first_voice_start = math.huge
|
local MUSIC_LEAD_SEC = 3.0
|
||||||
for _, tidx in ipairs(CHECK_TRACKS) do
|
|
||||||
local tr = reaper.GetTrack(0, tidx - 1)
|
-- Find where music becomes audible in the source WAV
|
||||||
if tr and reaper.CountTrackMediaItems(tr) > 0 then
|
local music_audible_offset = nil
|
||||||
local item = reaper.GetTrackMediaItem(tr, 0)
|
local music_ta = get_track_audio(MUSIC_TRACK)
|
||||||
local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
|
if music_ta then
|
||||||
if pos < first_voice_start then first_voice_start = pos end
|
local t = music_ta.item_pos
|
||||||
|
while t < music_ta.item_end do
|
||||||
|
local peak, _ = read_block_peak_rms(music_ta, t)
|
||||||
|
if peak >= THRESHOLD then
|
||||||
|
music_audible_offset = t - music_ta.item_pos -- offset into the WAV
|
||||||
|
break
|
||||||
|
end
|
||||||
|
t = t + BLOCK_SEC
|
||||||
end
|
end
|
||||||
|
destroy_track_audio(music_ta)
|
||||||
end
|
end
|
||||||
|
|
||||||
local MUSIC_LEAD_SEC = 3.0 -- seconds of music before first voice
|
if false then -- Music lead-in disabled — intro silence is preserved instead
|
||||||
if first_voice_start < math.huge then
|
-- Skip the silent intro: set take offset so audible music starts at position 0
|
||||||
local first_music = reaper.GetTrackMediaItem(music_track, 0)
|
local first_music = reaper.GetTrackMediaItem(music_track, 0)
|
||||||
if first_music then
|
if first_music then
|
||||||
local music_start = reaper.GetMediaItemInfo_Value(first_music, "D_POSITION")
|
local take = reaper.GetActiveTake(first_music)
|
||||||
local desired_voice_start = music_start + MUSIC_LEAD_SEC
|
if take then
|
||||||
if first_voice_start < desired_voice_start then
|
local current_offset = reaper.GetMediaItemTakeInfo_Value(take, "D_STARTOFFS")
|
||||||
local nudge = desired_voice_start - first_voice_start
|
reaper.SetMediaItemTakeInfo_Value(take, "D_STARTOFFS", current_offset + music_audible_offset)
|
||||||
-- Shift all non-music tracks forward
|
-- Trim item length to account for skipped intro
|
||||||
for t = 0, reaper.CountTracks(0) - 1 do
|
local item_len = reaper.GetMediaItemInfo_Value(first_music, "D_LENGTH")
|
||||||
if (t + 1) == MUSIC_TRACK then goto skip_music end
|
reaper.SetMediaItemInfo_Value(first_music, "D_LENGTH", item_len - music_audible_offset)
|
||||||
local track = reaper.GetTrack(0, t)
|
log("Phase 3: Skipped " .. string.format("%.1f", music_audible_offset) .. "s of silent music intro")
|
||||||
for i = 0, reaper.CountTrackMediaItems(track) - 1 do
|
|
||||||
local item = reaper.GetTrackMediaItem(track, i)
|
|
||||||
local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
|
|
||||||
reaper.SetMediaItemInfo_Value(item, "D_POSITION", pos + nudge)
|
|
||||||
end
|
|
||||||
::skip_music::
|
|
||||||
end
|
|
||||||
-- Also shift all markers/regions forward
|
|
||||||
local _, num_markers, num_regions = reaper.CountProjectMarkers(0)
|
|
||||||
local total_m = num_markers + num_regions
|
|
||||||
for i = 0, total_m - 1 do
|
|
||||||
local retval, is_region, pos, rgnend, name, idx, color = reaper.EnumProjectMarkers3(0, i)
|
|
||||||
if retval then
|
|
||||||
if is_region then
|
|
||||||
reaper.SetProjectMarker3(0, idx, true, pos + nudge, rgnend + nudge, name, color)
|
|
||||||
else
|
|
||||||
reaper.SetProjectMarker3(0, idx, false, pos + nudge, 0, name, color)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
log("Phase 3: Nudged non-music tracks forward " .. string.format("%.1f", nudge) .. "s for " .. MUSIC_LEAD_SEC .. "s music lead-in")
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Nudge all non-music tracks forward by MUSIC_LEAD_SEC
|
||||||
|
log("Phase 3: Nudging non-music tracks forward by " .. MUSIC_LEAD_SEC .. "s for music lead-in")
|
||||||
|
for t = 0, reaper.CountTracks(0) - 1 do
|
||||||
|
if (t + 1) == MUSIC_TRACK then goto skip_music end
|
||||||
|
local track = reaper.GetTrack(0, t)
|
||||||
|
for i = 0, reaper.CountTrackMediaItems(track) - 1 do
|
||||||
|
local item = reaper.GetTrackMediaItem(track, i)
|
||||||
|
local pos = reaper.GetMediaItemInfo_Value(item, "D_POSITION")
|
||||||
|
reaper.SetMediaItemInfo_Value(item, "D_POSITION", pos + MUSIC_LEAD_SEC)
|
||||||
|
end
|
||||||
|
::skip_music::
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Shift markers/regions forward too
|
||||||
|
local markers_to_update = {}
|
||||||
|
local _, num_markers, num_regions = reaper.CountProjectMarkers(0)
|
||||||
|
for i = 0, num_markers + num_regions - 1 do
|
||||||
|
local retval, is_region, pos, rgnend, name, idx, color = reaper.EnumProjectMarkers3(0, i)
|
||||||
|
if retval then
|
||||||
|
table.insert(markers_to_update, {is_region=is_region, pos=pos, rgnend=rgnend, name=name, idx=idx, color=color})
|
||||||
|
end
|
||||||
|
end
|
||||||
|
for _, m in ipairs(markers_to_update) do
|
||||||
|
if m.is_region then
|
||||||
|
reaper.SetProjectMarker3(0, m.idx, true, m.pos + MUSIC_LEAD_SEC, m.rgnend + MUSIC_LEAD_SEC, m.name, m.color)
|
||||||
|
else
|
||||||
|
reaper.SetProjectMarker3(0, m.idx, false, m.pos + MUSIC_LEAD_SEC, 0, m.name, m.color)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
log("Phase 3: No silent music intro detected — skipping lead-in adjustment")
|
||||||
end
|
end
|
||||||
|
|
||||||
local last_end = 0
|
local last_end = 0
|
||||||
@@ -1008,6 +1036,39 @@ local function do_work()
|
|||||||
log("Phase 4: No AD/IDENT regions found — skipping")
|
log("Phase 4: No AD/IDENT regions found — skipping")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Set loop/time selection: start 0.5s before audible music, end at last item
|
||||||
|
local loop_start = 0
|
||||||
|
local music_ta = get_track_audio(MUSIC_TRACK)
|
||||||
|
if music_ta then
|
||||||
|
local t = music_ta.item_pos
|
||||||
|
while t < music_ta.item_end do
|
||||||
|
local peak, _ = read_block_peak_rms(music_ta, t)
|
||||||
|
if peak >= THRESHOLD then
|
||||||
|
loop_start = math.max(0, t - 0.5)
|
||||||
|
break
|
||||||
|
end
|
||||||
|
t = t + BLOCK_SEC
|
||||||
|
end
|
||||||
|
destroy_track_audio(music_ta)
|
||||||
|
end
|
||||||
|
|
||||||
|
local project_end = 0
|
||||||
|
for t = 0, reaper.CountTracks(0) - 1 do
|
||||||
|
local track = reaper.GetTrack(0, t)
|
||||||
|
local n = reaper.CountTrackMediaItems(track)
|
||||||
|
if n > 0 then
|
||||||
|
local last_item = reaper.GetTrackMediaItem(track, n - 1)
|
||||||
|
local item_end = reaper.GetMediaItemInfo_Value(last_item, "D_POSITION")
|
||||||
|
+ reaper.GetMediaItemInfo_Value(last_item, "D_LENGTH")
|
||||||
|
if item_end > project_end then project_end = item_end end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if project_end > 0 then
|
||||||
|
reaper.GetSet_LoopTimeRange(true, true, loop_start, project_end, false)
|
||||||
|
reaper.GetSet_LoopTimeRange(true, false, loop_start, project_end, false)
|
||||||
|
log("Loop range set: " .. string.format("%.1f", loop_start) .. " to " .. string.format("%.1f", project_end) .. "s (" .. string.format("%.1f", (project_end - loop_start) / 60) .. " min)")
|
||||||
|
end
|
||||||
|
|
||||||
reaper.PreventUIRefresh(-1)
|
reaper.PreventUIRefresh(-1)
|
||||||
reaper.Undo_EndBlock("Post-production: strip silence + music fades", -1)
|
reaper.Undo_EndBlock("Post-production: strip silence + music fades", -1)
|
||||||
reaper.UpdateArrange()
|
reaper.UpdateArrange()
|
||||||
|
|||||||
@@ -0,0 +1,122 @@
|
|||||||
|
"""Scan music directory for tracks that contain vocals/lyrics.
|
||||||
|
|
||||||
|
Uses Whisper to transcribe a sample from each track — if it picks up
|
||||||
|
actual words, the track likely has vocals.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scan_music_vocals.py # scan and report
|
||||||
|
python scan_music_vocals.py --delete # scan and delete vocal tracks
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import librosa
|
||||||
|
import numpy as np
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
|
MUSIC_DIR = Path(__file__).parent / "music"
|
||||||
|
WHISPER_MODEL = "distil-large-v3"
|
||||||
|
|
||||||
|
# Words Whisper hallucinates on silence/instrumental — ignore these
|
||||||
|
HALLUCINATION_PHRASES = {
|
||||||
|
"thank you", "thanks for watching", "subscribe", "like and subscribe",
|
||||||
|
"please subscribe", "thank you for watching", "thanks for listening",
|
||||||
|
"you", "the end", "bye", "okay",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def scan_track(model: WhisperModel, filepath: Path) -> tuple[bool, str]:
|
||||||
|
"""Check a single track for vocals. Returns (has_vocals, transcription)."""
|
||||||
|
try:
|
||||||
|
audio, sr = librosa.load(str(filepath), sr=16000, mono=True)
|
||||||
|
except Exception as e:
|
||||||
|
return False, f"[load error: {e}]"
|
||||||
|
|
||||||
|
duration = len(audio) / sr
|
||||||
|
if duration < 10:
|
||||||
|
return False, "[too short]"
|
||||||
|
|
||||||
|
# Sample 30s from the middle (most likely to have vocals)
|
||||||
|
mid = len(audio) // 2
|
||||||
|
half_window = int(15 * sr) # 15s each side
|
||||||
|
start = max(0, mid - half_window)
|
||||||
|
end = min(len(audio), mid + half_window)
|
||||||
|
sample = audio[start:end]
|
||||||
|
|
||||||
|
segments, info = model.transcribe(
|
||||||
|
sample,
|
||||||
|
beam_size=3,
|
||||||
|
language="en",
|
||||||
|
vad_filter=True,
|
||||||
|
vad_parameters=dict(min_speech_duration_ms=500),
|
||||||
|
)
|
||||||
|
segments_list = list(segments)
|
||||||
|
text = " ".join(s.text for s in segments_list).strip()
|
||||||
|
|
||||||
|
# Filter out Whisper hallucinations
|
||||||
|
text_lower = text.lower().strip()
|
||||||
|
if text_lower in HALLUCINATION_PHRASES or len(text_lower) < 4:
|
||||||
|
return False, ""
|
||||||
|
|
||||||
|
# If Whisper found substantial text, it's likely vocals
|
||||||
|
word_count = len(text.split())
|
||||||
|
has_vocals = word_count >= 3
|
||||||
|
|
||||||
|
return has_vocals, text
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Scan music for vocal tracks")
|
||||||
|
parser.add_argument("--delete", action="store_true", help="Delete tracks with vocals")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
audio_files = sorted(
|
||||||
|
f for f in MUSIC_DIR.iterdir()
|
||||||
|
if f.suffix.lower() in {".mp3", ".wav", ".ogg", ".flac"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if not audio_files:
|
||||||
|
print("No audio files found in music/")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Loading Whisper {WHISPER_MODEL}...")
|
||||||
|
model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8")
|
||||||
|
|
||||||
|
print(f"Scanning {len(audio_files)} tracks for vocals...\n")
|
||||||
|
|
||||||
|
vocal_tracks = []
|
||||||
|
for i, f in enumerate(audio_files, 1):
|
||||||
|
print(f"[{i}/{len(audio_files)}] {f.name}...", end=" ", flush=True)
|
||||||
|
has_vocals, text = scan_track(model, f)
|
||||||
|
if has_vocals:
|
||||||
|
print(f"VOCALS: {text[:80]}")
|
||||||
|
vocal_tracks.append((f, text))
|
||||||
|
else:
|
||||||
|
print("OK")
|
||||||
|
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"Results: {len(vocal_tracks)} tracks with vocals out of {len(audio_files)}\n")
|
||||||
|
|
||||||
|
if not vocal_tracks:
|
||||||
|
print("All tracks appear to be instrumental!")
|
||||||
|
return
|
||||||
|
|
||||||
|
for f, text in vocal_tracks:
|
||||||
|
print(f" {f.name}")
|
||||||
|
print(f" Lyrics: {text[:120]}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
if args.delete:
|
||||||
|
print(f"Deleting {len(vocal_tracks)} vocal tracks...")
|
||||||
|
for f, _ in vocal_tracks:
|
||||||
|
f.unlink()
|
||||||
|
print(f" Deleted: {f.name}")
|
||||||
|
print("Done.")
|
||||||
|
else:
|
||||||
|
print("Run with --delete to remove these tracks.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user