TTS fixes, Inworld improvements, footer redesign, episodes 15-25, invoice script fix

- Fix TTS text pipeline: new caps handling (spell out unknown acronyms, lowercase
  emphasis words), action-word lookahead for parenthetical stripping, abbreviation
  expansions (US→United States, NM→New Mexico), pronunciation fixes
- Inworld TTS: camelCase API fields, speakingRate per-voice overrides, retry logic
  with exponential backoff (3 attempts)
- Footer redesign: SVG icons for social/podcast links across all pages
- Stats page: show "Rate us on Spotify" instead of "not public" placeholder
- New voices, expanded caller prompts and problem scenarios
- Social posting via Postiz, YouTube upload in publish pipeline
- Episode transcripts 15-25, terms page, sitemap updates
- Fix invoice script: match Timing totals using merged Task+App intervals

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-02 12:38:58 -07:00
parent 08a35bddeb
commit 6eeab58464
34 changed files with 6545 additions and 512 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -155,11 +155,11 @@ class LLMService:
json={
"model": model,
"messages": messages,
"max_tokens": max_tokens or 300,
"temperature": 0.8,
"top_p": 0.92,
"frequency_penalty": 0.5,
"presence_penalty": 0.3,
"max_tokens": max_tokens or 500,
"temperature": 0.65,
"top_p": 0.9,
"frequency_penalty": 0.3,
"presence_penalty": 0.15,
},
timeout=timeout,
)

View File

@@ -51,7 +51,8 @@ class RegularCallerService:
def add_regular(self, name: str, gender: str, age: int, job: str,
location: str, personality_traits: list[str],
first_call_summary: str, voice: str = None) -> dict:
first_call_summary: str, voice: str = None,
stable_seeds: dict = None) -> dict:
"""Promote a first-time caller to regular"""
# Retire oldest if at cap
if len(self._regulars) >= MAX_REGULARS:
@@ -68,6 +69,7 @@ class RegularCallerService:
"location": location,
"personality_traits": personality_traits,
"voice": voice,
"stable_seeds": stable_seeds or {},
"call_history": [
{"summary": first_call_summary, "timestamp": time.time()}
],

View File

@@ -82,9 +82,14 @@ VITS_SPEAKERS = {
DEFAULT_VITS_SPEAKER = "p225"
# Inworld voice mapping - maps ElevenLabs voice IDs to Inworld voices
# Full voice list from API: Alex, Ashley, Blake, Carter, Clive, Craig, Deborah,
# Dennis, Dominus, Edward, Elizabeth, Hades, Hana, Julia, Luna, Mark, Olivia,
# Pixie, Priya, Ronald, Sarah, Shaun, Theodore, Timothy, Wendy
# Full voice list from API (English): Abby, Alex, Amina, Anjali, Arjun, Ashley,
# Blake, Brian, Callum, Carter, Celeste, Chloe, Claire, Clive, Craig, Darlene,
# Deborah, Dennis, Derek, Dominus, Edward, Elizabeth, Elliot, Ethan, Evan, Evelyn,
# Gareth, Graham, Grant, Hades, Hamish, Hana, Hank, Jake, James, Jason, Jessica,
# Julia, Kayla, Kelsey, Lauren, Liam, Loretta, Luna, Malcolm, Mark, Marlene,
# Miranda, Mortimer, Nate, Oliver, Olivia, Pippa, Pixie, Priya, Ronald, Rupert,
# Saanvi, Sarah, Sebastian, Serena, Shaun, Simon, Snik, Tessa, Theodore, Timothy,
# Tyler, Veronica, Victor, Victoria, Vinny, Wendy
INWORLD_VOICES = {
# Original voice IDs
"VR6AewLTigWG4xSOukaG": "Edward", # Tony - fast-talking, emphatic, streetwise
@@ -111,6 +116,20 @@ INWORLD_VOICES = {
}
DEFAULT_INWORLD_VOICE = "Dennis"
# Inworld voices that speak too slowly at default rate — bump them up
# Range is 0.5 to 1.5, where 1.0 is the voice's native speed
INWORLD_SPEED_OVERRIDES = {
"Wendy": 1.15,
"Craig": 1.15,
"Deborah": 1.15,
"Sarah": 1.1,
"Hana": 1.1,
"Theodore": 1.15,
"Blake": 1.1,
"Priya": 1.1,
}
DEFAULT_INWORLD_SPEED = 1.1 # Slight bump for all voices
def preprocess_text_for_kokoro(text: str) -> str:
"""
@@ -598,7 +617,8 @@ async def generate_speech_inworld(text: str, voice_id: str) -> tuple[np.ndarray,
if not api_key:
raise RuntimeError("INWORLD_API_KEY not set in environment")
print(f"[Inworld TTS] Voice: {voice}, Text: {text[:50]}...")
speed = INWORLD_SPEED_OVERRIDES.get(voice, DEFAULT_INWORLD_SPEED)
print(f"[Inworld TTS] Voice: {voice}, Speed: {speed}, Text: {text[:50]}...")
url = "https://api.inworld.ai/tts/v1/voice"
headers = {
@@ -607,11 +627,12 @@ async def generate_speech_inworld(text: str, voice_id: str) -> tuple[np.ndarray,
}
payload = {
"text": text,
"voice_id": voice,
"model_id": "inworld-tts-1.5-max",
"audio_config": {
"encoding": "LINEAR16",
"sample_rate_hertz": 48000,
"voiceId": voice,
"modelId": "inworld-tts-1.5-max",
"audioConfig": {
"audioEncoding": "LINEAR16",
"sampleRateHertz": 48000,
"speakingRate": speed,
},
}
@@ -650,6 +671,21 @@ async def generate_speech_inworld(text: str, voice_id: str) -> tuple[np.ndarray,
return audio.astype(np.float32), 24000
_TTS_PROVIDERS = {
"kokoro": lambda text, vid: generate_speech_kokoro(text, vid),
"f5tts": lambda text, vid: generate_speech_f5tts(text, vid),
"inworld": lambda text, vid: generate_speech_inworld(text, vid),
"chattts": lambda text, vid: generate_speech_chattts(text, vid),
"styletts2": lambda text, vid: generate_speech_styletts2(text, vid),
"bark": lambda text, vid: generate_speech_bark(text, vid),
"vits": lambda text, vid: generate_speech_vits(text, vid),
"elevenlabs": lambda text, vid: generate_speech_elevenlabs(text, vid),
}
TTS_MAX_RETRIES = 3
TTS_RETRY_DELAYS = [1.0, 2.0, 4.0] # seconds between retries
async def generate_speech(
text: str,
voice_id: str,
@@ -657,7 +693,7 @@ async def generate_speech(
apply_filter: bool = True
) -> bytes:
"""
Generate speech from text.
Generate speech from text with automatic retry on failure.
Args:
text: Text to speak
@@ -668,29 +704,32 @@ async def generate_speech(
Returns:
Raw PCM audio bytes (16-bit signed int, 24kHz)
"""
# Choose TTS provider
import asyncio
provider = settings.tts_provider
print(f"[TTS] Provider: {provider}, Text: {text[:50]}...")
if provider == "kokoro":
audio, sample_rate = await generate_speech_kokoro(text, voice_id)
elif provider == "f5tts":
audio, sample_rate = await generate_speech_f5tts(text, voice_id)
elif provider == "inworld":
audio, sample_rate = await generate_speech_inworld(text, voice_id)
elif provider == "chattts":
audio, sample_rate = await generate_speech_chattts(text, voice_id)
elif provider == "styletts2":
audio, sample_rate = await generate_speech_styletts2(text, voice_id)
elif provider == "bark":
audio, sample_rate = await generate_speech_bark(text, voice_id)
elif provider == "vits":
audio, sample_rate = await generate_speech_vits(text, voice_id)
elif provider == "elevenlabs":
audio, sample_rate = await generate_speech_elevenlabs(text, voice_id)
else:
gen_fn = _TTS_PROVIDERS.get(provider)
if not gen_fn:
raise ValueError(f"Unknown TTS provider: {provider}")
last_error = None
for attempt in range(TTS_MAX_RETRIES):
try:
audio, sample_rate = await gen_fn(text, voice_id)
if attempt > 0:
print(f"[TTS] Succeeded on retry {attempt}")
break
except Exception as e:
last_error = e
if attempt < TTS_MAX_RETRIES - 1:
delay = TTS_RETRY_DELAYS[attempt]
print(f"[TTS] {provider} attempt {attempt + 1} failed: {e} — retrying in {delay}s...")
await asyncio.sleep(delay)
else:
print(f"[TTS] {provider} failed after {TTS_MAX_RETRIES} attempts: {e}")
raise
# Apply phone filter if requested
# Skip filter for Bark - it already has rough audio quality
if apply_filter and phone_quality not in ("none", "studio") and provider != "bark":