Initial commit: AI Radio Show web application
- FastAPI backend with multiple TTS providers (Inworld, ElevenLabs, Kokoro, F5-TTS, etc.) - Web frontend with caller management, music, and soundboard - Whisper transcription integration - OpenRouter/Ollama LLM support - Castopod podcast publishing script Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
200
backend/services/voices.py
Normal file
200
backend/services/voices.py
Normal file
@@ -0,0 +1,200 @@
|
||||
"""Voice configuration and TTS provider management"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class TTSProvider(str, Enum):
|
||||
ELEVENLABS = "elevenlabs"
|
||||
EDGE = "edge" # Microsoft Edge TTS (free)
|
||||
PIPER = "piper" # Local Piper via sherpa-onnx (free, fast)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Voice:
|
||||
"""Voice configuration"""
|
||||
id: str
|
||||
name: str
|
||||
provider: TTSProvider
|
||||
provider_voice_id: str # The actual ID used by the provider
|
||||
description: str = ""
|
||||
language: str = "en"
|
||||
gender: str = "neutral"
|
||||
|
||||
|
||||
# ElevenLabs voices
|
||||
ELEVENLABS_VOICES = [
|
||||
Voice("el_tony", "Tony (ElevenLabs)", TTSProvider.ELEVENLABS, "IKne3meq5aSn9XLyUdCD",
|
||||
"Male, New York accent, expressive", "en", "male"),
|
||||
Voice("el_jasmine", "Jasmine (ElevenLabs)", TTSProvider.ELEVENLABS, "FGY2WhTYpPnrIDTdsKH5",
|
||||
"Female, confident, direct", "en", "female"),
|
||||
Voice("el_rick", "Rick (ElevenLabs)", TTSProvider.ELEVENLABS, "JBFqnCBsd6RMkjVDRZzb",
|
||||
"Male, Texas accent, older", "en", "male"),
|
||||
Voice("el_megan", "Megan (ElevenLabs)", TTSProvider.ELEVENLABS, "XrExE9yKIg1WjnnlVkGX",
|
||||
"Female, young, casual", "en", "female"),
|
||||
Voice("el_dennis", "Dennis (ElevenLabs)", TTSProvider.ELEVENLABS, "cjVigY5qzO86Huf0OWal",
|
||||
"Male, middle-aged, anxious", "en", "male"),
|
||||
Voice("el_tanya", "Tanya (ElevenLabs)", TTSProvider.ELEVENLABS, "N2lVS1w4EtoT3dr4eOWO",
|
||||
"Female, Miami, sassy", "en", "female"),
|
||||
Voice("el_earl", "Earl (ElevenLabs)", TTSProvider.ELEVENLABS, "EXAVITQu4vr4xnSDxMaL",
|
||||
"Male, elderly, Southern", "en", "male"),
|
||||
Voice("el_carla", "Carla (ElevenLabs)", TTSProvider.ELEVENLABS, "CwhRBWXzGAHq8TQ4Fs17",
|
||||
"Female, Jersey, sharp", "en", "female"),
|
||||
Voice("el_marcus", "Marcus (ElevenLabs)", TTSProvider.ELEVENLABS, "bIHbv24MWmeRgasZH58o",
|
||||
"Male, young, urban", "en", "male"),
|
||||
Voice("el_brenda", "Brenda (ElevenLabs)", TTSProvider.ELEVENLABS, "Xb7hH8MSUJpSbSDYk0k2",
|
||||
"Female, middle-aged, worried", "en", "female"),
|
||||
Voice("el_jake", "Jake (ElevenLabs)", TTSProvider.ELEVENLABS, "SOYHLrjzK2X1ezoPC6cr",
|
||||
"Male, Boston, insecure", "en", "male"),
|
||||
Voice("el_diane", "Diane (ElevenLabs)", TTSProvider.ELEVENLABS, "cgSgspJ2msm6clMCkdW9",
|
||||
"Female, mature, conflicted", "en", "female"),
|
||||
Voice("el_bobby", "Bobby (ElevenLabs)", TTSProvider.ELEVENLABS, "nPczCjzI2devNBz1zQrb",
|
||||
"Male, sidekick, wisecracking", "en", "male"),
|
||||
Voice("el_announcer", "Announcer (ElevenLabs)", TTSProvider.ELEVENLABS, "ErXwobaYiN019PkySvjV",
|
||||
"Male, radio announcer", "en", "male"),
|
||||
]
|
||||
|
||||
# Edge TTS voices (Microsoft, free)
|
||||
EDGE_VOICES = [
|
||||
# US voices
|
||||
Voice("edge_jenny", "Jenny (Edge)", TTSProvider.EDGE, "en-US-JennyNeural",
|
||||
"Female, American, friendly", "en", "female"),
|
||||
Voice("edge_guy", "Guy (Edge)", TTSProvider.EDGE, "en-US-GuyNeural",
|
||||
"Male, American, casual", "en", "male"),
|
||||
Voice("edge_aria", "Aria (Edge)", TTSProvider.EDGE, "en-US-AriaNeural",
|
||||
"Female, American, professional", "en", "female"),
|
||||
Voice("edge_davis", "Davis (Edge)", TTSProvider.EDGE, "en-US-DavisNeural",
|
||||
"Male, American, calm", "en", "male"),
|
||||
Voice("edge_amber", "Amber (Edge)", TTSProvider.EDGE, "en-US-AmberNeural",
|
||||
"Female, American, warm", "en", "female"),
|
||||
Voice("edge_andrew", "Andrew (Edge)", TTSProvider.EDGE, "en-US-AndrewNeural",
|
||||
"Male, American, confident", "en", "male"),
|
||||
Voice("edge_ashley", "Ashley (Edge)", TTSProvider.EDGE, "en-US-AshleyNeural",
|
||||
"Female, American, cheerful", "en", "female"),
|
||||
Voice("edge_brian", "Brian (Edge)", TTSProvider.EDGE, "en-US-BrianNeural",
|
||||
"Male, American, narrator", "en", "male"),
|
||||
Voice("edge_christopher", "Christopher (Edge)", TTSProvider.EDGE, "en-US-ChristopherNeural",
|
||||
"Male, American, reliable", "en", "male"),
|
||||
Voice("edge_cora", "Cora (Edge)", TTSProvider.EDGE, "en-US-CoraNeural",
|
||||
"Female, American, older", "en", "female"),
|
||||
Voice("edge_elizabeth", "Elizabeth (Edge)", TTSProvider.EDGE, "en-US-ElizabethNeural",
|
||||
"Female, American, elegant", "en", "female"),
|
||||
Voice("edge_eric", "Eric (Edge)", TTSProvider.EDGE, "en-US-EricNeural",
|
||||
"Male, American, friendly", "en", "male"),
|
||||
Voice("edge_jacob", "Jacob (Edge)", TTSProvider.EDGE, "en-US-JacobNeural",
|
||||
"Male, American, young", "en", "male"),
|
||||
Voice("edge_michelle", "Michelle (Edge)", TTSProvider.EDGE, "en-US-MichelleNeural",
|
||||
"Female, American, clear", "en", "female"),
|
||||
Voice("edge_monica", "Monica (Edge)", TTSProvider.EDGE, "en-US-MonicaNeural",
|
||||
"Female, American, expressive", "en", "female"),
|
||||
Voice("edge_roger", "Roger (Edge)", TTSProvider.EDGE, "en-US-RogerNeural",
|
||||
"Male, American, mature", "en", "male"),
|
||||
Voice("edge_steffan", "Steffan (Edge)", TTSProvider.EDGE, "en-US-SteffanNeural",
|
||||
"Male, American, formal", "en", "male"),
|
||||
Voice("edge_tony", "Tony (Edge)", TTSProvider.EDGE, "en-US-TonyNeural",
|
||||
"Male, American, conversational", "en", "male"),
|
||||
# UK voices
|
||||
Voice("edge_sonia", "Sonia (Edge UK)", TTSProvider.EDGE, "en-GB-SoniaNeural",
|
||||
"Female, British, professional", "en", "female"),
|
||||
Voice("edge_ryan", "Ryan (Edge UK)", TTSProvider.EDGE, "en-GB-RyanNeural",
|
||||
"Male, British, clear", "en", "male"),
|
||||
Voice("edge_libby", "Libby (Edge UK)", TTSProvider.EDGE, "en-GB-LibbyNeural",
|
||||
"Female, British, warm", "en", "female"),
|
||||
Voice("edge_thomas", "Thomas (Edge UK)", TTSProvider.EDGE, "en-GB-ThomasNeural",
|
||||
"Male, British, friendly", "en", "male"),
|
||||
# Australian voices
|
||||
Voice("edge_natasha", "Natasha (Edge AU)", TTSProvider.EDGE, "en-AU-NatashaNeural",
|
||||
"Female, Australian, friendly", "en", "female"),
|
||||
Voice("edge_william", "William (Edge AU)", TTSProvider.EDGE, "en-AU-WilliamNeural",
|
||||
"Male, Australian, casual", "en", "male"),
|
||||
]
|
||||
|
||||
# Piper voices (local, via sherpa-onnx)
|
||||
PIPER_VOICES = [
|
||||
Voice("piper_amy", "Amy (Piper)", TTSProvider.PIPER, "amy",
|
||||
"Female, American, clear", "en", "female"),
|
||||
Voice("piper_joe", "Joe (Piper)", TTSProvider.PIPER, "joe",
|
||||
"Male, American, natural", "en", "male"),
|
||||
Voice("piper_lessac", "Lessac (Piper)", TTSProvider.PIPER, "lessac",
|
||||
"Female, American, expressive", "en", "female"),
|
||||
Voice("piper_alan", "Alan (Piper)", TTSProvider.PIPER, "alan",
|
||||
"Male, British, clear", "en", "male"),
|
||||
]
|
||||
|
||||
# All voices combined
|
||||
ALL_VOICES = ELEVENLABS_VOICES + EDGE_VOICES + PIPER_VOICES
|
||||
|
||||
# Voice lookup by ID
|
||||
VOICES_BY_ID = {v.id: v for v in ALL_VOICES}
|
||||
|
||||
# Default voice assignments for callers (maps caller key to voice ID)
|
||||
DEFAULT_CALLER_VOICES = {
|
||||
"1": "el_tony", # Tony from Staten Island
|
||||
"2": "el_jasmine", # Jasmine from Atlanta
|
||||
"3": "el_rick", # Rick from Texas
|
||||
"4": "el_megan", # Megan from Portland
|
||||
"5": "el_dennis", # Dennis from Long Island
|
||||
"6": "el_tanya", # Tanya from Miami
|
||||
"7": "el_earl", # Earl from Tennessee
|
||||
"8": "el_carla", # Carla from Jersey
|
||||
"9": "el_marcus", # Marcus from Detroit
|
||||
"0": "el_brenda", # Brenda from Phoenix
|
||||
"-": "el_jake", # Jake from Boston
|
||||
"=": "el_diane", # Diane from Chicago
|
||||
"bobby": "el_bobby",
|
||||
"announcer": "el_announcer",
|
||||
}
|
||||
|
||||
|
||||
class VoiceManager:
|
||||
"""Manages voice assignments and TTS provider selection"""
|
||||
|
||||
def __init__(self):
|
||||
# Current voice assignments (can be modified at runtime)
|
||||
self.caller_voices = DEFAULT_CALLER_VOICES.copy()
|
||||
|
||||
def get_voice(self, voice_id: str) -> Optional[Voice]:
|
||||
"""Get voice by ID"""
|
||||
return VOICES_BY_ID.get(voice_id)
|
||||
|
||||
def get_caller_voice(self, caller_key: str) -> Voice:
|
||||
"""Get the voice assigned to a caller"""
|
||||
voice_id = self.caller_voices.get(caller_key, "el_tony")
|
||||
return VOICES_BY_ID.get(voice_id, ELEVENLABS_VOICES[0])
|
||||
|
||||
def set_caller_voice(self, caller_key: str, voice_id: str):
|
||||
"""Assign a voice to a caller"""
|
||||
if voice_id in VOICES_BY_ID:
|
||||
self.caller_voices[caller_key] = voice_id
|
||||
|
||||
def get_all_voices(self) -> list[dict]:
|
||||
"""Get all available voices as dicts for API"""
|
||||
return [
|
||||
{
|
||||
"id": v.id,
|
||||
"name": v.name,
|
||||
"provider": v.provider.value,
|
||||
"description": v.description,
|
||||
"gender": v.gender,
|
||||
}
|
||||
for v in ALL_VOICES
|
||||
]
|
||||
|
||||
def get_voices_by_provider(self, provider: TTSProvider) -> list[Voice]:
|
||||
"""Get all voices for a specific provider"""
|
||||
return [v for v in ALL_VOICES if v.provider == provider]
|
||||
|
||||
def get_caller_voice_assignments(self) -> dict[str, str]:
|
||||
"""Get current caller voice assignments"""
|
||||
return self.caller_voices.copy()
|
||||
|
||||
def set_caller_voice_assignments(self, assignments: dict[str, str]):
|
||||
"""Set multiple caller voice assignments"""
|
||||
for caller_key, voice_id in assignments.items():
|
||||
if voice_id in VOICES_BY_ID:
|
||||
self.caller_voices[caller_key] = voice_id
|
||||
|
||||
|
||||
# Global instance
|
||||
voice_manager = VoiceManager()
|
||||
Reference in New Issue
Block a user