Fix choppy/distorted audio to live caller

- Mute host mic forwarding while TTS is streaming to prevent interleaving
  both audio sources into the same playback buffer
- Replace nearest-neighbor downsampling with box-filter averaging on both
  server (host mic) and browser (caller mic) for anti-aliased resampling

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-05 17:01:33 -07:00
parent d4e25ceb88
commit d583b48af0
5 changed files with 18 additions and 8 deletions

View File

@@ -472,9 +472,10 @@ class AudioService:
if not self._host_send_callback:
return
mono = indata[:, record_channel]
# Simple decimation to ~16kHz
# Downsample to ~16kHz with averaging (anti-aliased)
if step > 1:
mono = mono[::step]
n = len(mono) // step * step
mono = mono[:n].reshape(-1, step).mean(axis=1)
host_accum.append(mono.copy())
host_accum_samples[0] += len(mono)

View File

@@ -19,6 +19,7 @@ class CallerService:
self._caller_counter: int = 0
self._lock = threading.Lock()
self._websockets: dict[str, any] = {} # caller_id -> WebSocket
self.streaming_tts: bool = False # True while TTS audio is being streamed
def add_to_queue(self, caller_id: str, name: str):
with self._lock:
@@ -139,6 +140,7 @@ class CallerService:
if not ws:
return
self.streaming_tts = True
try:
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
if sample_rate != 16000:
@@ -160,6 +162,8 @@ class CallerService:
except Exception as e:
print(f"[Caller] Failed to stream audio: {e}")
finally:
self.streaming_tts = False
async def notify_caller(self, caller_id: str, message: dict):
"""Send JSON control message to caller"""