Fix choppy/distorted audio to live caller

- Mute host mic forwarding while TTS is streaming to prevent interleaving
  both audio sources into the same playback buffer
- Replace nearest-neighbor downsampling with box-filter averaging on both
  server (host mic) and browser (caller mic) for anti-aliased resampling

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-05 17:01:33 -07:00
parent d4e25ceb88
commit d583b48af0
5 changed files with 18 additions and 8 deletions

View File

@@ -880,6 +880,9 @@ async def _host_audio_sender():
"""Persistent task that drains audio queue and sends to callers""" """Persistent task that drains audio queue and sends to callers"""
while True: while True:
pcm_bytes = await _host_audio_queue.get() pcm_bytes = await _host_audio_queue.get()
# Skip host mic audio while TTS is streaming to avoid interleaving
if caller_service.streaming_tts:
continue
for caller_id in list(caller_service.active_calls.keys()): for caller_id in list(caller_service.active_calls.keys()):
try: try:
await caller_service.send_audio_to_caller(caller_id, pcm_bytes, 16000) await caller_service.send_audio_to_caller(caller_id, pcm_bytes, 16000)

View File

@@ -472,9 +472,10 @@ class AudioService:
if not self._host_send_callback: if not self._host_send_callback:
return return
mono = indata[:, record_channel] mono = indata[:, record_channel]
# Simple decimation to ~16kHz # Downsample to ~16kHz with averaging (anti-aliased)
if step > 1: if step > 1:
mono = mono[::step] n = len(mono) // step * step
mono = mono[:n].reshape(-1, step).mean(axis=1)
host_accum.append(mono.copy()) host_accum.append(mono.copy())
host_accum_samples[0] += len(mono) host_accum_samples[0] += len(mono)

View File

@@ -19,6 +19,7 @@ class CallerService:
self._caller_counter: int = 0 self._caller_counter: int = 0
self._lock = threading.Lock() self._lock = threading.Lock()
self._websockets: dict[str, any] = {} # caller_id -> WebSocket self._websockets: dict[str, any] = {} # caller_id -> WebSocket
self.streaming_tts: bool = False # True while TTS audio is being streamed
def add_to_queue(self, caller_id: str, name: str): def add_to_queue(self, caller_id: str, name: str):
with self._lock: with self._lock:
@@ -139,6 +140,7 @@ class CallerService:
if not ws: if not ws:
return return
self.streaming_tts = True
try: try:
audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0 audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
if sample_rate != 16000: if sample_rate != 16000:
@@ -160,6 +162,8 @@ class CallerService:
except Exception as e: except Exception as e:
print(f"[Caller] Failed to stream audio: {e}") print(f"[Caller] Failed to stream audio: {e}")
finally:
self.streaming_tts = False
async def notify_caller(self, caller_id: str, message: dict): async def notify_caller(self, caller_id: str, message: dict):
"""Send JSON control message to caller""" """Send JSON control message to caller"""

View File

@@ -150,6 +150,6 @@
</div> </div>
</div> </div>
<script src="/js/call-in.js?v=5"></script> <script src="/js/call-in.js?v=6"></script>
</body> </body>
</html> </html>

View File

@@ -52,12 +52,14 @@ class CallerProcessor extends AudioWorkletProcessor {
const input = inputs[0][0]; const input = inputs[0][0];
if (!input) return true; if (!input) return true;
const ratio = sampleRate / 16000; // Downsample with averaging (anti-aliased)
for (let i = 0; i < input.length; i += ratio) { const step = Math.floor(sampleRate / 16000);
const idx = Math.floor(i); for (let i = 0; i + step <= input.length; i += step) {
if (idx < input.length) { let sum = 0;
this.buffer.push(input[idx]); for (let j = 0; j < step; j++) {
sum += input[i + j];
} }
this.buffer.push(sum / step);
} }
while (this.buffer.length >= this.targetSamples) { while (this.buffer.length >= this.targetSamples) {