Fix choppy/distorted audio to live caller

- Mute host mic forwarding while TTS is streaming to prevent interleaving both audio sources into the same playback buffer - Replace nearest-neighbor downsampling with box-filter averaging on both server (host mic) and browser (caller mic) for anti-aliased resampling Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-05 17:01:33 -07:00
parent d4e25ceb88
commit d583b48af0
5 changed files with 18 additions and 8 deletions
@@ -880,6 +880,9 @@ async def _host_audio_sender():
    """Persistent task that drains audio queue and sends to callers"""
    while True:
        pcm_bytes = await _host_audio_queue.get()
        # Skip host mic audio while TTS is streaming to avoid interleaving
        if caller_service.streaming_tts:
            continue
        for caller_id in list(caller_service.active_calls.keys()):
            try:
                await caller_service.send_audio_to_caller(caller_id, pcm_bytes, 16000)
@@ -472,9 +472,10 @@ class AudioService:
                if not self._host_send_callback:
                    return
                mono = indata[:, record_channel]
-                # Simple decimation to ~16kHz
+                # Downsample to ~16kHz with averaging (anti-aliased)
                if step > 1:
-                    mono = mono[::step]
+                    n = len(mono) // step * step
                    mono = mono[:n].reshape(-1, step).mean(axis=1)
                host_accum.append(mono.copy())
                host_accum_samples[0] += len(mono)
@@ -19,6 +19,7 @@ class CallerService:
        self._caller_counter: int = 0
        self._lock = threading.Lock()
        self._websockets: dict[str, any] = {}  # caller_id -> WebSocket
        self.streaming_tts: bool = False  # True while TTS audio is being streamed
    def add_to_queue(self, caller_id: str, name: str):
        with self._lock:
@@ -139,6 +140,7 @@ class CallerService:
        if not ws:
            return
        self.streaming_tts = True
        try:
            audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0
            if sample_rate != 16000:
@@ -160,6 +162,8 @@ class CallerService:
        except Exception as e:
            print(f"[Caller] Failed to stream audio: {e}")
        finally:
            self.streaming_tts = False
    async def notify_caller(self, caller_id: str, message: dict):
        """Send JSON control message to caller"""
@@ -150,6 +150,6 @@
        </div>
    </div>
-    <script src="/js/call-in.js?v=5"></script>
+    <script src="/js/call-in.js?v=6"></script>
 </body>
 </html>
@@ -52,12 +52,14 @@ class CallerProcessor extends AudioWorkletProcessor {
        const input = inputs[0][0];
        if (!input) return true;
-        const ratio = sampleRate / 16000;
+        // Downsample with averaging (anti-aliased)
-        for (let i = 0; i < input.length; i += ratio) {
+        const step = Math.floor(sampleRate / 16000);
-            const idx = Math.floor(i);
+        for (let i = 0; i + step <= input.length; i += step) {
-            if (idx < input.length) {
+            let sum = 0;
-                this.buffer.push(input[idx]);
+            for (let j = 0; j < step; j++) {
                sum += input[i + j];
            }
            this.buffer.push(sum / step);
        }
        while (this.buffer.length >= this.targetSamples) {