diff --git a/backend/main.py b/backend/main.py index 5d43c9b..e61754f 100644 --- a/backend/main.py +++ b/backend/main.py @@ -880,6 +880,9 @@ async def _host_audio_sender(): """Persistent task that drains audio queue and sends to callers""" while True: pcm_bytes = await _host_audio_queue.get() + # Skip host mic audio while TTS is streaming to avoid interleaving + if caller_service.streaming_tts: + continue for caller_id in list(caller_service.active_calls.keys()): try: await caller_service.send_audio_to_caller(caller_id, pcm_bytes, 16000) diff --git a/backend/services/audio.py b/backend/services/audio.py index 1d37499..02ba4da 100644 --- a/backend/services/audio.py +++ b/backend/services/audio.py @@ -472,9 +472,10 @@ class AudioService: if not self._host_send_callback: return mono = indata[:, record_channel] - # Simple decimation to ~16kHz + # Downsample to ~16kHz with averaging (anti-aliased) if step > 1: - mono = mono[::step] + n = len(mono) // step * step + mono = mono[:n].reshape(-1, step).mean(axis=1) host_accum.append(mono.copy()) host_accum_samples[0] += len(mono) diff --git a/backend/services/caller_service.py b/backend/services/caller_service.py index 6cc12ee..ac06e69 100644 --- a/backend/services/caller_service.py +++ b/backend/services/caller_service.py @@ -19,6 +19,7 @@ class CallerService: self._caller_counter: int = 0 self._lock = threading.Lock() self._websockets: dict[str, any] = {} # caller_id -> WebSocket + self.streaming_tts: bool = False # True while TTS audio is being streamed def add_to_queue(self, caller_id: str, name: str): with self._lock: @@ -139,6 +140,7 @@ class CallerService: if not ws: return + self.streaming_tts = True try: audio = np.frombuffer(pcm_data, dtype=np.int16).astype(np.float32) / 32768.0 if sample_rate != 16000: @@ -160,6 +162,8 @@ class CallerService: except Exception as e: print(f"[Caller] Failed to stream audio: {e}") + finally: + self.streaming_tts = False async def notify_caller(self, caller_id: str, message: dict): """Send JSON control message to caller""" diff --git a/frontend/call-in.html b/frontend/call-in.html index a9dc4f1..d7d6b40 100644 --- a/frontend/call-in.html +++ b/frontend/call-in.html @@ -150,6 +150,6 @@ - + diff --git a/frontend/js/call-in.js b/frontend/js/call-in.js index 0396b68..009c823 100644 --- a/frontend/js/call-in.js +++ b/frontend/js/call-in.js @@ -52,12 +52,14 @@ class CallerProcessor extends AudioWorkletProcessor { const input = inputs[0][0]; if (!input) return true; - const ratio = sampleRate / 16000; - for (let i = 0; i < input.length; i += ratio) { - const idx = Math.floor(i); - if (idx < input.length) { - this.buffer.push(input[idx]); + // Downsample with averaging (anti-aliased) + const step = Math.floor(sampleRate / 16000); + for (let i = 0; i + step <= input.length; i += step) { + let sum = 0; + for (let j = 0; j < step; j++) { + sum += input[i + j]; } + this.buffer.push(sum / step); } while (this.buffer.length >= this.targetSamples) {