Fix live caller audio latency and choppiness

- Reduce capture chunk from 4096 to 640 samples (256ms → 40ms)
- Replace BufferSource scheduling with AudioWorklet playback ring buffer
- Add 80ms jitter buffer with linear interpolation upsampling
- Reduce host mic and live caller stream blocksizes from 4096/2048 to 1024
- Replace librosa.resample with numpy interpolation in send_audio_to_caller

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-05 16:32:27 -07:00
parent ab36ad8d5b
commit 7aed4d9c34
5 changed files with 89 additions and 27 deletions

View File

@@ -150,6 +150,6 @@
</div>
</div>
<script src="/js/call-in.js"></script>
<script src="/js/call-in.js?v=2"></script>
</body>
</html>

View File

@@ -207,6 +207,6 @@
</div>
</div>
<script src="/js/app.js?v=11"></script>
<script src="/js/app.js?v=12"></script>
</body>
</html>

View File

@@ -8,7 +8,7 @@ let ws = null;
let audioCtx = null;
let micStream = null;
let workletNode = null;
let nextPlayTime = 0;
let playbackNode = null;
let callerId = null;
const callBtn = document.getElementById('call-btn');
@@ -39,19 +39,19 @@ async function startCall() {
// Set up AudioContext
audioCtx = new AudioContext({ sampleRate: 48000 });
// Register worklet processor inline via blob
// Register worklet processors inline via blob
const processorCode = `
// --- Capture processor: downsample to 16kHz, emit small chunks ---
class CallerProcessor extends AudioWorkletProcessor {
constructor() {
super();
this.buffer = [];
this.targetSamples = 4096; // ~256ms at 16kHz
this.targetSamples = 640; // 40ms at 16kHz — low latency
}
process(inputs) {
const input = inputs[0][0];
if (!input) return true;
// Downsample from sampleRate to 16000
const ratio = sampleRate / 16000;
for (let i = 0; i < input.length; i += ratio) {
const idx = Math.floor(i);
@@ -60,7 +60,7 @@ class CallerProcessor extends AudioWorkletProcessor {
}
}
if (this.buffer.length >= this.targetSamples) {
while (this.buffer.length >= this.targetSamples) {
const chunk = this.buffer.splice(0, this.targetSamples);
const int16 = new Int16Array(chunk.length);
for (let i = 0; i < chunk.length; i++) {
@@ -73,6 +73,70 @@ class CallerProcessor extends AudioWorkletProcessor {
}
}
registerProcessor('caller-processor', CallerProcessor);
// --- Playback processor: ring buffer with 16kHz->sampleRate upsampling ---
class PlaybackProcessor extends AudioWorkletProcessor {
constructor() {
super();
this.ringSize = 16000 * 3; // 3s ring buffer at 16kHz
this.ring = new Float32Array(this.ringSize);
this.writePos = 0;
this.readPos = 0;
this.available = 0;
this.started = false;
this.jitterMs = 80; // buffer 80ms before starting playback
this.jitterSamples = Math.floor(16000 * this.jitterMs / 1000);
this.port.onmessage = (e) => {
const data = e.data;
for (let i = 0; i < data.length; i++) {
this.ring[this.writePos] = data[i];
this.writePos = (this.writePos + 1) % this.ringSize;
}
this.available += data.length;
if (this.available > this.ringSize) {
// Overflow — skip ahead
this.available = this.ringSize;
this.readPos = (this.writePos - this.ringSize + this.ringSize) % this.ringSize;
}
};
}
process(inputs, outputs) {
const output = outputs[0][0];
if (!output) return true;
// Wait for jitter buffer to fill before starting
if (!this.started) {
if (this.available < this.jitterSamples) {
output.fill(0);
return true;
}
this.started = true;
}
const ratio = 16000 / sampleRate;
const srcNeeded = Math.ceil(output.length * ratio);
if (this.available >= srcNeeded) {
for (let i = 0; i < output.length; i++) {
const srcPos = i * ratio;
const idx = Math.floor(srcPos);
const frac = srcPos - idx;
const p0 = (this.readPos + idx) % this.ringSize;
const p1 = (p0 + 1) % this.ringSize;
output[i] = this.ring[p0] * (1 - frac) + this.ring[p1] * frac;
}
this.readPos = (this.readPos + srcNeeded) % this.ringSize;
this.available -= srcNeeded;
} else {
// Underrun — silence, reset jitter buffer
output.fill(0);
this.started = false;
}
return true;
}
}
registerProcessor('playback-processor', PlaybackProcessor);
`;
const blob = new Blob([processorCode], { type: 'application/javascript' });
const blobUrl = URL.createObjectURL(blob);
@@ -120,6 +184,10 @@ registerProcessor('caller-processor', CallerProcessor);
source.connect(workletNode);
// Don't connect worklet to destination — we don't want to hear our own mic
// Set up playback worklet for received audio
playbackNode = new AudioWorkletNode(audioCtx, 'playback-processor');
playbackNode.connect(audioCtx.destination);
// Show mic meter
const analyser = audioCtx.createAnalyser();
analyser.fftSize = 256;
@@ -144,7 +212,6 @@ function handleControlMessage(msg) {
setStatus(`Waiting in queue (position ${msg.position})...`, false);
} else if (msg.status === 'on_air') {
setStatus('ON AIR', true);
nextPlayTime = audioCtx.currentTime;
} else if (msg.status === 'disconnected') {
setStatus('Disconnected', false);
cleanup();
@@ -152,27 +219,15 @@ function handleControlMessage(msg) {
}
function handleAudioData(buffer) {
if (!audioCtx) return;
if (!playbackNode) return;
// Convert Int16 PCM to Float32 and send to playback worklet
const int16 = new Int16Array(buffer);
const float32 = new Float32Array(int16.length);
for (let i = 0; i < int16.length; i++) {
float32[i] = int16[i] / 32768;
}
const audioBuf = audioCtx.createBuffer(1, float32.length, 16000);
audioBuf.copyToChannel(float32, 0);
const source = audioCtx.createBufferSource();
source.buffer = audioBuf;
source.connect(audioCtx.destination);
const now = audioCtx.currentTime;
if (nextPlayTime < now) {
nextPlayTime = now;
}
source.start(nextPlayTime);
nextPlayTime += audioBuf.duration;
playbackNode.port.postMessage(float32, [float32.buffer]);
}
function hangUp() {
@@ -188,6 +243,10 @@ function cleanup() {
workletNode.disconnect();
workletNode = null;
}
if (playbackNode) {
playbackNode.disconnect();
playbackNode = null;
}
if (micStream) {
micStream.getTracks().forEach(t => t.stop());
micStream = null;