const { useEffect, useMemo, useRef, useState } = React;
const STOP_CLOSE_TIMEOUT_MS = 150;
const FILE_STOP_CLOSE_TIMEOUT_MS = 10000;
const WS_KEEPALIVE_INTERVAL_MS = 15000;
const WS_AUTO_RECONNECT_DELAY_MS = 250;
const AUTO_SCROLL_THRESHOLD_PX = 28;
const FILE_STREAM_CHUNK_SAMPLES = 512;
const FILE_SAMPLE_RATE = 16000;
const FILE_TAIL_PAD_FRAMES = 0;
const FILE_TAIL_PAD_FRAME_MS = 10;
const FILE_TAIL_PAD_SAMPLES = Math.round((FILE_SAMPLE_RATE * FILE_TAIL_PAD_FRAMES * FILE_TAIL_PAD_FRAME_MS) / 1000);
const FILE_TAIL_PAD_SETTLE_MS = 0;
const FILE_STREAM_SPEED_MULTIPLIER = 4;
const WAVEFORM_POINT_COUNT = 128;
const WAVEFORM_POINTS_PER_CHUNK = 10;
const WAVEFORM_VISUAL_GAIN = 10;
const LANGUAGE_LABELS = {
zh: "Chinese",
en: "English",
ja: "Japanese",
ko: "Korean",
yue: "Cantonese",
ar: "Arabic",
de: "German",
fr: "French",
es: "Spanish",
pt: "Portuguese",
id: "Indonesian",
it: "Italian",
ru: "Russian",
th: "Thai",
vi: "Vietnamese",
tr: "Turkish",
hi: "Hindi",
ms: "Malay",
nl: "Dutch",
sv: "Swedish",
da: "Danish",
fi: "Finnish",
pl: "Polish",
cs: "Czech",
fil: "Filipino",
fa: "Persian",
el: "Greek",
hu: "Hungarian",
mk: "Macedonian",
ro: "Romanian",
};
const MODE_OPTIONS = [
{ value: "demo", label: "Demo" },
{ value: "default", label: "Default" },
{ value: "vertical", label: "Vertical" },
{ value: "grid", label: "Grid" },
];
const DEMO_PROVIDER_NAME = "zipformer_new_016s_punctuation_beam";
const DEMO_PROVIDER_NAMES = [DEMO_PROVIDER_NAME];
const INPUT_MODE_OPTIONS = [
{ value: "microphone", label: "Microphone" },
{ value: "file", label: "Static Resource" },
];
function makeSilentWaveform() {
return Array.from({ length: WAVEFORM_POINT_COUNT }, () => 0);
}
function sampleWaveformPoints(samples) {
if (!samples || !samples.length) return [];
const segmentSize = Math.max(1, Math.floor(samples.length / WAVEFORM_POINTS_PER_CHUNK));
const points = [];
for (let start = 0; start < samples.length; start += segmentSize) {
let signedPeak = 0;
for (let i = start; i < Math.min(samples.length, start + segmentSize); i += 1) {
const value = samples[i];
if (Math.abs(value) > Math.abs(signedPeak)) {
signedPeak = value;
}
}
points.push(Math.max(-1, Math.min(1, signedPeak * WAVEFORM_VISUAL_GAIN)));
if (points.length >= WAVEFORM_POINTS_PER_CHUNK) break;
}
return points;
}
function makeProviderMetaMap(providerCatalog) {
return Object.fromEntries(providerCatalog.map((item) => [item.name, item]));
}
function makeProviderState(name, providerMetaMap) {
const meta = providerMetaMap[name] || { label: name, detail: "", supported_languages: [] };
return {
name,
label: meta.label,
detail: meta.detail,
supportedLanguages: Array.isArray(meta.supported_languages) ? meta.supported_languages : [],
partial: "...",
committedText: "",
};
}
function getDisplayText(provider) {
const stable = String(provider.committedText || "").trim();
const live = normalizeLiveText(provider.partial);
return joinSessionText(stable, live);
}
function normalizeLiveText(text) {
const value = String(text || "").trim();
if (!value || value === "...") return "";
return value;
}
function makeInitialProviders(providerCatalog) {
const providerMetaMap = makeProviderMetaMap(providerCatalog);
return Object.fromEntries(providerCatalog.map((item) => [item.name, makeProviderState(item.name, providerMetaMap)]));
}
function PlaybackIcon({ state }) {
if (state === "playing") {
return (
);
}
return (
);
}
function AudioWaveform({ samples }) {
const canvasRef = useRef(null);
useEffect(() => {
const canvas = canvasRef.current;
if (!canvas) return;
const rect = canvas.getBoundingClientRect();
const cssWidth = Math.max(1, rect.width || canvas.clientWidth || 900);
const cssHeight = Math.max(1, rect.height || canvas.clientHeight || 104);
const dpr = window.devicePixelRatio || 1;
const width = Math.floor(cssWidth * dpr);
const height = Math.floor(cssHeight * dpr);
if (canvas.width !== width || canvas.height !== height) {
canvas.width = width;
canvas.height = height;
}
const ctx = canvas.getContext("2d");
ctx.setTransform(dpr, 0, 0, dpr, 0, 0);
ctx.clearRect(0, 0, cssWidth, cssHeight);
const centerY = cssHeight / 2;
const waveform = samples && samples.length ? samples : makeSilentWaveform();
const glow = ctx.createLinearGradient(0, 0, cssWidth, 0);
glow.addColorStop(0, "rgba(47, 211, 255, 0.08)");
glow.addColorStop(0.5, "rgba(142, 247, 255, 0.24)");
glow.addColorStop(1, "rgba(47, 211, 255, 0.08)");
ctx.fillStyle = glow;
ctx.fillRect(0, centerY - 1, cssWidth, 2);
ctx.beginPath();
waveform.forEach((value, index) => {
const x = waveform.length <= 1 ? 0 : (index / (waveform.length - 1)) * cssWidth;
const y = centerY - Math.max(-1, Math.min(1, value)) * centerY * 0.78;
if (index === 0) {
ctx.moveTo(x, y);
} else {
ctx.lineTo(x, y);
}
});
ctx.lineWidth = 2.4;
ctx.lineJoin = "round";
ctx.lineCap = "round";
ctx.shadowColor = "rgba(142, 247, 255, 0.7)";
ctx.shadowBlur = 14;
ctx.strokeStyle = "rgba(142, 247, 255, 0.96)";
ctx.stroke();
ctx.shadowBlur = 0;
ctx.beginPath();
waveform.forEach((value, index) => {
const x = waveform.length <= 1 ? 0 : (index / (waveform.length - 1)) * cssWidth;
const y = centerY - Math.max(-1, Math.min(1, value)) * centerY * 0.78;
if (index === 0) {
ctx.moveTo(x, y);
} else {
ctx.lineTo(x, y);
}
});
ctx.lineWidth = 1;
ctx.strokeStyle = "rgba(230, 240, 255, 0.85)";
ctx.stroke();
}, [samples]);
return (
);
}
function SpeakerIcon({ muted }) {
return (
{muted ? (
) : (
<>
>
)}
);
}
function isSupportedAudioFile(file) {
if (!file) return false;
if (String(file.type || "").startsWith("audio/")) return true;
return /\.(wav|mp3|m4a|aac|flac|ogg|webm)$/i.test(String(file.name || ""));
}
function TranscriptBox({ text, className = "" }) {
const boxRef = useRef(null);
const followTailRef = useRef(true);
useEffect(() => {
const element = boxRef.current;
if (!element) return;
if (followTailRef.current) {
element.scrollTop = element.scrollHeight;
}
}, [text]);
function handleScroll() {
const element = boxRef.current;
if (!element) return;
const distanceFromBottom = element.scrollHeight - element.scrollTop - element.clientHeight;
followTailRef.current = distanceFromBottom <= AUTO_SCROLL_THRESHOLD_PX;
}
const boxClassName = ["text-box", "transcript-box", className].filter(Boolean).join(" ");
return (
{text || "..."}
);
}
function App() {
const [status, setStatus] = useState("idle");
const [providerCatalog, setProviderCatalog] = useState([]);
const [providers, setProviders] = useState(() => makeInitialProviders([]));
const [selectedLanguage, setSelectedLanguage] = useState("zh");
const [selectedMode, setSelectedMode] = useState("default");
const [selectedInputMode, setSelectedInputMode] = useState("microphone");
const [selectedDefaultProviders, setSelectedDefaultProviders] = useState(["zipformer_new_016s_punctuation"]);
const [draftDefaultProviders, setDraftDefaultProviders] = useState(["zipformer_new_016s_punctuation"]);
const [selectedFile, setSelectedFile] = useState(null);
const [isFileDragOver, setIsFileDragOver] = useState(false);
const [selectedFilePlaybackMode, setSelectedFilePlaybackMode] = useState("muted");
const [filePlaybackState, setFilePlaybackState] = useState("stopped");
const [inputNotice, setInputNotice] = useState("");
const [startNeedsAttention, setStartNeedsAttention] = useState(false);
const [catalogLoaded, setCatalogLoaded] = useState(false);
const [sessionId, setSessionId] = useState("");
const [startedAt, setStartedAt] = useState(null);
const [durationTick, setDurationTick] = useState(0);
const [inputLevel, setInputLevel] = useState(0);
const [waveformSamples, setWaveformSamples] = useState(() => makeSilentWaveform());
const wsRef = useRef(null);
const mediaStreamRef = useRef(null);
const audioContextRef = useRef(null);
const processorRef = useRef(null);
const microphoneSourceRef = useRef(null);
const microphoneSinkGainRef = useRef(null);
const microphoneSessionWsRef = useRef(null);
const providerCatalogRef = useRef([]);
const fileInputRef = useRef(null);
const streamAbortRef = useRef({ aborted: false });
const filePlaybackBufferRef = useRef(null);
const filePlaybackFileRef = useRef(null);
const filePlaybackSourceRef = useRef(null);
const filePlaybackGainRef = useRef(null);
const filePlaybackProcessorRef = useRef(null);
const filePlaybackSessionWsRef = useRef(null);
const filePlaybackOffsetRef = useRef(0);
const filePlaybackStartAtRef = useRef(0);
const filePlaybackDurationRef = useRef(0);
const filePlaybackStopReasonRef = useRef("idle");
const activeSessionProviderKeyRef = useRef("");
const latestStartSessionRef = useRef(() => {});
const transitionInProgressRef = useRef(false);
const wsKeepaliveTimerRef = useRef(null);
const autoReconnectTimerRef = useRef(null);
const expectedWsCloseRef = useRef(false);
const inputLevelFrameRef = useRef(null);
const latestInputLevelRef = useRef(0);
const waveformFrameRef = useRef(null);
const latestWaveformSamplesRef = useRef(makeSilentWaveform());
useEffect(() => {
function handleBeforeUnload() {
stopSession({ immediateClose: true });
}
window.addEventListener("beforeunload", handleBeforeUnload);
return () => {
window.removeEventListener("beforeunload", handleBeforeUnload);
stopSession({ immediateClose: true });
};
}, []);
useEffect(() => {
providerCatalogRef.current = providerCatalog;
}, [providerCatalog]);
useEffect(
() => () => {
if (inputLevelFrameRef.current) {
cancelAnimationFrame(inputLevelFrameRef.current);
}
if (waveformFrameRef.current) {
cancelAnimationFrame(waveformFrameRef.current);
}
},
[]
);
useEffect(() => {
if (!startedAt) {
setDurationTick(0);
return;
}
const timer = setInterval(() => setDurationTick((value) => value + 1), 1000);
return () => clearInterval(timer);
}, [startedAt]);
useEffect(() => {
let cancelled = false;
async function loadProviderCatalog() {
try {
const response = await fetch("/api/provider-catalog", { cache: "no-store" });
if (!response.ok) {
throw new Error(`failed to load provider catalog: ${response.status}`);
}
const payload = await response.json();
const nextCatalog = Array.isArray(payload.providers) ? payload.providers : [];
if (cancelled) return;
setProviderCatalog(nextCatalog);
setProviders((prev) => {
const providerMetaMap = makeProviderMetaMap(nextCatalog);
const nextEntries = nextCatalog.map((item) => {
const current = prev[item.name];
if (!current) {
return [item.name, makeProviderState(item.name, providerMetaMap)];
}
return [
item.name,
{
...current,
label: item.label || item.name,
detail: item.detail || "",
supportedLanguages: Array.isArray(item.supported_languages) ? item.supported_languages : [],
},
];
});
return Object.fromEntries(nextEntries);
});
} catch (error) {
console.error(error);
} finally {
if (!cancelled) {
setCatalogLoaded(true);
}
}
}
loadProviderCatalog();
return () => {
cancelled = true;
};
}, []);
const durationText = useMemo(() => {
if (!startedAt) return "--";
return `${Math.max(0, Math.round((Date.now() - startedAt) / 1000))}s`;
}, [startedAt, durationTick]);
const availableLanguages = useMemo(() => {
const values = new Set();
providerCatalog.forEach((item) => {
(item.supported_languages || []).forEach((language) => {
if (String(language || "").trim()) {
values.add(String(language).trim());
}
});
});
const ordered = Array.from(values);
if (ordered.includes("zh")) {
return ["zh", ...ordered.filter((language) => language !== "zh")];
}
return ordered;
}, [providerCatalog]);
useEffect(() => {
if (!availableLanguages.includes(selectedLanguage)) {
setSelectedLanguage(availableLanguages[0] || "zh");
}
}, [availableLanguages, selectedLanguage]);
const visibleProviderCatalog = useMemo(() => {
return providerCatalog.filter((item) => (item.supported_languages || []).includes(selectedLanguage));
}, [providerCatalog, selectedLanguage]);
const visibleProviderMap = useMemo(
() => Object.fromEntries(visibleProviderCatalog.map((item) => [item.name, item])),
[visibleProviderCatalog]
);
const providerCatalogMap = useMemo(
() => Object.fromEntries(providerCatalog.map((item) => [item.name, item])),
[providerCatalog]
);
const providerOrder = useMemo(
() => [
"paraformer_sherpa",
"sensevoice_sherpa",
"qwen3",
"zipformer_new_016s_punctuation",
"zipformer_new_0_5s_punctuation",
"zipformer_new_1s_punctuation",
"zipformer_new_2s_punctuation",
"zipformer_viet_chunk16",
"zipformer_new_016s_punctuation_beam",
],
[]
);
const visibleProviderNames = useMemo(
() => providerOrder.filter((name) => Boolean(visibleProviderMap[name])),
[providerOrder, visibleProviderMap]
);
useEffect(() => {
if (!catalogLoaded) return;
const allowed = new Set(visibleProviderNames);
const normalizeSelection = (selection) =>
selection.filter((name, index) => allowed.has(name) && selection.indexOf(name) === index);
const nextSelected = normalizeSelection(selectedDefaultProviders);
if (nextSelected.join("|") !== selectedDefaultProviders.join("|")) {
setSelectedDefaultProviders(nextSelected);
}
const nextDraft = normalizeSelection(draftDefaultProviders);
if (nextDraft.join("|") !== draftDefaultProviders.join("|")) {
setDraftDefaultProviders(nextDraft);
}
}, [catalogLoaded, visibleProviderNames, selectedDefaultProviders, draftDefaultProviders]);
const gridRows = useMemo(
() => [
{
className: "provider-grid provider-grid--three",
names: ["paraformer_sherpa", "sensevoice_sherpa", "qwen3"],
},
{
className: "provider-grid provider-grid--four",
names: [
"zipformer_new_016s_punctuation",
"zipformer_new_0_5s_punctuation",
"zipformer_new_1s_punctuation",
"zipformer_new_2s_punctuation",
],
},
{
className: "provider-grid provider-grid--four",
names: ["zipformer_viet_chunk16", "zipformer_new_016s_punctuation_beam"],
},
],
[]
);
const activeSessionProviderNames = useMemo(() => {
if (selectedMode === "demo") {
return providerCatalogMap[DEMO_PROVIDER_NAME] ? DEMO_PROVIDER_NAMES : [];
}
if (selectedMode === "default") {
return selectedDefaultProviders.filter((name) => Boolean(visibleProviderMap[name]));
}
return visibleProviderNames;
}, [selectedMode, providerCatalogMap, selectedDefaultProviders, visibleProviderMap, visibleProviderNames]);
const activeSessionProviderKey = useMemo(
() => activeSessionProviderNames.join("|"),
[activeSessionProviderNames]
);
function buildVisibleProvidersForLanguage(language) {
const catalogForLanguage = providerCatalog.filter((item) => (item.supported_languages || []).includes(language));
const mapForLanguage = Object.fromEntries(catalogForLanguage.map((item) => [item.name, item]));
const namesForLanguage = providerOrder.filter((name) => Boolean(mapForLanguage[name]));
return { mapForLanguage, namesForLanguage };
}
function computeTargetProviderNames(nextLanguage, nextMode, nextDefaultProviders = selectedDefaultProviders) {
const { mapForLanguage, namesForLanguage } = buildVisibleProvidersForLanguage(nextLanguage);
const effectiveDefaultProviders = (nextDefaultProviders || []).filter(
(name, index, array) => Boolean(mapForLanguage[name]) && array.indexOf(name) === index
);
const targetNames =
nextMode === "demo"
? providerCatalogMap[DEMO_PROVIDER_NAME]
? DEMO_PROVIDER_NAMES
: []
: nextMode === "default"
? effectiveDefaultProviders
: namesForLanguage;
return { targetNames, effectiveDefaultProviders };
}
function handleProviderSelectionChange({
nextLanguage = selectedLanguage,
nextMode = selectedMode,
nextDefaultProviders = selectedDefaultProviders,
}) {
const { targetNames, effectiveDefaultProviders } = computeTargetProviderNames(
nextLanguage,
nextMode,
nextDefaultProviders
);
const targetKey = targetNames.join("|");
setSelectedLanguage(nextLanguage);
setSelectedMode(nextMode);
if (nextMode === "demo") {
setSelectedInputMode("microphone");
}
setSelectedDefaultProviders(effectiveDefaultProviders);
setDraftDefaultProviders(effectiveDefaultProviders);
if (!wsRef.current) return;
if (status !== "connecting" && status !== "streaming") return;
if (!activeSessionProviderKeyRef.current) return;
if (transitionInProgressRef.current) return;
if (activeSessionProviderKeyRef.current === targetKey) return;
const currentNames = activeSessionProviderKeyRef.current.split("|").filter(Boolean);
const hasReuse = currentNames.some((name) => targetNames.includes(name));
if (hasReuse) {
try {
wsRef.current.send(JSON.stringify({ event: "switch_providers", providers: targetNames }));
activeSessionProviderKeyRef.current = targetKey;
setInputNotice(targetNames.length ? "" : "No active models are available for the current view.");
setStartNeedsAttention(false);
} catch (error) {
console.error(error);
setInputNotice("Failed to switch models cleanly. Please press Start again.");
setStartNeedsAttention(true);
}
return;
}
transitionInProgressRef.current = true;
activeSessionProviderKeyRef.current = "";
setSessionId("");
if (targetNames.length <= 1) {
setInputNotice(
targetNames.length
? "Focus model changed. Press Start to run the new model."
: "No active models are available for the current view."
);
setStartNeedsAttention(targetNames.length > 0);
stopSession({ immediateClose: true, detachSocket: true });
transitionInProgressRef.current = false;
return;
}
setInputNotice("");
setStartNeedsAttention(false);
stopSession({ immediateClose: true, detachSocket: true });
transitionInProgressRef.current = false;
window.setTimeout(() => {
latestStartSessionRef.current();
}, 0);
}
const defaultSelectionDirty = useMemo(
() => draftDefaultProviders.join("|") !== selectedDefaultProviders.join("|"),
[draftDefaultProviders, selectedDefaultProviders]
);
function toggleDraftDefaultProvider(name) {
setDraftDefaultProviders((prev) =>
prev.includes(name) ? prev.filter((item) => item !== name) : [...prev, name]
);
}
function applyDefaultProviderSelection() {
handleProviderSelectionChange({ nextDefaultProviders: draftDefaultProviders });
}
function clearWsKeepalive() {
if (wsKeepaliveTimerRef.current) {
clearInterval(wsKeepaliveTimerRef.current);
wsKeepaliveTimerRef.current = null;
}
}
function clearAutoReconnect() {
if (autoReconnectTimerRef.current) {
clearTimeout(autoReconnectTimerRef.current);
autoReconnectTimerRef.current = null;
}
}
function startWsKeepalive(ws) {
clearWsKeepalive();
wsKeepaliveTimerRef.current = setInterval(() => {
if (wsRef.current !== ws || ws.readyState !== WebSocket.OPEN) return;
try {
ws.send(JSON.stringify({ event: "ping" }));
} catch (error) {
console.warn("failed to send ws keepalive", error);
}
}, WS_KEEPALIVE_INTERVAL_MS);
}
function getMicrophonePreflightError() {
if (!window.isSecureContext) {
return "Microphone mode requires HTTPS. Please use the secure domain or switch to Static Resource.";
}
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
return "This browser cannot access the microphone in the current context.";
}
return "";
}
function formatInputStartError(error) {
const name = String(error?.name || "");
if (name === "NotAllowedError" || name === "SecurityError") {
return "Microphone permission was denied.";
}
if (name === "NotFoundError" || name === "DevicesNotFoundError") {
return "No microphone device was found.";
}
if (name === "NotReadableError" || name === "TrackStartError" || name === "AbortError") {
return "The microphone is busy or unavailable.";
}
return error?.message ? `Failed to start input stream: ${error.message}` : "Failed to start input stream.";
}
function updateInputLevel(nextLevel) {
latestInputLevelRef.current = Math.max(0, Math.min(1, Number(nextLevel) || 0));
if (inputLevelFrameRef.current) return;
inputLevelFrameRef.current = requestAnimationFrame(() => {
inputLevelFrameRef.current = null;
setInputLevel(latestInputLevelRef.current);
});
}
function updateWaveform(samples) {
const points = sampleWaveformPoints(samples);
if (!points.length) return;
latestWaveformSamplesRef.current = latestWaveformSamplesRef.current
.slice(points.length)
.concat(points);
if (waveformFrameRef.current) return;
waveformFrameRef.current = requestAnimationFrame(() => {
waveformFrameRef.current = null;
setWaveformSamples(latestWaveformSamplesRef.current);
});
}
function resetInputLevel() {
latestInputLevelRef.current = 0;
if (inputLevelFrameRef.current) {
cancelAnimationFrame(inputLevelFrameRef.current);
inputLevelFrameRef.current = null;
}
if (waveformFrameRef.current) {
cancelAnimationFrame(waveformFrameRef.current);
waveformFrameRef.current = null;
}
setInputLevel(0);
latestWaveformSamplesRef.current = makeSilentWaveform();
setWaveformSamples(latestWaveformSamplesRef.current);
}
function updateInputLevelFromSamples(samples) {
if (!samples || !samples.length) {
updateInputLevel(0);
return { rms: 0, peak: 0 };
}
let sumSq = 0;
let peak = 0;
for (let i = 0; i < samples.length; i += 1) {
const value = samples[i];
sumSq += value * value;
const abs = Math.abs(value);
if (abs > peak) peak = abs;
}
const rms = Math.sqrt(sumSq / samples.length);
updateInputLevel(Math.min(1, rms * 24 + peak * 0.35));
updateWaveform(samples);
return { rms, peak };
}
async function startSession(options = {}) {
const { preserveTranscripts = false, preserveStartedAt = false, reconnecting = false } = options;
if (transitionInProgressRef.current) return;
if (wsRef.current) return;
clearAutoReconnect();
expectedWsCloseRef.current = false;
if (selectedInputMode === "file" && !selectedFile) {
setInputNotice("Please choose an audio file first.");
return;
}
if (!activeSessionProviderNames.length) {
setInputNotice("No active models are available for the current view.");
return;
}
const micPreflightError = selectedInputMode === "file" ? "" : getMicrophonePreflightError();
if (micPreflightError) {
setInputNotice(micPreflightError);
setStartNeedsAttention(true);
setStatus("idle");
setStartedAt(null);
return;
}
if (selectedInputMode !== "file") {
try {
await prepareMicrophone();
} catch (error) {
console.error(error);
stopMicrophoneOnly();
setInputNotice(formatInputStartError(error));
setStartNeedsAttention(true);
setStatus("idle");
setStartedAt(null);
return;
}
}
activeSessionProviderKeyRef.current = activeSessionProviderKey;
setInputNotice(reconnecting ? "Connection refreshed. Reconnecting..." : "");
setStartNeedsAttention(false);
setStatus("connecting");
setSessionId("");
if (!preserveStartedAt) {
setStartedAt(Date.now());
}
if (!preserveTranscripts) {
setProviders(makeInitialProviders(providerCatalog));
}
streamAbortRef.current = { aborted: false };
if (selectedInputMode === "file") {
try {
await prepareFilePlaybackEngine(selectedFile);
} catch (error) {
console.warn("failed to prepare file playback", error);
setInputNotice("Playback unavailable, continuing without local audio.");
}
}
const protocol = location.protocol === "https:" ? "wss" : "ws";
const sessionQuery = new URLSearchParams();
sessionQuery.set("providers", activeSessionProviderNames.join(","));
const sessionUrl = `${protocol}://${location.host}/ws?${sessionQuery.toString()}`;
const ws = new WebSocket(sessionUrl);
ws.binaryType = "arraybuffer";
ws.onopen = async () => {
startWsKeepalive(ws);
try {
if (selectedInputMode === "file") {
setStatus("streaming");
await startFileStream(ws, selectedFile, streamAbortRef.current, selectedFilePlaybackMode);
} else {
startMicrophone(ws);
setStatus("streaming");
}
} catch (error) {
console.error(error);
destroyFilePlayback();
setInputNotice(formatInputStartError(error));
setStartNeedsAttention(true);
if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {
ws.close(1011, "input-start-failed");
}
}
};
ws.onmessage = (event) => {
if (wsRef.current !== ws) return;
if (typeof event.data !== "string") return;
const payload = JSON.parse(event.data);
if (payload.action === "session_ready") {
setSessionId(payload.data.session_id || "");
const activeProviders = Array.isArray(payload.data.providers) ? payload.data.providers : [];
activeSessionProviderKeyRef.current = activeProviders.join("|");
if (activeProviders.length) {
setProviders((prev) => {
const providerMetaMap = makeProviderMetaMap(providerCatalogRef.current);
const next = { ...prev };
activeProviders.forEach((name) => {
if (!next[name]) {
next[name] = makeProviderState(name, providerMetaMap);
}
});
return next;
});
}
}
if (payload.action === "providers_updated") {
const activeProviders = Array.isArray(payload.data.providers) ? payload.data.providers : [];
activeSessionProviderKeyRef.current = activeProviders.join("|");
}
if (payload.action === "asr_update") {
const data = payload.data || {};
const provider = String(data.provider || "");
setProviders((prev) => {
const providerMetaMap = makeProviderMetaMap(providerCatalogRef.current);
const current = prev[provider] || makeProviderState(provider, providerMetaMap);
const text = String(data.text || "");
const nextPartial =
data.type === "partial"
? text.trim()
? text
: current.partial
: current.partial;
const next = {
...current,
partial: nextPartial,
committedText:
data.type === "final" || data.type === "stable"
? joinSessionText(current.committedText, text)
: current.committedText,
};
if (data.type === "final" || data.type === "stable") {
next.partial = "...";
}
if (data.type === "blank") {
next.partial = text.trim() ? text : current.partial;
}
return { ...prev, [provider]: next };
});
}
if (payload.action === "provider_error") {
const data = payload.data || {};
const provider = String(data.provider || "");
setProviders((prev) => {
const providerMetaMap = makeProviderMetaMap(providerCatalogRef.current);
const current = prev[provider] || makeProviderState(provider, providerMetaMap);
return {
...prev,
[provider]: {
...current,
partial: data.message ? `[error] ${String(data.message)}` : "[error]",
},
};
});
}
};
ws.onclose = () => {
if (wsRef.current !== ws) return;
const wasExpected = expectedWsCloseRef.current;
clearWsKeepalive();
wsRef.current = null;
activeSessionProviderKeyRef.current = "";
setSessionId("");
transitionInProgressRef.current = false;
const shouldAutoReconnect = !wasExpected && selectedInputMode === "microphone" && activeSessionProviderNames.length > 0;
if (shouldAutoReconnect) {
setStatus("connecting");
clearAutoReconnect();
autoReconnectTimerRef.current = window.setTimeout(() => {
latestStartSessionRef.current({
preserveTranscripts: true,
preserveStartedAt: true,
reconnecting: true,
});
}, WS_AUTO_RECONNECT_DELAY_MS);
return;
}
destroyFilePlayback();
setStatus("idle");
setStartedAt(null);
};
wsRef.current = ws;
}
useEffect(() => {
latestStartSessionRef.current = startSession;
});
async function prepareMicrophone() {
if (mediaStreamRef.current && audioContextRef.current && processorRef.current) {
if (audioContextRef.current.state === "suspended") {
await audioContextRef.current.resume();
}
return;
}
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
echoCancellation: false,
autoGainControl: false,
noiseSuppression: false,
},
});
mediaStreamRef.current = stream;
const audioContext = new AudioContext({ sampleRate: 16000 });
audioContextRef.current = audioContext;
if (audioContext.state === "suspended") {
await audioContext.resume();
}
const source = audioContext.createMediaStreamSource(stream);
const processor = audioContext.createScriptProcessor(512, 1, 1);
const sinkGain = audioContext.createGain();
sinkGain.gain.setValueAtTime(0, audioContext.currentTime);
window.__codexMicDebug = window.__codexMicDebug || { chunkCount: 0, firstChunkAt: null };
processor.onaudioprocess = (event) => {
const ws = microphoneSessionWsRef.current;
if (!ws || ws.readyState !== WebSocket.OPEN) return;
const channelCount = Math.max(1, event.inputBuffer.numberOfChannels || 1);
const frameCount = event.inputBuffer.length || 0;
const mixed = new Float32Array(frameCount);
for (let channelIndex = 0; channelIndex < channelCount; channelIndex += 1) {
const channel = event.inputBuffer.getChannelData(channelIndex);
for (let i = 0; i < frameCount; i += 1) {
mixed[i] += channel[i] / channelCount;
}
}
const { rms, peak } = updateInputLevelFromSamples(mixed);
const pcm = float32ToInt16(mixed);
ws.send(pcm.buffer);
const debug = window.__codexMicDebug || (window.__codexMicDebug = { chunkCount: 0, firstChunkAt: null, firstChunkRms: 0, firstChunkPeak: 0, channelCount: 0 });
debug.chunkCount += 1;
if (!debug.firstChunkAt) {
debug.firstChunkAt = Date.now();
debug.firstChunkRms = Number(rms.toFixed(6));
debug.firstChunkPeak = Number(peak.toFixed(6));
debug.channelCount = channelCount;
console.info('microphone first chunk sent', pcm.length, 'channels', channelCount, 'rms', debug.firstChunkRms, 'peak', debug.firstChunkPeak);
}
};
source.connect(processor);
processor.connect(sinkGain);
sinkGain.connect(audioContext.destination);
microphoneSourceRef.current = source;
microphoneSinkGainRef.current = sinkGain;
processorRef.current = processor;
}
function stopMicrophoneOnly() {
microphoneSessionWsRef.current = null;
resetInputLevel();
if (processorRef.current) {
processorRef.current.disconnect();
processorRef.current.onaudioprocess = null;
processorRef.current = null;
}
if (microphoneSinkGainRef.current) {
try {
microphoneSinkGainRef.current.disconnect();
} catch (error) {
console.warn("failed to disconnect microphone sink", error);
}
microphoneSinkGainRef.current = null;
}
if (microphoneSourceRef.current) {
try {
microphoneSourceRef.current.disconnect();
} catch (error) {
console.warn("failed to disconnect microphone source", error);
}
microphoneSourceRef.current = null;
}
if (mediaStreamRef.current) {
mediaStreamRef.current.getTracks().forEach((track) => track.stop());
mediaStreamRef.current = null;
}
if (audioContextRef.current) {
audioContextRef.current.close();
audioContextRef.current = null;
}
}
function startMicrophone(ws) {
const processor = processorRef.current;
const audioContext = audioContextRef.current;
if (!processor || !audioContext) {
throw new Error("Microphone processor is not ready.");
}
microphoneSessionWsRef.current = ws;
if (audioContext.state === "suspended") {
audioContext.resume().catch((error) => {
console.warn("failed to resume microphone audio context", error);
});
}
}
async function startFileStream(ws, file, abortState, playbackMode) {
await prepareFilePlaybackEngine(file);
if (abortState.aborted || ws.readyState !== WebSocket.OPEN) return;
filePlaybackSessionWsRef.current = ws;
applyFilePlaybackMode(playbackMode);
await startFilePlaybackSource({ restart: true });
}
function applyFilePlaybackMode(mode) {
const gain = filePlaybackGainRef.current;
if (!gain) return;
const muted = mode === "muted";
gain.gain.setValueAtTime(muted ? 0 : 1, gain.context.currentTime);
}
async function prepareFilePlaybackEngine(file) {
if (!file) return;
let audioContext = audioContextRef.current;
if (!audioContext || audioContext.state === "closed") {
audioContext = new AudioContext({ sampleRate: 16000 });
audioContextRef.current = audioContext;
}
if (audioContext.state === "suspended") {
await audioContext.resume();
}
const sameFile =
filePlaybackFileRef.current === file &&
filePlaybackBufferRef.current &&
filePlaybackProcessorRef.current &&
filePlaybackGainRef.current;
if (sameFile) {
applyFilePlaybackMode(selectedFilePlaybackMode);
return;
}
destroyFilePlayback({ closeContext: false, preserveFileSelection: true });
const mono16k = await decodeFileTo16kMono(file);
filePlaybackBufferRef.current = mono16k;
filePlaybackFileRef.current = file;
filePlaybackDurationRef.current = mono16k.length / 16000;
filePlaybackOffsetRef.current = 0;
const gain = audioContext.createGain();
gain.connect(audioContext.destination);
filePlaybackGainRef.current = gain;
const processor = audioContext.createScriptProcessor(FILE_STREAM_CHUNK_SAMPLES, 1, 1);
processor.onaudioprocess = (event) => {
const input = event.inputBuffer.getChannelData(0);
const output = event.outputBuffer.getChannelData(0);
if (input && output) {
output.set(input);
}
const ws = filePlaybackSessionWsRef.current;
if (!ws || ws.readyState !== WebSocket.OPEN || !input || input.length === 0) return;
updateInputLevelFromSamples(input);
const pcm = float32ToInt16(input);
ws.send(pcm.buffer);
};
processor.connect(gain);
filePlaybackProcessorRef.current = processor;
applyFilePlaybackMode(selectedFilePlaybackMode);
}
function getCurrentFilePlaybackOffset() {
const audioContext = audioContextRef.current;
if (!audioContext || !filePlaybackSourceRef.current) {
return filePlaybackOffsetRef.current;
}
const elapsed = Math.max(0, audioContext.currentTime - filePlaybackStartAtRef.current);
return Math.min(filePlaybackDurationRef.current, filePlaybackOffsetRef.current + elapsed);
}
function stopCurrentFileSource(reason) {
const source = filePlaybackSourceRef.current;
if (!source) return;
if (reason === "pause") {
filePlaybackOffsetRef.current = getCurrentFilePlaybackOffset();
} else if (reason === "destroy") {
filePlaybackOffsetRef.current = 0;
}
filePlaybackStopReasonRef.current = reason;
filePlaybackSourceRef.current = null;
try {
source.stop();
} catch (error) {
console.warn("failed to stop file source", error);
}
try {
source.disconnect();
} catch (error) {
console.warn("failed to disconnect file source", error);
}
}
async function startFilePlaybackSource(options = {}) {
const { restart = false } = options;
const audioContext = audioContextRef.current;
const samples = filePlaybackBufferRef.current;
if (!audioContext || !samples) return;
if (audioContext.state === "suspended") {
await audioContext.resume();
}
if (filePlaybackSourceRef.current) {
stopCurrentFileSource("destroy");
}
const audioBuffer = audioContext.createBuffer(1, samples.length, 16000);
audioBuffer.copyToChannel(samples, 0);
const source = audioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(filePlaybackProcessorRef.current);
const offset = restart ? 0 : filePlaybackOffsetRef.current;
filePlaybackOffsetRef.current = offset;
filePlaybackStartAtRef.current = audioContext.currentTime;
filePlaybackStopReasonRef.current = "natural";
filePlaybackSourceRef.current = source;
source.onended = () => {
if (filePlaybackSourceRef.current === source) {
filePlaybackSourceRef.current = null;
}
const reason = filePlaybackStopReasonRef.current;
if (reason === "pause") {
setFilePlaybackState("paused");
return;
}
if (reason === "destroy") {
setFilePlaybackState("stopped");
return;
}
filePlaybackOffsetRef.current = filePlaybackDurationRef.current;
setFilePlaybackState("stopped");
const ws = filePlaybackSessionWsRef.current;
if (ws && ws.readyState === WebSocket.OPEN) {
filePlaybackSessionWsRef.current = null;
setStatus("stopping");
try {
sendFileTailPadding(ws);
} catch (error) {
console.warn("failed to send tail padding after file playback", error);
}
window.setTimeout(() => {
if (ws.readyState !== WebSocket.OPEN) return;
try {
ws.send(JSON.stringify({ event: "stop" }));
} catch (error) {
console.warn("failed to send stop event after file playback", error);
}
}, FILE_TAIL_PAD_SETTLE_MS);
scheduleSocketClose(ws, FILE_STOP_CLOSE_TIMEOUT_MS);
}
};
source.start(0, offset);
setFilePlaybackState("playing");
}
function sendFileTailPadding(ws) {
if (!ws || ws.readyState !== WebSocket.OPEN || FILE_TAIL_PAD_SAMPLES <= 0) return;
let remainingSamples = FILE_TAIL_PAD_SAMPLES;
while (remainingSamples > 0 && ws.readyState === WebSocket.OPEN) {
const chunkSamples = Math.min(FILE_STREAM_CHUNK_SAMPLES, remainingSamples);
ws.send(new Int16Array(chunkSamples).buffer);
remainingSamples -= chunkSamples;
}
}
function pauseFilePlayback() {
if (!filePlaybackSourceRef.current) return;
stopCurrentFileSource("pause");
}
function destroyFilePlayback(options = {}) {
const { closeContext = false, preserveFileSelection = false } = options;
filePlaybackSessionWsRef.current = null;
if (filePlaybackSourceRef.current) {
stopCurrentFileSource("destroy");
}
if (filePlaybackProcessorRef.current) {
try {
filePlaybackProcessorRef.current.disconnect();
} catch (error) {
console.warn("failed to disconnect file processor", error);
}
filePlaybackProcessorRef.current.onaudioprocess = null;
filePlaybackProcessorRef.current = null;
}
if (filePlaybackGainRef.current) {
try {
filePlaybackGainRef.current.disconnect();
} catch (error) {
console.warn("failed to disconnect file gain", error);
}
filePlaybackGainRef.current = null;
}
filePlaybackBufferRef.current = null;
filePlaybackFileRef.current = preserveFileSelection ? filePlaybackFileRef.current : null;
filePlaybackDurationRef.current = 0;
filePlaybackOffsetRef.current = 0;
filePlaybackStartAtRef.current = 0;
filePlaybackStopReasonRef.current = "idle";
setFilePlaybackState("stopped");
if (closeContext && audioContextRef.current) {
audioContextRef.current.close();
audioContextRef.current = null;
}
}
async function handlePlaybackToggle() {
if (!selectedFile) {
setInputNotice("Please choose an audio file first.");
return;
}
try {
await prepareFilePlaybackEngine(selectedFile);
if (filePlaybackState === "playing") {
pauseFilePlayback();
} else {
await startFilePlaybackSource({ restart: filePlaybackState === "stopped" });
}
setInputNotice("");
} catch (error) {
console.warn("failed to toggle file playback", error);
setInputNotice("Playback unavailable in this browser context.");
}
}
function handlePlaybackModeToggle() {
setSelectedFilePlaybackMode((current) => {
const next = current === "playalong" ? "muted" : "playalong";
applyFilePlaybackMode(next);
if (next === "muted") {
setInputNotice(status === "streaming" ? "Speaker off. Recognition continues." : "Speaker off.");
} else {
setInputNotice(status === "streaming" ? "Speaker on." : "Speaker on.");
}
return next;
});
}
function stopSession(options = {}) {
const { immediateClose = false, detachSocket = false } = options;
expectedWsCloseRef.current = true;
clearAutoReconnect();
if (!detachSocket) {
transitionInProgressRef.current = false;
}
const ws = wsRef.current;
streamAbortRef.current.aborted = true;
if (processorRef.current) {
processorRef.current.disconnect();
processorRef.current = null;
}
if (mediaStreamRef.current) {
mediaStreamRef.current.getTracks().forEach((track) => track.stop());
mediaStreamRef.current = null;
}
if (audioContextRef.current) {
audioContextRef.current.close();
audioContextRef.current = null;
}
destroyFilePlayback({ closeContext: true });
if (!ws) {
clearWsKeepalive();
activeSessionProviderKeyRef.current = "";
setSessionId("");
setStatus("idle");
setStartedAt(null);
return;
}
setStatus("stopping");
if (ws.readyState === WebSocket.OPEN) {
try {
ws.send(JSON.stringify({ event: "stop" }));
} catch (error) {
console.warn("failed to send stop event", error);
}
}
if (immediateClose) {
if (detachSocket) {
clearWsKeepalive();
wsRef.current = null;
filePlaybackSessionWsRef.current = null;
ws.onopen = null;
ws.onmessage = null;
ws.onerror = null;
ws.onclose = null;
activeSessionProviderKeyRef.current = "";
setSessionId("");
setStatus("idle");
setStartedAt(null);
}
if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {
ws.close(1000, "client-stop");
}
return;
}
scheduleSocketClose(ws, STOP_CLOSE_TIMEOUT_MS);
}
function handleSelectedFile(file, options = {}) {
const { unsupportedMessage = "Please choose a supported local audio file." } = options;
destroyFilePlayback({ closeContext: false });
setIsFileDragOver(false);
if (!file) {
setSelectedFile(null);
setInputNotice("");
return;
}
if (!isSupportedAudioFile(file)) {
setSelectedFile(null);
setInputNotice(unsupportedMessage);
return;
}
setSelectedFile(file);
setInputNotice(`Ready: ${file.name}`);
}
function renderProviderCard(name, options = {}) {
const { cardClassName = "", transcriptClassName = "" } = options;
const provider = providers[name] || makeProviderState(name, makeProviderMetaMap(providerCatalog));
const articleClassName = ["provider-card", cardClassName].filter(Boolean).join(" ");
return (
{provider.label}
{provider.detail}
);
}
function renderEmpty() {
return (
{catalogLoaded
? "No ASR models currently match the selected language."
: "Loading ASR models..."}
);
}
function renderDefaultMode() {
const orderedNames = selectedDefaultProviders.filter((name) => Boolean(visibleProviderMap[name]));
if (!orderedNames.length) return renderEmpty();
return (
{orderedNames.map((name) =>
renderProviderCard(name, {
cardClassName: "provider-card--focus",
transcriptClassName: "text-box--focus",
})
)}
);
}
function renderDemoMode() {
if (!providerCatalogMap[DEMO_PROVIDER_NAME]) return renderEmpty();
return (
{renderProviderCard(DEMO_PROVIDER_NAME, {
cardClassName: "provider-card--focus provider-card--demo",
transcriptClassName: "text-box--focus text-box--demo",
})}
);
}
function renderVerticalMode() {
if (!visibleProviderNames.length) return renderEmpty();
return {visibleProviderNames.map(renderProviderCard)} ;
}
function renderGridMode() {
const rows = gridRows
.map((row) => ({
...row,
names: row.names.filter((name) => Boolean(visibleProviderMap[name])),
}))
.filter((row) => row.names.length);
if (!rows.length) return renderEmpty();
return rows.map((row, index) => (
{row.names.map(renderProviderCard)}
));
}
let content = null;
const isDemoMode = selectedMode === "demo";
if (isDemoMode) {
content = renderDemoMode();
} else if (selectedMode === "vertical") {
content = renderVerticalMode();
} else if (selectedMode === "grid") {
content = renderGridMode();
} else {
content = renderDefaultMode();
}
return (
{isDemoMode ? "Focused Live Demo" : "Standalone Multi-ASR Comparison"}
{isDemoMode ? "ASR Live Demo" : "ASR Demo"}
{isDemoMode
? "X-ASR demo: A low latency streaming model"
: "Browser microphone input or uploaded audio file, side-by-side comparison of multiple ASR pipelines, with language-based filtering so users can quickly focus on supported models."}
Status
{status}
{!isDemoMode && (
Session
{sessionId || "--"}
)}
Duration
{durationText}
{!isDemoMode && (
Language
handleProviderSelectionChange({ nextLanguage: event.target.value })}
>
{availableLanguages.map((language) => (
{LANGUAGE_LABELS[language] || language.toUpperCase()}
))}
)}
Mode
handleProviderSelectionChange({ nextMode: event.target.value })}
>
{MODE_OPTIONS.map((mode) => (
{mode.label}
))}
{!isDemoMode && (
Input
{
setSelectedInputMode(event.target.value);
setInputNotice("");
}}
>
{INPUT_MODE_OPTIONS.map((mode) => (
{mode.label}
))}
)}
{!isDemoMode && selectedMode === "default" && (
Default Models
{visibleProviderNames.map((name) => {
const selectedIndex = draftDefaultProviders.indexOf(name);
return (
= 0 ? "default-model-chip--selected" : ""}`.trim()}
onClick={() => toggleDraftDefaultProvider(name)}
>
{(visibleProviderMap[name] && visibleProviderMap[name].label) || name}
{selectedIndex >= 0 ? selectedIndex + 1 : ""}
);
})}
✓
)}
{!isDemoMode && selectedInputMode === "file" && (
Audio File
{
event.preventDefault();
setIsFileDragOver(true);
}}
onDragOver={(event) => {
event.preventDefault();
event.dataTransfer.dropEffect = "copy";
setIsFileDragOver(true);
}}
onDragLeave={(event) => {
event.preventDefault();
if (event.currentTarget.contains(event.relatedTarget)) return;
setIsFileDragOver(false);
}}
onDrop={(event) => {
event.preventDefault();
const file = event.dataTransfer.files && event.dataTransfer.files[0] ? event.dataTransfer.files[0] : null;
handleSelectedFile(file, { unsupportedMessage: "Please drop a supported local audio file." });
}}
>
Drop a local audio file here
or use the picker below
{
const file = event.target.files && event.target.files[0] ? event.target.files[0] : null;
handleSelectedFile(file);
}}
/>
fileInputRef.current && fileInputRef.current.click()}
>
Choose Local File
{selectedFile ? selectedFile.name : "No file selected"}
Playback
{selectedFilePlaybackMode === "playalong" ? "Speaker On" : "Speaker Off"}
)}
Start
Exit
{inputNotice ? : null}
{content}
);
}
ReactDOM.createRoot(document.getElementById("root")).render( );
function joinSessionText(previous, next) {
const left = String(previous || "").trim();
const right = String(next || "").trim();
if (!right) return left;
if (!left) return right;
if (left.endsWith(right)) return left;
if (right.startsWith(left)) return right;
const maxOverlap = Math.min(left.length, right.length);
for (let size = maxOverlap; size > 0; size -= 1) {
if (left.slice(-size) === right.slice(0, size)) {
return `${left}${right.slice(size)}`.trim();
}
}
return `${left}${needsSpace(left, right) ? " " : ""}${right}`;
}
function needsSpace(left, right) {
return /[A-Za-z0-9]$/.test(left) && /^[A-Za-z0-9]/.test(right);
}
function scheduleSocketClose(ws, timeoutMs) {
window.setTimeout(() => {
if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {
ws.close(1000, "client-stop");
}
}, timeoutMs);
}
function sleep(ms) {
return new Promise((resolve) => window.setTimeout(resolve, ms));
}
function float32ToInt16(input) {
const pcm = new Int16Array(input.length);
for (let i = 0; i < input.length; i += 1) {
const sample = Math.max(-1, Math.min(1, input[i]));
pcm[i] = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
}
return pcm;
}
async function decodeFileTo16kMono(file) {
const AudioCtx = window.AudioContext || window.webkitAudioContext;
const arrayBuffer = await file.arrayBuffer();
const decodeContext = new AudioCtx();
try {
const decoded = await decodeContext.decodeAudioData(arrayBuffer.slice(0));
const monoData = mixToMono(decoded);
const targetLength = Math.max(1, Math.ceil((monoData.length * 16000) / decoded.sampleRate));
const offline = new OfflineAudioContext(1, targetLength, 16000);
const buffer = offline.createBuffer(1, monoData.length, decoded.sampleRate);
buffer.copyToChannel(monoData, 0);
const source = offline.createBufferSource();
source.buffer = buffer;
source.connect(offline.destination);
source.start(0);
const rendered = await offline.startRendering();
return rendered.getChannelData(0).slice();
} finally {
await decodeContext.close().catch(() => {});
}
}
function mixToMono(audioBuffer) {
const { numberOfChannels, length } = audioBuffer;
if (numberOfChannels === 1) {
return audioBuffer.getChannelData(0);
}
const mono = new Float32Array(length);
for (let channel = 0; channel < numberOfChannels; channel += 1) {
const data = audioBuffer.getChannelData(channel);
for (let i = 0; i < length; i += 1) {
mono[i] += data[i] / numberOfChannels;
}
}
return mono;
}