web-index/doubao-asr-demo/main.js

974 lines
37 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const formElement = null; // 旧表单已移除使用新UI按钮触发
const submitButton = document.getElementById("submitButton");
let debugLogElement = document.getElementById("debugLog") || null;
const transcriptElement = document.getElementById("transcriptOutput") || null;
const downloadTxtBtn = document.getElementById("downloadTxtBtn");
const downloadSrtBtn = document.getElementById("downloadSrtBtn");
const copyBtn = document.getElementById("copyBtn");
const viewModeSelect = document.getElementById("viewMode");
const exportRangeTxtBtn = document.getElementById("exportRangeTxt");
const exportRangeSrtBtn = document.getElementById("exportRangeSrt");
const exportCsvBtn = document.getElementById("exportCsv");
const rangeStartInput = document.getElementById("rangeStart");
const rangeEndInput = document.getElementById("rangeEnd");
const exportToolbar = document.getElementById("exportToolbar");
let lastResultJson = null;
const editedMap = new Map(); // key: `${start}-${end}` -> edited text
// 说话人显示名与颜色映射
const speakerNameMap = new Map(); // key: speakerId -> display name (可编辑)
const speakerColorMap = new Map(); // key: speakerId -> 1..3 映射到 tag-1..tag-3
let editMode = false; // 是否开启“编辑说话人名称”
// 前端脚本加载标记,便于确认 JS 是否生效
appendLog('前端脚本已加载');
window.addEventListener('DOMContentLoaded', () => { debugLogElement = document.getElementById('debugLog'); appendLog('DOM 已就绪'); fetch('/health').then(r=>r.json()).then(j=>appendLog('健康检查:'+JSON.stringify(j))).catch(e=>appendLog('健康检查失败:'+e.message)); });
const proxySubmitEndpoint = "/api/asr/submit";
const proxyResultEndpoint = "/api/asr/result";
const pollIntervalMs = 2000;
const maxAttempts = 60;
const statusSuccessSet = new Set(["SUCCEEDED", "SUCCESS", "FINISHED"]);
const statusFailureSet = new Set(["FAILED", "FAIL", "ERROR"]);
function appendLog(message) {
const el = debugLogElement || document.getElementById("debugLog");
if (!el) return;
const timestamp = new Date().toISOString();
const nextLine = `[${timestamp}] ${message}`;
if (el.textContent === "等待提交…") {
el.textContent = nextLine;
} else {
el.textContent = `${el.textContent}\n${nextLine}`;
}
}
function resetOutputs() {
if (transcriptElement) transcriptElement.value = "";
if (debugLogElement) debugLogElement.textContent = "等待提交…";
const lines = document.getElementById('transcriptLines');
if (lines) lines.innerHTML = '';
const tags = document.getElementById('speakerTags');
if (tags) tags.innerHTML = '';
}
function stripDataUriPrefix(base64String) {
return base64String.replace(/^data:.*;base64,/, "");
}
function detectAudioFormat(nameOrUrl) {
if (!nameOrUrl || typeof nameOrUrl !== "string") return "wav";
let s = nameOrUrl.trim();
try {
// 若是 URL取 pathname 再去掉查询/锚点
const u = new URL(s);
s = u.pathname;
} catch (_) {
// 非 URL继续用原字符串
}
// 去掉查询/锚点残留
s = s.split("?")[0].split("#")[0];
const lastSlash = s.lastIndexOf("/");
if (lastSlash !== -1) s = s.slice(lastSlash + 1);
const lastDot = s.lastIndexOf(".");
if (lastDot === -1) return "wav";
const ext = s.slice(lastDot + 1).toLowerCase();
// 归一化若干别名
if (["m4a", "mp4a", "mp4"].includes(ext)) return "m4a";
if (ext === "oga") return "ogg";
return ext;
}
function readFileAsBase64(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result);
reader.onerror = () => reject(new Error("读取文件失败"));
reader.readAsDataURL(file);
});
}
function createHeaders({ useProxy, apiKey, resourceId, requestId }) {
const headers = new Headers();
headers.set("Content-Type", "application/json");
if (useProxy) {
return headers;
}
if (!apiKey) {
throw new Error("当直连豆包接口时必须填写 API Key");
}
headers.set("x-api-key", apiKey);
if (resourceId) {
headers.set("X-Api-Resource-Id", resourceId);
}
headers.set("X-Api-Request-Id", requestId);
headers.set("X-Api-Sequence", "-1");
return headers;
}
function buildSubmitPayload({
base64Data,
format,
sampleRate,
bits,
channels,
uid,
modelName,
options
}) {
return {
user: {
uid
},
audio: {
data: base64Data,
format,
codec: ["mp3", "wav", "pcm"].includes(format) ? "raw" : format,
rate: sampleRate,
bits,
channel: channels
},
request: {
model_name: modelName,
enable_itn: options.enableItn,
enable_punc: options.enablePunc,
enable_ddc: options.enableDdc,
enable_speaker_info: options.enableSpeaker,
enable_channel_split: options.enableChannelSplit,
show_utterances: options.showUtterances,
vad_segment: options.vadSegment,
sensitive_words_filter: options.sensitiveWords
}
};
}
function extractJobKey(result) {
const candidates = [
result?.result?.job_key,
result?.result?.task_id,
result?.result?.taskId,
result?.data?.job_key,
result?.data?.taskId,
result?.data?.task_id,
result?.job_key,
result?.task_id,
result?.taskId,
result?.RequestId,
result?.request_id
];
return candidates.find((value) => typeof value === "string" && value.length > 0) ?? null;
}
function deriveStatus(result) {
const candidates = [
result?.result?.status,
result?.result?.task_status,
result?.data?.status,
result?.data?.task_status,
result?.status
];
return candidates.find((value) => typeof value === "string" && value.length > 0) ?? null;
}
function flattenSegmentsText(segments) {
if (!Array.isArray(segments) || segments.length === 0) {
return null;
}
const texts = segments
.map((segment) => {
if (typeof segment?.text === "string" && segment.text.trim().length > 0) {
return segment.text.trim();
}
if (typeof segment?.transcript === "string" && segment.transcript.trim().length > 0) {
return segment.transcript.trim();
}
return null;
})
.filter((text) => typeof text === "string" && text.length > 0);
if (texts.length === 0) {
return null;
}
return texts.join("\n");
}
function extractTranscript(result) {
const segmentsTranscript =
flattenSegmentsText(result?.data?.result?.segments) ?? flattenSegmentsText(result?.result?.segments);
if (segmentsTranscript) {
return segmentsTranscript;
}
const candidates = [
result?.data?.result?.text,
result?.result?.text,
result?.result?.output?.choices?.[0]?.text,
result?.result?.output?.transcript,
result?.result?.transcript,
result?.data?.output?.choices?.[0]?.text,
result?.data?.transcript
];
return candidates.find((value) => typeof value === "string" && value.trim().length > 0) ?? null;
}
function msToTimestamp(ms) {
if (typeof ms !== "number" || Number.isNaN(ms)) return null;
const sign = ms < 0 ? "-" : "";
const t = Math.abs(ms);
const hours = Math.floor(t / 3600000);
const minutes = Math.floor((t % 3600000) / 60000);
const seconds = Math.floor((t % 60000) / 1000);
const millis = Math.floor(t % 1000);
const hh = hours.toString().padStart(2, "0");
const mm = minutes.toString().padStart(2, "0");
const ss = seconds.toString().padStart(2, "0");
const mmm = millis.toString().padStart(3, "0");
return `${sign}${hh}:${mm}:${ss}.${mmm}`;
}
function anyToMs(value) {
if (typeof value === "number") return value;
if (typeof value === "string") {
// 支持 "1.23" 秒或 "00:00:01.230"
if (/^\d+(\.\d+)?$/.test(value)) {
return Number(value) * 1000;
}
const m = value.match(/^(\d{2}):(\d{2}):(\d{2})\.(\d{1,3})$/);
if (m) {
const h = Number(m[1]);
const min = Number(m[2]);
const s = Number(m[3]);
const ms = Number(m[4].padEnd(3, "0"));
return ((h * 60 + min) * 60 + s) * 1000 + ms;
}
}
return null;
}
function extractSegments(obj) {
const segs = obj?.data?.result?.segments || obj?.result?.segments || obj?.segments || null;
if (Array.isArray(segs) && segs.length > 0) return segs;
// 兼容返回的 utterances 字段,转换为统一的段落结构
const utts = obj?.data?.result?.utterances || obj?.result?.utterances || obj?.utterances || null;
if (Array.isArray(utts) && utts.length > 0) {
return utts.map(u => ({
start_ms: u.start_ms ?? u.start_time ?? u.start,
end_ms: u.end_ms ?? u.end_time ?? u.end,
text: u.text,
channel: u.channel ?? u.channel_index ?? u.ch ?? u.additions?.channel_id,
speaker: u.speaker ?? u.spk ?? u.speaker_id ?? u.additions?.speaker
}));
}
return null;
}
function formatSegmentsDetailed(obj) {
const segments = extractSegments(obj);
if (!Array.isArray(segments) || segments.length === 0) return null;
const lines = [];
for (const seg of segments) {
const start = anyToMs(seg.start_ms ?? seg.start_time ?? seg.start ?? seg.begin_ms ?? seg.begin_time);
const end = anyToMs(seg.end_ms ?? seg.end_time ?? seg.end ?? seg.finish_ms ?? seg.finish_time);
const startTs = start != null ? msToTimestamp(start) : null;
const endTs = end != null ? msToTimestamp(end) : null;
const ch = seg.channel ?? seg.channel_index ?? seg.ch ?? null;
const spk = seg.speaker ?? seg.spk ?? seg.speaker_id ?? null;
const text = (typeof seg.text === "string" && seg.text.trim()) || (typeof seg.transcript === "string" && seg.transcript.trim()) || "";
const attrs = [];
if (ch != null) attrs.push(`ch=${ch}`);
if (spk != null) attrs.push(`spk=${spk}`);
const range = startTs && endTs ? `[${startTs}-${endTs}]` : (startTs ? `[${startTs}-?]` : (endTs ? `[?-${endTs}]` : ""));
const header = [range, attrs.length ? `(${attrs.join(", ")})` : ""].filter(Boolean).join(" ");
lines.push(header ? `${header} ${text}` : text);
}
return lines.join("\n");
}
function sleep(durationMs) {
return new Promise((resolve) => setTimeout(resolve, durationMs));
}
function buildSegmentsFromEdited(obj) {
const segs = extractSegments(obj) || [];
return segs.map(s => {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
const key = `${start}-${end}`;
const text = editedMap.get(key) ?? s.text ?? s.transcript ?? "";
return { ...s, start_ms: start, end_ms: end, text };
});
}
function buildSrtFromSegments(obj) {
const segs = extractSegments(obj);
if (!Array.isArray(segs) || segs.length === 0) return null;
const blocks = [];
let idx = 1;
for (const s of segs) {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start);
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end);
const startTs = msToTimestamp(start)?.replace(".", ",") ?? "00:00:00,000";
const endTs = msToTimestamp(end)?.replace(".", ",") ?? "00:00:00,000";
const spkId = s.speaker ?? s.spk ?? s.speaker_id;
const spkName = spkId != null ? (speakerNameMap.get(spkId) || `说话人 ${spkId}`) : "";
const text = (typeof s.text === "string" && s.text.trim()) || (typeof s.transcript === "string" && s.transcript.trim()) || "";
const prefix = spkName ? `[${spkName}] ` : "";
blocks.push(`${idx}\n${startTs} --> ${endTs}\n${prefix}${text}\n`);
idx += 1;
}
return blocks.join("\n");
}
function groupBySpeakerAndChannel(segs) {
const map = new Map(); // key: channel -> Map(speaker -> [segments])
for (const s of segs) {
const ch = s.channel ?? s.channel_index ?? s.ch ?? s.additions?.channel_id ?? "unknown";
const spk = s.speaker ?? s.spk ?? s.speaker_id ?? s.additions?.speaker ?? "unknown";
if (!map.has(ch)) map.set(ch, new Map());
const inner = map.get(ch);
if (!inner.has(spk)) inner.set(spk, []);
inner.get(spk).push(s);
}
return map;
}
function buildTimeline(durationMs, rulerEl, onJump) {
if (!rulerEl) return;
rulerEl.innerHTML = "";
if (!durationMs || durationMs <= 0) return;
const step = Math.max(10000, Math.floor(durationMs / 10)); // 10s 或更大
// Tooltip element
const tooltip = document.createElement("div");
tooltip.className = "tooltip";
tooltip.style.left = "0%";
rulerEl.appendChild(tooltip);
const updateTooltip = (x) => {
const width = rulerEl.clientWidth || 1;
const percent = Math.max(0, Math.min(1, x / width));
const t = Math.round(durationMs * percent);
tooltip.style.left = `${percent * 100}%`;
let label = msToTimestamp(t)?.slice(0, 12) || "";
const segs = Array.isArray(buildTimeline._segs) ? buildTimeline._segs : [];
const near = segs.find(s => {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
return t >= start && t <= end;
});
if (near) {
const ch = near.channel ?? near.channel_index ?? near.ch;
const spk = near.speaker ?? near.spk ?? near.speaker_id;
const previewRaw = (near.text || near.transcript || "").replace(/\n/g, " ");
const preview = previewRaw.length > 36 ? (previewRaw.slice(0, 36) + "…") : previewRaw;
label = `${msToTimestamp(t)?.slice(0, 12)} • ch=${ch ?? "-"} spk=${spk ?? "-"}${preview}`;
}
tooltip.textContent = label;
};
rulerEl.addEventListener("mousemove", (e) => {
const x = e.offsetX ?? (e.clientX - rulerEl.getBoundingClientRect().left);
updateTooltip(x);
});
rulerEl.addEventListener("mouseenter", (e) => {
const x = e.offsetX ?? (e.clientX - rulerEl.getBoundingClientRect().left);
updateTooltip(x);
});
rulerEl.addEventListener("click", (e) => {
const x = e.offsetX ?? (e.clientX - rulerEl.getBoundingClientRect().left);
const percent = Math.max(0, Math.min(1, x / (rulerEl.clientWidth || 1)));
const t = Math.round(durationMs * percent);
onJump?.(t);
});
for (let t = 0; t <= durationMs; t += step) {
const p = (t / durationMs) * 100;
const tick = document.createElement("div");
tick.className = "tick";
tick.style.left = `${p}%`;
const label = document.createElement("div");
label.className = "label";
label.style.left = `${p}%`;
label.textContent = msToTimestamp(t)?.slice(0, 8) || "";
label.style.cursor = "pointer";
label.addEventListener("click", () => onJump?.(t));
rulerEl.appendChild(tick);
rulerEl.appendChild(label);
}
}
function highlightKeywords(text, keywords) {
if (!keywords || keywords.length === 0) return text;
let safe = text;
for (const kw of keywords) {
if (!kw) continue;
const re = new RegExp(`(${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")})`, "gi");
safe = safe.replace(re, '<span class="highlight">$1</span>');
}
return safe;
}
function renderStructuredView(json) { // 旧结构化保留但不使用
const segs = extractSegments(json) || [];
const container = document.getElementById("channelsContainer");
const ruler = document.getElementById("timelineRuler");
if (!container || !ruler) { appendLog('结构化容器缺失,跳过渲染'); return; }
const duration = anyToMs(json?.data?.additions?.duration || json?.result?.additions?.duration) || json?.audio_info?.duration || 0;
container.innerHTML = "";
buildTimeline._segs = segs;
buildTimeline(duration, ruler, (jumpMs) => {
// 时间轴点击 → 高亮对应时间附近的分段
const near = segs.filter(s => {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
return jumpMs >= start && jumpMs <= end;
});
document.querySelectorAll('.segment-item.active').forEach(el => el.classList.remove('active'));
near.forEach(s => {
const selector = `.segment-item[data-start="${anyToMs(s.start_ms ?? s.start_time ?? s.start)}"][data-end="${anyToMs(s.end_ms ?? s.end_time ?? s.end)}"]`;
const el = container.querySelector(selector);
if (el) {
el.classList.add('active');
el.scrollIntoView({ behavior: 'smooth', block: 'center' });
}
});
});
const channelMap = groupBySpeakerAndChannel(segs);
const keywordsInput = document.getElementById("sensitiveWords").value.trim();
const keywords = keywordsInput ? keywordsInput.split(/[,\s]+/).filter(Boolean) : [];
for (const [ch, speakerMap] of channelMap.entries()) {
const column = document.createElement("div");
column.className = "channel-column";
const header = document.createElement("div");
header.className = "channel-header";
header.innerHTML = `<span>声道 ch=${ch}</span><span>分组:说话人</span>`;
column.appendChild(header);
for (const [spk, items] of speakerMap.entries()) {
const group = document.createElement("div");
group.className = "speaker-group";
const title = document.createElement("div");
title.className = "speaker-title";
const avatar = document.createElement("span");
avatar.className = "speaker-avatar";
const colorIdx = (Number(spk) || 0) % 6 || 1;
avatar.style.background = getComputedStyle(document.documentElement).getPropertyValue(`--speaker-${colorIdx}`).trim() || "var(--accent)";
// 徽标文字:若为字符串取首字母/首个汉字,若为数字直接显示
let badgeText = "";
if (typeof spk === "string") {
const m = spk.match(/[A-Za-z\u4e00-\u9fa5]/);
badgeText = (m ? m[0] : spk[0] || "?").toUpperCase();
} else {
badgeText = String(spk ?? "?");
}
avatar.textContent = badgeText;
const label = document.createElement("span");
label.textContent = `说话人 ${spk}`;
title.appendChild(avatar);
title.appendChild(label);
group.appendChild(title);
for (const s of items) {
const item = document.createElement("div");
item.className = "segment-item";
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start);
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end);
item.dataset.start = String(start ?? 0);
item.dataset.end = String(end ?? 0);
const meta = document.createElement("div");
meta.className = "segment-meta";
meta.textContent = `[${msToTimestamp(start)} - ${msToTimestamp(end)}]`;
const text = document.createElement("div");
text.contentEditable = "true";
text.className = "editable";
const raw = (typeof s.text === "string" && s.text.trim()) || (typeof s.transcript === "string" && s.transcript.trim()) || "";
const key = `${start}-${end}`;
if (!editedMap.has(key)) editedMap.set(key, raw);
text.innerHTML = highlightKeywords(editedMap.get(key), keywords);
text.addEventListener("input", () => {
editedMap.set(key, text.textContent || "");
});
item.appendChild(meta);
item.appendChild(text);
group.appendChild(item);
}
column.appendChild(group);
}
container.appendChild(column);
}
}
function renderByViewMode() { /* 旧视图切换不再使用 */ }
// 旧视图切换移除
function withinRange(s, fromMs, toMs) {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
return (fromMs == null || end >= fromMs) && (toMs == null || start <= toMs);
}
function parseTimeInput(v) {
if (!v) return null;
const s = v.trim();
if (/^\d+$/.test(s)) return Number(s);
const m = s.match(/^(\d{1,2}):(\d{2})(?::(\d{2}))?$/);
if (m) {
const h = m[3] ? Number(m[1]) : 0;
const min = m[3] ? Number(m[2]) : Number(m[1]);
const sec = m[3] ? Number(m[3]) : Number(m[2]);
return ((h * 60 + min) * 60 + sec) * 1000;
}
return null;
}
exportRangeTxtBtn?.addEventListener("click", () => {
if (!lastResultJson) return;
const fromMs = parseTimeInput(rangeStartInput?.value);
const toMs = parseTimeInput(rangeEndInput?.value);
const segs = buildSegmentsFromEdited(lastResultJson).filter(s => withinRange(s, fromMs, toMs));
const text = segs.map(s => s.text || s.transcript || "").join("\n");
triggerDownload(text, "transcript_range.txt");
});
exportRangeSrtBtn?.addEventListener("click", () => {
if (!lastResultJson) return;
const fromMs = parseTimeInput(rangeStartInput?.value);
const toMs = parseTimeInput(rangeEndInput?.value);
const segs = buildSegmentsFromEdited(lastResultJson).filter(s => withinRange(s, fromMs, toMs));
const srt = (function buildPartialSrt(items){
if (!items.length) return "";
let idx = 1; const out = [];
for (const s of items) {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
const startTs = msToTimestamp(start)?.replace(".", ",") ?? "00:00:00,000";
const endTs = msToTimestamp(end)?.replace(".", ",") ?? "00:00:00,000";
out.push(`${idx}\n${startTs} --> ${endTs}\n${s.text || s.transcript || ""}\n`);
idx += 1;
}
return out.join("\n");
})(segs);
triggerDownload(srt, "transcript_range.srt");
});
exportCsvBtn?.addEventListener("click", () => {
if (!lastResultJson) return;
const segs = buildSegmentsFromEdited(lastResultJson);
const header = ["start_ms","end_ms","channel","speaker","text"];
const rows = segs.map(s => [
anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0,
anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0,
s.channel ?? "",
s.speaker ?? "",
(s.text || s.transcript || "").replace(/"/g,'""')
]);
const csv = [header.join(","), ...rows.map(r => r.map(c => /[",\n]/.test(String(c)) ? `"${String(c)}"` : String(c)).join(","))].join("\n");
triggerDownload(csv, "transcript.csv");
});
const exportBtn = document.getElementById('exportBtn');
const copyAllBtn = document.getElementById('copyAllBtn');
copyAllBtn?.addEventListener("click", async () => {
try {
const segs = buildSegmentsFromEdited(lastResultJson || {});
const text = segs.map(s => s.text || s.transcript || "").join("\n");
await navigator.clipboard.writeText(text);
} catch (e) {}
});
function triggerDownload(content, filename) {
const blob = new Blob([content], { type: "text/plain;charset=utf-8" });
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
a.remove();
URL.revokeObjectURL(url);
}
downloadSrtBtn?.addEventListener("click", () => {
if (!lastResultJson) { return; }
const srt = buildSrtFromSegments({ ...lastResultJson, segments: buildSegmentsFromEdited(lastResultJson) });
const text = srt || (lastResultJson?.transcript || extractTranscript(lastResultJson) || "");
triggerDownload(text, srt ? "transcript.srt" : "transcript.txt");
});
// 统一走导出按钮事件
exportBtn?.addEventListener('click', () => {
if (!lastResultJson) { appendLog('暂无结果可下载'); return; }
// 导出为“带时间+说话人”的可读TXT
const segs = buildSegmentsFromEdited(lastResultJson);
const lines = segs.map(s => {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
const time = `[${msToTimestamp(start)} - ${msToTimestamp(end)}]`;
const spkId = s.speaker ?? s.spk ?? s.speaker_id;
const spk = spkId != null ? (speakerNameMap.get(spkId) || `说话人 ${spkId}`) : '';
const name = spk ? `${spk}: ` : '';
const text = (s.text || s.transcript || '').trim();
return `${time} ${name}${text}`;
});
const content = lines.join('\n');
triggerDownload(content, 'transcript.txt');
});
function renderInNewUI(json){
try{
const segs = extractSegments(json) || [];
const tagsEl = document.getElementById('speakerTags');
const linesEl = document.getElementById('transcriptLines');
const editToggle = document.getElementById('editToggle');
const saveSpkBtn = document.getElementById('saveSpkBtn');
if (tagsEl) tagsEl.innerHTML = '';
if (linesEl) linesEl.innerHTML = '';
// 收集说话人,建立稳定颜色映射(跨次渲染保持)
const speakers = [];
const spkSet = new Set();
for (const s of segs){
const spk = s.speaker ?? s.spk ?? s.speaker_id ?? null;
if (spk == null) continue;
if (!spkSet.has(spk)) { spkSet.add(spk); speakers.push(spk); }
if (!speakerColorMap.has(spk)) {
const used = new Set([...speakerColorMap.values()]);
const pool = [1,2,3,1,2,3];
const pick = pool.find(i => !used.has(i)) || ((speakerColorMap.size % 3) + 1);
speakerColorMap.set(spk, pick);
}
if (!speakerNameMap.has(spk)) {
speakerNameMap.set(spk, `说话人 ${spk}`);
}
}
// 渲染顶部可编辑标签
if (tagsEl){
speakers.forEach(spk => {
const colorIdx = speakerColorMap.get(spk) || 1;
const tag = document.createElement('span');
tag.className = `speaker-tag tag-${colorIdx}`;
tag.contentEditable = editMode ? 'true' : 'false';
tag.dataset.spk = String(spk);
tag.textContent = speakerNameMap.get(spk) || `说话人 ${spk}`;
tag.addEventListener('input', () => {
speakerNameMap.set(spk, tag.textContent || `说话人 ${spk}`);
// 同步更新下方所有同 speaker 的徽标文字
linesEl?.querySelectorAll(`[data-spk="${CSS.escape(String(spk))}"]`).forEach(el => {
el.textContent = speakerNameMap.get(spk);
});
});
tagsEl.appendChild(tag);
});
}
// 渲染正文每行
for (const s of segs){
if (!linesEl) break;
const spk = s.speaker ?? s.spk ?? s.speaker_id ?? null;
const item = document.createElement('div');
item.className = 'transcript-line';
const ts = document.createElement('span');
ts.className = 'timestamp';
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start);
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end);
ts.textContent = `[${msToTimestamp(start) || '00:00:00.000'} - ${msToTimestamp(end) || '00:00:00.000'}]`;
const sp = document.createElement('span');
const colorIdx = speakerColorMap.get(spk) || 1;
sp.className = `speaker tag-${colorIdx}`;
sp.dataset.spk = spk != null ? String(spk) : '';
if (spk != null) sp.textContent = speakerNameMap.get(spk) || `说话人 ${spk}`;
const text = document.createElement('div');
text.className = 'text-content';
text.textContent = (s.text || s.transcript || '').trim();
item.appendChild(ts);
if (spk != null) item.appendChild(sp);
item.appendChild(text);
linesEl.appendChild(item);
}
// 绑定编辑开关
if (editToggle){
editToggle.onclick = () => {
editMode = !editMode;
if (saveSpkBtn) saveSpkBtn.style.display = editMode ? 'inline-flex' : 'none';
tagsEl?.querySelectorAll('.speaker-tag').forEach(el => { el.contentEditable = editMode ? 'true' : 'false'; });
};
}
if (saveSpkBtn){
saveSpkBtn.onclick = () => {
tagsEl?.querySelectorAll('.speaker-tag').forEach(el => {
const spk = el.dataset.spk;
speakerNameMap.set(spk, el.textContent || `说话人 ${spk}`);
});
// 关闭编辑并同步正文徽标
editMode = false;
saveSpkBtn.style.display = 'none';
tagsEl?.querySelectorAll('.speaker-tag').forEach(el => { el.contentEditable = 'false'; });
linesEl?.querySelectorAll('.speaker').forEach(el => {
const spk = el.dataset.spk;
el.textContent = speakerNameMap.get(spk) || `说话人 ${spk}`;
});
};
}
}catch(e){ appendLog('渲染失败:' + (e?.message || String(e))); }
}
async function pollJobResult({ url, headers, body, useProxy }) {
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
appendLog(`第 ${attempt} 次查询任务状态…`);
if (!useProxy && headers.has("X-Api-Request-Id")) {
headers.set("X-Api-Request-Id", (self.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`));
}
const response = await fetch(url, {
method: "POST",
headers,
body: JSON.stringify(body)
});
if (!response.ok) {
const errorBody = await response.text();
throw new Error(`查询失败HTTP ${response.status}${errorBody}`);
}
const json = await response.json().catch(() => {
throw new Error("解析查询结果时发生错误");
});
try {
const segs = extractSegments(json) || [];
const txt = extractTranscript(json) || '';
appendLog(`查询结果摘要segments=${segs.length}, transcriptLen=${txt.length}`);
} catch(_) { appendLog('查询结果已更新'); }
// 优先等待结构化分段segments/utterances仅文本不提前返回避免“只有一大段文本”的情况
const segs = extractSegments(json);
const hasSeg = Array.isArray(segs) && segs.length > 0;
const immediateTranscript = json?.transcript || extractTranscript(json);
if (hasSeg) {
return { ...json, transcript: immediateTranscript || null };
}
const status = deriveStatus(json);
if (!status) {
appendLog("未返回任务状态继续轮询");
} else if (statusSuccessSet.has(status.toUpperCase())) {
return json;
} else if (statusFailureSet.has(status.toUpperCase())) {
throw new Error(`任务失败,状态:${status}`);
}
await sleep(pollIntervalMs);
}
throw new Error("轮询超时请稍后重试");
}
function readOptions() {
// 新UI默认全部开启且不展示敏感词过滤
return {
enableItn: true,
enablePunc: true,
enableDdc: true,
enableSpeaker: true,
enableChannelSplit: true,
showUtterances: true,
vadSegment: true,
sensitiveWords: "",
// 按你的需求默认不做声道拆分,避免同一句在左右声道各来一遍
enableChannelSplit: false
};
}
// 使用新UI的“重新转换”按钮触发提交
const convertBtn = document.getElementById('convertBtn');
const fileInputNew = document.getElementById('fileInput');
// 绑定文件选择后自动触发转换,避免内联脚本早于 main.js 导致点击丢失
let converting = false;
const triggerConvert = () => { if (converting) { appendLog('已忽略:上一次转换仍在进行中'); return; } convertBtn?.click(); };
fileInputNew?.addEventListener('change', () => {
appendLog('文件已选择,自动触发转换…');
// 简单防抖
clearTimeout(window.__conv_debounce);
window.__conv_debounce = setTimeout(triggerConvert, 50);
});
convertBtn?.addEventListener('click', async (event) => {
if (converting) { appendLog('已忽略:转换进行中'); return; }
converting = true;
event.preventDefault();
resetOutputs();
if (location.protocol === 'file:') { appendLog('当前以 file:// 打开页面,无法向本地代理提交;请访问 http://localhost:6174'); }
const audioUrlInput = { value: "" }; // 新UI不使用 URL
const fileInput = fileInputNew;
const uidInput = { value: "AudioToText" };
const modelNameInput = { value: "bigmodel" };
const sampleRateInput = { value: 16000 };
const bitsInput = { value: 16 };
const channelInput = { value: 1 };
const apiBaseInput = { value: "" };
const apiKeyInput = { value: "" };
const resourceIdInput = { value: "" };
const audioUrl = audioUrlInput.value.trim();
const audioFile = fileInput.files?.[0] ?? null;
if (!audioUrl && !audioFile) {
appendLog("未选择文件等待提交");
return;
}
appendLog("检测到文件选择开始提交识别任务");
const apiBase = apiBaseInput.value.trim();
const useProxy = apiBase.length === 0;
const submitUrl = useProxy ? proxySubmitEndpoint : `${apiBase}/submit`;
const resultUrl = useProxy ? proxyResultEndpoint : `${apiBase}/query`;
const uid = uidInput.value.trim() || "anonymous";
const modelName = modelNameInput.value.trim() || "bigmodel";
const sampleRate = Number(sampleRateInput.value) || 16000;
const bits = Number(bitsInput.value) || 16;
const channels = Number(channelInput.value) || 1;
const format = audioUrl ? detectAudioFormat(audioUrl) : detectAudioFormat(audioFile.name);
const requestId = (self.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`);
const apiKey = apiKeyInput.value.trim();
const resourceId = resourceIdInput.value.trim();
if (submitButton) submitButton.disabled = true;
if (audioUrl) {
appendLog(`使用音频 URL${audioUrl}`);
} else {
appendLog(`读取文件:${audioFile.name}`);
}
try {
const submitHeaders = createHeaders({ useProxy, apiKey, resourceId, requestId });
const options = readOptions();
let payload;
if (audioUrl) {
const mappedCodec = (["wav","pcm","wave"].includes(format) ? "raw" : (format === "mp3" ? "mp3" : (["m4a","aac"].includes(format) ? "aac" : format)));
const finalRate = (["m4a","aac","mp3"].includes(format) && (!sampleRate || sampleRate < 22050)) ? 44100 : sampleRate;
appendLog(`提交参数source=url, format=${format}, codec=${mappedCodec}, rate=${finalRate}, ch=${channels}`);
payload = {
user: { uid },
audio: { url: audioUrl, format, codec: mappedCodec, rate: finalRate, bits, channel: channels },
request: {
model_name: modelName,
enable_itn: !!options.enableItn,
enable_punc: !!options.enablePunc,
enable_ddc: !!options.enableDdc,
enable_speaker_info: !!options.enableSpeaker,
enable_channel_split: !!options.enableChannelSplit,
show_utterances: !!options.showUtterances,
vad_segment: !!options.vadSegment,
sensitive_words_filter: options.sensitiveWords || ""
}
};
} else {
const base64WithPrefix = await readFileAsBase64(audioFile);
const base64Data = stripDataUriPrefix(base64WithPrefix);
appendLog(`文件编码完成,长度约 ${Math.round(base64Data.length / 1024)} KB`);
payload = buildSubmitPayload({
base64Data,
format,
sampleRate,
bits,
channels,
uid,
modelName,
options
});
// 纠正 codec 映射 + 压缩格式的采样率兜底
const mappedCodec = (["wav","pcm","wave"].includes(format) ? "raw" : (format === "mp3" ? "mp3" : (["m4a","aac"].includes(format) ? "aac" : format)));
const finalRate = (["m4a","aac","mp3"].includes(format) && (!sampleRate || sampleRate < 22050)) ? 44100 : sampleRate;
payload.audio.codec = mappedCodec;
payload.audio.rate = finalRate;
payload.request.enable_itn = !!options.enableItn;
payload.request.enable_punc = !!options.enablePunc;
payload.request.enable_ddc = !!options.enableDdc;
payload.request.enable_speaker_info = !!options.enableSpeaker;
payload.request.enable_channel_split = !!options.enableChannelSplit;
payload.request.show_utterances = !!options.showUtterances;
payload.request.vad_segment = !!options.vadSegment;
payload.request.sensitive_words_filter = options.sensitiveWords || "";
appendLog(`提交参数source=file, format=${format}, codec=${mappedCodec}, rate=${finalRate}, ch=${channels}`);
}
appendLog(`提交请求体:${JSON.stringify(payload)}`);
const submitResponse = await fetch(submitUrl, {
method: "POST",
headers: submitHeaders,
body: JSON.stringify(payload)
}).catch(err => {
appendLog("提交请求未发出可能没有后端代理或跨域受限");
throw err;
});
if (!submitResponse.ok) {
const errorBody = await submitResponse.text();
appendLog("提交失败请在本地运行带代理版本或在代码里配置直连 API 基础地址");
throw new Error(`提交失败HTTP ${submitResponse.status}${errorBody}`);
}
const submitJson = await submitResponse.json();
appendLog(`提交成功:${JSON.stringify(submitJson)}`);
const jobKey = extractJobKey(submitJson);
if (!jobKey) {
throw new Error("未能从提交响应中解析到任务 ID");
}
appendLog(`任务 ID${jobKey}`);
const pollHeaders = createHeaders({ useProxy, apiKey, resourceId, requestId: (self.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`) });
const pollBody = { job_key: jobKey, taskId: jobKey };
const resultJson = await pollJobResult({ url: resultUrl, headers: pollHeaders, body: pollBody, useProxy });
appendLog("任务完成处理结果如下");
appendLog(JSON.stringify(resultJson));
// 去重:同一时间窗口(±300ms)、相同文本合并(跨声道去重)
(function dedup(){
const segs = extractSegments(resultJson) || [];
const norm = (ms)=>Math.round((anyToMs(ms)||0)/300);
const seen = new Set();
const filtered = [];
for (const s of segs){
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
const key = `${norm(start)}|${norm(end)}|${(s.text||s.transcript||'').trim()}`;
if (seen.has(key)) continue;
seen.add(key);
filtered.push(s);
}
if (!resultJson.data) resultJson.data = {};
if (!resultJson.data.result) resultJson.data.result = {};
resultJson.data.result.segments = filtered;
})();
lastResultJson = resultJson;
renderInNewUI(lastResultJson);
// 停止计时
if (window.startAnalysisTimer && window.startAnalysisTimer._timer) { clearInterval(window.startAnalysisTimer._timer); window.startAnalysisTimer._timer=null; }
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
appendLog(`发生错误:${message}`);
if (transcriptElement) transcriptElement.value = message;
} finally {
converting = false;
// 新UI无提交按钮禁用逻辑
}
});