const formElement = null; // 旧表单已移除,使用新UI按钮触发
const submitButton = document.getElementById("submitButton");
let debugLogElement = document.getElementById("debugLog") || null;
const transcriptElement = document.getElementById("transcriptOutput") || null;
const downloadTxtBtn = document.getElementById("downloadTxtBtn");
const downloadSrtBtn = document.getElementById("downloadSrtBtn");
const copyBtn = document.getElementById("copyBtn");
const viewModeSelect = document.getElementById("viewMode");
const exportRangeTxtBtn = document.getElementById("exportRangeTxt");
const exportRangeSrtBtn = document.getElementById("exportRangeSrt");
const exportCsvBtn = document.getElementById("exportCsv");
const rangeStartInput = document.getElementById("rangeStart");
const rangeEndInput = document.getElementById("rangeEnd");
const exportToolbar = document.getElementById("exportToolbar");
let lastResultJson = null;
const editedMap = new Map(); // key: `${start}-${end}` -> edited text
// 说话人显示名与颜色映射
const speakerNameMap = new Map(); // key: speakerId -> display name (可编辑)
const speakerColorMap = new Map(); // key: speakerId -> 1..3 映射到 tag-1..tag-3
let editMode = false; // 是否开启“编辑说话人名称”
// 前端脚本加载标记,便于确认 JS 是否生效
appendLog('前端脚本已加载');
window.addEventListener('DOMContentLoaded', () => { debugLogElement = document.getElementById('debugLog'); appendLog('DOM 已就绪'); fetch('/health').then(r=>r.json()).then(j=>appendLog('健康检查:'+JSON.stringify(j))).catch(e=>appendLog('健康检查失败:'+e.message)); });
const proxySubmitEndpoint = "/api/asr/submit";
const proxyResultEndpoint = "/api/asr/result";
const pollIntervalMs = 2000;
const maxAttempts = 60;
const statusSuccessSet = new Set(["SUCCEEDED", "SUCCESS", "FINISHED"]);
const statusFailureSet = new Set(["FAILED", "FAIL", "ERROR"]);
function appendLog(message) {
const el = debugLogElement || document.getElementById("debugLog");
if (!el) return;
const timestamp = new Date().toISOString();
const nextLine = `[${timestamp}] ${message}`;
if (el.textContent === "等待提交…") {
el.textContent = nextLine;
} else {
el.textContent = `${el.textContent}\n${nextLine}`;
}
}
function resetOutputs() {
if (transcriptElement) transcriptElement.value = "";
if (debugLogElement) debugLogElement.textContent = "等待提交…";
const lines = document.getElementById('transcriptLines');
if (lines) lines.innerHTML = '';
const tags = document.getElementById('speakerTags');
if (tags) tags.innerHTML = '';
}
function stripDataUriPrefix(base64String) {
return base64String.replace(/^data:.*;base64,/, "");
}
function detectAudioFormat(nameOrUrl) {
if (!nameOrUrl || typeof nameOrUrl !== "string") return "wav";
let s = nameOrUrl.trim();
try {
// 若是 URL,取 pathname 再去掉查询/锚点
const u = new URL(s);
s = u.pathname;
} catch (_) {
// 非 URL,继续用原字符串
}
// 去掉查询/锚点残留
s = s.split("?")[0].split("#")[0];
const lastSlash = s.lastIndexOf("/");
if (lastSlash !== -1) s = s.slice(lastSlash + 1);
const lastDot = s.lastIndexOf(".");
if (lastDot === -1) return "wav";
const ext = s.slice(lastDot + 1).toLowerCase();
// 归一化若干别名
if (["m4a", "mp4a", "mp4"].includes(ext)) return "m4a";
if (ext === "oga") return "ogg";
return ext;
}
function readFileAsBase64(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result);
reader.onerror = () => reject(new Error("读取文件失败"));
reader.readAsDataURL(file);
});
}
function createHeaders({ useProxy, apiKey, resourceId, requestId }) {
const headers = new Headers();
headers.set("Content-Type", "application/json");
if (useProxy) {
return headers;
}
if (!apiKey) {
throw new Error("当直连豆包接口时必须填写 API Key");
}
headers.set("x-api-key", apiKey);
if (resourceId) {
headers.set("X-Api-Resource-Id", resourceId);
}
headers.set("X-Api-Request-Id", requestId);
headers.set("X-Api-Sequence", "-1");
return headers;
}
function buildSubmitPayload({
base64Data,
format,
sampleRate,
bits,
channels,
uid,
modelName,
options
}) {
return {
user: {
uid
},
audio: {
data: base64Data,
format,
codec: ["mp3", "wav", "pcm"].includes(format) ? "raw" : format,
rate: sampleRate,
bits,
channel: channels
},
request: {
model_name: modelName,
enable_itn: options.enableItn,
enable_punc: options.enablePunc,
enable_ddc: options.enableDdc,
enable_speaker_info: options.enableSpeaker,
enable_channel_split: options.enableChannelSplit,
show_utterances: options.showUtterances,
vad_segment: options.vadSegment,
sensitive_words_filter: options.sensitiveWords
}
};
}
function extractJobKey(result) {
const candidates = [
result?.result?.job_key,
result?.result?.task_id,
result?.result?.taskId,
result?.data?.job_key,
result?.data?.taskId,
result?.data?.task_id,
result?.job_key,
result?.task_id,
result?.taskId,
result?.RequestId,
result?.request_id
];
return candidates.find((value) => typeof value === "string" && value.length > 0) ?? null;
}
function deriveStatus(result) {
const candidates = [
result?.result?.status,
result?.result?.task_status,
result?.data?.status,
result?.data?.task_status,
result?.status
];
return candidates.find((value) => typeof value === "string" && value.length > 0) ?? null;
}
function flattenSegmentsText(segments) {
if (!Array.isArray(segments) || segments.length === 0) {
return null;
}
const texts = segments
.map((segment) => {
if (typeof segment?.text === "string" && segment.text.trim().length > 0) {
return segment.text.trim();
}
if (typeof segment?.transcript === "string" && segment.transcript.trim().length > 0) {
return segment.transcript.trim();
}
return null;
})
.filter((text) => typeof text === "string" && text.length > 0);
if (texts.length === 0) {
return null;
}
return texts.join("\n");
}
function extractTranscript(result) {
const segmentsTranscript =
flattenSegmentsText(result?.data?.result?.segments) ?? flattenSegmentsText(result?.result?.segments);
if (segmentsTranscript) {
return segmentsTranscript;
}
const candidates = [
result?.data?.result?.text,
result?.result?.text,
result?.result?.output?.choices?.[0]?.text,
result?.result?.output?.transcript,
result?.result?.transcript,
result?.data?.output?.choices?.[0]?.text,
result?.data?.transcript
];
return candidates.find((value) => typeof value === "string" && value.trim().length > 0) ?? null;
}
function msToTimestamp(ms) {
if (typeof ms !== "number" || Number.isNaN(ms)) return null;
const sign = ms < 0 ? "-" : "";
const t = Math.abs(ms);
const hours = Math.floor(t / 3600000);
const minutes = Math.floor((t % 3600000) / 60000);
const seconds = Math.floor((t % 60000) / 1000);
const millis = Math.floor(t % 1000);
const hh = hours.toString().padStart(2, "0");
const mm = minutes.toString().padStart(2, "0");
const ss = seconds.toString().padStart(2, "0");
const mmm = millis.toString().padStart(3, "0");
return `${sign}${hh}:${mm}:${ss}.${mmm}`;
}
function anyToMs(value) {
if (typeof value === "number") return value;
if (typeof value === "string") {
// 支持 "1.23" 秒或 "00:00:01.230"
if (/^\d+(\.\d+)?$/.test(value)) {
return Number(value) * 1000;
}
const m = value.match(/^(\d{2}):(\d{2}):(\d{2})\.(\d{1,3})$/);
if (m) {
const h = Number(m[1]);
const min = Number(m[2]);
const s = Number(m[3]);
const ms = Number(m[4].padEnd(3, "0"));
return ((h * 60 + min) * 60 + s) * 1000 + ms;
}
}
return null;
}
function extractSegments(obj) {
const segs = obj?.data?.result?.segments || obj?.result?.segments || obj?.segments || null;
if (Array.isArray(segs) && segs.length > 0) return segs;
// 兼容返回的 utterances 字段,转换为统一的段落结构
const utts = obj?.data?.result?.utterances || obj?.result?.utterances || obj?.utterances || null;
if (Array.isArray(utts) && utts.length > 0) {
return utts.map(u => ({
start_ms: u.start_ms ?? u.start_time ?? u.start,
end_ms: u.end_ms ?? u.end_time ?? u.end,
text: u.text,
channel: u.channel ?? u.channel_index ?? u.ch ?? u.additions?.channel_id,
speaker: u.speaker ?? u.spk ?? u.speaker_id ?? u.additions?.speaker
}));
}
return null;
}
function formatSegmentsDetailed(obj) {
const segments = extractSegments(obj);
if (!Array.isArray(segments) || segments.length === 0) return null;
const lines = [];
for (const seg of segments) {
const start = anyToMs(seg.start_ms ?? seg.start_time ?? seg.start ?? seg.begin_ms ?? seg.begin_time);
const end = anyToMs(seg.end_ms ?? seg.end_time ?? seg.end ?? seg.finish_ms ?? seg.finish_time);
const startTs = start != null ? msToTimestamp(start) : null;
const endTs = end != null ? msToTimestamp(end) : null;
const ch = seg.channel ?? seg.channel_index ?? seg.ch ?? null;
const spk = seg.speaker ?? seg.spk ?? seg.speaker_id ?? null;
const text = (typeof seg.text === "string" && seg.text.trim()) || (typeof seg.transcript === "string" && seg.transcript.trim()) || "";
const attrs = [];
if (ch != null) attrs.push(`ch=${ch}`);
if (spk != null) attrs.push(`spk=${spk}`);
const range = startTs && endTs ? `[${startTs}-${endTs}]` : (startTs ? `[${startTs}-?]` : (endTs ? `[?-${endTs}]` : ""));
const header = [range, attrs.length ? `(${attrs.join(", ")})` : ""].filter(Boolean).join(" ");
lines.push(header ? `${header} ${text}` : text);
}
return lines.join("\n");
}
function sleep(durationMs) {
return new Promise((resolve) => setTimeout(resolve, durationMs));
}
function buildSegmentsFromEdited(obj) {
const segs = extractSegments(obj) || [];
return segs.map(s => {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
const key = `${start}-${end}`;
const text = editedMap.get(key) ?? s.text ?? s.transcript ?? "";
return { ...s, start_ms: start, end_ms: end, text };
});
}
function buildSrtFromSegments(obj) {
const segs = extractSegments(obj);
if (!Array.isArray(segs) || segs.length === 0) return null;
const blocks = [];
let idx = 1;
for (const s of segs) {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start);
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end);
const startTs = msToTimestamp(start)?.replace(".", ",") ?? "00:00:00,000";
const endTs = msToTimestamp(end)?.replace(".", ",") ?? "00:00:00,000";
const spkId = s.speaker ?? s.spk ?? s.speaker_id;
const spkName = spkId != null ? (speakerNameMap.get(spkId) || `说话人 ${spkId}`) : "";
const text = (typeof s.text === "string" && s.text.trim()) || (typeof s.transcript === "string" && s.transcript.trim()) || "";
const prefix = spkName ? `[${spkName}] ` : "";
blocks.push(`${idx}\n${startTs} --> ${endTs}\n${prefix}${text}\n`);
idx += 1;
}
return blocks.join("\n");
}
function groupBySpeakerAndChannel(segs) {
const map = new Map(); // key: channel -> Map(speaker -> [segments])
for (const s of segs) {
const ch = s.channel ?? s.channel_index ?? s.ch ?? s.additions?.channel_id ?? "unknown";
const spk = s.speaker ?? s.spk ?? s.speaker_id ?? s.additions?.speaker ?? "unknown";
if (!map.has(ch)) map.set(ch, new Map());
const inner = map.get(ch);
if (!inner.has(spk)) inner.set(spk, []);
inner.get(spk).push(s);
}
return map;
}
function buildTimeline(durationMs, rulerEl, onJump) {
if (!rulerEl) return;
rulerEl.innerHTML = "";
if (!durationMs || durationMs <= 0) return;
const step = Math.max(10000, Math.floor(durationMs / 10)); // 10s 或更大
// Tooltip element
const tooltip = document.createElement("div");
tooltip.className = "tooltip";
tooltip.style.left = "0%";
rulerEl.appendChild(tooltip);
const updateTooltip = (x) => {
const width = rulerEl.clientWidth || 1;
const percent = Math.max(0, Math.min(1, x / width));
const t = Math.round(durationMs * percent);
tooltip.style.left = `${percent * 100}%`;
let label = msToTimestamp(t)?.slice(0, 12) || "";
const segs = Array.isArray(buildTimeline._segs) ? buildTimeline._segs : [];
const near = segs.find(s => {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
return t >= start && t <= end;
});
if (near) {
const ch = near.channel ?? near.channel_index ?? near.ch;
const spk = near.speaker ?? near.spk ?? near.speaker_id;
const previewRaw = (near.text || near.transcript || "").replace(/\n/g, " ");
const preview = previewRaw.length > 36 ? (previewRaw.slice(0, 36) + "…") : previewRaw;
label = `${msToTimestamp(t)?.slice(0, 12)} • ch=${ch ?? "-"} spk=${spk ?? "-"} • ${preview}`;
}
tooltip.textContent = label;
};
rulerEl.addEventListener("mousemove", (e) => {
const x = e.offsetX ?? (e.clientX - rulerEl.getBoundingClientRect().left);
updateTooltip(x);
});
rulerEl.addEventListener("mouseenter", (e) => {
const x = e.offsetX ?? (e.clientX - rulerEl.getBoundingClientRect().left);
updateTooltip(x);
});
rulerEl.addEventListener("click", (e) => {
const x = e.offsetX ?? (e.clientX - rulerEl.getBoundingClientRect().left);
const percent = Math.max(0, Math.min(1, x / (rulerEl.clientWidth || 1)));
const t = Math.round(durationMs * percent);
onJump?.(t);
});
for (let t = 0; t <= durationMs; t += step) {
const p = (t / durationMs) * 100;
const tick = document.createElement("div");
tick.className = "tick";
tick.style.left = `${p}%`;
const label = document.createElement("div");
label.className = "label";
label.style.left = `${p}%`;
label.textContent = msToTimestamp(t)?.slice(0, 8) || "";
label.style.cursor = "pointer";
label.addEventListener("click", () => onJump?.(t));
rulerEl.appendChild(tick);
rulerEl.appendChild(label);
}
}
function highlightKeywords(text, keywords) {
if (!keywords || keywords.length === 0) return text;
let safe = text;
for (const kw of keywords) {
if (!kw) continue;
const re = new RegExp(`(${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")})`, "gi");
safe = safe.replace(re, '$1');
}
return safe;
}
function renderStructuredView(json) { // 旧结构化保留但不使用
const segs = extractSegments(json) || [];
const container = document.getElementById("channelsContainer");
const ruler = document.getElementById("timelineRuler");
if (!container || !ruler) { appendLog('结构化容器缺失,跳过渲染'); return; }
const duration = anyToMs(json?.data?.additions?.duration || json?.result?.additions?.duration) || json?.audio_info?.duration || 0;
container.innerHTML = "";
buildTimeline._segs = segs;
buildTimeline(duration, ruler, (jumpMs) => {
// 时间轴点击 → 高亮对应时间附近的分段
const near = segs.filter(s => {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
return jumpMs >= start && jumpMs <= end;
});
document.querySelectorAll('.segment-item.active').forEach(el => el.classList.remove('active'));
near.forEach(s => {
const selector = `.segment-item[data-start="${anyToMs(s.start_ms ?? s.start_time ?? s.start)}"][data-end="${anyToMs(s.end_ms ?? s.end_time ?? s.end)}"]`;
const el = container.querySelector(selector);
if (el) {
el.classList.add('active');
el.scrollIntoView({ behavior: 'smooth', block: 'center' });
}
});
});
const channelMap = groupBySpeakerAndChannel(segs);
const keywordsInput = document.getElementById("sensitiveWords").value.trim();
const keywords = keywordsInput ? keywordsInput.split(/[,,\s]+/).filter(Boolean) : [];
for (const [ch, speakerMap] of channelMap.entries()) {
const column = document.createElement("div");
column.className = "channel-column";
const header = document.createElement("div");
header.className = "channel-header";
header.innerHTML = `声道 ch=${ch}分组:说话人`;
column.appendChild(header);
for (const [spk, items] of speakerMap.entries()) {
const group = document.createElement("div");
group.className = "speaker-group";
const title = document.createElement("div");
title.className = "speaker-title";
const avatar = document.createElement("span");
avatar.className = "speaker-avatar";
const colorIdx = (Number(spk) || 0) % 6 || 1;
avatar.style.background = getComputedStyle(document.documentElement).getPropertyValue(`--speaker-${colorIdx}`).trim() || "var(--accent)";
// 徽标文字:若为字符串取首字母/首个汉字,若为数字直接显示
let badgeText = "";
if (typeof spk === "string") {
const m = spk.match(/[A-Za-z\u4e00-\u9fa5]/);
badgeText = (m ? m[0] : spk[0] || "?").toUpperCase();
} else {
badgeText = String(spk ?? "?");
}
avatar.textContent = badgeText;
const label = document.createElement("span");
label.textContent = `说话人 ${spk}`;
title.appendChild(avatar);
title.appendChild(label);
group.appendChild(title);
for (const s of items) {
const item = document.createElement("div");
item.className = "segment-item";
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start);
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end);
item.dataset.start = String(start ?? 0);
item.dataset.end = String(end ?? 0);
const meta = document.createElement("div");
meta.className = "segment-meta";
meta.textContent = `[${msToTimestamp(start)} - ${msToTimestamp(end)}]`;
const text = document.createElement("div");
text.contentEditable = "true";
text.className = "editable";
const raw = (typeof s.text === "string" && s.text.trim()) || (typeof s.transcript === "string" && s.transcript.trim()) || "";
const key = `${start}-${end}`;
if (!editedMap.has(key)) editedMap.set(key, raw);
text.innerHTML = highlightKeywords(editedMap.get(key), keywords);
text.addEventListener("input", () => {
editedMap.set(key, text.textContent || "");
});
item.appendChild(meta);
item.appendChild(text);
group.appendChild(item);
}
column.appendChild(group);
}
container.appendChild(column);
}
}
function renderByViewMode() { /* 旧视图切换不再使用 */ }
// 旧视图切换移除
function withinRange(s, fromMs, toMs) {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
return (fromMs == null || end >= fromMs) && (toMs == null || start <= toMs);
}
function parseTimeInput(v) {
if (!v) return null;
const s = v.trim();
if (/^\d+$/.test(s)) return Number(s);
const m = s.match(/^(\d{1,2}):(\d{2})(?::(\d{2}))?$/);
if (m) {
const h = m[3] ? Number(m[1]) : 0;
const min = m[3] ? Number(m[2]) : Number(m[1]);
const sec = m[3] ? Number(m[3]) : Number(m[2]);
return ((h * 60 + min) * 60 + sec) * 1000;
}
return null;
}
exportRangeTxtBtn?.addEventListener("click", () => {
if (!lastResultJson) return;
const fromMs = parseTimeInput(rangeStartInput?.value);
const toMs = parseTimeInput(rangeEndInput?.value);
const segs = buildSegmentsFromEdited(lastResultJson).filter(s => withinRange(s, fromMs, toMs));
const text = segs.map(s => s.text || s.transcript || "").join("\n");
triggerDownload(text, "transcript_range.txt");
});
exportRangeSrtBtn?.addEventListener("click", () => {
if (!lastResultJson) return;
const fromMs = parseTimeInput(rangeStartInput?.value);
const toMs = parseTimeInput(rangeEndInput?.value);
const segs = buildSegmentsFromEdited(lastResultJson).filter(s => withinRange(s, fromMs, toMs));
const srt = (function buildPartialSrt(items){
if (!items.length) return "";
let idx = 1; const out = [];
for (const s of items) {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
const startTs = msToTimestamp(start)?.replace(".", ",") ?? "00:00:00,000";
const endTs = msToTimestamp(end)?.replace(".", ",") ?? "00:00:00,000";
out.push(`${idx}\n${startTs} --> ${endTs}\n${s.text || s.transcript || ""}\n`);
idx += 1;
}
return out.join("\n");
})(segs);
triggerDownload(srt, "transcript_range.srt");
});
exportCsvBtn?.addEventListener("click", () => {
if (!lastResultJson) return;
const segs = buildSegmentsFromEdited(lastResultJson);
const header = ["start_ms","end_ms","channel","speaker","text"];
const rows = segs.map(s => [
anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0,
anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0,
s.channel ?? "",
s.speaker ?? "",
(s.text || s.transcript || "").replace(/"/g,'""')
]);
const csv = [header.join(","), ...rows.map(r => r.map(c => /[",\n]/.test(String(c)) ? `"${String(c)}"` : String(c)).join(","))].join("\n");
triggerDownload(csv, "transcript.csv");
});
const exportBtn = document.getElementById('exportBtn');
const copyAllBtn = document.getElementById('copyAllBtn');
copyAllBtn?.addEventListener("click", async () => {
try {
const segs = buildSegmentsFromEdited(lastResultJson || {});
const text = segs.map(s => s.text || s.transcript || "").join("\n");
await navigator.clipboard.writeText(text);
} catch (e) {}
});
function triggerDownload(content, filename) {
const blob = new Blob([content], { type: "text/plain;charset=utf-8" });
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
a.remove();
URL.revokeObjectURL(url);
}
downloadSrtBtn?.addEventListener("click", () => {
if (!lastResultJson) { return; }
const srt = buildSrtFromSegments({ ...lastResultJson, segments: buildSegmentsFromEdited(lastResultJson) });
const text = srt || (lastResultJson?.transcript || extractTranscript(lastResultJson) || "");
triggerDownload(text, srt ? "transcript.srt" : "transcript.txt");
});
// 统一走导出按钮事件
exportBtn?.addEventListener('click', () => {
if (!lastResultJson) { appendLog('暂无结果可下载'); return; }
// 导出为“带时间+说话人”的可读TXT
const segs = buildSegmentsFromEdited(lastResultJson);
const lines = segs.map(s => {
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
const time = `[${msToTimestamp(start)} - ${msToTimestamp(end)}]`;
const spkId = s.speaker ?? s.spk ?? s.speaker_id;
const spk = spkId != null ? (speakerNameMap.get(spkId) || `说话人 ${spkId}`) : '';
const name = spk ? `${spk}: ` : '';
const text = (s.text || s.transcript || '').trim();
return `${time} ${name}${text}`;
});
const content = lines.join('\n');
triggerDownload(content, 'transcript.txt');
});
function renderInNewUI(json){
try{
const segs = extractSegments(json) || [];
const tagsEl = document.getElementById('speakerTags');
const linesEl = document.getElementById('transcriptLines');
const editToggle = document.getElementById('editToggle');
const saveSpkBtn = document.getElementById('saveSpkBtn');
if (tagsEl) tagsEl.innerHTML = '';
if (linesEl) linesEl.innerHTML = '';
// 收集说话人,建立稳定颜色映射(跨次渲染保持)
const speakers = [];
const spkSet = new Set();
for (const s of segs){
const spk = s.speaker ?? s.spk ?? s.speaker_id ?? null;
if (spk == null) continue;
if (!spkSet.has(spk)) { spkSet.add(spk); speakers.push(spk); }
if (!speakerColorMap.has(spk)) {
const used = new Set([...speakerColorMap.values()]);
const pool = [1,2,3,1,2,3];
const pick = pool.find(i => !used.has(i)) || ((speakerColorMap.size % 3) + 1);
speakerColorMap.set(spk, pick);
}
if (!speakerNameMap.has(spk)) {
speakerNameMap.set(spk, `说话人 ${spk}`);
}
}
// 渲染顶部可编辑标签
if (tagsEl){
speakers.forEach(spk => {
const colorIdx = speakerColorMap.get(spk) || 1;
const tag = document.createElement('span');
tag.className = `speaker-tag tag-${colorIdx}`;
tag.contentEditable = editMode ? 'true' : 'false';
tag.dataset.spk = String(spk);
tag.textContent = speakerNameMap.get(spk) || `说话人 ${spk}`;
tag.addEventListener('input', () => {
speakerNameMap.set(spk, tag.textContent || `说话人 ${spk}`);
// 同步更新下方所有同 speaker 的徽标文字
linesEl?.querySelectorAll(`[data-spk="${CSS.escape(String(spk))}"]`).forEach(el => {
el.textContent = speakerNameMap.get(spk);
});
});
tagsEl.appendChild(tag);
});
}
// 渲染正文每行
for (const s of segs){
if (!linesEl) break;
const spk = s.speaker ?? s.spk ?? s.speaker_id ?? null;
const item = document.createElement('div');
item.className = 'transcript-line';
const ts = document.createElement('span');
ts.className = 'timestamp';
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start);
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end);
ts.textContent = `[${msToTimestamp(start) || '00:00:00.000'} - ${msToTimestamp(end) || '00:00:00.000'}]`;
const sp = document.createElement('span');
const colorIdx = speakerColorMap.get(spk) || 1;
sp.className = `speaker tag-${colorIdx}`;
sp.dataset.spk = spk != null ? String(spk) : '';
if (spk != null) sp.textContent = speakerNameMap.get(spk) || `说话人 ${spk}`;
const text = document.createElement('div');
text.className = 'text-content';
text.textContent = (s.text || s.transcript || '').trim();
item.appendChild(ts);
if (spk != null) item.appendChild(sp);
item.appendChild(text);
linesEl.appendChild(item);
}
// 绑定编辑开关
if (editToggle){
editToggle.onclick = () => {
editMode = !editMode;
if (saveSpkBtn) saveSpkBtn.style.display = editMode ? 'inline-flex' : 'none';
tagsEl?.querySelectorAll('.speaker-tag').forEach(el => { el.contentEditable = editMode ? 'true' : 'false'; });
};
}
if (saveSpkBtn){
saveSpkBtn.onclick = () => {
tagsEl?.querySelectorAll('.speaker-tag').forEach(el => {
const spk = el.dataset.spk;
speakerNameMap.set(spk, el.textContent || `说话人 ${spk}`);
});
// 关闭编辑并同步正文徽标
editMode = false;
saveSpkBtn.style.display = 'none';
tagsEl?.querySelectorAll('.speaker-tag').forEach(el => { el.contentEditable = 'false'; });
linesEl?.querySelectorAll('.speaker').forEach(el => {
const spk = el.dataset.spk;
el.textContent = speakerNameMap.get(spk) || `说话人 ${spk}`;
});
};
}
}catch(e){ appendLog('渲染失败:' + (e?.message || String(e))); }
}
async function pollJobResult({ url, headers, body, useProxy }) {
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
appendLog(`第 ${attempt} 次查询任务状态…`);
if (!useProxy && headers.has("X-Api-Request-Id")) {
headers.set("X-Api-Request-Id", (self.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`));
}
const response = await fetch(url, {
method: "POST",
headers,
body: JSON.stringify(body)
});
if (!response.ok) {
const errorBody = await response.text();
throw new Error(`查询失败,HTTP ${response.status}:${errorBody}`);
}
const json = await response.json().catch(() => {
throw new Error("解析查询结果时发生错误");
});
try {
const segs = extractSegments(json) || [];
const txt = extractTranscript(json) || '';
appendLog(`查询结果摘要:segments=${segs.length}, transcriptLen=${txt.length}`);
} catch(_) { appendLog('查询结果已更新'); }
// 优先等待结构化分段(segments/utterances);仅文本不提前返回,避免“只有一大段文本”的情况
const segs = extractSegments(json);
const hasSeg = Array.isArray(segs) && segs.length > 0;
const immediateTranscript = json?.transcript || extractTranscript(json);
if (hasSeg) {
return { ...json, transcript: immediateTranscript || null };
}
const status = deriveStatus(json);
if (!status) {
appendLog("未返回任务状态,继续轮询…");
} else if (statusSuccessSet.has(status.toUpperCase())) {
return json;
} else if (statusFailureSet.has(status.toUpperCase())) {
throw new Error(`任务失败,状态:${status}`);
}
await sleep(pollIntervalMs);
}
throw new Error("轮询超时,请稍后重试");
}
function readOptions() {
// 新UI默认全部开启,且不展示敏感词过滤
return {
enableItn: true,
enablePunc: true,
enableDdc: true,
enableSpeaker: true,
enableChannelSplit: true,
showUtterances: true,
vadSegment: true,
sensitiveWords: "",
// 按你的需求默认不做声道拆分,避免同一句在左右声道各来一遍
enableChannelSplit: false
};
}
// 使用新UI的“重新转换”按钮触发提交
const convertBtn = document.getElementById('convertBtn');
const fileInputNew = document.getElementById('fileInput');
// 绑定文件选择后自动触发转换,避免内联脚本早于 main.js 导致点击丢失
let converting = false;
const triggerConvert = () => { if (converting) { appendLog('已忽略:上一次转换仍在进行中'); return; } convertBtn?.click(); };
fileInputNew?.addEventListener('change', () => {
appendLog('文件已选择,自动触发转换…');
// 简单防抖
clearTimeout(window.__conv_debounce);
window.__conv_debounce = setTimeout(triggerConvert, 50);
});
convertBtn?.addEventListener('click', async (event) => {
if (converting) { appendLog('已忽略:转换进行中'); return; }
converting = true;
event.preventDefault();
resetOutputs();
if (location.protocol === 'file:') { appendLog('当前以 file:// 打开页面,无法向本地代理提交;请访问 http://localhost:6174'); }
const audioUrlInput = { value: "" }; // 新UI不使用 URL
const fileInput = fileInputNew;
const uidInput = { value: "AudioToText" };
const modelNameInput = { value: "bigmodel" };
const sampleRateInput = { value: 16000 };
const bitsInput = { value: 16 };
const channelInput = { value: 1 };
const apiBaseInput = { value: "" };
const apiKeyInput = { value: "" };
const resourceIdInput = { value: "" };
const audioUrl = audioUrlInput.value.trim();
const audioFile = fileInput.files?.[0] ?? null;
if (!audioUrl && !audioFile) {
appendLog("未选择文件,等待提交…");
return;
}
appendLog("检测到文件选择,开始提交识别任务…");
const apiBase = apiBaseInput.value.trim();
const useProxy = apiBase.length === 0;
const submitUrl = useProxy ? proxySubmitEndpoint : `${apiBase}/submit`;
const resultUrl = useProxy ? proxyResultEndpoint : `${apiBase}/query`;
const uid = uidInput.value.trim() || "anonymous";
const modelName = modelNameInput.value.trim() || "bigmodel";
const sampleRate = Number(sampleRateInput.value) || 16000;
const bits = Number(bitsInput.value) || 16;
const channels = Number(channelInput.value) || 1;
const format = audioUrl ? detectAudioFormat(audioUrl) : detectAudioFormat(audioFile.name);
const requestId = (self.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`);
const apiKey = apiKeyInput.value.trim();
const resourceId = resourceIdInput.value.trim();
if (submitButton) submitButton.disabled = true;
if (audioUrl) {
appendLog(`使用音频 URL:${audioUrl}`);
} else {
appendLog(`读取文件:${audioFile.name}`);
}
try {
const submitHeaders = createHeaders({ useProxy, apiKey, resourceId, requestId });
const options = readOptions();
let payload;
if (audioUrl) {
const mappedCodec = (["wav","pcm","wave"].includes(format) ? "raw" : (format === "mp3" ? "mp3" : (["m4a","aac"].includes(format) ? "aac" : format)));
const finalRate = (["m4a","aac","mp3"].includes(format) && (!sampleRate || sampleRate < 22050)) ? 44100 : sampleRate;
appendLog(`提交参数:source=url, format=${format}, codec=${mappedCodec}, rate=${finalRate}, ch=${channels}`);
payload = {
user: { uid },
audio: { url: audioUrl, format, codec: mappedCodec, rate: finalRate, bits, channel: channels },
request: {
model_name: modelName,
enable_itn: !!options.enableItn,
enable_punc: !!options.enablePunc,
enable_ddc: !!options.enableDdc,
enable_speaker_info: !!options.enableSpeaker,
enable_channel_split: !!options.enableChannelSplit,
show_utterances: !!options.showUtterances,
vad_segment: !!options.vadSegment,
sensitive_words_filter: options.sensitiveWords || ""
}
};
} else {
const base64WithPrefix = await readFileAsBase64(audioFile);
const base64Data = stripDataUriPrefix(base64WithPrefix);
appendLog(`文件编码完成,长度约 ${Math.round(base64Data.length / 1024)} KB`);
payload = buildSubmitPayload({
base64Data,
format,
sampleRate,
bits,
channels,
uid,
modelName,
options
});
// 纠正 codec 映射 + 压缩格式的采样率兜底
const mappedCodec = (["wav","pcm","wave"].includes(format) ? "raw" : (format === "mp3" ? "mp3" : (["m4a","aac"].includes(format) ? "aac" : format)));
const finalRate = (["m4a","aac","mp3"].includes(format) && (!sampleRate || sampleRate < 22050)) ? 44100 : sampleRate;
payload.audio.codec = mappedCodec;
payload.audio.rate = finalRate;
payload.request.enable_itn = !!options.enableItn;
payload.request.enable_punc = !!options.enablePunc;
payload.request.enable_ddc = !!options.enableDdc;
payload.request.enable_speaker_info = !!options.enableSpeaker;
payload.request.enable_channel_split = !!options.enableChannelSplit;
payload.request.show_utterances = !!options.showUtterances;
payload.request.vad_segment = !!options.vadSegment;
payload.request.sensitive_words_filter = options.sensitiveWords || "";
appendLog(`提交参数:source=file, format=${format}, codec=${mappedCodec}, rate=${finalRate}, ch=${channels}`);
}
appendLog(`提交请求体:${JSON.stringify(payload)}`);
const submitResponse = await fetch(submitUrl, {
method: "POST",
headers: submitHeaders,
body: JSON.stringify(payload)
}).catch(err => {
appendLog("提交请求未发出,可能没有后端代理或跨域受限");
throw err;
});
if (!submitResponse.ok) {
const errorBody = await submitResponse.text();
appendLog("提交失败:请在本地运行带代理版本,或在代码里配置直连 API 基础地址");
throw new Error(`提交失败,HTTP ${submitResponse.status}:${errorBody}`);
}
const submitJson = await submitResponse.json();
appendLog(`提交成功:${JSON.stringify(submitJson)}`);
const jobKey = extractJobKey(submitJson);
if (!jobKey) {
throw new Error("未能从提交响应中解析到任务 ID");
}
appendLog(`任务 ID:${jobKey}`);
const pollHeaders = createHeaders({ useProxy, apiKey, resourceId, requestId: (self.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`) });
const pollBody = { job_key: jobKey, taskId: jobKey };
const resultJson = await pollJobResult({ url: resultUrl, headers: pollHeaders, body: pollBody, useProxy });
appendLog("任务完成,处理结果如下:");
appendLog(JSON.stringify(resultJson));
// 去重:同一时间窗口(±300ms)、相同文本合并(跨声道去重)
(function dedup(){
const segs = extractSegments(resultJson) || [];
const norm = (ms)=>Math.round((anyToMs(ms)||0)/300);
const seen = new Set();
const filtered = [];
for (const s of segs){
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
const key = `${norm(start)}|${norm(end)}|${(s.text||s.transcript||'').trim()}`;
if (seen.has(key)) continue;
seen.add(key);
filtered.push(s);
}
if (!resultJson.data) resultJson.data = {};
if (!resultJson.data.result) resultJson.data.result = {};
resultJson.data.result.segments = filtered;
})();
lastResultJson = resultJson;
renderInNewUI(lastResultJson);
// 停止计时
if (window.startAnalysisTimer && window.startAnalysisTimer._timer) { clearInterval(window.startAnalysisTimer._timer); window.startAnalysisTimer._timer=null; }
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
appendLog(`发生错误:${message}`);
if (transcriptElement) transcriptElement.value = message;
} finally {
converting = false;
// 新UI无提交按钮禁用逻辑
}
});