const formElement = null; // 旧表单已移除,使用新UI按钮触发 const submitButton = document.getElementById("submitButton"); let debugLogElement = document.getElementById("debugLog") || null; const transcriptElement = document.getElementById("transcriptOutput") || null; const downloadTxtBtn = document.getElementById("downloadTxtBtn"); const downloadSrtBtn = document.getElementById("downloadSrtBtn"); const copyBtn = document.getElementById("copyBtn"); const viewModeSelect = document.getElementById("viewMode"); const exportRangeTxtBtn = document.getElementById("exportRangeTxt"); const exportRangeSrtBtn = document.getElementById("exportRangeSrt"); const exportCsvBtn = document.getElementById("exportCsv"); const rangeStartInput = document.getElementById("rangeStart"); const rangeEndInput = document.getElementById("rangeEnd"); const exportToolbar = document.getElementById("exportToolbar"); let lastResultJson = null; const editedMap = new Map(); // key: `${start}-${end}` -> edited text // 说话人显示名与颜色映射 const speakerNameMap = new Map(); // key: speakerId -> display name (可编辑) const speakerColorMap = new Map(); // key: speakerId -> 1..3 映射到 tag-1..tag-3 let editMode = false; // 是否开启“编辑说话人名称” // 前端脚本加载标记,便于确认 JS 是否生效 appendLog('前端脚本已加载'); window.addEventListener('DOMContentLoaded', () => { debugLogElement = document.getElementById('debugLog'); appendLog('DOM 已就绪'); fetch('/health').then(r=>r.json()).then(j=>appendLog('健康检查:'+JSON.stringify(j))).catch(e=>appendLog('健康检查失败:'+e.message)); }); const proxySubmitEndpoint = "/api/asr/submit"; const proxyResultEndpoint = "/api/asr/result"; const pollIntervalMs = 2000; const maxAttempts = 60; const statusSuccessSet = new Set(["SUCCEEDED", "SUCCESS", "FINISHED"]); const statusFailureSet = new Set(["FAILED", "FAIL", "ERROR"]); function appendLog(message) { const el = debugLogElement || document.getElementById("debugLog"); if (!el) return; const timestamp = new Date().toISOString(); const nextLine = `[${timestamp}] ${message}`; if (el.textContent === "等待提交…") { el.textContent = nextLine; } else { el.textContent = `${el.textContent}\n${nextLine}`; } } function resetOutputs() { if (transcriptElement) transcriptElement.value = ""; if (debugLogElement) debugLogElement.textContent = "等待提交…"; const lines = document.getElementById('transcriptLines'); if (lines) lines.innerHTML = ''; const tags = document.getElementById('speakerTags'); if (tags) tags.innerHTML = ''; } function stripDataUriPrefix(base64String) { return base64String.replace(/^data:.*;base64,/, ""); } function detectAudioFormat(nameOrUrl) { if (!nameOrUrl || typeof nameOrUrl !== "string") return "wav"; let s = nameOrUrl.trim(); try { // 若是 URL,取 pathname 再去掉查询/锚点 const u = new URL(s); s = u.pathname; } catch (_) { // 非 URL,继续用原字符串 } // 去掉查询/锚点残留 s = s.split("?")[0].split("#")[0]; const lastSlash = s.lastIndexOf("/"); if (lastSlash !== -1) s = s.slice(lastSlash + 1); const lastDot = s.lastIndexOf("."); if (lastDot === -1) return "wav"; const ext = s.slice(lastDot + 1).toLowerCase(); // 归一化若干别名 if (["m4a", "mp4a", "mp4"].includes(ext)) return "m4a"; if (ext === "oga") return "ogg"; return ext; } function readFileAsBase64(file) { return new Promise((resolve, reject) => { const reader = new FileReader(); reader.onload = () => resolve(reader.result); reader.onerror = () => reject(new Error("读取文件失败")); reader.readAsDataURL(file); }); } function createHeaders({ useProxy, apiKey, resourceId, requestId }) { const headers = new Headers(); headers.set("Content-Type", "application/json"); if (useProxy) { return headers; } if (!apiKey) { throw new Error("当直连豆包接口时必须填写 API Key"); } headers.set("x-api-key", apiKey); if (resourceId) { headers.set("X-Api-Resource-Id", resourceId); } headers.set("X-Api-Request-Id", requestId); headers.set("X-Api-Sequence", "-1"); return headers; } function buildSubmitPayload({ base64Data, format, sampleRate, bits, channels, uid, modelName, options }) { return { user: { uid }, audio: { data: base64Data, format, codec: ["mp3", "wav", "pcm"].includes(format) ? "raw" : format, rate: sampleRate, bits, channel: channels }, request: { model_name: modelName, enable_itn: options.enableItn, enable_punc: options.enablePunc, enable_ddc: options.enableDdc, enable_speaker_info: options.enableSpeaker, enable_channel_split: options.enableChannelSplit, show_utterances: options.showUtterances, vad_segment: options.vadSegment, sensitive_words_filter: options.sensitiveWords } }; } function extractJobKey(result) { const candidates = [ result?.result?.job_key, result?.result?.task_id, result?.result?.taskId, result?.data?.job_key, result?.data?.taskId, result?.data?.task_id, result?.job_key, result?.task_id, result?.taskId, result?.RequestId, result?.request_id ]; return candidates.find((value) => typeof value === "string" && value.length > 0) ?? null; } function deriveStatus(result) { const candidates = [ result?.result?.status, result?.result?.task_status, result?.data?.status, result?.data?.task_status, result?.status ]; return candidates.find((value) => typeof value === "string" && value.length > 0) ?? null; } function flattenSegmentsText(segments) { if (!Array.isArray(segments) || segments.length === 0) { return null; } const texts = segments .map((segment) => { if (typeof segment?.text === "string" && segment.text.trim().length > 0) { return segment.text.trim(); } if (typeof segment?.transcript === "string" && segment.transcript.trim().length > 0) { return segment.transcript.trim(); } return null; }) .filter((text) => typeof text === "string" && text.length > 0); if (texts.length === 0) { return null; } return texts.join("\n"); } function extractTranscript(result) { const segmentsTranscript = flattenSegmentsText(result?.data?.result?.segments) ?? flattenSegmentsText(result?.result?.segments); if (segmentsTranscript) { return segmentsTranscript; } const candidates = [ result?.data?.result?.text, result?.result?.text, result?.result?.output?.choices?.[0]?.text, result?.result?.output?.transcript, result?.result?.transcript, result?.data?.output?.choices?.[0]?.text, result?.data?.transcript ]; return candidates.find((value) => typeof value === "string" && value.trim().length > 0) ?? null; } function msToTimestamp(ms) { if (typeof ms !== "number" || Number.isNaN(ms)) return null; const sign = ms < 0 ? "-" : ""; const t = Math.abs(ms); const hours = Math.floor(t / 3600000); const minutes = Math.floor((t % 3600000) / 60000); const seconds = Math.floor((t % 60000) / 1000); const millis = Math.floor(t % 1000); const hh = hours.toString().padStart(2, "0"); const mm = minutes.toString().padStart(2, "0"); const ss = seconds.toString().padStart(2, "0"); const mmm = millis.toString().padStart(3, "0"); return `${sign}${hh}:${mm}:${ss}.${mmm}`; } function anyToMs(value) { if (typeof value === "number") return value; if (typeof value === "string") { // 支持 "1.23" 秒或 "00:00:01.230" if (/^\d+(\.\d+)?$/.test(value)) { return Number(value) * 1000; } const m = value.match(/^(\d{2}):(\d{2}):(\d{2})\.(\d{1,3})$/); if (m) { const h = Number(m[1]); const min = Number(m[2]); const s = Number(m[3]); const ms = Number(m[4].padEnd(3, "0")); return ((h * 60 + min) * 60 + s) * 1000 + ms; } } return null; } function extractSegments(obj) { const segs = obj?.data?.result?.segments || obj?.result?.segments || obj?.segments || null; if (Array.isArray(segs) && segs.length > 0) return segs; // 兼容返回的 utterances 字段,转换为统一的段落结构 const utts = obj?.data?.result?.utterances || obj?.result?.utterances || obj?.utterances || null; if (Array.isArray(utts) && utts.length > 0) { return utts.map(u => ({ start_ms: u.start_ms ?? u.start_time ?? u.start, end_ms: u.end_ms ?? u.end_time ?? u.end, text: u.text, channel: u.channel ?? u.channel_index ?? u.ch ?? u.additions?.channel_id, speaker: u.speaker ?? u.spk ?? u.speaker_id ?? u.additions?.speaker })); } return null; } function formatSegmentsDetailed(obj) { const segments = extractSegments(obj); if (!Array.isArray(segments) || segments.length === 0) return null; const lines = []; for (const seg of segments) { const start = anyToMs(seg.start_ms ?? seg.start_time ?? seg.start ?? seg.begin_ms ?? seg.begin_time); const end = anyToMs(seg.end_ms ?? seg.end_time ?? seg.end ?? seg.finish_ms ?? seg.finish_time); const startTs = start != null ? msToTimestamp(start) : null; const endTs = end != null ? msToTimestamp(end) : null; const ch = seg.channel ?? seg.channel_index ?? seg.ch ?? null; const spk = seg.speaker ?? seg.spk ?? seg.speaker_id ?? null; const text = (typeof seg.text === "string" && seg.text.trim()) || (typeof seg.transcript === "string" && seg.transcript.trim()) || ""; const attrs = []; if (ch != null) attrs.push(`ch=${ch}`); if (spk != null) attrs.push(`spk=${spk}`); const range = startTs && endTs ? `[${startTs}-${endTs}]` : (startTs ? `[${startTs}-?]` : (endTs ? `[?-${endTs}]` : "")); const header = [range, attrs.length ? `(${attrs.join(", ")})` : ""].filter(Boolean).join(" "); lines.push(header ? `${header} ${text}` : text); } return lines.join("\n"); } function sleep(durationMs) { return new Promise((resolve) => setTimeout(resolve, durationMs)); } function buildSegmentsFromEdited(obj) { const segs = extractSegments(obj) || []; return segs.map(s => { const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0; const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0; const key = `${start}-${end}`; const text = editedMap.get(key) ?? s.text ?? s.transcript ?? ""; return { ...s, start_ms: start, end_ms: end, text }; }); } function buildSrtFromSegments(obj) { const segs = extractSegments(obj); if (!Array.isArray(segs) || segs.length === 0) return null; const blocks = []; let idx = 1; for (const s of segs) { const start = anyToMs(s.start_ms ?? s.start_time ?? s.start); const end = anyToMs(s.end_ms ?? s.end_time ?? s.end); const startTs = msToTimestamp(start)?.replace(".", ",") ?? "00:00:00,000"; const endTs = msToTimestamp(end)?.replace(".", ",") ?? "00:00:00,000"; const spkId = s.speaker ?? s.spk ?? s.speaker_id; const spkName = spkId != null ? (speakerNameMap.get(spkId) || `说话人 ${spkId}`) : ""; const text = (typeof s.text === "string" && s.text.trim()) || (typeof s.transcript === "string" && s.transcript.trim()) || ""; const prefix = spkName ? `[${spkName}] ` : ""; blocks.push(`${idx}\n${startTs} --> ${endTs}\n${prefix}${text}\n`); idx += 1; } return blocks.join("\n"); } function groupBySpeakerAndChannel(segs) { const map = new Map(); // key: channel -> Map(speaker -> [segments]) for (const s of segs) { const ch = s.channel ?? s.channel_index ?? s.ch ?? s.additions?.channel_id ?? "unknown"; const spk = s.speaker ?? s.spk ?? s.speaker_id ?? s.additions?.speaker ?? "unknown"; if (!map.has(ch)) map.set(ch, new Map()); const inner = map.get(ch); if (!inner.has(spk)) inner.set(spk, []); inner.get(spk).push(s); } return map; } function buildTimeline(durationMs, rulerEl, onJump) { if (!rulerEl) return; rulerEl.innerHTML = ""; if (!durationMs || durationMs <= 0) return; const step = Math.max(10000, Math.floor(durationMs / 10)); // 10s 或更大 // Tooltip element const tooltip = document.createElement("div"); tooltip.className = "tooltip"; tooltip.style.left = "0%"; rulerEl.appendChild(tooltip); const updateTooltip = (x) => { const width = rulerEl.clientWidth || 1; const percent = Math.max(0, Math.min(1, x / width)); const t = Math.round(durationMs * percent); tooltip.style.left = `${percent * 100}%`; let label = msToTimestamp(t)?.slice(0, 12) || ""; const segs = Array.isArray(buildTimeline._segs) ? buildTimeline._segs : []; const near = segs.find(s => { const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0; const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0; return t >= start && t <= end; }); if (near) { const ch = near.channel ?? near.channel_index ?? near.ch; const spk = near.speaker ?? near.spk ?? near.speaker_id; const previewRaw = (near.text || near.transcript || "").replace(/\n/g, " "); const preview = previewRaw.length > 36 ? (previewRaw.slice(0, 36) + "…") : previewRaw; label = `${msToTimestamp(t)?.slice(0, 12)} • ch=${ch ?? "-"} spk=${spk ?? "-"} • ${preview}`; } tooltip.textContent = label; }; rulerEl.addEventListener("mousemove", (e) => { const x = e.offsetX ?? (e.clientX - rulerEl.getBoundingClientRect().left); updateTooltip(x); }); rulerEl.addEventListener("mouseenter", (e) => { const x = e.offsetX ?? (e.clientX - rulerEl.getBoundingClientRect().left); updateTooltip(x); }); rulerEl.addEventListener("click", (e) => { const x = e.offsetX ?? (e.clientX - rulerEl.getBoundingClientRect().left); const percent = Math.max(0, Math.min(1, x / (rulerEl.clientWidth || 1))); const t = Math.round(durationMs * percent); onJump?.(t); }); for (let t = 0; t <= durationMs; t += step) { const p = (t / durationMs) * 100; const tick = document.createElement("div"); tick.className = "tick"; tick.style.left = `${p}%`; const label = document.createElement("div"); label.className = "label"; label.style.left = `${p}%`; label.textContent = msToTimestamp(t)?.slice(0, 8) || ""; label.style.cursor = "pointer"; label.addEventListener("click", () => onJump?.(t)); rulerEl.appendChild(tick); rulerEl.appendChild(label); } } function highlightKeywords(text, keywords) { if (!keywords || keywords.length === 0) return text; let safe = text; for (const kw of keywords) { if (!kw) continue; const re = new RegExp(`(${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")})`, "gi"); safe = safe.replace(re, '$1'); } return safe; } function renderStructuredView(json) { // 旧结构化保留但不使用 const segs = extractSegments(json) || []; const container = document.getElementById("channelsContainer"); const ruler = document.getElementById("timelineRuler"); if (!container || !ruler) { appendLog('结构化容器缺失,跳过渲染'); return; } const duration = anyToMs(json?.data?.additions?.duration || json?.result?.additions?.duration) || json?.audio_info?.duration || 0; container.innerHTML = ""; buildTimeline._segs = segs; buildTimeline(duration, ruler, (jumpMs) => { // 时间轴点击 → 高亮对应时间附近的分段 const near = segs.filter(s => { const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0; const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0; return jumpMs >= start && jumpMs <= end; }); document.querySelectorAll('.segment-item.active').forEach(el => el.classList.remove('active')); near.forEach(s => { const selector = `.segment-item[data-start="${anyToMs(s.start_ms ?? s.start_time ?? s.start)}"][data-end="${anyToMs(s.end_ms ?? s.end_time ?? s.end)}"]`; const el = container.querySelector(selector); if (el) { el.classList.add('active'); el.scrollIntoView({ behavior: 'smooth', block: 'center' }); } }); }); const channelMap = groupBySpeakerAndChannel(segs); const keywordsInput = document.getElementById("sensitiveWords").value.trim(); const keywords = keywordsInput ? keywordsInput.split(/[,,\s]+/).filter(Boolean) : []; for (const [ch, speakerMap] of channelMap.entries()) { const column = document.createElement("div"); column.className = "channel-column"; const header = document.createElement("div"); header.className = "channel-header"; header.innerHTML = `声道 ch=${ch}分组:说话人`; column.appendChild(header); for (const [spk, items] of speakerMap.entries()) { const group = document.createElement("div"); group.className = "speaker-group"; const title = document.createElement("div"); title.className = "speaker-title"; const avatar = document.createElement("span"); avatar.className = "speaker-avatar"; const colorIdx = (Number(spk) || 0) % 6 || 1; avatar.style.background = getComputedStyle(document.documentElement).getPropertyValue(`--speaker-${colorIdx}`).trim() || "var(--accent)"; // 徽标文字:若为字符串取首字母/首个汉字,若为数字直接显示 let badgeText = ""; if (typeof spk === "string") { const m = spk.match(/[A-Za-z\u4e00-\u9fa5]/); badgeText = (m ? m[0] : spk[0] || "?").toUpperCase(); } else { badgeText = String(spk ?? "?"); } avatar.textContent = badgeText; const label = document.createElement("span"); label.textContent = `说话人 ${spk}`; title.appendChild(avatar); title.appendChild(label); group.appendChild(title); for (const s of items) { const item = document.createElement("div"); item.className = "segment-item"; const start = anyToMs(s.start_ms ?? s.start_time ?? s.start); const end = anyToMs(s.end_ms ?? s.end_time ?? s.end); item.dataset.start = String(start ?? 0); item.dataset.end = String(end ?? 0); const meta = document.createElement("div"); meta.className = "segment-meta"; meta.textContent = `[${msToTimestamp(start)} - ${msToTimestamp(end)}]`; const text = document.createElement("div"); text.contentEditable = "true"; text.className = "editable"; const raw = (typeof s.text === "string" && s.text.trim()) || (typeof s.transcript === "string" && s.transcript.trim()) || ""; const key = `${start}-${end}`; if (!editedMap.has(key)) editedMap.set(key, raw); text.innerHTML = highlightKeywords(editedMap.get(key), keywords); text.addEventListener("input", () => { editedMap.set(key, text.textContent || ""); }); item.appendChild(meta); item.appendChild(text); group.appendChild(item); } column.appendChild(group); } container.appendChild(column); } } function renderByViewMode() { /* 旧视图切换不再使用 */ } // 旧视图切换移除 function withinRange(s, fromMs, toMs) { const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0; const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0; return (fromMs == null || end >= fromMs) && (toMs == null || start <= toMs); } function parseTimeInput(v) { if (!v) return null; const s = v.trim(); if (/^\d+$/.test(s)) return Number(s); const m = s.match(/^(\d{1,2}):(\d{2})(?::(\d{2}))?$/); if (m) { const h = m[3] ? Number(m[1]) : 0; const min = m[3] ? Number(m[2]) : Number(m[1]); const sec = m[3] ? Number(m[3]) : Number(m[2]); return ((h * 60 + min) * 60 + sec) * 1000; } return null; } exportRangeTxtBtn?.addEventListener("click", () => { if (!lastResultJson) return; const fromMs = parseTimeInput(rangeStartInput?.value); const toMs = parseTimeInput(rangeEndInput?.value); const segs = buildSegmentsFromEdited(lastResultJson).filter(s => withinRange(s, fromMs, toMs)); const text = segs.map(s => s.text || s.transcript || "").join("\n"); triggerDownload(text, "transcript_range.txt"); }); exportRangeSrtBtn?.addEventListener("click", () => { if (!lastResultJson) return; const fromMs = parseTimeInput(rangeStartInput?.value); const toMs = parseTimeInput(rangeEndInput?.value); const segs = buildSegmentsFromEdited(lastResultJson).filter(s => withinRange(s, fromMs, toMs)); const srt = (function buildPartialSrt(items){ if (!items.length) return ""; let idx = 1; const out = []; for (const s of items) { const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0; const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0; const startTs = msToTimestamp(start)?.replace(".", ",") ?? "00:00:00,000"; const endTs = msToTimestamp(end)?.replace(".", ",") ?? "00:00:00,000"; out.push(`${idx}\n${startTs} --> ${endTs}\n${s.text || s.transcript || ""}\n`); idx += 1; } return out.join("\n"); })(segs); triggerDownload(srt, "transcript_range.srt"); }); exportCsvBtn?.addEventListener("click", () => { if (!lastResultJson) return; const segs = buildSegmentsFromEdited(lastResultJson); const header = ["start_ms","end_ms","channel","speaker","text"]; const rows = segs.map(s => [ anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0, anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0, s.channel ?? "", s.speaker ?? "", (s.text || s.transcript || "").replace(/"/g,'""') ]); const csv = [header.join(","), ...rows.map(r => r.map(c => /[",\n]/.test(String(c)) ? `"${String(c)}"` : String(c)).join(","))].join("\n"); triggerDownload(csv, "transcript.csv"); }); const exportBtn = document.getElementById('exportBtn'); const copyAllBtn = document.getElementById('copyAllBtn'); copyAllBtn?.addEventListener("click", async () => { try { const segs = buildSegmentsFromEdited(lastResultJson || {}); const text = segs.map(s => s.text || s.transcript || "").join("\n"); await navigator.clipboard.writeText(text); } catch (e) {} }); function triggerDownload(content, filename) { const blob = new Blob([content], { type: "text/plain;charset=utf-8" }); const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; a.download = filename; document.body.appendChild(a); a.click(); a.remove(); URL.revokeObjectURL(url); } downloadSrtBtn?.addEventListener("click", () => { if (!lastResultJson) { return; } const srt = buildSrtFromSegments({ ...lastResultJson, segments: buildSegmentsFromEdited(lastResultJson) }); const text = srt || (lastResultJson?.transcript || extractTranscript(lastResultJson) || ""); triggerDownload(text, srt ? "transcript.srt" : "transcript.txt"); }); // 统一走导出按钮事件 exportBtn?.addEventListener('click', () => { if (!lastResultJson) { appendLog('暂无结果可下载'); return; } // 导出为“带时间+说话人”的可读TXT const segs = buildSegmentsFromEdited(lastResultJson); const lines = segs.map(s => { const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0; const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0; const time = `[${msToTimestamp(start)} - ${msToTimestamp(end)}]`; const spkId = s.speaker ?? s.spk ?? s.speaker_id; const spk = spkId != null ? (speakerNameMap.get(spkId) || `说话人 ${spkId}`) : ''; const name = spk ? `${spk}: ` : ''; const text = (s.text || s.transcript || '').trim(); return `${time} ${name}${text}`; }); const content = lines.join('\n'); triggerDownload(content, 'transcript.txt'); }); function renderInNewUI(json){ try{ const segs = extractSegments(json) || []; const tagsEl = document.getElementById('speakerTags'); const linesEl = document.getElementById('transcriptLines'); const editToggle = document.getElementById('editToggle'); const saveSpkBtn = document.getElementById('saveSpkBtn'); if (tagsEl) tagsEl.innerHTML = ''; if (linesEl) linesEl.innerHTML = ''; // 收集说话人,建立稳定颜色映射(跨次渲染保持) const speakers = []; const spkSet = new Set(); for (const s of segs){ const spk = s.speaker ?? s.spk ?? s.speaker_id ?? null; if (spk == null) continue; if (!spkSet.has(spk)) { spkSet.add(spk); speakers.push(spk); } if (!speakerColorMap.has(spk)) { const used = new Set([...speakerColorMap.values()]); const pool = [1,2,3,1,2,3]; const pick = pool.find(i => !used.has(i)) || ((speakerColorMap.size % 3) + 1); speakerColorMap.set(spk, pick); } if (!speakerNameMap.has(spk)) { speakerNameMap.set(spk, `说话人 ${spk}`); } } // 渲染顶部可编辑标签 if (tagsEl){ speakers.forEach(spk => { const colorIdx = speakerColorMap.get(spk) || 1; const tag = document.createElement('span'); tag.className = `speaker-tag tag-${colorIdx}`; tag.contentEditable = editMode ? 'true' : 'false'; tag.dataset.spk = String(spk); tag.textContent = speakerNameMap.get(spk) || `说话人 ${spk}`; tag.addEventListener('input', () => { speakerNameMap.set(spk, tag.textContent || `说话人 ${spk}`); // 同步更新下方所有同 speaker 的徽标文字 linesEl?.querySelectorAll(`[data-spk="${CSS.escape(String(spk))}"]`).forEach(el => { el.textContent = speakerNameMap.get(spk); }); }); tagsEl.appendChild(tag); }); } // 渲染正文每行 for (const s of segs){ if (!linesEl) break; const spk = s.speaker ?? s.spk ?? s.speaker_id ?? null; const item = document.createElement('div'); item.className = 'transcript-line'; const ts = document.createElement('span'); ts.className = 'timestamp'; const start = anyToMs(s.start_ms ?? s.start_time ?? s.start); const end = anyToMs(s.end_ms ?? s.end_time ?? s.end); ts.textContent = `[${msToTimestamp(start) || '00:00:00.000'} - ${msToTimestamp(end) || '00:00:00.000'}]`; const sp = document.createElement('span'); const colorIdx = speakerColorMap.get(spk) || 1; sp.className = `speaker tag-${colorIdx}`; sp.dataset.spk = spk != null ? String(spk) : ''; if (spk != null) sp.textContent = speakerNameMap.get(spk) || `说话人 ${spk}`; const text = document.createElement('div'); text.className = 'text-content'; text.textContent = (s.text || s.transcript || '').trim(); item.appendChild(ts); if (spk != null) item.appendChild(sp); item.appendChild(text); linesEl.appendChild(item); } // 绑定编辑开关 if (editToggle){ editToggle.onclick = () => { editMode = !editMode; if (saveSpkBtn) saveSpkBtn.style.display = editMode ? 'inline-flex' : 'none'; tagsEl?.querySelectorAll('.speaker-tag').forEach(el => { el.contentEditable = editMode ? 'true' : 'false'; }); }; } if (saveSpkBtn){ saveSpkBtn.onclick = () => { tagsEl?.querySelectorAll('.speaker-tag').forEach(el => { const spk = el.dataset.spk; speakerNameMap.set(spk, el.textContent || `说话人 ${spk}`); }); // 关闭编辑并同步正文徽标 editMode = false; saveSpkBtn.style.display = 'none'; tagsEl?.querySelectorAll('.speaker-tag').forEach(el => { el.contentEditable = 'false'; }); linesEl?.querySelectorAll('.speaker').forEach(el => { const spk = el.dataset.spk; el.textContent = speakerNameMap.get(spk) || `说话人 ${spk}`; }); }; } }catch(e){ appendLog('渲染失败:' + (e?.message || String(e))); } } async function pollJobResult({ url, headers, body, useProxy }) { for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { appendLog(`第 ${attempt} 次查询任务状态…`); if (!useProxy && headers.has("X-Api-Request-Id")) { headers.set("X-Api-Request-Id", (self.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`)); } const response = await fetch(url, { method: "POST", headers, body: JSON.stringify(body) }); if (!response.ok) { const errorBody = await response.text(); throw new Error(`查询失败,HTTP ${response.status}:${errorBody}`); } const json = await response.json().catch(() => { throw new Error("解析查询结果时发生错误"); }); try { const segs = extractSegments(json) || []; const txt = extractTranscript(json) || ''; appendLog(`查询结果摘要:segments=${segs.length}, transcriptLen=${txt.length}`); } catch(_) { appendLog('查询结果已更新'); } // 优先等待结构化分段(segments/utterances);仅文本不提前返回,避免“只有一大段文本”的情况 const segs = extractSegments(json); const hasSeg = Array.isArray(segs) && segs.length > 0; const immediateTranscript = json?.transcript || extractTranscript(json); if (hasSeg) { return { ...json, transcript: immediateTranscript || null }; } const status = deriveStatus(json); if (!status) { appendLog("未返回任务状态,继续轮询…"); } else if (statusSuccessSet.has(status.toUpperCase())) { return json; } else if (statusFailureSet.has(status.toUpperCase())) { throw new Error(`任务失败,状态:${status}`); } await sleep(pollIntervalMs); } throw new Error("轮询超时,请稍后重试"); } function readOptions() { // 新UI默认全部开启,且不展示敏感词过滤 return { enableItn: true, enablePunc: true, enableDdc: true, enableSpeaker: true, enableChannelSplit: true, showUtterances: true, vadSegment: true, sensitiveWords: "", // 按你的需求默认不做声道拆分,避免同一句在左右声道各来一遍 enableChannelSplit: false }; } // 使用新UI的“重新转换”按钮触发提交 const convertBtn = document.getElementById('convertBtn'); const fileInputNew = document.getElementById('fileInput'); // 绑定文件选择后自动触发转换,避免内联脚本早于 main.js 导致点击丢失 let converting = false; const triggerConvert = () => { if (converting) { appendLog('已忽略:上一次转换仍在进行中'); return; } convertBtn?.click(); }; fileInputNew?.addEventListener('change', () => { appendLog('文件已选择,自动触发转换…'); // 简单防抖 clearTimeout(window.__conv_debounce); window.__conv_debounce = setTimeout(triggerConvert, 50); }); convertBtn?.addEventListener('click', async (event) => { if (converting) { appendLog('已忽略:转换进行中'); return; } converting = true; event.preventDefault(); resetOutputs(); if (location.protocol === 'file:') { appendLog('当前以 file:// 打开页面,无法向本地代理提交;请访问 http://localhost:6174'); } const audioUrlInput = { value: "" }; // 新UI不使用 URL const fileInput = fileInputNew; const uidInput = { value: "AudioToText" }; const modelNameInput = { value: "bigmodel" }; const sampleRateInput = { value: 16000 }; const bitsInput = { value: 16 }; const channelInput = { value: 1 }; const apiBaseInput = { value: "" }; const apiKeyInput = { value: "" }; const resourceIdInput = { value: "" }; const audioUrl = audioUrlInput.value.trim(); const audioFile = fileInput.files?.[0] ?? null; if (!audioUrl && !audioFile) { appendLog("未选择文件,等待提交…"); return; } appendLog("检测到文件选择,开始提交识别任务…"); const apiBase = apiBaseInput.value.trim(); const useProxy = apiBase.length === 0; const submitUrl = useProxy ? proxySubmitEndpoint : `${apiBase}/submit`; const resultUrl = useProxy ? proxyResultEndpoint : `${apiBase}/query`; const uid = uidInput.value.trim() || "anonymous"; const modelName = modelNameInput.value.trim() || "bigmodel"; const sampleRate = Number(sampleRateInput.value) || 16000; const bits = Number(bitsInput.value) || 16; const channels = Number(channelInput.value) || 1; const format = audioUrl ? detectAudioFormat(audioUrl) : detectAudioFormat(audioFile.name); const requestId = (self.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`); const apiKey = apiKeyInput.value.trim(); const resourceId = resourceIdInput.value.trim(); if (submitButton) submitButton.disabled = true; if (audioUrl) { appendLog(`使用音频 URL:${audioUrl}`); } else { appendLog(`读取文件:${audioFile.name}`); } try { const submitHeaders = createHeaders({ useProxy, apiKey, resourceId, requestId }); const options = readOptions(); let payload; if (audioUrl) { const mappedCodec = (["wav","pcm","wave"].includes(format) ? "raw" : (format === "mp3" ? "mp3" : (["m4a","aac"].includes(format) ? "aac" : format))); const finalRate = (["m4a","aac","mp3"].includes(format) && (!sampleRate || sampleRate < 22050)) ? 44100 : sampleRate; appendLog(`提交参数:source=url, format=${format}, codec=${mappedCodec}, rate=${finalRate}, ch=${channels}`); payload = { user: { uid }, audio: { url: audioUrl, format, codec: mappedCodec, rate: finalRate, bits, channel: channels }, request: { model_name: modelName, enable_itn: !!options.enableItn, enable_punc: !!options.enablePunc, enable_ddc: !!options.enableDdc, enable_speaker_info: !!options.enableSpeaker, enable_channel_split: !!options.enableChannelSplit, show_utterances: !!options.showUtterances, vad_segment: !!options.vadSegment, sensitive_words_filter: options.sensitiveWords || "" } }; } else { const base64WithPrefix = await readFileAsBase64(audioFile); const base64Data = stripDataUriPrefix(base64WithPrefix); appendLog(`文件编码完成,长度约 ${Math.round(base64Data.length / 1024)} KB`); payload = buildSubmitPayload({ base64Data, format, sampleRate, bits, channels, uid, modelName, options }); // 纠正 codec 映射 + 压缩格式的采样率兜底 const mappedCodec = (["wav","pcm","wave"].includes(format) ? "raw" : (format === "mp3" ? "mp3" : (["m4a","aac"].includes(format) ? "aac" : format))); const finalRate = (["m4a","aac","mp3"].includes(format) && (!sampleRate || sampleRate < 22050)) ? 44100 : sampleRate; payload.audio.codec = mappedCodec; payload.audio.rate = finalRate; payload.request.enable_itn = !!options.enableItn; payload.request.enable_punc = !!options.enablePunc; payload.request.enable_ddc = !!options.enableDdc; payload.request.enable_speaker_info = !!options.enableSpeaker; payload.request.enable_channel_split = !!options.enableChannelSplit; payload.request.show_utterances = !!options.showUtterances; payload.request.vad_segment = !!options.vadSegment; payload.request.sensitive_words_filter = options.sensitiveWords || ""; appendLog(`提交参数:source=file, format=${format}, codec=${mappedCodec}, rate=${finalRate}, ch=${channels}`); } appendLog(`提交请求体:${JSON.stringify(payload)}`); const submitResponse = await fetch(submitUrl, { method: "POST", headers: submitHeaders, body: JSON.stringify(payload) }).catch(err => { appendLog("提交请求未发出,可能没有后端代理或跨域受限"); throw err; }); if (!submitResponse.ok) { const errorBody = await submitResponse.text(); appendLog("提交失败:请在本地运行带代理版本,或在代码里配置直连 API 基础地址"); throw new Error(`提交失败,HTTP ${submitResponse.status}:${errorBody}`); } const submitJson = await submitResponse.json(); appendLog(`提交成功:${JSON.stringify(submitJson)}`); const jobKey = extractJobKey(submitJson); if (!jobKey) { throw new Error("未能从提交响应中解析到任务 ID"); } appendLog(`任务 ID:${jobKey}`); const pollHeaders = createHeaders({ useProxy, apiKey, resourceId, requestId: (self.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`) }); const pollBody = { job_key: jobKey, taskId: jobKey }; const resultJson = await pollJobResult({ url: resultUrl, headers: pollHeaders, body: pollBody, useProxy }); appendLog("任务完成,处理结果如下:"); appendLog(JSON.stringify(resultJson)); // 去重:同一时间窗口(±300ms)、相同文本合并(跨声道去重) (function dedup(){ const segs = extractSegments(resultJson) || []; const norm = (ms)=>Math.round((anyToMs(ms)||0)/300); const seen = new Set(); const filtered = []; for (const s of segs){ const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0; const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0; const key = `${norm(start)}|${norm(end)}|${(s.text||s.transcript||'').trim()}`; if (seen.has(key)) continue; seen.add(key); filtered.push(s); } if (!resultJson.data) resultJson.data = {}; if (!resultJson.data.result) resultJson.data.result = {}; resultJson.data.result.segments = filtered; })(); lastResultJson = resultJson; renderInNewUI(lastResultJson); // 停止计时 if (window.startAnalysisTimer && window.startAnalysisTimer._timer) { clearInterval(window.startAnalysisTimer._timer); window.startAnalysisTimer._timer=null; } } catch (error) { const message = error instanceof Error ? error.message : String(error); appendLog(`发生错误:${message}`); if (transcriptElement) transcriptElement.value = message; } finally { converting = false; // 新UI无提交按钮禁用逻辑 } });