974 lines
37 KiB
JavaScript
974 lines
37 KiB
JavaScript
const formElement = null; // 旧表单已移除,使用新UI按钮触发
|
||
const submitButton = document.getElementById("submitButton");
|
||
let debugLogElement = document.getElementById("debugLog") || null;
|
||
const transcriptElement = document.getElementById("transcriptOutput") || null;
|
||
const downloadTxtBtn = document.getElementById("downloadTxtBtn");
|
||
const downloadSrtBtn = document.getElementById("downloadSrtBtn");
|
||
const copyBtn = document.getElementById("copyBtn");
|
||
const viewModeSelect = document.getElementById("viewMode");
|
||
const exportRangeTxtBtn = document.getElementById("exportRangeTxt");
|
||
const exportRangeSrtBtn = document.getElementById("exportRangeSrt");
|
||
const exportCsvBtn = document.getElementById("exportCsv");
|
||
const rangeStartInput = document.getElementById("rangeStart");
|
||
const rangeEndInput = document.getElementById("rangeEnd");
|
||
const exportToolbar = document.getElementById("exportToolbar");
|
||
let lastResultJson = null;
|
||
const editedMap = new Map(); // key: `${start}-${end}` -> edited text
|
||
// 说话人显示名与颜色映射
|
||
const speakerNameMap = new Map(); // key: speakerId -> display name (可编辑)
|
||
const speakerColorMap = new Map(); // key: speakerId -> 1..3 映射到 tag-1..tag-3
|
||
let editMode = false; // 是否开启“编辑说话人名称”
|
||
|
||
// 前端脚本加载标记,便于确认 JS 是否生效
|
||
appendLog('前端脚本已加载');
|
||
window.addEventListener('DOMContentLoaded', () => { debugLogElement = document.getElementById('debugLog'); appendLog('DOM 已就绪'); fetch('/health').then(r=>r.json()).then(j=>appendLog('健康检查:'+JSON.stringify(j))).catch(e=>appendLog('健康检查失败:'+e.message)); });
|
||
const proxySubmitEndpoint = "/api/asr/submit";
|
||
const proxyResultEndpoint = "/api/asr/result";
|
||
const pollIntervalMs = 2000;
|
||
const maxAttempts = 60;
|
||
|
||
const statusSuccessSet = new Set(["SUCCEEDED", "SUCCESS", "FINISHED"]);
|
||
const statusFailureSet = new Set(["FAILED", "FAIL", "ERROR"]);
|
||
|
||
function appendLog(message) {
|
||
const el = debugLogElement || document.getElementById("debugLog");
|
||
if (!el) return;
|
||
const timestamp = new Date().toISOString();
|
||
const nextLine = `[${timestamp}] ${message}`;
|
||
if (el.textContent === "等待提交…") {
|
||
el.textContent = nextLine;
|
||
} else {
|
||
el.textContent = `${el.textContent}\n${nextLine}`;
|
||
}
|
||
}
|
||
|
||
function resetOutputs() {
|
||
if (transcriptElement) transcriptElement.value = "";
|
||
if (debugLogElement) debugLogElement.textContent = "等待提交…";
|
||
const lines = document.getElementById('transcriptLines');
|
||
if (lines) lines.innerHTML = '';
|
||
const tags = document.getElementById('speakerTags');
|
||
if (tags) tags.innerHTML = '';
|
||
}
|
||
|
||
function stripDataUriPrefix(base64String) {
|
||
return base64String.replace(/^data:.*;base64,/, "");
|
||
}
|
||
|
||
function detectAudioFormat(nameOrUrl) {
|
||
if (!nameOrUrl || typeof nameOrUrl !== "string") return "wav";
|
||
let s = nameOrUrl.trim();
|
||
try {
|
||
// 若是 URL,取 pathname 再去掉查询/锚点
|
||
const u = new URL(s);
|
||
s = u.pathname;
|
||
} catch (_) {
|
||
// 非 URL,继续用原字符串
|
||
}
|
||
// 去掉查询/锚点残留
|
||
s = s.split("?")[0].split("#")[0];
|
||
const lastSlash = s.lastIndexOf("/");
|
||
if (lastSlash !== -1) s = s.slice(lastSlash + 1);
|
||
const lastDot = s.lastIndexOf(".");
|
||
if (lastDot === -1) return "wav";
|
||
const ext = s.slice(lastDot + 1).toLowerCase();
|
||
// 归一化若干别名
|
||
if (["m4a", "mp4a", "mp4"].includes(ext)) return "m4a";
|
||
if (ext === "oga") return "ogg";
|
||
return ext;
|
||
}
|
||
|
||
function readFileAsBase64(file) {
|
||
return new Promise((resolve, reject) => {
|
||
const reader = new FileReader();
|
||
reader.onload = () => resolve(reader.result);
|
||
reader.onerror = () => reject(new Error("读取文件失败"));
|
||
reader.readAsDataURL(file);
|
||
});
|
||
}
|
||
|
||
function createHeaders({ useProxy, apiKey, resourceId, requestId }) {
|
||
const headers = new Headers();
|
||
headers.set("Content-Type", "application/json");
|
||
if (useProxy) {
|
||
return headers;
|
||
}
|
||
|
||
if (!apiKey) {
|
||
throw new Error("当直连豆包接口时必须填写 API Key");
|
||
}
|
||
|
||
headers.set("x-api-key", apiKey);
|
||
if (resourceId) {
|
||
headers.set("X-Api-Resource-Id", resourceId);
|
||
}
|
||
headers.set("X-Api-Request-Id", requestId);
|
||
headers.set("X-Api-Sequence", "-1");
|
||
return headers;
|
||
}
|
||
|
||
function buildSubmitPayload({
|
||
base64Data,
|
||
format,
|
||
sampleRate,
|
||
bits,
|
||
channels,
|
||
uid,
|
||
modelName,
|
||
options
|
||
}) {
|
||
return {
|
||
user: {
|
||
uid
|
||
},
|
||
audio: {
|
||
data: base64Data,
|
||
format,
|
||
codec: ["mp3", "wav", "pcm"].includes(format) ? "raw" : format,
|
||
rate: sampleRate,
|
||
bits,
|
||
channel: channels
|
||
},
|
||
request: {
|
||
model_name: modelName,
|
||
enable_itn: options.enableItn,
|
||
enable_punc: options.enablePunc,
|
||
enable_ddc: options.enableDdc,
|
||
enable_speaker_info: options.enableSpeaker,
|
||
enable_channel_split: options.enableChannelSplit,
|
||
show_utterances: options.showUtterances,
|
||
vad_segment: options.vadSegment,
|
||
sensitive_words_filter: options.sensitiveWords
|
||
}
|
||
};
|
||
}
|
||
|
||
function extractJobKey(result) {
|
||
const candidates = [
|
||
result?.result?.job_key,
|
||
result?.result?.task_id,
|
||
result?.result?.taskId,
|
||
result?.data?.job_key,
|
||
result?.data?.taskId,
|
||
result?.data?.task_id,
|
||
result?.job_key,
|
||
result?.task_id,
|
||
result?.taskId,
|
||
result?.RequestId,
|
||
result?.request_id
|
||
];
|
||
return candidates.find((value) => typeof value === "string" && value.length > 0) ?? null;
|
||
}
|
||
|
||
function deriveStatus(result) {
|
||
const candidates = [
|
||
result?.result?.status,
|
||
result?.result?.task_status,
|
||
result?.data?.status,
|
||
result?.data?.task_status,
|
||
result?.status
|
||
];
|
||
return candidates.find((value) => typeof value === "string" && value.length > 0) ?? null;
|
||
}
|
||
|
||
function flattenSegmentsText(segments) {
|
||
if (!Array.isArray(segments) || segments.length === 0) {
|
||
return null;
|
||
}
|
||
const texts = segments
|
||
.map((segment) => {
|
||
if (typeof segment?.text === "string" && segment.text.trim().length > 0) {
|
||
return segment.text.trim();
|
||
}
|
||
if (typeof segment?.transcript === "string" && segment.transcript.trim().length > 0) {
|
||
return segment.transcript.trim();
|
||
}
|
||
return null;
|
||
})
|
||
.filter((text) => typeof text === "string" && text.length > 0);
|
||
if (texts.length === 0) {
|
||
return null;
|
||
}
|
||
return texts.join("\n");
|
||
}
|
||
|
||
function extractTranscript(result) {
|
||
const segmentsTranscript =
|
||
flattenSegmentsText(result?.data?.result?.segments) ?? flattenSegmentsText(result?.result?.segments);
|
||
if (segmentsTranscript) {
|
||
return segmentsTranscript;
|
||
}
|
||
const candidates = [
|
||
result?.data?.result?.text,
|
||
result?.result?.text,
|
||
result?.result?.output?.choices?.[0]?.text,
|
||
result?.result?.output?.transcript,
|
||
result?.result?.transcript,
|
||
result?.data?.output?.choices?.[0]?.text,
|
||
result?.data?.transcript
|
||
];
|
||
return candidates.find((value) => typeof value === "string" && value.trim().length > 0) ?? null;
|
||
}
|
||
|
||
function msToTimestamp(ms) {
|
||
if (typeof ms !== "number" || Number.isNaN(ms)) return null;
|
||
const sign = ms < 0 ? "-" : "";
|
||
const t = Math.abs(ms);
|
||
const hours = Math.floor(t / 3600000);
|
||
const minutes = Math.floor((t % 3600000) / 60000);
|
||
const seconds = Math.floor((t % 60000) / 1000);
|
||
const millis = Math.floor(t % 1000);
|
||
const hh = hours.toString().padStart(2, "0");
|
||
const mm = minutes.toString().padStart(2, "0");
|
||
const ss = seconds.toString().padStart(2, "0");
|
||
const mmm = millis.toString().padStart(3, "0");
|
||
return `${sign}${hh}:${mm}:${ss}.${mmm}`;
|
||
}
|
||
|
||
function anyToMs(value) {
|
||
if (typeof value === "number") return value;
|
||
if (typeof value === "string") {
|
||
// 支持 "1.23" 秒或 "00:00:01.230"
|
||
if (/^\d+(\.\d+)?$/.test(value)) {
|
||
return Number(value) * 1000;
|
||
}
|
||
const m = value.match(/^(\d{2}):(\d{2}):(\d{2})\.(\d{1,3})$/);
|
||
if (m) {
|
||
const h = Number(m[1]);
|
||
const min = Number(m[2]);
|
||
const s = Number(m[3]);
|
||
const ms = Number(m[4].padEnd(3, "0"));
|
||
return ((h * 60 + min) * 60 + s) * 1000 + ms;
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function extractSegments(obj) {
|
||
const segs = obj?.data?.result?.segments || obj?.result?.segments || obj?.segments || null;
|
||
if (Array.isArray(segs) && segs.length > 0) return segs;
|
||
// 兼容返回的 utterances 字段,转换为统一的段落结构
|
||
const utts = obj?.data?.result?.utterances || obj?.result?.utterances || obj?.utterances || null;
|
||
if (Array.isArray(utts) && utts.length > 0) {
|
||
return utts.map(u => ({
|
||
start_ms: u.start_ms ?? u.start_time ?? u.start,
|
||
end_ms: u.end_ms ?? u.end_time ?? u.end,
|
||
text: u.text,
|
||
channel: u.channel ?? u.channel_index ?? u.ch ?? u.additions?.channel_id,
|
||
speaker: u.speaker ?? u.spk ?? u.speaker_id ?? u.additions?.speaker
|
||
}));
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function formatSegmentsDetailed(obj) {
|
||
const segments = extractSegments(obj);
|
||
if (!Array.isArray(segments) || segments.length === 0) return null;
|
||
const lines = [];
|
||
for (const seg of segments) {
|
||
const start = anyToMs(seg.start_ms ?? seg.start_time ?? seg.start ?? seg.begin_ms ?? seg.begin_time);
|
||
const end = anyToMs(seg.end_ms ?? seg.end_time ?? seg.end ?? seg.finish_ms ?? seg.finish_time);
|
||
const startTs = start != null ? msToTimestamp(start) : null;
|
||
const endTs = end != null ? msToTimestamp(end) : null;
|
||
const ch = seg.channel ?? seg.channel_index ?? seg.ch ?? null;
|
||
const spk = seg.speaker ?? seg.spk ?? seg.speaker_id ?? null;
|
||
const text = (typeof seg.text === "string" && seg.text.trim()) || (typeof seg.transcript === "string" && seg.transcript.trim()) || "";
|
||
const attrs = [];
|
||
if (ch != null) attrs.push(`ch=${ch}`);
|
||
if (spk != null) attrs.push(`spk=${spk}`);
|
||
const range = startTs && endTs ? `[${startTs}-${endTs}]` : (startTs ? `[${startTs}-?]` : (endTs ? `[?-${endTs}]` : ""));
|
||
const header = [range, attrs.length ? `(${attrs.join(", ")})` : ""].filter(Boolean).join(" ");
|
||
lines.push(header ? `${header} ${text}` : text);
|
||
}
|
||
return lines.join("\n");
|
||
}
|
||
|
||
function sleep(durationMs) {
|
||
return new Promise((resolve) => setTimeout(resolve, durationMs));
|
||
}
|
||
|
||
function buildSegmentsFromEdited(obj) {
|
||
const segs = extractSegments(obj) || [];
|
||
return segs.map(s => {
|
||
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
|
||
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
|
||
const key = `${start}-${end}`;
|
||
const text = editedMap.get(key) ?? s.text ?? s.transcript ?? "";
|
||
return { ...s, start_ms: start, end_ms: end, text };
|
||
});
|
||
}
|
||
|
||
function buildSrtFromSegments(obj) {
|
||
const segs = extractSegments(obj);
|
||
if (!Array.isArray(segs) || segs.length === 0) return null;
|
||
const blocks = [];
|
||
let idx = 1;
|
||
for (const s of segs) {
|
||
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start);
|
||
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end);
|
||
const startTs = msToTimestamp(start)?.replace(".", ",") ?? "00:00:00,000";
|
||
const endTs = msToTimestamp(end)?.replace(".", ",") ?? "00:00:00,000";
|
||
const spkId = s.speaker ?? s.spk ?? s.speaker_id;
|
||
const spkName = spkId != null ? (speakerNameMap.get(spkId) || `说话人 ${spkId}`) : "";
|
||
const text = (typeof s.text === "string" && s.text.trim()) || (typeof s.transcript === "string" && s.transcript.trim()) || "";
|
||
const prefix = spkName ? `[${spkName}] ` : "";
|
||
blocks.push(`${idx}\n${startTs} --> ${endTs}\n${prefix}${text}\n`);
|
||
idx += 1;
|
||
}
|
||
return blocks.join("\n");
|
||
}
|
||
|
||
function groupBySpeakerAndChannel(segs) {
|
||
const map = new Map(); // key: channel -> Map(speaker -> [segments])
|
||
for (const s of segs) {
|
||
const ch = s.channel ?? s.channel_index ?? s.ch ?? s.additions?.channel_id ?? "unknown";
|
||
const spk = s.speaker ?? s.spk ?? s.speaker_id ?? s.additions?.speaker ?? "unknown";
|
||
if (!map.has(ch)) map.set(ch, new Map());
|
||
const inner = map.get(ch);
|
||
if (!inner.has(spk)) inner.set(spk, []);
|
||
inner.get(spk).push(s);
|
||
}
|
||
return map;
|
||
}
|
||
|
||
function buildTimeline(durationMs, rulerEl, onJump) {
|
||
if (!rulerEl) return;
|
||
rulerEl.innerHTML = "";
|
||
if (!durationMs || durationMs <= 0) return;
|
||
const step = Math.max(10000, Math.floor(durationMs / 10)); // 10s 或更大
|
||
|
||
// Tooltip element
|
||
const tooltip = document.createElement("div");
|
||
tooltip.className = "tooltip";
|
||
tooltip.style.left = "0%";
|
||
rulerEl.appendChild(tooltip);
|
||
|
||
const updateTooltip = (x) => {
|
||
const width = rulerEl.clientWidth || 1;
|
||
const percent = Math.max(0, Math.min(1, x / width));
|
||
const t = Math.round(durationMs * percent);
|
||
tooltip.style.left = `${percent * 100}%`;
|
||
let label = msToTimestamp(t)?.slice(0, 12) || "";
|
||
const segs = Array.isArray(buildTimeline._segs) ? buildTimeline._segs : [];
|
||
const near = segs.find(s => {
|
||
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
|
||
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
|
||
return t >= start && t <= end;
|
||
});
|
||
if (near) {
|
||
const ch = near.channel ?? near.channel_index ?? near.ch;
|
||
const spk = near.speaker ?? near.spk ?? near.speaker_id;
|
||
const previewRaw = (near.text || near.transcript || "").replace(/\n/g, " ");
|
||
const preview = previewRaw.length > 36 ? (previewRaw.slice(0, 36) + "…") : previewRaw;
|
||
label = `${msToTimestamp(t)?.slice(0, 12)} • ch=${ch ?? "-"} spk=${spk ?? "-"} • ${preview}`;
|
||
}
|
||
tooltip.textContent = label;
|
||
};
|
||
|
||
rulerEl.addEventListener("mousemove", (e) => {
|
||
const x = e.offsetX ?? (e.clientX - rulerEl.getBoundingClientRect().left);
|
||
updateTooltip(x);
|
||
});
|
||
rulerEl.addEventListener("mouseenter", (e) => {
|
||
const x = e.offsetX ?? (e.clientX - rulerEl.getBoundingClientRect().left);
|
||
updateTooltip(x);
|
||
});
|
||
rulerEl.addEventListener("click", (e) => {
|
||
const x = e.offsetX ?? (e.clientX - rulerEl.getBoundingClientRect().left);
|
||
const percent = Math.max(0, Math.min(1, x / (rulerEl.clientWidth || 1)));
|
||
const t = Math.round(durationMs * percent);
|
||
onJump?.(t);
|
||
});
|
||
|
||
for (let t = 0; t <= durationMs; t += step) {
|
||
const p = (t / durationMs) * 100;
|
||
const tick = document.createElement("div");
|
||
tick.className = "tick";
|
||
tick.style.left = `${p}%`;
|
||
const label = document.createElement("div");
|
||
label.className = "label";
|
||
label.style.left = `${p}%`;
|
||
label.textContent = msToTimestamp(t)?.slice(0, 8) || "";
|
||
label.style.cursor = "pointer";
|
||
label.addEventListener("click", () => onJump?.(t));
|
||
rulerEl.appendChild(tick);
|
||
rulerEl.appendChild(label);
|
||
}
|
||
}
|
||
|
||
function highlightKeywords(text, keywords) {
|
||
if (!keywords || keywords.length === 0) return text;
|
||
let safe = text;
|
||
for (const kw of keywords) {
|
||
if (!kw) continue;
|
||
const re = new RegExp(`(${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")})`, "gi");
|
||
safe = safe.replace(re, '<span class="highlight">$1</span>');
|
||
}
|
||
return safe;
|
||
}
|
||
|
||
function renderStructuredView(json) { // 旧结构化保留但不使用
|
||
|
||
const segs = extractSegments(json) || [];
|
||
const container = document.getElementById("channelsContainer");
|
||
const ruler = document.getElementById("timelineRuler");
|
||
if (!container || !ruler) { appendLog('结构化容器缺失,跳过渲染'); return; }
|
||
const duration = anyToMs(json?.data?.additions?.duration || json?.result?.additions?.duration) || json?.audio_info?.duration || 0;
|
||
|
||
container.innerHTML = "";
|
||
buildTimeline._segs = segs;
|
||
buildTimeline(duration, ruler, (jumpMs) => {
|
||
// 时间轴点击 → 高亮对应时间附近的分段
|
||
const near = segs.filter(s => {
|
||
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
|
||
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
|
||
return jumpMs >= start && jumpMs <= end;
|
||
});
|
||
document.querySelectorAll('.segment-item.active').forEach(el => el.classList.remove('active'));
|
||
near.forEach(s => {
|
||
const selector = `.segment-item[data-start="${anyToMs(s.start_ms ?? s.start_time ?? s.start)}"][data-end="${anyToMs(s.end_ms ?? s.end_time ?? s.end)}"]`;
|
||
const el = container.querySelector(selector);
|
||
if (el) {
|
||
el.classList.add('active');
|
||
el.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||
}
|
||
});
|
||
});
|
||
|
||
const channelMap = groupBySpeakerAndChannel(segs);
|
||
const keywordsInput = document.getElementById("sensitiveWords").value.trim();
|
||
const keywords = keywordsInput ? keywordsInput.split(/[,,\s]+/).filter(Boolean) : [];
|
||
|
||
for (const [ch, speakerMap] of channelMap.entries()) {
|
||
const column = document.createElement("div");
|
||
column.className = "channel-column";
|
||
|
||
const header = document.createElement("div");
|
||
header.className = "channel-header";
|
||
header.innerHTML = `<span>声道 ch=${ch}</span><span>分组:说话人</span>`;
|
||
column.appendChild(header);
|
||
|
||
for (const [spk, items] of speakerMap.entries()) {
|
||
const group = document.createElement("div");
|
||
group.className = "speaker-group";
|
||
const title = document.createElement("div");
|
||
title.className = "speaker-title";
|
||
const avatar = document.createElement("span");
|
||
avatar.className = "speaker-avatar";
|
||
const colorIdx = (Number(spk) || 0) % 6 || 1;
|
||
avatar.style.background = getComputedStyle(document.documentElement).getPropertyValue(`--speaker-${colorIdx}`).trim() || "var(--accent)";
|
||
// 徽标文字:若为字符串取首字母/首个汉字,若为数字直接显示
|
||
let badgeText = "";
|
||
if (typeof spk === "string") {
|
||
const m = spk.match(/[A-Za-z\u4e00-\u9fa5]/);
|
||
badgeText = (m ? m[0] : spk[0] || "?").toUpperCase();
|
||
} else {
|
||
badgeText = String(spk ?? "?");
|
||
}
|
||
avatar.textContent = badgeText;
|
||
const label = document.createElement("span");
|
||
label.textContent = `说话人 ${spk}`;
|
||
title.appendChild(avatar);
|
||
title.appendChild(label);
|
||
group.appendChild(title);
|
||
|
||
for (const s of items) {
|
||
const item = document.createElement("div");
|
||
item.className = "segment-item";
|
||
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start);
|
||
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end);
|
||
item.dataset.start = String(start ?? 0);
|
||
item.dataset.end = String(end ?? 0);
|
||
const meta = document.createElement("div");
|
||
meta.className = "segment-meta";
|
||
meta.textContent = `[${msToTimestamp(start)} - ${msToTimestamp(end)}]`;
|
||
const text = document.createElement("div");
|
||
text.contentEditable = "true";
|
||
text.className = "editable";
|
||
const raw = (typeof s.text === "string" && s.text.trim()) || (typeof s.transcript === "string" && s.transcript.trim()) || "";
|
||
const key = `${start}-${end}`;
|
||
if (!editedMap.has(key)) editedMap.set(key, raw);
|
||
text.innerHTML = highlightKeywords(editedMap.get(key), keywords);
|
||
text.addEventListener("input", () => {
|
||
editedMap.set(key, text.textContent || "");
|
||
});
|
||
item.appendChild(meta);
|
||
item.appendChild(text);
|
||
group.appendChild(item);
|
||
}
|
||
|
||
column.appendChild(group);
|
||
}
|
||
|
||
container.appendChild(column);
|
||
}
|
||
}
|
||
|
||
function renderByViewMode() { /* 旧视图切换不再使用 */ }
|
||
|
||
|
||
// 旧视图切换移除
|
||
|
||
|
||
function withinRange(s, fromMs, toMs) {
|
||
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
|
||
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
|
||
return (fromMs == null || end >= fromMs) && (toMs == null || start <= toMs);
|
||
}
|
||
|
||
function parseTimeInput(v) {
|
||
if (!v) return null;
|
||
const s = v.trim();
|
||
if (/^\d+$/.test(s)) return Number(s);
|
||
const m = s.match(/^(\d{1,2}):(\d{2})(?::(\d{2}))?$/);
|
||
if (m) {
|
||
const h = m[3] ? Number(m[1]) : 0;
|
||
const min = m[3] ? Number(m[2]) : Number(m[1]);
|
||
const sec = m[3] ? Number(m[3]) : Number(m[2]);
|
||
return ((h * 60 + min) * 60 + sec) * 1000;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
exportRangeTxtBtn?.addEventListener("click", () => {
|
||
if (!lastResultJson) return;
|
||
const fromMs = parseTimeInput(rangeStartInput?.value);
|
||
const toMs = parseTimeInput(rangeEndInput?.value);
|
||
const segs = buildSegmentsFromEdited(lastResultJson).filter(s => withinRange(s, fromMs, toMs));
|
||
const text = segs.map(s => s.text || s.transcript || "").join("\n");
|
||
triggerDownload(text, "transcript_range.txt");
|
||
});
|
||
|
||
exportRangeSrtBtn?.addEventListener("click", () => {
|
||
if (!lastResultJson) return;
|
||
const fromMs = parseTimeInput(rangeStartInput?.value);
|
||
const toMs = parseTimeInput(rangeEndInput?.value);
|
||
const segs = buildSegmentsFromEdited(lastResultJson).filter(s => withinRange(s, fromMs, toMs));
|
||
const srt = (function buildPartialSrt(items){
|
||
if (!items.length) return "";
|
||
let idx = 1; const out = [];
|
||
for (const s of items) {
|
||
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
|
||
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
|
||
const startTs = msToTimestamp(start)?.replace(".", ",") ?? "00:00:00,000";
|
||
const endTs = msToTimestamp(end)?.replace(".", ",") ?? "00:00:00,000";
|
||
out.push(`${idx}\n${startTs} --> ${endTs}\n${s.text || s.transcript || ""}\n`);
|
||
idx += 1;
|
||
}
|
||
return out.join("\n");
|
||
})(segs);
|
||
triggerDownload(srt, "transcript_range.srt");
|
||
});
|
||
|
||
exportCsvBtn?.addEventListener("click", () => {
|
||
if (!lastResultJson) return;
|
||
const segs = buildSegmentsFromEdited(lastResultJson);
|
||
const header = ["start_ms","end_ms","channel","speaker","text"];
|
||
const rows = segs.map(s => [
|
||
anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0,
|
||
anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0,
|
||
s.channel ?? "",
|
||
s.speaker ?? "",
|
||
(s.text || s.transcript || "").replace(/"/g,'""')
|
||
]);
|
||
const csv = [header.join(","), ...rows.map(r => r.map(c => /[",\n]/.test(String(c)) ? `"${String(c)}"` : String(c)).join(","))].join("\n");
|
||
triggerDownload(csv, "transcript.csv");
|
||
});
|
||
|
||
const exportBtn = document.getElementById('exportBtn');
|
||
const copyAllBtn = document.getElementById('copyAllBtn');
|
||
copyAllBtn?.addEventListener("click", async () => {
|
||
try {
|
||
const segs = buildSegmentsFromEdited(lastResultJson || {});
|
||
const text = segs.map(s => s.text || s.transcript || "").join("\n");
|
||
await navigator.clipboard.writeText(text);
|
||
} catch (e) {}
|
||
});
|
||
|
||
function triggerDownload(content, filename) {
|
||
const blob = new Blob([content], { type: "text/plain;charset=utf-8" });
|
||
const url = URL.createObjectURL(blob);
|
||
const a = document.createElement("a");
|
||
a.href = url;
|
||
a.download = filename;
|
||
document.body.appendChild(a);
|
||
a.click();
|
||
a.remove();
|
||
URL.revokeObjectURL(url);
|
||
}
|
||
|
||
downloadSrtBtn?.addEventListener("click", () => {
|
||
if (!lastResultJson) { return; }
|
||
const srt = buildSrtFromSegments({ ...lastResultJson, segments: buildSegmentsFromEdited(lastResultJson) });
|
||
const text = srt || (lastResultJson?.transcript || extractTranscript(lastResultJson) || "");
|
||
triggerDownload(text, srt ? "transcript.srt" : "transcript.txt");
|
||
});
|
||
|
||
// 统一走导出按钮事件
|
||
exportBtn?.addEventListener('click', () => {
|
||
if (!lastResultJson) { appendLog('暂无结果可下载'); return; }
|
||
// 导出为“带时间+说话人”的可读TXT
|
||
const segs = buildSegmentsFromEdited(lastResultJson);
|
||
const lines = segs.map(s => {
|
||
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
|
||
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
|
||
const time = `[${msToTimestamp(start)} - ${msToTimestamp(end)}]`;
|
||
const spkId = s.speaker ?? s.spk ?? s.speaker_id;
|
||
const spk = spkId != null ? (speakerNameMap.get(spkId) || `说话人 ${spkId}`) : '';
|
||
const name = spk ? `${spk}: ` : '';
|
||
const text = (s.text || s.transcript || '').trim();
|
||
return `${time} ${name}${text}`;
|
||
});
|
||
const content = lines.join('\n');
|
||
triggerDownload(content, 'transcript.txt');
|
||
});
|
||
|
||
function renderInNewUI(json){
|
||
try{
|
||
const segs = extractSegments(json) || [];
|
||
const tagsEl = document.getElementById('speakerTags');
|
||
const linesEl = document.getElementById('transcriptLines');
|
||
const editToggle = document.getElementById('editToggle');
|
||
const saveSpkBtn = document.getElementById('saveSpkBtn');
|
||
if (tagsEl) tagsEl.innerHTML = '';
|
||
if (linesEl) linesEl.innerHTML = '';
|
||
|
||
// 收集说话人,建立稳定颜色映射(跨次渲染保持)
|
||
const speakers = [];
|
||
const spkSet = new Set();
|
||
for (const s of segs){
|
||
const spk = s.speaker ?? s.spk ?? s.speaker_id ?? null;
|
||
if (spk == null) continue;
|
||
if (!spkSet.has(spk)) { spkSet.add(spk); speakers.push(spk); }
|
||
if (!speakerColorMap.has(spk)) {
|
||
const used = new Set([...speakerColorMap.values()]);
|
||
const pool = [1,2,3,1,2,3];
|
||
const pick = pool.find(i => !used.has(i)) || ((speakerColorMap.size % 3) + 1);
|
||
speakerColorMap.set(spk, pick);
|
||
}
|
||
if (!speakerNameMap.has(spk)) {
|
||
speakerNameMap.set(spk, `说话人 ${spk}`);
|
||
}
|
||
}
|
||
|
||
// 渲染顶部可编辑标签
|
||
if (tagsEl){
|
||
speakers.forEach(spk => {
|
||
const colorIdx = speakerColorMap.get(spk) || 1;
|
||
const tag = document.createElement('span');
|
||
tag.className = `speaker-tag tag-${colorIdx}`;
|
||
tag.contentEditable = editMode ? 'true' : 'false';
|
||
tag.dataset.spk = String(spk);
|
||
tag.textContent = speakerNameMap.get(spk) || `说话人 ${spk}`;
|
||
tag.addEventListener('input', () => {
|
||
speakerNameMap.set(spk, tag.textContent || `说话人 ${spk}`);
|
||
// 同步更新下方所有同 speaker 的徽标文字
|
||
linesEl?.querySelectorAll(`[data-spk="${CSS.escape(String(spk))}"]`).forEach(el => {
|
||
el.textContent = speakerNameMap.get(spk);
|
||
});
|
||
});
|
||
tagsEl.appendChild(tag);
|
||
});
|
||
}
|
||
|
||
// 渲染正文每行
|
||
for (const s of segs){
|
||
if (!linesEl) break;
|
||
const spk = s.speaker ?? s.spk ?? s.speaker_id ?? null;
|
||
const item = document.createElement('div');
|
||
item.className = 'transcript-line';
|
||
const ts = document.createElement('span');
|
||
ts.className = 'timestamp';
|
||
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start);
|
||
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end);
|
||
ts.textContent = `[${msToTimestamp(start) || '00:00:00.000'} - ${msToTimestamp(end) || '00:00:00.000'}]`;
|
||
const sp = document.createElement('span');
|
||
const colorIdx = speakerColorMap.get(spk) || 1;
|
||
sp.className = `speaker tag-${colorIdx}`;
|
||
sp.dataset.spk = spk != null ? String(spk) : '';
|
||
if (spk != null) sp.textContent = speakerNameMap.get(spk) || `说话人 ${spk}`;
|
||
const text = document.createElement('div');
|
||
text.className = 'text-content';
|
||
text.textContent = (s.text || s.transcript || '').trim();
|
||
item.appendChild(ts);
|
||
if (spk != null) item.appendChild(sp);
|
||
item.appendChild(text);
|
||
linesEl.appendChild(item);
|
||
}
|
||
|
||
// 绑定编辑开关
|
||
if (editToggle){
|
||
editToggle.onclick = () => {
|
||
editMode = !editMode;
|
||
if (saveSpkBtn) saveSpkBtn.style.display = editMode ? 'inline-flex' : 'none';
|
||
tagsEl?.querySelectorAll('.speaker-tag').forEach(el => { el.contentEditable = editMode ? 'true' : 'false'; });
|
||
};
|
||
}
|
||
if (saveSpkBtn){
|
||
saveSpkBtn.onclick = () => {
|
||
tagsEl?.querySelectorAll('.speaker-tag').forEach(el => {
|
||
const spk = el.dataset.spk;
|
||
speakerNameMap.set(spk, el.textContent || `说话人 ${spk}`);
|
||
});
|
||
// 关闭编辑并同步正文徽标
|
||
editMode = false;
|
||
saveSpkBtn.style.display = 'none';
|
||
tagsEl?.querySelectorAll('.speaker-tag').forEach(el => { el.contentEditable = 'false'; });
|
||
linesEl?.querySelectorAll('.speaker').forEach(el => {
|
||
const spk = el.dataset.spk;
|
||
el.textContent = speakerNameMap.get(spk) || `说话人 ${spk}`;
|
||
});
|
||
};
|
||
}
|
||
}catch(e){ appendLog('渲染失败:' + (e?.message || String(e))); }
|
||
}
|
||
|
||
async function pollJobResult({ url, headers, body, useProxy }) {
|
||
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
||
appendLog(`第 ${attempt} 次查询任务状态…`);
|
||
|
||
if (!useProxy && headers.has("X-Api-Request-Id")) {
|
||
headers.set("X-Api-Request-Id", (self.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`));
|
||
}
|
||
|
||
const response = await fetch(url, {
|
||
method: "POST",
|
||
headers,
|
||
body: JSON.stringify(body)
|
||
});
|
||
|
||
if (!response.ok) {
|
||
const errorBody = await response.text();
|
||
throw new Error(`查询失败,HTTP ${response.status}:${errorBody}`);
|
||
}
|
||
|
||
const json = await response.json().catch(() => {
|
||
throw new Error("解析查询结果时发生错误");
|
||
});
|
||
|
||
try {
|
||
const segs = extractSegments(json) || [];
|
||
const txt = extractTranscript(json) || '';
|
||
appendLog(`查询结果摘要:segments=${segs.length}, transcriptLen=${txt.length}`);
|
||
} catch(_) { appendLog('查询结果已更新'); }
|
||
|
||
// 优先等待结构化分段(segments/utterances);仅文本不提前返回,避免“只有一大段文本”的情况
|
||
const segs = extractSegments(json);
|
||
const hasSeg = Array.isArray(segs) && segs.length > 0;
|
||
const immediateTranscript = json?.transcript || extractTranscript(json);
|
||
if (hasSeg) {
|
||
return { ...json, transcript: immediateTranscript || null };
|
||
}
|
||
|
||
const status = deriveStatus(json);
|
||
if (!status) {
|
||
appendLog("未返回任务状态,继续轮询…");
|
||
} else if (statusSuccessSet.has(status.toUpperCase())) {
|
||
return json;
|
||
} else if (statusFailureSet.has(status.toUpperCase())) {
|
||
throw new Error(`任务失败,状态:${status}`);
|
||
}
|
||
|
||
await sleep(pollIntervalMs);
|
||
}
|
||
throw new Error("轮询超时,请稍后重试");
|
||
}
|
||
|
||
function readOptions() {
|
||
// 新UI默认全部开启,且不展示敏感词过滤
|
||
return {
|
||
enableItn: true,
|
||
enablePunc: true,
|
||
enableDdc: true,
|
||
enableSpeaker: true,
|
||
enableChannelSplit: true,
|
||
showUtterances: true,
|
||
vadSegment: true,
|
||
sensitiveWords: "",
|
||
// 按你的需求默认不做声道拆分,避免同一句在左右声道各来一遍
|
||
enableChannelSplit: false
|
||
};
|
||
}
|
||
|
||
// 使用新UI的“重新转换”按钮触发提交
|
||
const convertBtn = document.getElementById('convertBtn');
|
||
const fileInputNew = document.getElementById('fileInput');
|
||
// 绑定文件选择后自动触发转换,避免内联脚本早于 main.js 导致点击丢失
|
||
let converting = false;
|
||
const triggerConvert = () => { if (converting) { appendLog('已忽略:上一次转换仍在进行中'); return; } convertBtn?.click(); };
|
||
fileInputNew?.addEventListener('change', () => {
|
||
appendLog('文件已选择,自动触发转换…');
|
||
// 简单防抖
|
||
clearTimeout(window.__conv_debounce);
|
||
window.__conv_debounce = setTimeout(triggerConvert, 50);
|
||
});
|
||
convertBtn?.addEventListener('click', async (event) => {
|
||
if (converting) { appendLog('已忽略:转换进行中'); return; }
|
||
converting = true;
|
||
event.preventDefault();
|
||
resetOutputs();
|
||
if (location.protocol === 'file:') { appendLog('当前以 file:// 打开页面,无法向本地代理提交;请访问 http://localhost:6174'); }
|
||
|
||
const audioUrlInput = { value: "" }; // 新UI不使用 URL
|
||
const fileInput = fileInputNew;
|
||
const uidInput = { value: "AudioToText" };
|
||
const modelNameInput = { value: "bigmodel" };
|
||
const sampleRateInput = { value: 16000 };
|
||
const bitsInput = { value: 16 };
|
||
const channelInput = { value: 1 };
|
||
const apiBaseInput = { value: "" };
|
||
const apiKeyInput = { value: "" };
|
||
const resourceIdInput = { value: "" };
|
||
|
||
const audioUrl = audioUrlInput.value.trim();
|
||
const audioFile = fileInput.files?.[0] ?? null;
|
||
if (!audioUrl && !audioFile) {
|
||
appendLog("未选择文件,等待提交…");
|
||
return;
|
||
}
|
||
appendLog("检测到文件选择,开始提交识别任务…");
|
||
|
||
const apiBase = apiBaseInput.value.trim();
|
||
const useProxy = apiBase.length === 0;
|
||
const submitUrl = useProxy ? proxySubmitEndpoint : `${apiBase}/submit`;
|
||
const resultUrl = useProxy ? proxyResultEndpoint : `${apiBase}/query`;
|
||
|
||
const uid = uidInput.value.trim() || "anonymous";
|
||
const modelName = modelNameInput.value.trim() || "bigmodel";
|
||
const sampleRate = Number(sampleRateInput.value) || 16000;
|
||
const bits = Number(bitsInput.value) || 16;
|
||
const channels = Number(channelInput.value) || 1;
|
||
const format = audioUrl ? detectAudioFormat(audioUrl) : detectAudioFormat(audioFile.name);
|
||
|
||
const requestId = (self.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`);
|
||
const apiKey = apiKeyInput.value.trim();
|
||
const resourceId = resourceIdInput.value.trim();
|
||
|
||
if (submitButton) submitButton.disabled = true;
|
||
if (audioUrl) {
|
||
appendLog(`使用音频 URL:${audioUrl}`);
|
||
} else {
|
||
appendLog(`读取文件:${audioFile.name}`);
|
||
}
|
||
|
||
try {
|
||
const submitHeaders = createHeaders({ useProxy, apiKey, resourceId, requestId });
|
||
const options = readOptions();
|
||
|
||
let payload;
|
||
if (audioUrl) {
|
||
const mappedCodec = (["wav","pcm","wave"].includes(format) ? "raw" : (format === "mp3" ? "mp3" : (["m4a","aac"].includes(format) ? "aac" : format)));
|
||
const finalRate = (["m4a","aac","mp3"].includes(format) && (!sampleRate || sampleRate < 22050)) ? 44100 : sampleRate;
|
||
appendLog(`提交参数:source=url, format=${format}, codec=${mappedCodec}, rate=${finalRate}, ch=${channels}`);
|
||
payload = {
|
||
user: { uid },
|
||
audio: { url: audioUrl, format, codec: mappedCodec, rate: finalRate, bits, channel: channels },
|
||
request: {
|
||
model_name: modelName,
|
||
enable_itn: !!options.enableItn,
|
||
enable_punc: !!options.enablePunc,
|
||
enable_ddc: !!options.enableDdc,
|
||
enable_speaker_info: !!options.enableSpeaker,
|
||
enable_channel_split: !!options.enableChannelSplit,
|
||
show_utterances: !!options.showUtterances,
|
||
vad_segment: !!options.vadSegment,
|
||
sensitive_words_filter: options.sensitiveWords || ""
|
||
}
|
||
};
|
||
} else {
|
||
const base64WithPrefix = await readFileAsBase64(audioFile);
|
||
const base64Data = stripDataUriPrefix(base64WithPrefix);
|
||
appendLog(`文件编码完成,长度约 ${Math.round(base64Data.length / 1024)} KB`);
|
||
payload = buildSubmitPayload({
|
||
base64Data,
|
||
format,
|
||
sampleRate,
|
||
bits,
|
||
channels,
|
||
uid,
|
||
modelName,
|
||
options
|
||
});
|
||
// 纠正 codec 映射 + 压缩格式的采样率兜底
|
||
const mappedCodec = (["wav","pcm","wave"].includes(format) ? "raw" : (format === "mp3" ? "mp3" : (["m4a","aac"].includes(format) ? "aac" : format)));
|
||
const finalRate = (["m4a","aac","mp3"].includes(format) && (!sampleRate || sampleRate < 22050)) ? 44100 : sampleRate;
|
||
payload.audio.codec = mappedCodec;
|
||
payload.audio.rate = finalRate;
|
||
payload.request.enable_itn = !!options.enableItn;
|
||
payload.request.enable_punc = !!options.enablePunc;
|
||
payload.request.enable_ddc = !!options.enableDdc;
|
||
payload.request.enable_speaker_info = !!options.enableSpeaker;
|
||
payload.request.enable_channel_split = !!options.enableChannelSplit;
|
||
payload.request.show_utterances = !!options.showUtterances;
|
||
payload.request.vad_segment = !!options.vadSegment;
|
||
payload.request.sensitive_words_filter = options.sensitiveWords || "";
|
||
appendLog(`提交参数:source=file, format=${format}, codec=${mappedCodec}, rate=${finalRate}, ch=${channels}`);
|
||
}
|
||
|
||
appendLog(`提交请求体:${JSON.stringify(payload)}`);
|
||
|
||
const submitResponse = await fetch(submitUrl, {
|
||
method: "POST",
|
||
headers: submitHeaders,
|
||
body: JSON.stringify(payload)
|
||
}).catch(err => {
|
||
appendLog("提交请求未发出,可能没有后端代理或跨域受限");
|
||
throw err;
|
||
});
|
||
|
||
if (!submitResponse.ok) {
|
||
const errorBody = await submitResponse.text();
|
||
appendLog("提交失败:请在本地运行带代理版本,或在代码里配置直连 API 基础地址");
|
||
throw new Error(`提交失败,HTTP ${submitResponse.status}:${errorBody}`);
|
||
}
|
||
|
||
const submitJson = await submitResponse.json();
|
||
appendLog(`提交成功:${JSON.stringify(submitJson)}`);
|
||
|
||
const jobKey = extractJobKey(submitJson);
|
||
if (!jobKey) {
|
||
throw new Error("未能从提交响应中解析到任务 ID");
|
||
}
|
||
|
||
appendLog(`任务 ID:${jobKey}`);
|
||
|
||
const pollHeaders = createHeaders({ useProxy, apiKey, resourceId, requestId: (self.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`) });
|
||
const pollBody = { job_key: jobKey, taskId: jobKey };
|
||
const resultJson = await pollJobResult({ url: resultUrl, headers: pollHeaders, body: pollBody, useProxy });
|
||
|
||
appendLog("任务完成,处理结果如下:");
|
||
appendLog(JSON.stringify(resultJson));
|
||
|
||
// 去重:同一时间窗口(±300ms)、相同文本合并(跨声道去重)
|
||
(function dedup(){
|
||
const segs = extractSegments(resultJson) || [];
|
||
const norm = (ms)=>Math.round((anyToMs(ms)||0)/300);
|
||
const seen = new Set();
|
||
const filtered = [];
|
||
for (const s of segs){
|
||
const start = anyToMs(s.start_ms ?? s.start_time ?? s.start) ?? 0;
|
||
const end = anyToMs(s.end_ms ?? s.end_time ?? s.end) ?? 0;
|
||
const key = `${norm(start)}|${norm(end)}|${(s.text||s.transcript||'').trim()}`;
|
||
if (seen.has(key)) continue;
|
||
seen.add(key);
|
||
filtered.push(s);
|
||
}
|
||
if (!resultJson.data) resultJson.data = {};
|
||
if (!resultJson.data.result) resultJson.data.result = {};
|
||
resultJson.data.result.segments = filtered;
|
||
})();
|
||
lastResultJson = resultJson;
|
||
renderInNewUI(lastResultJson);
|
||
|
||
// 停止计时
|
||
if (window.startAnalysisTimer && window.startAnalysisTimer._timer) { clearInterval(window.startAnalysisTimer._timer); window.startAnalysisTimer._timer=null; }
|
||
} catch (error) {
|
||
const message = error instanceof Error ? error.message : String(error);
|
||
appendLog(`发生错误:${message}`);
|
||
if (transcriptElement) transcriptElement.value = message;
|
||
} finally {
|
||
converting = false;
|
||
// 新UI无提交按钮禁用逻辑
|
||
}
|
||
});
|