const state = {
config: { moonshine: {}, sensevoice: {}, llms: {} },
backend: 'sensevoice',
utterances: [],
diarizedUtterances: null,
diarizationStats: null,
summary: '',
audioUrl: null,
sourcePath: null,
uploadedFile: null,
transcribing: false,
summarizing: false,
};
const elements = {
backendSelect: document.getElementById('backend-select'),
modelSelect: document.getElementById('model-select'),
llmSelect: document.getElementById('llm-select'),
promptInput: document.getElementById('prompt-input'),
vadSlider: document.getElementById('vad-threshold'),
vadValue: document.getElementById('vad-value'),
diarizationToggle: document.getElementById('diarization-toggle'),
diarizationSettings: document.getElementById('diarization-settings'),
numSpeakers: document.getElementById('num-speakers'),
clusterSlider: document.getElementById('cluster-threshold'),
clusterValue: document.getElementById('cluster-value'),
sensevoiceOptions: document.getElementById('sensevoice-options'),
sensevoiceLanguage: document.getElementById('sensevoice-language'),
transcribeBtn: document.getElementById('transcribe-btn'),
summaryBtn: document.getElementById('summary-btn'),
statusText: document.getElementById('status-text'),
audioPlayer: document.getElementById('audio-player'),
transcriptList: document.getElementById('transcript-list'),
transcriptTemplate: document.getElementById('utterance-template'),
utteranceCount: document.getElementById('utterance-count'),
summaryOutput: document.getElementById('summary-output'),
diarizationPanel: document.getElementById('diarization-summary'),
diarizationMetrics: document.getElementById('diarization-metrics'),
speakerBreakdown: document.getElementById('speaker-breakdown'),
transcriptFormat: document.getElementById('transcript-format'),
summaryFormat: document.getElementById('summary-format'),
exportTranscriptBtn: document.getElementById('export-transcript'),
exportSummaryBtn: document.getElementById('export-summary'),
includeTimestamps: document.getElementById('include-timestamps'),
fileInput: document.getElementById('file-input'),
youtubeUrl: document.getElementById('youtube-url'),
youtubeFetch: document.getElementById('youtube-fetch'),
podcastQuery: document.getElementById('podcast-query'),
podcastSearch: document.getElementById('podcast-search'),
podcastResults: document.getElementById('podcast-results'),
episodeResults: document.getElementById('episode-results'),
};
const TRANSCRIPT_FORMATS = [
'SRT (SubRip)',
'VTT (WebVTT)',
'ASS (Advanced SubStation Alpha)',
'Plain Text',
'JSON',
'ELAN (EAF)',
];
const SUMMARY_FORMATS = ['Markdown', 'Plain Text'];
let activeTab = 'podcast-tab';
let activeUtteranceIndex = -1;
function setStatus(message, tone = 'info') {
elements.statusText.textContent = message;
elements.statusText.dataset.tone = tone;
}
function formatTime(seconds) {
const mins = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60).toString().padStart(2, '0');
return `${mins}:${secs}`;
}
function setListEmpty(container, message) {
if (!container) return;
container.innerHTML = `
${message}
`;
}
async function fetchConfig() {
try {
const res = await fetch('/api/config/models');
if (!res.ok) throw new Error('Failed to fetch model catalog');
state.config = await res.json();
populateModelSelect();
populateLLMSelect();
populateExportSelects();
} catch (err) {
console.error(err);
setStatus(err.message, 'error');
}
}
function populateModelSelect() {
const backend = state.backend;
elements.modelSelect.innerHTML = '';
const models = backend === 'moonshine' ? state.config.moonshine : state.config.sensevoice;
Object.entries(models).forEach(([label, value]) => {
const option = document.createElement('option');
option.value = value;
option.textContent = label;
elements.modelSelect.appendChild(option);
});
if (elements.modelSelect.options.length > 0) {
elements.modelSelect.selectedIndex = 0;
}
elements.sensevoiceOptions.classList.toggle('hidden', backend !== 'sensevoice');
}
function populateLLMSelect() {
elements.llmSelect.innerHTML = '';
Object.keys(state.config.llms).forEach((name) => {
const option = document.createElement('option');
option.value = name;
option.textContent = name;
elements.llmSelect.appendChild(option);
});
}
function populateExportSelects() {
elements.transcriptFormat.innerHTML = '';
TRANSCRIPT_FORMATS.forEach((fmt) => {
const option = document.createElement('option');
option.value = fmt;
option.textContent = fmt;
elements.transcriptFormat.appendChild(option);
});
elements.summaryFormat.innerHTML = '';
SUMMARY_FORMATS.forEach((fmt) => {
const option = document.createElement('option');
option.value = fmt;
option.textContent = fmt;
elements.summaryFormat.appendChild(option);
});
}
function initTabs() {
document.querySelectorAll('.tab').forEach((tab) => {
tab.addEventListener('click', () => {
if (tab.dataset.target === activeTab) return;
document.querySelectorAll('.tab').forEach((btn) => btn.classList.remove('active'));
document.querySelectorAll('.tab-panel').forEach((panel) => panel.classList.remove('active'));
tab.classList.add('active');
document.getElementById(tab.dataset.target).classList.add('active');
activeTab = tab.dataset.target;
});
});
}
function initSidebarInteractions() {
elements.backendSelect.addEventListener('change', () => {
state.backend = elements.backendSelect.value;
populateModelSelect();
});
elements.vadSlider.addEventListener('input', () => {
elements.vadValue.textContent = Number(elements.vadSlider.value).toFixed(2);
});
elements.diarizationToggle.addEventListener('change', () => {
elements.diarizationSettings.classList.toggle('hidden', !elements.diarizationToggle.checked);
});
elements.clusterSlider.addEventListener('input', () => {
elements.clusterValue.textContent = Number(elements.clusterSlider.value).toFixed(2);
});
}
function resetTranscriptionState() {
state.utterances = [];
state.diarizedUtterances = null;
state.diarizationStats = null;
activeUtteranceIndex = -1;
elements.transcriptList.innerHTML = '';
elements.utteranceCount.textContent = '';
elements.diarizationPanel.classList.add('hidden');
}
function prepareTranscriptionOptions() {
const textnormValue = document.querySelector('input[name="textnorm"]:checked')?.value || 'withitn';
return {
backend: state.backend,
model_name: elements.modelSelect.value,
vad_threshold: Number(elements.vadSlider.value),
language: state.backend === 'sensevoice' ? elements.sensevoiceLanguage.value : 'auto',
textnorm: textnormValue,
diarization: {
enable: elements.diarizationToggle.checked,
num_speakers: Number(elements.numSpeakers.value || -1),
cluster_threshold: Number(elements.clusterSlider.value),
},
};
}
async function handleTranscription() {
if (state.transcribing) return;
if (!state.uploadedFile && !state.audioUrl) {
setStatus('Upload or select an audio source first', 'warning');
return;
}
resetTranscriptionState();
state.transcribing = true;
setStatus('Starting transcription...', 'info');
const formData = new FormData();
if (state.uploadedFile) {
formData.append('audio', state.uploadedFile, state.uploadedFile.name);
} else if (state.audioUrl) {
formData.append('source', state.audioUrl);
}
formData.append('options', JSON.stringify(prepareTranscriptionOptions()));
try {
const response = await fetch('/api/transcribe', {
method: 'POST',
body: formData,
});
if (!response.ok || !response.body) {
throw new Error('Transcription request failed');
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
setStatus('Processing audio...', 'info');
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
let lines = buffer.split('\n');
buffer = lines.pop();
for (const line of lines) {
if (!line.trim()) continue;
const event = JSON.parse(line);
handleTranscriptionEvent(event);
}
}
if (buffer.trim()) {
handleTranscriptionEvent(JSON.parse(buffer));
}
setStatus('Transcription complete', 'success');
} catch (err) {
console.error(err);
setStatus(err.message, 'error');
} finally {
state.transcribing = false;
}
}
function handleTranscriptionEvent(event) {
switch (event.type) {
case 'ready':
if (event.audioUrl) {
state.audioUrl = event.audioUrl;
elements.audioPlayer.src = event.audioUrl;
elements.audioPlayer.currentTime = 0;
}
break;
case 'utterance':
if (event.utterance) {
state.utterances.push(event.utterance);
renderTranscript();
}
break;
case 'complete':
if (event.diarization) {
state.diarizedUtterances = event.diarization.utterances || [];
state.diarizationStats = event.diarization.stats || null;
}
if (event.utterances) {
const diarized = state.diarizedUtterances?.length ? state.diarizedUtterances : null;
state.utterances = diarized
? diarized.map((utt, index) => ({
...(event.utterances[index] || {}),
...utt,
}))
: event.utterances;
} else if (state.diarizedUtterances?.length) {
state.utterances = state.diarizedUtterances;
}
renderTranscript();
renderDiarizationStats();
break;
case 'error':
setStatus(event.message || 'Transcription error', 'error');
break;
}
}
function renderTranscript() {
elements.transcriptList.innerHTML = '';
const fragment = document.createDocumentFragment();
state.utterances.forEach((utt, index) => {
const node = elements.transcriptTemplate.content.cloneNode(true);
const item = node.querySelector('.utterance-item');
item.dataset.index = index.toString();
item.dataset.start = utt.start;
item.dataset.end = utt.end;
node.querySelector('.timestamp').textContent = `[${formatTime(utt.start)}]`;
node.querySelector('.utterance-text').textContent = utt.text;
const speakerTag = node.querySelector('.speaker-tag');
if (typeof utt.speaker === 'number') {
speakerTag.textContent = `Speaker ${utt.speaker + 1}`;
speakerTag.classList.remove('hidden');
}
fragment.appendChild(node);
});
elements.transcriptList.appendChild(fragment);
elements.utteranceCount.textContent = `${state.utterances.length} segments`;
}
function renderDiarizationStats() {
if (!state.diarizationStats) {
elements.diarizationPanel.classList.add('hidden');
return;
}
elements.diarizationPanel.classList.remove('hidden');
const stats = state.diarizationStats;
elements.diarizationMetrics.innerHTML = '';
const metricsFragment = document.createDocumentFragment();
const totalCard = document.createElement('div');
totalCard.className = 'metric-card';
totalCard.innerHTML = `Total speakers: ${stats.total_speakers || 0}
Duration: ${stats.total_duration?.toFixed(1) || 0}s`;
metricsFragment.appendChild(totalCard);
elements.diarizationMetrics.appendChild(metricsFragment);
elements.speakerBreakdown.innerHTML = '';
const speakersFragment = document.createDocumentFragment();
Object.entries(stats.speakers || {}).forEach(([speakerId, info]) => {
const card = document.createElement('div');
card.className = 'metric-card';
card.innerHTML = `
Speaker ${Number(speakerId) + 1}
Speaking time: ${info.speaking_time.toFixed(1)}s
Percentage: ${info.percentage.toFixed(1)}%
Utterances: ${info.utterances}
Avg length: ${info.avg_utterance_length.toFixed(1)}s
`;
speakersFragment.appendChild(card);
});
elements.speakerBreakdown.appendChild(speakersFragment);
}
function findActiveUtterance(currentTime) {
let left = 0;
let right = state.utterances.length - 1;
let match = -1;
while (left <= right) {
const mid = Math.floor((left + right) / 2);
const utt = state.utterances[mid];
if (currentTime >= utt.start && currentTime < utt.end) {
return mid;
}
if (currentTime < utt.start) {
right = mid - 1;
} else {
match = mid;
left = mid + 1;
}
}
return match;
}
function updateActiveUtterance(index) {
if (index === activeUtteranceIndex) return;
const previous = elements.transcriptList.querySelector('.utterance-item.active');
if (previous) previous.classList.remove('active');
const current = elements.transcriptList.querySelector(`.utterance-item[data-index="${index}"]`);
if (current) {
current.classList.add('active');
current.scrollIntoView({ behavior: 'smooth', block: 'center' });
}
activeUtteranceIndex = index;
}
function initAudioInteractions() {
elements.audioPlayer.addEventListener('timeupdate', () => {
if (!state.utterances.length) return;
const idx = findActiveUtterance(elements.audioPlayer.currentTime);
if (idx >= 0) updateActiveUtterance(idx);
});
elements.transcriptList.addEventListener('click', (event) => {
const item = event.target.closest('.utterance-item');
if (!item) return;
const editButton = event.target.closest('.edit-btn');
const saveButton = event.target.closest('.save-edit');
const cancelButton = event.target.closest('.cancel-edit');
const index = Number(item.dataset.index);
if (editButton) {
toggleEdit(item, true);
return;
}
if (saveButton) {
const textarea = item.querySelector('textarea');
const newText = textarea.value.trim();
if (newText.length === 0) return;
state.utterances[index].text = newText;
item.querySelector('.utterance-text').textContent = newText;
toggleEdit(item, false);
return;
}
if (cancelButton) {
toggleEdit(item, false);
return;
}
const start = Number(item.dataset.start);
seekToTime(start);
});
}
function toggleEdit(item, editing) {
const textBlock = item.querySelector('.utterance-text');
const editArea = item.querySelector('.edit-area');
if (!textBlock || !editArea) return;
if (editing) {
const textarea = editArea.querySelector('textarea');
textarea.value = textBlock.textContent;
textBlock.classList.add('hidden');
editArea.classList.remove('hidden');
} else {
textBlock.classList.remove('hidden');
editArea.classList.add('hidden');
}
}
function seekToTime(timeInSeconds) {
if (!Number.isFinite(timeInSeconds)) return;
const audio = elements.audioPlayer;
const executeSeek = () => {
audio.currentTime = Math.max(0, timeInSeconds);
updateActiveUtterance(findActiveUtterance(audio.currentTime));
audio.play().catch(() => {});
};
if (audio.readyState >= 1) {
executeSeek();
} else {
const onLoaded = () => {
executeSeek();
audio.removeEventListener('loadedmetadata', onLoaded);
};
audio.addEventListener('loadedmetadata', onLoaded);
audio.load();
}
}
async function handleSummaryGeneration() {
if (state.summarizing || !state.utterances.length) return;
state.summarizing = true;
setStatus('Generating summary...', 'info');
elements.summaryOutput.textContent = '';
const payload = {
transcript: state.utterances.map((u) => u.text).join('\n'),
llm_model: elements.llmSelect.value,
prompt: elements.promptInput.value || 'Summarize the transcript below.',
};
try {
const response = await fetch('/api/summarize', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
if (!response.ok || !response.body) throw new Error('Failed to generate summary');
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
let lines = buffer.split('\n');
buffer = lines.pop();
for (const line of lines) {
if (!line.trim()) continue;
const event = JSON.parse(line);
if (event.type === 'partial' && event.content) {
elements.summaryOutput.textContent = event.content;
}
}
}
setStatus('Summary ready', 'success');
} catch (err) {
console.error(err);
setStatus(err.message, 'error');
} finally {
state.summarizing = false;
}
}
async function handleExportTranscript() {
if (!state.utterances.length) return;
const payload = {
format: elements.transcriptFormat.value,
include_timestamps: elements.includeTimestamps.checked,
utterances: state.utterances,
};
await downloadFile('/api/export/transcript', payload, 'transcript');
}
async function handleExportSummary() {
if (!elements.summaryOutput.textContent.trim()) return;
const payload = {
format: elements.summaryFormat.value,
summary: elements.summaryOutput.textContent,
metadata: {},
};
await downloadFile('/api/export/summary', payload, 'summary');
}
async function downloadFile(url, payload, prefix) {
try {
const response = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
if (!response.ok) throw new Error('Export failed');
const blob = await response.blob();
const filename = getFilenameFromDisposition(response.headers.get('Content-Disposition')) || `${prefix}.txt`;
const link = document.createElement('a');
link.href = URL.createObjectURL(blob);
link.download = filename;
link.click();
URL.revokeObjectURL(link.href);
setStatus('Export complete', 'success');
} catch (err) {
console.error(err);
setStatus(err.message, 'error');
}
}
function getFilenameFromDisposition(disposition) {
if (!disposition) return null;
const match = disposition.match(/filename="?([^"]+)"?/i);
return match ? match[1] : null;
}
function handleFileUpload(event) {
const file = event.target.files?.[0];
if (!file) return;
state.uploadedFile = file;
state.audioUrl = null;
const objectUrl = URL.createObjectURL(file);
elements.audioPlayer.src = objectUrl;
setStatus(`Loaded ${file.name}`, 'info');
}
async function handleYoutubeFetch() {
if (!elements.youtubeUrl.value.trim()) return;
setStatus('Downloading audio from YouTube...', 'info');
try {
const res = await fetch('/api/youtube/fetch', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url: elements.youtubeUrl.value.trim() }),
});
if (!res.ok) throw new Error('YouTube download failed');
const data = await res.json();
state.audioUrl = data.audioUrl;
state.uploadedFile = null;
elements.audioPlayer.src = data.audioUrl;
setStatus('YouTube audio ready', 'success');
} catch (err) {
console.error(err);
setStatus(err.message, 'error');
}
}
async function handlePodcastSearch() {
const query = elements.podcastQuery.value.trim();
if (!query) return;
setStatus('Searching podcasts...', 'info');
setListEmpty(elements.podcastResults, 'Searching podcasts...');
setListEmpty(elements.episodeResults, 'Select a podcast to view episodes.');
try {
const res = await fetch(`/api/podcast/search?query=${encodeURIComponent(query)}`);
if (!res.ok) throw new Error('Podcast search failed');
const series = await res.json();
if (!series.length) {
setListEmpty(elements.podcastResults, 'No podcasts match your search yet.');
return;
}
elements.podcastResults.innerHTML = '';
const fragment = document.createDocumentFragment();
series.forEach((item) => {
const div = document.createElement('div');
div.className = 'list-item';
div.innerHTML = `
${item.title}
${item.artist || 'Unknown artist'}
`;
fragment.appendChild(div);
});
elements.podcastResults.appendChild(fragment);
setListEmpty(elements.episodeResults, 'Select a podcast to view episodes.');
} catch (err) {
console.error(err);
setStatus(err.message, 'error');
setListEmpty(elements.podcastResults, 'Unable to load podcasts right now.');
}
}
async function loadEpisodes(feedUrl, sourceItem = null) {
setStatus('Loading episodes...', 'info');
if (sourceItem) {
elements.podcastResults.querySelectorAll('.list-item').forEach((item) => item.classList.remove('selected'));
sourceItem.classList.add('selected');
}
setListEmpty(elements.episodeResults, 'Loading episodes...');
try {
const res = await fetch(`/api/podcast/episodes?feed_url=${encodeURIComponent(feedUrl)}`);
if (!res.ok) throw new Error('Failed to load episodes');
const episodes = await res.json();
if (!episodes.length) {
setListEmpty(elements.episodeResults, 'No episodes available for this podcast.');
return;
}
elements.episodeResults.innerHTML = '';
const fragment = document.createDocumentFragment();
episodes.slice(0, 15).forEach((ep) => {
const div = document.createElement('div');
div.className = 'list-item';
div.innerHTML = `
${ep.title}
${ep.published || ''}
`;
fragment.appendChild(div);
});
elements.episodeResults.appendChild(fragment);
setStatus('Episodes ready', 'success');
} catch (err) {
console.error(err);
setStatus(err.message, 'error');
setListEmpty(elements.episodeResults, 'Unable to load episodes right now.');
}
}
async function downloadEpisode(audioUrl, title, triggerButton = null) {
setStatus('Downloading episode...', 'info');
let originalLabel = null;
if (triggerButton) {
originalLabel = triggerButton.innerHTML;
triggerButton.disabled = true;
triggerButton.classList.add('loading');
triggerButton.textContent = 'Downloading…';
}
try {
const res = await fetch('/api/podcast/download', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ audioUrl, title }),
});
if (!res.ok) throw new Error('Episode download failed');
const data = await res.json();
state.audioUrl = data.audioUrl;
state.uploadedFile = null;
elements.audioPlayer.src = data.audioUrl;
setStatus('Episode ready', 'success');
if (triggerButton) {
triggerButton.textContent = 'Ready ✓';
triggerButton.classList.add('success');
}
} catch (err) {
console.error(err);
setStatus(err.message, 'error');
if (triggerButton) {
triggerButton.textContent = 'Retry';
triggerButton.classList.add('error');
}
} finally {
if (triggerButton) {
triggerButton.disabled = false;
triggerButton.classList.remove('loading');
setTimeout(() => {
triggerButton.classList.remove('success', 'error');
triggerButton.textContent = originalLabel || 'Download';
}, 2000);
}
}
}
function initPodcastInteractions() {
elements.podcastResults.addEventListener('click', (event) => {
const btn = event.target.closest('button[data-feed]');
if (!btn) return;
const listItem = btn.closest('.list-item');
loadEpisodes(btn.dataset.feed, listItem);
});
elements.episodeResults.addEventListener('click', (event) => {
const btn = event.target.closest('button[data-url]');
if (!btn) return;
downloadEpisode(btn.dataset.url, btn.dataset.title, btn);
});
}
function initEventBindings() {
elements.transcribeBtn.addEventListener('click', handleTranscription);
elements.summaryBtn.addEventListener('click', handleSummaryGeneration);
elements.exportTranscriptBtn.addEventListener('click', handleExportTranscript);
elements.exportSummaryBtn.addEventListener('click', handleExportSummary);
elements.fileInput.addEventListener('change', handleFileUpload);
elements.youtubeFetch.addEventListener('click', handleYoutubeFetch);
elements.podcastSearch.addEventListener('click', handlePodcastSearch);
elements.podcastQuery.addEventListener('keydown', (event) => {
if (event.key === 'Enter') {
event.preventDefault();
handlePodcastSearch();
}
});
}
async function init() {
initTabs();
initSidebarInteractions();
initAudioInteractions();
initEventBindings();
initPodcastInteractions();
elements.backendSelect.innerHTML = `
`;
state.backend = elements.backendSelect.value;
setListEmpty(elements.podcastResults, 'Search to discover podcasts.');
setListEmpty(elements.episodeResults, 'Select a podcast to view episodes.');
await fetchConfig();
setStatus('Ready');
}
init();