Spaces:
Running
Running
ping98k
Refactor K-Means clustering implementation; modularize embedding and clustering logic, enhance heatmap and scatter plot functions, and improve cluster naming process.
12c4198
| import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]'; | |
| const embed = await pipeline( | |
| "feature-extraction", | |
| "onnx-community/Qwen3-Embedding-0.6B-ONNX", | |
| { device: "webgpu", dtype: "q4f16" }, | |
| ); | |
| export async function getGroupEmbeddings(groups, task) { | |
| const groupEmbeddings = []; | |
| for (const g of groups) { | |
| // Remove lines starting with ## | |
| const lines = g.split(/\n/) | |
| .map(x => x.trim()) | |
| .filter(x => x && !x.startsWith('##')); | |
| const prompts = lines.map(s => `Instruct: ${task}\nQuery:${s}`); | |
| const out = await embed(prompts, { pooling: "mean", normalize: true }); | |
| const embeddings = typeof out.tolist === 'function' ? out.tolist() : out.data; | |
| const dim = embeddings[0].length; | |
| const avg = new Float32Array(dim); | |
| for (const e of embeddings) { for (let i = 0; i < dim; i++) avg[i] += e[i]; } | |
| for (let i = 0; i < dim; i++) avg[i] /= embeddings.length; | |
| groupEmbeddings.push(avg); | |
| } | |
| return groupEmbeddings; | |
| } | |
| export async function getLineEmbeddings(lines, task) { | |
| const prompts = lines.map(s => `Instruct: ${task}\nQuery:${s}`); | |
| const out = await embed(prompts, { pooling: "mean", normalize: true }); | |
| return typeof out.tolist === 'function' ? out.tolist() : out.data; | |
| } | |