Some checks failed
Security Scan / security (pull_request) Successful in 44s
Security Scan / dependency-check (pull_request) Successful in 49s
Test Suite / lint (pull_request) Failing after 40s
Test Suite / test (3.11) (pull_request) Successful in 1m39s
Test Suite / build (pull_request) Has been skipped
172 lines
6.2 KiB
JavaScript
172 lines
6.2 KiB
JavaScript
// Simple script to load Transformers.js from CDN and initialize embedding functionality
|
|
// This approach uses traditional script loading instead of ES6 modules
|
|
|
|
console.log('🔧 Transformers.js loader starting...');
|
|
|
|
// Global state
|
|
window.transformersLibraryLoaded = false;
|
|
window.transformersLibraryLoading = false;
|
|
|
|
// Function to dynamically load a script
|
|
function loadScript(src) {
|
|
return new Promise((resolve, reject) => {
|
|
const script = document.createElement('script');
|
|
script.src = src;
|
|
script.type = 'module';
|
|
script.onload = () => resolve();
|
|
script.onerror = () => reject(new Error(`Failed to load script: ${src}`));
|
|
document.head.appendChild(script);
|
|
});
|
|
}
|
|
|
|
// Function to initialize Transformers.js
|
|
async function initializeTransformers() {
|
|
if (window.transformersLibraryLoaded) {
|
|
console.log('✅ Transformers.js already loaded');
|
|
return true;
|
|
}
|
|
|
|
if (window.transformersLibraryLoading) {
|
|
console.log('⏳ Transformers.js already loading, waiting...');
|
|
// Wait for loading to complete
|
|
while (window.transformersLibraryLoading) {
|
|
await new Promise(resolve => setTimeout(resolve, 100));
|
|
}
|
|
return window.transformersLibraryLoaded;
|
|
}
|
|
|
|
window.transformersLibraryLoading = true;
|
|
|
|
try {
|
|
console.log('📦 Loading Transformers.js from CDN...');
|
|
|
|
// Use dynamic import since this is more reliable with ES modules
|
|
const transformers = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0');
|
|
window.transformersLibrary = transformers;
|
|
window.transformersLibraryLoaded = true;
|
|
|
|
console.log('✅ Transformers.js loaded successfully');
|
|
return true;
|
|
} catch (error) {
|
|
console.error('❌ Failed to load Transformers.js:', error);
|
|
return false;
|
|
} finally {
|
|
window.transformersLibraryLoading = false;
|
|
}
|
|
}
|
|
|
|
// Simple embeddings class
|
|
class SimpleEmbedder {
|
|
constructor() {
|
|
this.pipeline = null;
|
|
this.modelCache = new Map();
|
|
}
|
|
|
|
async generateEmbeddings(texts, modelName = 'Xenova/all-MiniLM-L6-v2') {
|
|
console.log('🔄 Generating embeddings for', texts.length, 'texts with model', modelName);
|
|
|
|
// Ensure Transformers.js is loaded
|
|
if (!window.transformersLibraryLoaded) {
|
|
const loaded = await initializeTransformers();
|
|
if (!loaded) {
|
|
throw new Error('Failed to load Transformers.js');
|
|
}
|
|
}
|
|
|
|
// Create pipeline if not cached
|
|
if (!this.modelCache.has(modelName)) {
|
|
console.log('🏗️ Creating pipeline for', modelName);
|
|
const { pipeline } = window.transformersLibrary;
|
|
this.pipeline = await pipeline('feature-extraction', modelName);
|
|
this.modelCache.set(modelName, this.pipeline);
|
|
} else {
|
|
this.pipeline = this.modelCache.get(modelName);
|
|
}
|
|
|
|
// Generate embeddings
|
|
const embeddings = [];
|
|
for (let i = 0; i < texts.length; i++) {
|
|
console.log(`Processing text ${i + 1}/${texts.length}...`);
|
|
const result = await this.pipeline(texts[i], { pooling: 'mean', normalize: true });
|
|
embeddings.push(Array.from(result.data));
|
|
}
|
|
|
|
console.log('✅ Generated', embeddings.length, 'embeddings');
|
|
return embeddings;
|
|
}
|
|
}
|
|
|
|
// Create global instance
|
|
window.simpleEmbedder = new SimpleEmbedder();
|
|
|
|
// Set up Dash clientside callbacks
|
|
window.dash_clientside = window.dash_clientside || {};
|
|
window.dash_clientside.transformers = {
|
|
generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) {
|
|
console.log('🚀 Client-side generateEmbeddings called');
|
|
|
|
if (!nClicks || !textContent || textContent.trim().length === 0) {
|
|
console.log('⚠️ Missing required parameters');
|
|
return window.dash_clientside.no_update;
|
|
}
|
|
|
|
try {
|
|
// Tokenize text
|
|
let textChunks;
|
|
const trimmedText = textContent.trim();
|
|
|
|
switch (tokenizationMethod) {
|
|
case 'sentence':
|
|
textChunks = trimmedText.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 0);
|
|
break;
|
|
case 'paragraph':
|
|
textChunks = trimmedText.split(/\n\s*\n/).map(s => s.trim()).filter(s => s.length > 0);
|
|
break;
|
|
case 'manual':
|
|
textChunks = trimmedText.split('\n').map(s => s.trim()).filter(s => s.length > 0);
|
|
break;
|
|
default:
|
|
textChunks = [trimmedText];
|
|
}
|
|
|
|
if (textChunks.length === 0) {
|
|
throw new Error('No valid text chunks after tokenization');
|
|
}
|
|
|
|
// Generate embeddings
|
|
const embeddings = await window.simpleEmbedder.generateEmbeddings(textChunks, modelName);
|
|
|
|
// Create documents
|
|
const documents = textChunks.map((text, i) => ({
|
|
id: `text_input_${Date.now()}_${i}`,
|
|
text: text,
|
|
embedding: embeddings[i],
|
|
category: category || "Text Input",
|
|
subcategory: subcategory || "Generated",
|
|
tags: []
|
|
}));
|
|
|
|
return [
|
|
{
|
|
documents: documents,
|
|
embeddings: embeddings
|
|
},
|
|
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
|
|
"success",
|
|
false
|
|
];
|
|
|
|
} catch (error) {
|
|
console.error('❌ Error generating embeddings:', error);
|
|
return [
|
|
{ error: error.message },
|
|
`❌ Error: ${error.message}`,
|
|
"danger",
|
|
false
|
|
];
|
|
}
|
|
}
|
|
};
|
|
|
|
console.log('✅ Simple Transformers.js setup complete');
|
|
console.log('Available functions:', Object.keys(window.dash_clientside.transformers)); |