minor formatting

add in browser embedding generation
Merge pull request 'add-os-load' (#3 ) from add-os-load into main
2025-09-06 07:23:26 -07:00 · 2025-09-06 07:16:30 -07:00 · 2025-08-14 19:07:24 -07:00 · 2025-08-14 19:02:17 -07:00 · 2025-08-14 14:30:52 -07:00 · 2025-08-14 13:49:46 -07:00
25 changed files with 3727 additions and 12 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -3,7 +3,8 @@
    "allow": [
      "Bash(mkdir:*)",
      "Bash(uv run:*)",
-      "Bash(uv add:*)"
+      "Bash(uv add:*)",
      "Bash(uv sync:*)"
    ],
    "deny": [],
    "ask": [],
--- a/.gitignore
+++ b/.gitignore
@@ -81,4 +81,7 @@ safety-report.json
 pip-audit-report.json
 # Temporary files
-*.tmp
+*.tmp
 examples/extra
--- a/assets/embeddings.js
+++ b/assets/embeddings.js
@@ -0,0 +1,278 @@
 // Text input embedding generation using Transformers.js
 // This module runs entirely in the browser for privacy and performance
 // Global flag to track initialization
 window.transformersLoading = false;
 window.transformersLoaded = false;
 class TransformersEmbedder {
    constructor() {
        this.extractor = null;
        this.currentModel = null;
        this.modelCache = new Map();
        this.isLoading = false;
    }
    async initializeModel(modelName = 'Xenova/all-MiniLM-L6-v2') {
        try {
            if (this.modelCache.has(modelName)) {
                this.extractor = this.modelCache.get(modelName);
                this.currentModel = modelName;
                return { success: true, model: modelName };
            }
            if (this.isLoading) {
                return { success: false, error: 'Model loading already in progress' };
            }
            this.isLoading = true;
            // Use globally loaded Transformers.js pipeline
            if (!window.transformers) {
                if (!window.transformersPipeline) {
                    // Wait for the pipeline to load
                    let attempts = 0;
                    while (!window.transformersPipeline && attempts < 50) { // Wait up to 5 seconds
                        await new Promise(resolve => setTimeout(resolve, 100));
                        attempts++;
                    }
                    if (!window.transformersPipeline) {
                        throw new Error('Transformers.js pipeline not available. Please refresh the page.');
                    }
                }
                window.transformers = { pipeline: window.transformersPipeline };
                window.transformersLoaded = true;
                console.log('✅ Using globally loaded Transformers.js pipeline');
            }
            // Show loading progress to user
            if (window.updateModelLoadingProgress) {
                window.updateModelLoadingProgress(0, `Loading ${modelName}...`);
            }
            this.extractor = await window.transformers.pipeline('feature-extraction', modelName, {
                progress_callback: (data) => {
                    if (window.updateModelLoadingProgress && data.progress !== undefined) {
                        const progress = Math.round(data.progress);
                        window.updateModelLoadingProgress(progress, data.status || 'Loading...');
                    }
                }
            });
            this.modelCache.set(modelName, this.extractor);
            this.currentModel = modelName;
            this.isLoading = false;
            if (window.updateModelLoadingProgress) {
                window.updateModelLoadingProgress(100, 'Model loaded successfully');
            }
            return { success: true, model: modelName };
        } catch (error) {
            this.isLoading = false;
            console.error('Model initialization error:', error);
            return { success: false, error: error.message };
        }
    }
    async generateEmbeddings(texts, options = {}) {
        if (!this.extractor) {
            throw new Error('Model not initialized. Call initializeModel() first.');
        }
        if (!texts || texts.length === 0) {
            throw new Error('No texts provided for embedding generation.');
        }
        const embeddings = [];
        const defaultOptions = { 
            pooling: 'mean', 
            normalize: true,
            ...options 
        };
        // Process in batches to avoid memory issues
        const batchSize = options.batchSize || 8;
        try {
            for (let i = 0; i < texts.length; i += batchSize) {
                const batch = texts.slice(i, i + batchSize);
                const batchResults = await Promise.all(
                    batch.map(text => {
                        if (!text || text.trim().length === 0) {
                            throw new Error('Empty text found in batch');
                        }
                        return this.extractor(text.trim(), defaultOptions);
                    })
                );
                // Convert tensor output to arrays
                batchResults.forEach((result, idx) => {
                    if (result && result.data) {
                        embeddings.push(Array.from(result.data));
                    } else {
                        throw new Error(`Invalid embedding result for text: ${batch[idx]}`);
                    }
                });
                // Update progress
                const progress = Math.min(100, ((i + batch.length) / texts.length) * 100);
                if (window.updateEmbeddingProgress) {
                    window.updateEmbeddingProgress(progress, `Processing ${i + batch.length}/${texts.length} texts`);
                }
            }
            if (window.updateEmbeddingProgress) {
                window.updateEmbeddingProgress(100, `Generated ${embeddings.length} embeddings successfully`);
            }
            return embeddings;
        } catch (error) {
            console.error('Embedding generation error:', error);
            throw error;
        }
    }
 }
 // Global instance
 window.transformersEmbedder = new TransformersEmbedder();
 console.log('📦 TransformersEmbedder instance created');
 // Global progress update functions
 window.updateModelLoadingProgress = function(progress, status) {
    const progressBar = document.getElementById('model-loading-progress');
    const statusText = document.getElementById('model-loading-status');
    if (progressBar) {
        progressBar.style.width = progress + '%';
        progressBar.setAttribute('aria-valuenow', progress);
    }
    if (statusText) {
        statusText.textContent = status;
    }
 };
 window.updateEmbeddingProgress = function(progress, status) {
    const progressBar = document.getElementById('embedding-progress');
    const statusText = document.getElementById('embedding-status');
    if (progressBar) {
        progressBar.style.width = progress + '%';
        progressBar.setAttribute('aria-valuenow', progress);
    }
    if (statusText) {
        statusText.textContent = status;
    }
 };
 // Dash clientside callback functions
 window.dash_clientside = window.dash_clientside || {};
 console.log('🔧 Setting up window.dash_clientside.transformers');
 window.dash_clientside.transformers = {
    generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) {
        console.log('🚀 generateEmbeddings called with:', { nClicks, modelName, tokenizationMethod, textLength: textContent?.length });
        if (!nClicks || !textContent || textContent.trim().length === 0) {
            console.log('⚠️ Early return - missing required parameters');
            return window.dash_clientside.no_update;
        }
        try {
            // Initialize model if needed
            const initResult = await window.transformersEmbedder.initializeModel(modelName);
            if (!initResult.success) {
                return [
                    { error: initResult.error },
                    `❌ Model loading error: ${initResult.error}`,
                    "danger",
                    false
                ];
            }
            // Tokenize text based on method
            let textChunks;
            const trimmedText = textContent.trim();
            switch (tokenizationMethod) {
                case 'sentence':
                    // Simple sentence splitting - can be enhanced with proper NLP
                    textChunks = trimmedText
                        .split(/[.!?]+/)
                        .map(s => s.trim())
                        .filter(s => s.length > 0);
                    break;
                case 'paragraph':
                    textChunks = trimmedText
                        .split(/\n\s*\n/)
                        .map(s => s.trim())
                        .filter(s => s.length > 0);
                    break;
                case 'manual':
                    textChunks = trimmedText
                        .split('\n')
                        .map(s => s.trim())
                        .filter(s => s.length > 0);
                    break;
                default:
                    textChunks = [trimmedText];
            }
            if (textChunks.length === 0) {
                return [
                    { error: 'No valid text chunks found after tokenization' },
                    '❌ Error: No valid text chunks found after tokenization',
                    "danger",
                    false
                ];
            }
            // Generate embeddings
            const embeddings = await window.transformersEmbedder.generateEmbeddings(textChunks);
            if (!embeddings || embeddings.length !== textChunks.length) {
                return [
                    { error: 'Embedding generation failed - mismatch in text chunks and embeddings' },
                    '❌ Error: Embedding generation failed',
                    "danger",
                    false
                ];
            }
            // Create documents structure
            const documents = textChunks.map((text, i) => ({
                id: `text_input_${Date.now()}_${i}`,
                text: text,
                embedding: embeddings[i],
                category: category || "Text Input",
                subcategory: subcategory || "Generated",
                tags: []
            }));
            return [
                {
                    documents: documents,
                    embeddings: embeddings
                },
                `✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
                "success",
                false
            ];
        } catch (error) {
            console.error('Client-side embedding error:', error);
            return [
                { error: error.message },
                `❌ Error: ${error.message}`,
                "danger", 
                false
            ];
        }
    }
 };
 console.log('✅ Transformers.js client-side setup complete');
 console.log('Available:', {
    transformersEmbedder: !!window.transformersEmbedder,
    dashClientside: !!window.dash_clientside,
    transformersModule: !!window.dash_clientside?.transformers,
    generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings
 });
--- a/assets/package.json
+++ b/assets/package.json
@@ -0,0 +1,9 @@
 {
  "name": "embeddingbuddy-assets",
  "version": "1.0.0",
  "description": "JavaScript dependencies for EmbeddingBuddy text input functionality",
  "dependencies": {
    "@huggingface/transformers": "^3.0.0"
  },
  "type": "module"
 }
--- a/assets/sample-txt.md
+++ b/assets/sample-txt.md
@@ -0,0 +1,106 @@
 The sun peeked through the clouds after a drizzly morning.
 A gentle breeze rustled the leaves as we walked along the shoreline.
 Heavy rains caused flooding in several low-lying neighborhoods.
 It was so hot that even the birds sought shade under the palm trees.
 By midnight, the temperature had dropped below freezing.
 Thunderstorms lit up the sky with flashes of lightning.
 A thick fog settled over the city streets at dawn.
 The air smelled of ozone after the sudden hailstorm.
 I watched the snowflakes drift silently onto the ground.
 A double rainbow appeared after the rain shower.
 The humidity soared to uncomfortable levels by midday.
 Dust devils formed in the dry desert plains.
 The barometer readings indicated an approaching front.
 A sudden gust of wind knocked over the garden chairs.
 Light drizzle turned into a torrential downpour within minutes.
 The new smartphone features a foldable display and 5G connectivity.
 In the world of AI, transformers have revolutionized natural language processing.
 Quantum computing promises to solve problems beyond classical computers' reach.
 Blockchain technology is being explored for secure voting systems.
 Virtual reality headsets are becoming more affordable and accessible.
 The rise of electric vehicles is reshaping the automotive industry.
 Cloud computing allows businesses to scale resources dynamically.
 Machine learning algorithms can now predict stock market trends with surprising accuracy.
 Augmented reality applications are transforming retail experiences.
 The Internet of Things connects everyday devices to the web for smarter living.
 Cybersecurity threats are evolving, requiring constant vigilance.
 3D printing is enabling rapid prototyping and custom manufacturing.
 Edge computing reduces latency by processing data closer to the source.
 Biometric authentication methods are enhancing security in devices.
 Wearable technology is tracking health metrics in real-time.
 Artificial intelligence is being used to create realistic deepfakes.
 Preheat the oven to 375°F before you start mixing the batter.
 She finely chopped the garlic and sautéed it in two tablespoons of olive oil.
 A pinch of saffron adds a beautiful color and aroma to traditional paella.
 If the soup is too salty, add a peeled potato to absorb excess sodium.
 Let the bread dough rise for at least an hour in a warm, draft-free spot.
 Marinate the chicken overnight in a blend of citrus and spices.
 Use a cast-iron skillet to sear the steak on high heat.
 Whisk the egg whites until they form stiff peaks.
 Fold in the chocolate chips gently to keep the batter airy.
 Brush the pastry with an egg wash for a golden finish.
 Slow-roast the pork shoulder until it falls off the bone.
 Garnish the salad with toasted nuts and fresh herbs.
 Deglaze the pan with white wine for a rich sauce.
 Simmer the curry paste until the aroma intensifies.
 Let the risotto rest before serving to thicken slightly.
 He dribbled past two defenders and sank a three-pointer at the buzzer.
 The marathon runner kept a steady pace despite the sweltering heat.
 Their home team clinched the championship with a last-minute goal.
 NASCAR fans cheered as the cars roared around the oval track.
 She landed a perfect triple axel at the figure skating championship.
 The cyclist pedaled up the steep hill in record time.
 He pitched a no-hitter during the high school baseball game.
 The quarterback threw a touchdown pass under heavy pressure.
 They scored a hat-trick in the hockey final.
 The boxer delivered a swift uppercut in the final round.
 Surfers caught massive waves at dawn on the Pacific coast.
 Fans erupted when the underdog scored the winning goal.
 The swimmer broke the national record in the 200m freestyle.
 The gymnast executed a flawless routine on the balance beam.
 The rugby team celebrated their victory with a traditional haka.
 The stock market rallied after positive earnings reports.
 Investors are closely watching interest rate changes by the Federal Reserve.
 Cryptocurrency prices have been extremely volatile this year.
 Diversification is key to managing investment risk effectively.
 Inflation rates have reached a 40-year high, impacting consumer spending.
 Many companies are adopting ESG criteria to attract socially conscious investors.
 The bond market is reacting to geopolitical tensions and supply chain disruptions.
 Venture capital funding for startups has surged in the tech sector.
 Exchange-traded funds (ETFs) offer a way to invest in diversified portfolios.
 The global economy is recovering from the pandemic, but challenges remain.
 Central banks are exploring digital currencies to modernize payment systems.
 Retail investors are increasingly participating in the stock market through apps.
 Hedge funds are using complex algorithms to gain an edge in trading.
 Real estate prices have skyrocketed in urban areas due to low inventory.
 The startup raised $10 million in its Series A funding round.
 The symphony orchestra played a hauntingly beautiful melody.
 She strummed her guitar softly, filling the room with a warm sound.
 The DJ mixed tracks seamlessly, keeping the crowd dancing all night.
 His voice soared during the high notes of the ballad.
 The band played an acoustic set in the intimate coffee shop.
 Jazz musicians often improvise solos based on the chord changes.
 The opera singer hit the high C with perfect pitch.
 The choir harmonized beautifully, filling the church with sound.
 He composed a symphony that was performed at the concert hall.
 The singer-songwriter wrote heartfelt lyrics about love and loss.
 The rock band headlined the festival, drawing a massive crowd.
 Hip-hop artists use rhythm and rhyme to tell powerful stories.
 The violinist played a virtuosic solo that left the audience in awe.
 Folk music often reflects the culture and traditions of a community.
 The gospel choir lifted spirits with their uplifting performance.
 The fall of the Berlin Wall in 1989 marked the end of the Cold War.
 Ancient Egypt's pyramids are a testament to their architectural prowess.
 Europe's Renaissance period sparked a revival in art and science.
 The signing of the Declaration of Independence in 1776 established the United States.
 The Industrial Revolution transformed economies and societies worldwide.
 Rome was the center of a vast empire that influenced law and governance.
 The discovery of the New World by Christopher Columbus in 1492 changed global trade.
 The French Revolution in 1789 led to significant political and social change.
 World War II was a global conflict that reshaped international relations.
 The fall of the Roman Empire in 476 AD marked the beginning of the Middle Ages.
 The invention of the printing press revolutionized the spread of knowledge.
 The Cold War was characterized by political tension between the U.S. and the Soviet Union.
 The ancient Silk Road connected East and West through trade routes.
 The signing of the Magna Carta in 1215 established principles of due process.
 Exploration during the Age of Discovery expanded European empires across the globe.
--- a/assets/transformers-loader.js
+++ b/assets/transformers-loader.js
@@ -0,0 +1,172 @@
 // Simple script to load Transformers.js from CDN and initialize embedding functionality
 // This approach uses traditional script loading instead of ES6 modules
 console.log('🔧 Transformers.js loader starting...');
 // Global state
 window.transformersLibraryLoaded = false;
 window.transformersLibraryLoading = false;
 // Function to dynamically load a script
 function loadScript(src) {
    return new Promise((resolve, reject) => {
        const script = document.createElement('script');
        script.src = src;
        script.type = 'module';
        script.onload = () => resolve();
        script.onerror = () => reject(new Error(`Failed to load script: ${src}`));
        document.head.appendChild(script);
    });
 }
 // Function to initialize Transformers.js
 async function initializeTransformers() {
    if (window.transformersLibraryLoaded) {
        console.log('✅ Transformers.js already loaded');
        return true;
    }
    if (window.transformersLibraryLoading) {
        console.log('⏳ Transformers.js already loading, waiting...');
        // Wait for loading to complete
        while (window.transformersLibraryLoading) {
            await new Promise(resolve => setTimeout(resolve, 100));
        }
        return window.transformersLibraryLoaded;
    }
    window.transformersLibraryLoading = true;
    try {
        console.log('📦 Loading Transformers.js from CDN...');
        // Use dynamic import since this is more reliable with ES modules
        const transformers = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0');
        window.transformersLibrary = transformers;
        window.transformersLibraryLoaded = true;
        console.log('✅ Transformers.js loaded successfully');
        return true;
    } catch (error) {
        console.error('❌ Failed to load Transformers.js:', error);
        return false;
    } finally {
        window.transformersLibraryLoading = false;
    }
 }
 // Simple embeddings class
 class SimpleEmbedder {
    constructor() {
        this.pipeline = null;
        this.modelCache = new Map();
    }
    async generateEmbeddings(texts, modelName = 'Xenova/all-MiniLM-L6-v2') {
        console.log('🔄 Generating embeddings for', texts.length, 'texts with model', modelName);
        // Ensure Transformers.js is loaded
        if (!window.transformersLibraryLoaded) {
            const loaded = await initializeTransformers();
            if (!loaded) {
                throw new Error('Failed to load Transformers.js');
            }
        }
        // Create pipeline if not cached
        if (!this.modelCache.has(modelName)) {
            console.log('🏗️ Creating pipeline for', modelName);
            const { pipeline } = window.transformersLibrary;
            this.pipeline = await pipeline('feature-extraction', modelName);
            this.modelCache.set(modelName, this.pipeline);
        } else {
            this.pipeline = this.modelCache.get(modelName);
        }
        // Generate embeddings
        const embeddings = [];
        for (let i = 0; i < texts.length; i++) {
            console.log(`Processing text ${i + 1}/${texts.length}...`);
            const result = await this.pipeline(texts[i], { pooling: 'mean', normalize: true });
            embeddings.push(Array.from(result.data));
        }
        console.log('✅ Generated', embeddings.length, 'embeddings');
        return embeddings;
    }
 }
 // Create global instance
 window.simpleEmbedder = new SimpleEmbedder();
 // Set up Dash clientside callbacks
 window.dash_clientside = window.dash_clientside || {};
 window.dash_clientside.transformers = {
    generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) {
        console.log('🚀 Client-side generateEmbeddings called');
        if (!nClicks || !textContent || textContent.trim().length === 0) {
            console.log('⚠️ Missing required parameters');
            return window.dash_clientside.no_update;
        }
        try {
            // Tokenize text
            let textChunks;
            const trimmedText = textContent.trim();
            switch (tokenizationMethod) {
                case 'sentence':
                    textChunks = trimmedText.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 0);
                    break;
                case 'paragraph':
                    textChunks = trimmedText.split(/\n\s*\n/).map(s => s.trim()).filter(s => s.length > 0);
                    break;
                case 'manual':
                    textChunks = trimmedText.split('\n').map(s => s.trim()).filter(s => s.length > 0);
                    break;
                default:
                    textChunks = [trimmedText];
            }
            if (textChunks.length === 0) {
                throw new Error('No valid text chunks after tokenization');
            }
            // Generate embeddings
            const embeddings = await window.simpleEmbedder.generateEmbeddings(textChunks, modelName);
            // Create documents
            const documents = textChunks.map((text, i) => ({
                id: `text_input_${Date.now()}_${i}`,
                text: text,
                embedding: embeddings[i],
                category: category || "Text Input",
                subcategory: subcategory || "Generated", 
                tags: []
            }));
            return [
                {
                    documents: documents,
                    embeddings: embeddings
                },
                `✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
                "success",
                false
            ];
        } catch (error) {
            console.error('❌ Error generating embeddings:', error);
            return [
                { error: error.message },
                `❌ Error: ${error.message}`,
                "danger",
                false
            ];
        }
    }
 };
 console.log('✅ Simple Transformers.js setup complete');
 console.log('Available functions:', Object.keys(window.dash_clientside.transformers));
--- a/example/README_elasticsearch.md
+++ b/example/README_elasticsearch.md
@@ -0,0 +1,157 @@
 # Elasticsearch/OpenSearch Sample Data
 This directory contains sample data files in Elasticsearch bulk index format for testing the OpenSearch integration in EmbeddingBuddy.
 ## Files
 ### Original NDJSON Files
 - `sample_data.ndjson` - Original sample documents in EmbeddingBuddy format
 - `sample_prompts.ndjson` - Original sample prompts in EmbeddingBuddy format
 ### Elasticsearch Bulk Files
 - `sample_data_es_bulk.ndjson` - Documents in ES bulk format (index: "embeddings")
 - `sample_prompts_es_bulk.ndjson` - Prompts in ES bulk format (index: "prompts")
 ## Usage
 ### 1. Index the data using curl
 ```bash
 # Index main documents
 curl -X POST "localhost:9200/_bulk" \
  -H "Content-Type: application/x-ndjson" \
  --data-binary @sample_data_es_bulk.ndjson
 # Index prompts
 curl -X POST "localhost:9200/_bulk" \
  -H "Content-Type: application/x-ndjson" \
  --data-binary @sample_prompts_es_bulk.ndjson
 ```
 ### 2. Create proper mappings (recommended)
 First create the index with proper dense_vector mapping:
 ```bash
 # Create embeddings index with dense_vector mapping
 curl -X PUT "localhost:9200/embeddings" \
  -H "Content-Type: application/json" \
  -d '{
    "settings": {
      "index.knn": true
    },
    "mappings": {
      "properties": {
        "id": {"type": "keyword"},
        "embedding": {
          "type": "knn_vector",
          "dimension": 8,
          "method": {
            "engine": "lucene",
            "space_type": "cosinesimil",
            "name": "hnsw",
            "parameters": {}
          }
        },
        "text": {"type": "text"},
        "category": {"type": "keyword"},
        "subcategory": {"type": "keyword"},
        "tags": {"type": "keyword"}
      }
    }
  }'
 # Create dense vector index with alternative field names
 curl -X PUT "localhost:9200/prompts" \
  -H "Content-Type: application/json" \
  -d '{
    "settings": {
      "index.knn": true
    },
    "mappings": {
      "properties": {
        "id": {"type": "keyword"},
        "embedding": {
          "type": "knn_vector",
          "dimension": 8,
          "method": {
            "engine": "lucene",
            "space_type": "cosinesimil",
            "name": "hnsw",
            "parameters": {}
          }
        },
        "text": {"type": "text"},
        "category": {"type": "keyword"},
        "subcategory": {"type": "keyword"},
        "tags": {"type": "keyword"}
      }
    }
  }'
 ```
 Then index the data using the bulk files above.
 ### 3. Test in EmbeddingBuddy
 #### For "embeddings" index
 - **OpenSearch URL**: `http://localhost:9200`
 - **Index Name**: `embeddings`
 - **Field Mapping**:
  - Embedding Field: `embedding`
  - Text Field: `text`
  - ID Field: `id`
  - Category Field: `category`
  - Subcategory Field: `subcategory`
  - Tags Field: `tags`
 #### For "embeddings-dense" index (alternative field names)
 - **OpenSearch URL**: `http://localhost:9200`
 - **Index Name**: `embeddings-dense`
 - **Field Mapping**:
  - Embedding Field: `vector`
  - Text Field: `content`
  - ID Field: `doc_id`
  - Category Field: `type`
  - Subcategory Field: `subtopic`
  - Tags Field: `keywords`
 ## Data Structure
 ### Original Format (from NDJSON files)
 ```json
 {
  "id": "doc_001",
  "embedding": [0.2, -0.1, 0.8, 0.3, -0.5, 0.7, 0.1, -0.3],
  "text": "Machine learning algorithms are transforming healthcare...",
  "category": "technology",
  "subcategory": "healthcare",
  "tags": ["ai", "medicine", "prediction"]
 }
 ```
 ### ES Bulk Format
 ```json
 {"index": {"_index": "embeddings", "_id": "doc_001"}}
 {"id": "doc_001", "embedding": [...], "text": "...", "category": "...", ...}
 ```
 ### Alternative Field Names (dense vector format)
 ```json
 {"index": {"_index": "embeddings-dense", "_id": "doc_001"}}
 {"doc_id": "doc_001", "vector": [...], "content": "...", "type": "...", ...}
 ```
 ## Notes
 - All embedding vectors are 8-dimensional for these sample files
 - The alternative format demonstrates how EmbeddingBuddy's field mapping handles different field names
 - For production use, you may want larger embedding dimensions (e.g., 384, 768, 1536)
 - The `dense_vector` field type in Elasticsearch/OpenSearch enables vector similarity search
--- a/example/sample_data_es_bulk.ndjson
+++ b/example/sample_data_es_bulk.ndjson
@@ -0,0 +1,40 @@
 {"index": {"_index": "embeddings", "_id": "doc_001"}}
 {"id": "doc_001", "embedding": [0.2, -0.1, 0.8, 0.3, -0.5, 0.7, 0.1, -0.3], "text": "Machine learning algorithms are transforming healthcare by enabling predictive analytics and personalized medicine.", "category": "technology", "subcategory": "healthcare", "tags": ["ai", "medicine", "prediction"]}
 {"index": {"_index": "embeddings", "_id": "doc_002"}}
 {"id": "doc_002", "embedding": [0.1, 0.4, -0.2, 0.6, 0.3, -0.4, 0.8, 0.2], "text": "Climate change poses significant challenges to global food security and agricultural sustainability.", "category": "environment", "subcategory": "agriculture", "tags": ["climate", "food", "sustainability"]}
 {"index": {"_index": "embeddings", "_id": "doc_003"}}
 {"id": "doc_003", "embedding": [-0.3, 0.7, 0.1, -0.2, 0.9, 0.4, -0.1, 0.5], "text": "The rise of electric vehicles is reshaping the automotive industry and urban transportation systems.", "category": "technology", "subcategory": "automotive", "tags": ["electric", "transport", "urban"]}
 {"index": {"_index": "embeddings", "_id": "doc_004"}}
 {"id": "doc_004", "embedding": [0.5, -0.6, 0.3, 0.8, -0.2, 0.1, 0.7, -0.4], "text": "Renewable energy sources like solar and wind are becoming increasingly cost-competitive with fossil fuels.", "category": "environment", "subcategory": "energy", "tags": ["renewable", "solar", "wind"]}
 {"index": {"_index": "embeddings", "_id": "doc_005"}}
 {"id": "doc_005", "embedding": [0.8, 0.2, -0.5, 0.1, 0.6, -0.3, 0.4, 0.9], "text": "Financial markets are experiencing volatility due to geopolitical tensions and inflation concerns.", "category": "finance", "subcategory": "markets", "tags": ["volatility", "inflation", "geopolitics"]}
 {"index": {"_index": "embeddings", "_id": "doc_006"}}
 {"id": "doc_006", "embedding": [-0.1, 0.5, 0.7, -0.4, 0.2, 0.8, -0.6, 0.3], "text": "Quantum computing research is advancing rapidly with potential applications in cryptography and drug discovery.", "category": "technology", "subcategory": "research", "tags": ["quantum", "cryptography", "research"]}
 {"index": {"_index": "embeddings", "_id": "doc_007"}}
 {"id": "doc_007", "embedding": [0.4, -0.3, 0.6, 0.7, -0.8, 0.2, 0.5, -0.1], "text": "Ocean pollution from plastic waste is threatening marine ecosystems and biodiversity worldwide.", "category": "environment", "subcategory": "marine", "tags": ["pollution", "plastic", "marine"]}
 {"index": {"_index": "embeddings", "_id": "doc_008"}}
 {"id": "doc_008", "embedding": [0.3, 0.8, -0.2, 0.5, 0.1, -0.7, 0.6, 0.4], "text": "Artificial intelligence is revolutionizing customer service through chatbots and automated support systems.", "category": "technology", "subcategory": "customer_service", "tags": ["ai", "chatbots", "automation"]}
 {"index": {"_index": "embeddings", "_id": "doc_009"}}
 {"id": "doc_009", "embedding": [-0.5, 0.3, 0.9, -0.1, 0.7, 0.4, -0.2, 0.8], "text": "Global supply chains are being redesigned for resilience after pandemic-related disruptions.", "category": "business", "subcategory": "logistics", "tags": ["supply_chain", "pandemic", "resilience"]}
 {"index": {"_index": "embeddings", "_id": "doc_010"}}
 {"id": "doc_010", "embedding": [0.7, -0.4, 0.2, 0.9, -0.3, 0.6, 0.1, -0.8], "text": "Space exploration missions are expanding our understanding of the solar system and potential for life.", "category": "science", "subcategory": "space", "tags": ["space", "exploration", "life"]}
 {"index": {"_index": "embeddings", "_id": "doc_011"}}
 {"id": "doc_011", "embedding": [-0.2, 0.6, 0.4, -0.7, 0.8, 0.3, -0.5, 0.1], "text": "Cryptocurrency adoption is growing among institutional investors despite regulatory uncertainties.", "category": "finance", "subcategory": "crypto", "tags": ["cryptocurrency", "institutional", "regulation"]}
 {"index": {"_index": "embeddings", "_id": "doc_012"}}
 {"id": "doc_012", "embedding": [0.6, 0.1, -0.8, 0.4, 0.5, -0.2, 0.9, -0.3], "text": "Remote work technologies are transforming traditional office environments and work-life balance.", "category": "technology", "subcategory": "workplace", "tags": ["remote", "work", "balance"]}
 {"index": {"_index": "embeddings", "_id": "doc_013"}}
 {"id": "doc_013", "embedding": [0.1, -0.7, 0.5, 0.8, -0.4, 0.3, 0.2, 0.6], "text": "Gene therapy breakthroughs are offering new hope for treating previously incurable genetic diseases.", "category": "science", "subcategory": "medicine", "tags": ["gene_therapy", "genetics", "medicine"]}
 {"index": {"_index": "embeddings", "_id": "doc_014"}}
 {"id": "doc_014", "embedding": [-0.4, 0.2, 0.7, -0.1, 0.9, -0.6, 0.3, 0.5], "text": "Urban planning is evolving to create more sustainable and livable cities for growing populations.", "category": "environment", "subcategory": "urban", "tags": ["urban_planning", "sustainability", "cities"]}
 {"index": {"_index": "embeddings", "_id": "doc_015"}}
 {"id": "doc_015", "embedding": [0.9, -0.1, 0.3, 0.6, -0.5, 0.8, -0.2, 0.4], "text": "Social media platforms are implementing new policies to combat misinformation and protect user privacy.", "category": "technology", "subcategory": "social_media", "tags": ["social_media", "misinformation", "privacy"]}
 {"index": {"_index": "embeddings", "_id": "doc_016"}}
 {"id": "doc_016", "embedding": [-0.3, 0.8, -0.1, 0.4, 0.7, -0.5, 0.6, -0.9], "text": "Educational technology is personalizing learning experiences and improving student outcomes.", "category": "education", "subcategory": "technology", "tags": ["education", "personalization", "technology"]}
 {"index": {"_index": "embeddings", "_id": "doc_017"}}
 {"id": "doc_017", "embedding": [0.5, 0.3, -0.6, 0.2, 0.8, 0.1, -0.4, 0.7], "text": "Biodiversity conservation efforts are critical for maintaining ecosystem balance and preventing species extinction.", "category": "environment", "subcategory": "conservation", "tags": ["biodiversity", "conservation", "extinction"]}
 {"index": {"_index": "embeddings", "_id": "doc_018"}}
 {"id": "doc_018", "embedding": [0.2, -0.8, 0.4, 0.7, -0.1, 0.5, 0.9, -0.3], "text": "Healthcare systems are adopting telemedicine to improve access and reduce costs for patients.", "category": "technology", "subcategory": "healthcare", "tags": ["telemedicine", "healthcare", "access"]}
 {"index": {"_index": "embeddings", "_id": "doc_019"}}
 {"id": "doc_019", "embedding": [-0.7, 0.4, 0.8, -0.2, 0.3, 0.6, -0.1, 0.9], "text": "Autonomous vehicles are being tested extensively with promises of safer and more efficient transportation.", "category": "technology", "subcategory": "automotive", "tags": ["autonomous", "safety", "efficiency"]}
 {"index": {"_index": "embeddings", "_id": "doc_020"}}
 {"id": "doc_020", "embedding": [0.4, 0.7, -0.3, 0.9, -0.6, 0.2, 0.5, -0.1], "text": "Mental health awareness is increasing with new approaches to therapy and workplace wellness programs.", "category": "health", "subcategory": "mental", "tags": ["mental_health", "therapy", "wellness"]}
--- a/example/sample_prompts_es_bulk.ndjson
+++ b/example/sample_prompts_es_bulk.ndjson
@@ -0,0 +1,20 @@
 {"index": {"_index": "prompts", "_id": "prompt_001"}}
 {"id": "prompt_001", "embedding": [0.15, -0.28, 0.65, 0.42, -0.11, 0.33, 0.78, -0.52], "text": "Find articles about machine learning applications", "category": "search", "subcategory": "technology", "tags": ["AI", "research"]}
 {"index": {"_index": "prompts", "_id": "prompt_002"}}
 {"id": "prompt_002", "embedding": [0.72, 0.18, -0.35, 0.51, 0.09, -0.44, 0.27, 0.63], "text": "Show me product reviews for smartphones", "category": "search", "subcategory": "product", "tags": ["mobile", "reviews"]}
 {"index": {"_index": "prompts", "_id": "prompt_003"}}
 {"id": "prompt_003", "embedding": [-0.21, 0.59, 0.34, -0.67, 0.45, 0.12, -0.38, 0.76], "text": "What are the latest political developments?", "category": "search", "subcategory": "news", "tags": ["politics", "current events"]}
 {"index": {"_index": "prompts", "_id": "prompt_004"}}
 {"id": "prompt_004", "embedding": [0.48, -0.15, 0.72, 0.31, -0.58, 0.24, 0.67, -0.39], "text": "Summarize recent tech industry trends", "category": "analysis", "subcategory": "technology", "tags": ["tech", "trends", "summary"]}
 {"index": {"_index": "prompts", "_id": "prompt_005"}}
 {"id": "prompt_005", "embedding": [-0.33, 0.47, -0.62, 0.28, 0.71, -0.18, 0.54, 0.35], "text": "Compare different smartphone models", "category": "analysis", "subcategory": "product", "tags": ["comparison", "mobile", "evaluation"]}
 {"index": {"_index": "prompts", "_id": "prompt_006"}}
 {"id": "prompt_006", "embedding": [0.64, 0.21, 0.39, -0.45, 0.13, 0.58, -0.27, 0.74], "text": "Analyze voter sentiment on recent policies", "category": "analysis", "subcategory": "politics", "tags": ["sentiment", "politics", "analysis"]}
 {"index": {"_index": "prompts", "_id": "prompt_007"}}
 {"id": "prompt_007", "embedding": [0.29, -0.43, 0.56, 0.68, -0.22, 0.37, 0.14, -0.61], "text": "Generate a summary of machine learning research", "category": "generation", "subcategory": "technology", "tags": ["AI", "research", "summary"]}
 {"index": {"_index": "prompts", "_id": "prompt_008"}}
 {"id": "prompt_008", "embedding": [-0.17, 0.52, -0.48, 0.36, 0.74, -0.29, 0.61, 0.18], "text": "Create a product recommendation report", "category": "generation", "subcategory": "product", "tags": ["recommendation", "report", "analysis"]}
 {"index": {"_index": "prompts", "_id": "prompt_009"}}
 {"id": "prompt_009", "embedding": [0.55, 0.08, 0.41, -0.37, 0.26, 0.69, -0.14, 0.58], "text": "Write a news brief on election updates", "category": "generation", "subcategory": "news", "tags": ["election", "news", "brief"]}
 {"index": {"_index": "prompts", "_id": "prompt_010"}}
 {"id": "prompt_010", "embedding": [0.23, -0.59, 0.47, 0.61, -0.35, 0.18, 0.72, -0.26], "text": "Explain how neural networks work", "category": "explanation", "subcategory": "technology", "tags": ["AI", "education", "neural networks"]}
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "embeddingbuddy"
-version = "0.2.0"
+version = "0.3.0"
 description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
 readme = "README.md"
 requires-python = ">=3.11"
@@ -15,6 +15,7 @@ dependencies = [
    "numba>=0.56.4",
    "openTSNE>=1.0.0",
    "mypy>=1.17.1",
    "opensearch-py>=3.0.0",
 ]
 [project.optional-dependencies]
--- a/src/embeddingbuddy/app.py
+++ b/src/embeddingbuddy/app.py
@@ -8,7 +8,18 @@ from .ui.callbacks.interactions import InteractionCallbacks
 def create_app():
-    app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
+    import os
    # Get the project root directory (two levels up from this file)
    project_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
    assets_path = os.path.join(project_root, "assets")
    app = dash.Dash(
        __name__, external_stylesheets=[dbc.themes.BOOTSTRAP], assets_folder=assets_path
    )
    # Allow callbacks to components that are dynamically created in tabs
    app.config.suppress_callback_exceptions = True
    layout_manager = AppLayout()
    app.layout = layout_manager.create_layout()
@@ -17,9 +28,78 @@ def create_app():
    VisualizationCallbacks()
    InteractionCallbacks()
    # Register client-side callback for embedding generation
    _register_client_side_callbacks(app)
    return app
 def _register_client_side_callbacks(app):
    """Register client-side callbacks for browser-based processing."""
    from dash import Input, Output, State
    # Client-side callback for embedding generation
    app.clientside_callback(
        """
        function(nClicks, textContent, modelName, tokenizationMethod, batchSize, category, subcategory) {
            if (!nClicks || !textContent || !textContent.trim()) {
                return window.dash_clientside.no_update;
            }
            console.log('🔍 Checking for Transformers.js...');
            console.log('window.dash_clientside:', typeof window.dash_clientside);
            console.log('window.dash_clientside.transformers:', typeof window.dash_clientside?.transformers);
            console.log('generateEmbeddings function:', typeof window.dash_clientside?.transformers?.generateEmbeddings);
            if (typeof window.dash_clientside !== 'undefined' && 
                typeof window.dash_clientside.transformers !== 'undefined' &&
                typeof window.dash_clientside.transformers.generateEmbeddings === 'function') {
                console.log('✅ Calling Transformers.js generateEmbeddings...');
                return window.dash_clientside.transformers.generateEmbeddings(
                    nClicks, textContent, modelName, tokenizationMethod, category, subcategory
                );
            }
            // More detailed error information
            let errorMsg = '❌ Transformers.js not available. ';
            if (typeof window.dash_clientside === 'undefined') {
                errorMsg += 'dash_clientside not found.';
            } else if (typeof window.dash_clientside.transformers === 'undefined') {
                errorMsg += 'transformers module not found.';
            } else if (typeof window.dash_clientside.transformers.generateEmbeddings !== 'function') {
                errorMsg += 'generateEmbeddings function not found.';
            }
            console.error(errorMsg);
            return [
                { error: 'Transformers.js not loaded. Please refresh the page and try again.' },
                errorMsg + ' Please refresh the page.',
                'danger',
                false
            ];
        }
        """,
        [
            Output("embeddings-generated-trigger", "data"),
            Output("text-input-status-immediate", "children"),
            Output("text-input-status-immediate", "color"),
            Output("generate-embeddings-btn", "disabled", allow_duplicate=True),
        ],
        [Input("generate-embeddings-btn", "n_clicks")],
        [
            State("text-input-area", "value"),
            State("model-selection", "value"),
            State("tokenization-method", "value"),
            State("batch-size", "value"),
            State("text-category", "value"),
            State("text-subcategory", "value"),
        ],
        prevent_initial_call=True,
    )
 def run_app(app=None, debug=None, host=None, port=None):
    if app is None:
        app = create_app()
--- a/src/embeddingbuddy/config/settings.py
+++ b/src/embeddingbuddy/config/settings.py
@@ -73,6 +73,77 @@ class AppSettings:
    HOST = os.getenv("EMBEDDINGBUDDY_HOST", "127.0.0.1")
    PORT = int(os.getenv("EMBEDDINGBUDDY_PORT", "8050"))
    # OpenSearch Configuration
    OPENSEARCH_DEFAULT_SIZE = 100
    OPENSEARCH_SAMPLE_SIZE = 5
    OPENSEARCH_CONNECTION_TIMEOUT = 30
    OPENSEARCH_VERIFY_CERTS = True
    # Text Input / Transformers.js Configuration
    DEFAULT_EMBEDDING_MODEL = "Xenova/all-mpnet-base-v2"
    MAX_TEXT_LENGTH = 50000  # Characters (browser memory limits)
    DEFAULT_TOKENIZATION_METHOD = "sentence"
    MAX_BATCH_SIZE = 8  # Process in smaller batches for memory management
    # Available Transformers.js compatible models
    AVAILABLE_MODELS = [
        {
            "name": "Xenova/all-mpnet-base-v2",
            "label": "All-MPNet-Base-v2 (Quality, 768d)",
            "description": "Higher quality embeddings with better semantic understanding",
            "dimensions": 768,
            "size": "109 MB",
            "context_length": 512,
            "multilingual": False,
            "default": True,
        },
        {
            "name": "Xenova/all-MiniLM-L6-v2",
            "label": "All-MiniLM-L6-v2 (Fast, 384d)",
            "description": "Lightweight model, good for quick testing and general purpose",
            "dimensions": 384,
            "size": "23 MB",
            "context_length": 512,
            "multilingual": False,
            "default": False,
        },
        {
            "name": "Xenova/paraphrase-multilingual-MiniLM-L12-v2",
            "label": "Multilingual MiniLM (50+ languages)",
            "description": "Support for multiple languages with good performance",
            "dimensions": 384,
            "size": "127 MB",
            "context_length": 512,
            "multilingual": True,
        },
        {
            "name": "Xenova/bge-small-en-v1.5",
            "label": "BGE Small English (High quality, 384d)",
            "description": "Beijing Academy of AI model with excellent performance on retrieval tasks",
            "dimensions": 384,
            "size": "67 MB",
            "context_length": 512,
            "multilingual": False,
        },
        {
            "name": "Xenova/gte-small",
            "label": "GTE Small (General Text Embeddings, 384d)",
            "description": "Alibaba's general text embedding model, balanced performance",
            "dimensions": 384,
            "size": "67 MB",
            "context_length": 512,
            "multilingual": False,
        },
    ]
    # Browser compatibility requirements
    SUPPORTED_BROWSERS = {
        "chrome": ">=88",
        "firefox": ">=92",
        "safari": ">=15.4",
        "edge": ">=88",
    }
    # Bootstrap Theme
    EXTERNAL_STYLESHEETS = [
        "https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css"
--- a/src/embeddingbuddy/data/processor.py
+++ b/src/embeddingbuddy/data/processor.py
@@ -1,6 +1,7 @@
 import numpy as np
 from typing import List, Optional, Tuple
 from ..models.schemas import Document, ProcessedData
 from ..models.field_mapper import FieldMapper
 from .parser import NDJSONParser
@@ -26,6 +27,126 @@ class DataProcessor:
        except Exception as e:
            return ProcessedData(documents=[], embeddings=np.array([]), error=str(e))
    def process_opensearch_data(
        self, raw_documents: List[dict], field_mapping
    ) -> ProcessedData:
        """Process raw OpenSearch documents using field mapping."""
        try:
            # Transform documents using field mapping
            transformed_docs = FieldMapper.transform_documents(
                raw_documents, field_mapping
            )
            # Parse transformed documents
            documents = []
            for doc_dict in transformed_docs:
                try:
                    # Ensure required fields are present with defaults if needed
                    if "id" not in doc_dict or not doc_dict["id"]:
                        doc_dict["id"] = f"doc_{len(documents)}"
                    doc = Document(**doc_dict)
                    documents.append(doc)
                except Exception:
                    continue  # Skip invalid documents
            if not documents:
                return ProcessedData(
                    documents=[],
                    embeddings=np.array([]),
                    error="No valid documents after transformation",
                )
            embeddings = self._extract_embeddings(documents)
            return ProcessedData(documents=documents, embeddings=embeddings)
        except Exception as e:
            return ProcessedData(documents=[], embeddings=np.array([]), error=str(e))
    def process_client_embeddings(self, embeddings_data: dict) -> ProcessedData:
        """Process embeddings data received from client-side JavaScript."""
        try:
            if "error" in embeddings_data:
                return ProcessedData(
                    documents=[],
                    embeddings=np.array([]),
                    error=embeddings_data["error"],
                )
            # Extract documents and embeddings from client data
            documents_data = embeddings_data.get("documents", [])
            embeddings_list = embeddings_data.get("embeddings", [])
            if not documents_data or not embeddings_list:
                return ProcessedData(
                    documents=[],
                    embeddings=np.array([]),
                    error="No documents or embeddings in client data",
                )
            if len(documents_data) != len(embeddings_list):
                return ProcessedData(
                    documents=[],
                    embeddings=np.array([]),
                    error="Mismatch between number of documents and embeddings",
                )
            # Convert embeddings to numpy array first
            try:
                embeddings = np.array(embeddings_list)
                if embeddings.ndim != 2:
                    return ProcessedData(
                        documents=[],
                        embeddings=np.array([]),
                        error="Invalid embedding dimensions",
                    )
            except Exception as e:
                return ProcessedData(
                    documents=[],
                    embeddings=np.array([]),
                    error=f"Error processing embeddings: {str(e)}",
                )
            # Convert to Document objects with embeddings
            documents = []
            for i, doc_data in enumerate(documents_data):
                try:
                    # Skip if we don't have a corresponding embedding
                    if i >= len(embeddings):
                        continue
                    # Ensure required fields are present
                    if "id" not in doc_data or not doc_data["id"]:
                        doc_data["id"] = f"text_input_{i}"
                    if "text" not in doc_data or not doc_data["text"].strip():
                        continue  # Skip documents without text
                    # Add the embedding to doc_data
                    doc_data["embedding"] = embeddings[i].tolist()
                    doc = Document(**doc_data)
                    documents.append(doc)
                except Exception:
                    # Skip invalid documents but continue processing
                    continue
            if not documents:
                return ProcessedData(
                    documents=[],
                    embeddings=np.array([]),
                    error="No valid documents found in client data",
                )
            # Only keep embeddings for valid documents
            valid_embeddings = embeddings[: len(documents)]
            return ProcessedData(documents=documents, embeddings=valid_embeddings)
        except Exception as e:
            return ProcessedData(documents=[], embeddings=np.array([]), error=str(e))
    def _extract_embeddings(self, documents: List[Document]) -> np.ndarray:
        if not documents:
            return np.array([])
--- a/src/embeddingbuddy/data/sources/init.py
+++ b/src/embeddingbuddy/data/sources/init.py
--- a/src/embeddingbuddy/data/sources/opensearch.py
+++ b/src/embeddingbuddy/data/sources/opensearch.py
@@ -0,0 +1,189 @@
 from typing import Dict, List, Optional, Any, Tuple
 import logging
 from opensearchpy import OpenSearch
 from opensearchpy.exceptions import OpenSearchException
 logger = logging.getLogger(__name__)
 class OpenSearchClient:
    def __init__(self):
        self.client: Optional[OpenSearch] = None
        self.connection_info: Optional[Dict[str, Any]] = None
    def connect(
        self,
        url: str,
        username: Optional[str] = None,
        password: Optional[str] = None,
        api_key: Optional[str] = None,
        verify_certs: bool = True,
    ) -> Tuple[bool, str]:
        """
        Connect to OpenSearch instance.
        Returns:
            Tuple of (success: bool, message: str)
        """
        try:
            # Parse URL to extract host and port
            if url.startswith("http://") or url.startswith("https://"):
                host = url
            else:
                host = f"https://{url}"
            # Build auth configuration
            auth_config = {}
            if username and password:
                auth_config["http_auth"] = (username, password)
            elif api_key:
                auth_config["api_key"] = api_key
            # Create client
            self.client = OpenSearch([host], verify_certs=verify_certs, **auth_config)
            # Test connection
            info = self.client.info()
            self.connection_info = {
                "url": host,
                "cluster_name": info.get("cluster_name", "Unknown"),
                "version": info.get("version", {}).get("number", "Unknown"),
            }
            return (
                True,
                f"Connected to {info.get('cluster_name', 'OpenSearch cluster')}",
            )
        except OpenSearchException as e:
            logger.error(f"OpenSearch connection error: {e}")
            return False, f"Connection failed: {str(e)}"
        except Exception as e:
            logger.error(f"Unexpected error connecting to OpenSearch: {e}")
            return False, f"Unexpected error: {str(e)}"
    def get_index_mapping(self, index_name: str) -> Tuple[bool, Optional[Dict], str]:
        """
        Get the mapping for a specific index.
        Returns:
            Tuple of (success: bool, mapping: Dict or None, message: str)
        """
        if not self.client:
            return False, None, "Not connected to OpenSearch"
        try:
            mapping = self.client.indices.get_mapping(index=index_name)
            return True, mapping, "Mapping retrieved successfully"
        except OpenSearchException as e:
            logger.error(f"Error getting mapping for index {index_name}: {e}")
            return False, None, f"Failed to get mapping: {str(e)}"
    def analyze_fields(self, index_name: str) -> Tuple[bool, Optional[Dict], str]:
        """
        Analyze index fields to detect potential embedding and text fields.
        Returns:
            Tuple of (success: bool, analysis: Dict or None, message: str)
        """
        success, mapping, message = self.get_index_mapping(index_name)
        if not success:
            return False, None, message
        try:
            # Extract field information from mapping
            index_mapping = mapping[index_name]["mappings"]["properties"]
            analysis = {
                "vector_fields": [],
                "text_fields": [],
                "keyword_fields": [],
                "numeric_fields": [],
                "all_fields": [],
            }
            for field_name, field_info in index_mapping.items():
                field_type = field_info.get("type", "unknown")
                analysis["all_fields"].append(field_name)
                if field_type == "dense_vector":
                    analysis["vector_fields"].append(
                        {
                            "name": field_name,
                            "dimension": field_info.get("dimension", "unknown"),
                        }
                    )
                elif field_type == "text":
                    analysis["text_fields"].append(field_name)
                elif field_type == "keyword":
                    analysis["keyword_fields"].append(field_name)
                elif field_type in ["integer", "long", "float", "double"]:
                    analysis["numeric_fields"].append(field_name)
            return True, analysis, "Field analysis completed"
        except Exception as e:
            logger.error(f"Error analyzing fields: {e}")
            return False, None, f"Field analysis failed: {str(e)}"
    def fetch_sample_data(
        self, index_name: str, size: int = 5
    ) -> Tuple[bool, List[Dict], str]:
        """
        Fetch sample documents from the index.
        Returns:
            Tuple of (success: bool, documents: List[Dict], message: str)
        """
        if not self.client:
            return False, [], "Not connected to OpenSearch"
        try:
            response = self.client.search(
                index=index_name, body={"query": {"match_all": {}}, "size": size}
            )
            documents = [hit["_source"] for hit in response["hits"]["hits"]]
            return True, documents, f"Retrieved {len(documents)} sample documents"
        except OpenSearchException as e:
            logger.error(f"Error fetching sample data: {e}")
            return False, [], f"Failed to fetch sample data: {str(e)}"
    def fetch_data(
        self, index_name: str, size: int = 100
    ) -> Tuple[bool, List[Dict], str]:
        """
        Fetch documents from the index.
        Returns:
            Tuple of (success: bool, documents: List[Dict], message: str)
        """
        if not self.client:
            return False, [], "Not connected to OpenSearch"
        try:
            response = self.client.search(
                index=index_name, body={"query": {"match_all": {}}, "size": size}
            )
            documents = [hit["_source"] for hit in response["hits"]["hits"]]
            total_hits = response["hits"]["total"]["value"]
            message = f"Retrieved {len(documents)} documents from {total_hits} total"
            return True, documents, message
        except OpenSearchException as e:
            logger.error(f"Error fetching data: {e}")
            return False, [], f"Failed to fetch data: {str(e)}"
    def disconnect(self):
        """Disconnect from OpenSearch."""
        if self.client:
            self.client = None
            self.connection_info = None
    def is_connected(self) -> bool:
        """Check if connected to OpenSearch."""
        return self.client is not None
--- a/src/embeddingbuddy/models/field_mapper.py
+++ b/src/embeddingbuddy/models/field_mapper.py
@@ -0,0 +1,254 @@
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Any
 import logging
 logger = logging.getLogger(__name__)
@dataclass
 class FieldMapping:
    """Configuration for mapping OpenSearch fields to standard format."""
    embedding_field: str
    text_field: str
    id_field: Optional[str] = None
    category_field: Optional[str] = None
    subcategory_field: Optional[str] = None
    tags_field: Optional[str] = None
 class FieldMapper:
    """Handles field mapping and data transformation from OpenSearch to standard format."""
    @staticmethod
    def suggest_mappings(field_analysis: Dict) -> Dict[str, List[str]]:
        """
        Suggest field mappings based on field analysis.
        Each dropdown will show ALL available fields, but ordered by relevance
        with the most likely candidates first.
        Args:
            field_analysis: Analysis results from OpenSearchClient.analyze_fields
        Returns:
            Dictionary with suggested fields for each mapping (ordered by relevance)
        """
        all_fields = field_analysis.get("all_fields", [])
        vector_fields = [vf["name"] for vf in field_analysis.get("vector_fields", [])]
        text_fields = field_analysis.get("text_fields", [])
        keyword_fields = field_analysis.get("keyword_fields", [])
        # Helper function to create ordered suggestions
        def create_ordered_suggestions(primary_candidates, all_available_fields):
            # Start with primary candidates, then add all other fields
            ordered = []
            # Add primary candidates first
            for field in primary_candidates:
                if field in all_available_fields and field not in ordered:
                    ordered.append(field)
            # Add remaining fields
            for field in all_available_fields:
                if field not in ordered:
                    ordered.append(field)
            return ordered
        suggestions = {}
        # Embedding field suggestions (vector fields first, then name-based candidates, then all fields)
        embedding_candidates = vector_fields.copy()
        # Add fields that likely contain embeddings based on name
        embedding_name_candidates = [
            f
            for f in all_fields
            if any(
                keyword in f.lower()
                for keyword in ["embedding", "embeddings", "vector", "vectors", "embed"]
            )
        ]
        # Add name-based candidates that aren't already in vector_fields
        for candidate in embedding_name_candidates:
            if candidate not in embedding_candidates:
                embedding_candidates.append(candidate)
        suggestions["embedding"] = create_ordered_suggestions(
            embedding_candidates, all_fields
        )
        # Text field suggestions (text fields first, then all fields)
        text_candidates = text_fields.copy()
        suggestions["text"] = create_ordered_suggestions(text_candidates, all_fields)
        # ID field suggestions (ID-like fields first, then all fields)
        id_candidates = [
            f
            for f in keyword_fields
            if any(keyword in f.lower() for keyword in ["id", "_id", "doc", "document"])
        ]
        id_candidates.append("_id")  # _id is always available
        suggestions["id"] = create_ordered_suggestions(id_candidates, all_fields)
        # Category field suggestions (category-like fields first, then all fields)
        category_candidates = [
            f
            for f in keyword_fields
            if any(
                keyword in f.lower()
                for keyword in ["category", "class", "type", "label"]
            )
        ]
        suggestions["category"] = create_ordered_suggestions(
            category_candidates, all_fields
        )
        # Subcategory field suggestions (subcategory-like fields first, then all fields)
        subcategory_candidates = [
            f
            for f in keyword_fields
            if any(
                keyword in f.lower()
                for keyword in ["subcategory", "subclass", "subtype", "subtopic"]
            )
        ]
        suggestions["subcategory"] = create_ordered_suggestions(
            subcategory_candidates, all_fields
        )
        # Tags field suggestions (tag-like fields first, then all fields)
        tags_candidates = [
            f
            for f in keyword_fields
            if any(
                keyword in f.lower()
                for keyword in ["tag", "tags", "keyword", "keywords"]
            )
        ]
        suggestions["tags"] = create_ordered_suggestions(tags_candidates, all_fields)
        return suggestions
    @staticmethod
    def validate_mapping(
        mapping: FieldMapping, available_fields: List[str]
    ) -> List[str]:
        """
        Validate that the field mapping is correct.
        Returns:
            List of validation errors (empty if valid)
        """
        errors = []
        # Required fields validation
        if not mapping.embedding_field:
            errors.append("Embedding field is required")
        elif mapping.embedding_field not in available_fields:
            errors.append(
                f"Embedding field '{mapping.embedding_field}' not found in index"
            )
        if not mapping.text_field:
            errors.append("Text field is required")
        elif mapping.text_field not in available_fields:
            errors.append(f"Text field '{mapping.text_field}' not found in index")
        # Optional fields validation
        optional_fields = {
            "id_field": mapping.id_field,
            "category_field": mapping.category_field,
            "subcategory_field": mapping.subcategory_field,
            "tags_field": mapping.tags_field,
        }
        for field_name, field_value in optional_fields.items():
            if field_value and field_value not in available_fields:
                errors.append(
                    f"Field '{field_value}' for {field_name} not found in index"
                )
        return errors
    @staticmethod
    def transform_documents(
        documents: List[Dict[str, Any]], mapping: FieldMapping
    ) -> List[Dict[str, Any]]:
        """
        Transform OpenSearch documents to standard format using field mapping.
        Args:
            documents: Raw documents from OpenSearch
            mapping: Field mapping configuration
        Returns:
            List of transformed documents in standard format
        """
        transformed = []
        for doc in documents:
            try:
                # Build standard format document
                standard_doc = {}
                # Required fields
                if mapping.embedding_field in doc:
                    standard_doc["embedding"] = doc[mapping.embedding_field]
                else:
                    logger.warning(
                        f"Missing embedding field '{mapping.embedding_field}' in document"
                    )
                    continue
                if mapping.text_field in doc:
                    standard_doc["text"] = str(doc[mapping.text_field])
                else:
                    logger.warning(
                        f"Missing text field '{mapping.text_field}' in document"
                    )
                    continue
                # Optional fields
                if mapping.id_field and mapping.id_field in doc:
                    standard_doc["id"] = str(doc[mapping.id_field])
                if mapping.category_field and mapping.category_field in doc:
                    standard_doc["category"] = str(doc[mapping.category_field])
                if mapping.subcategory_field and mapping.subcategory_field in doc:
                    standard_doc["subcategory"] = str(doc[mapping.subcategory_field])
                if mapping.tags_field and mapping.tags_field in doc:
                    tags = doc[mapping.tags_field]
                    # Handle both string and list tags
                    if isinstance(tags, list):
                        standard_doc["tags"] = [str(tag) for tag in tags]
                    else:
                        standard_doc["tags"] = [str(tags)]
                transformed.append(standard_doc)
            except Exception as e:
                logger.error(f"Error transforming document: {e}")
                continue
        logger.info(f"Transformed {len(transformed)} documents out of {len(documents)}")
        return transformed
    @staticmethod
    def create_mapping_from_dict(mapping_dict: Dict[str, str]) -> FieldMapping:
        """
        Create a FieldMapping from a dictionary.
        Args:
            mapping_dict: Dictionary with field mappings
        Returns:
            FieldMapping instance
        """
        return FieldMapping(
            embedding_field=mapping_dict.get("embedding", ""),
            text_field=mapping_dict.get("text", ""),
            id_field=mapping_dict.get("id") or None,
            category_field=mapping_dict.get("category") or None,
            subcategory_field=mapping_dict.get("subcategory") or None,
            tags_field=mapping_dict.get("tags") or None,
        )
--- a/src/embeddingbuddy/ui/callbacks/data_processing.py
+++ b/src/embeddingbuddy/ui/callbacks/data_processing.py
@@ -1,10 +1,15 @@
-from dash import callback, Input, Output, State
+from dash import callback, Input, Output, State, no_update, html
 from ...data.processor import DataProcessor
 from ...data.sources.opensearch import OpenSearchClient
 from ...models.field_mapper import FieldMapper
 from ...config.settings import AppSettings
 class DataProcessingCallbacks:
    def __init__(self):
        self.processor = DataProcessor()
        self.opensearch_client_data = OpenSearchClient()  # For data/documents
        self.opensearch_client_prompts = OpenSearchClient()  # For prompts
        self._register_callbacks()
    def _register_callbacks(self):
@@ -67,6 +72,620 @@ class DataProcessingCallbacks:
                "embeddings": processed_data.embeddings.tolist(),
            }
        # OpenSearch callbacks
        @callback(
            [
                Output("tab-content", "children"),
            ],
            [Input("data-source-tabs", "active_tab")],
            prevent_initial_call=False,
        )
        def render_tab_content(active_tab):
            from ...ui.components.datasource import DataSourceComponent
            datasource = DataSourceComponent()
            if active_tab == "opensearch-tab":
                return [datasource.create_opensearch_tab()]
            elif active_tab == "text-input-tab":
                return [datasource.create_text_input_tab()]
            else:
                return [datasource.create_file_upload_tab()]
        # Register callbacks for both data and prompts sections
        self._register_opensearch_callbacks("data", self.opensearch_client_data)
        self._register_opensearch_callbacks("prompts", self.opensearch_client_prompts)
        # Register collapsible section callbacks
        self._register_collapse_callbacks()
        # Register text input callbacks
        self._register_text_input_callbacks()
    def _register_opensearch_callbacks(self, section_type, opensearch_client):
        """Register callbacks for a specific section (data or prompts)."""
        @callback(
            Output(f"{section_type}-auth-collapse", "is_open"),
            [Input(f"{section_type}-auth-toggle", "n_clicks")],
            [State(f"{section_type}-auth-collapse", "is_open")],
            prevent_initial_call=True,
        )
        def toggle_auth(n_clicks, is_open):
            if n_clicks:
                return not is_open
            return is_open
        @callback(
            Output(f"{section_type}-auth-toggle", "children"),
            [Input(f"{section_type}-auth-collapse", "is_open")],
            prevent_initial_call=False,
        )
        def update_auth_button_text(is_open):
            return "Hide Authentication" if is_open else "Show Authentication"
        @callback(
            [
                Output(f"{section_type}-connection-status", "children"),
                Output(f"{section_type}-field-mapping-section", "children"),
                Output(f"{section_type}-field-mapping-section", "style"),
                Output(f"{section_type}-load-data-section", "style"),
                Output(f"{section_type}-load-opensearch-data-btn", "disabled"),
                Output(f"{section_type}-embedding-field-dropdown", "options"),
                Output(f"{section_type}-text-field-dropdown", "options"),
                Output(f"{section_type}-id-field-dropdown", "options"),
                Output(f"{section_type}-category-field-dropdown", "options"),
                Output(f"{section_type}-subcategory-field-dropdown", "options"),
                Output(f"{section_type}-tags-field-dropdown", "options"),
            ],
            [Input(f"{section_type}-test-connection-btn", "n_clicks")],
            [
                State(f"{section_type}-opensearch-url", "value"),
                State(f"{section_type}-opensearch-index", "value"),
                State(f"{section_type}-opensearch-username", "value"),
                State(f"{section_type}-opensearch-password", "value"),
                State(f"{section_type}-opensearch-api-key", "value"),
            ],
            prevent_initial_call=True,
        )
        def test_opensearch_connection(
            n_clicks, url, index_name, username, password, api_key
        ):
            if not n_clicks or not url or not index_name:
                return (
                    no_update,
                    no_update,
                    no_update,
                    no_update,
                    no_update,
                    no_update,
                    no_update,
                    no_update,
                    no_update,
                    no_update,
                    no_update,
                )
            # Test connection
            success, message = opensearch_client.connect(
                url=url,
                username=username,
                password=password,
                api_key=api_key,
                verify_certs=AppSettings.OPENSEARCH_VERIFY_CERTS,
            )
            if not success:
                return (
                    self._create_status_alert(f"❌ {message}", "danger"),
                    [],
                    {"display": "none"},
                    {"display": "none"},
                    True,
                    [],  # empty options for hidden dropdowns
                    [],
                    [],
                    [],
                    [],
                    [],
                )
            # Analyze fields
            success, field_analysis, analysis_message = (
                opensearch_client.analyze_fields(index_name)
            )
            if not success:
                return (
                    self._create_status_alert(f"❌ {analysis_message}", "danger"),
                    [],
                    {"display": "none"},
                    {"display": "none"},
                    True,
                    [],  # empty options for hidden dropdowns
                    [],
                    [],
                    [],
                    [],
                    [],
                )
            # Generate field suggestions
            field_suggestions = FieldMapper.suggest_mappings(field_analysis)
            from ...ui.components.datasource import DataSourceComponent
            datasource = DataSourceComponent()
            field_mapping_ui = datasource.create_field_mapping_interface(
                field_suggestions, section_type
            )
            return (
                self._create_status_alert(f"✅ {message}", "success"),
                field_mapping_ui,
                {"display": "block"},
                {"display": "block"},
                False,
                [
                    {"label": field, "value": field}
                    for field in field_suggestions.get("embedding", [])
                ],
                [
                    {"label": field, "value": field}
                    for field in field_suggestions.get("text", [])
                ],
                [
                    {"label": field, "value": field}
                    for field in field_suggestions.get("id", [])
                ],
                [
                    {"label": field, "value": field}
                    for field in field_suggestions.get("category", [])
                ],
                [
                    {"label": field, "value": field}
                    for field in field_suggestions.get("subcategory", [])
                ],
                [
                    {"label": field, "value": field}
                    for field in field_suggestions.get("tags", [])
                ],
            )
        # Determine output target based on section type
        output_target = (
            "processed-data" if section_type == "data" else "processed-prompts"
        )
        @callback(
            [
                Output(output_target, "data", allow_duplicate=True),
                Output("opensearch-success-alert", "children", allow_duplicate=True),
                Output("opensearch-success-alert", "is_open", allow_duplicate=True),
                Output("opensearch-error-alert", "children", allow_duplicate=True),
                Output("opensearch-error-alert", "is_open", allow_duplicate=True),
            ],
            [Input(f"{section_type}-load-opensearch-data-btn", "n_clicks")],
            [
                State(f"{section_type}-opensearch-index", "value"),
                State(f"{section_type}-opensearch-query-size", "value"),
                State(f"{section_type}-embedding-field-dropdown-ui", "value"),
                State(f"{section_type}-text-field-dropdown-ui", "value"),
                State(f"{section_type}-id-field-dropdown-ui", "value"),
                State(f"{section_type}-category-field-dropdown-ui", "value"),
                State(f"{section_type}-subcategory-field-dropdown-ui", "value"),
                State(f"{section_type}-tags-field-dropdown-ui", "value"),
            ],
            prevent_initial_call=True,
        )
        def load_opensearch_data(
            n_clicks,
            index_name,
            query_size,
            embedding_field,
            text_field,
            id_field,
            category_field,
            subcategory_field,
            tags_field,
        ):
            if not n_clicks or not index_name or not embedding_field or not text_field:
                return no_update, no_update, no_update, no_update, no_update
            try:
                # Validate and set query size
                if not query_size or query_size < 1:
                    query_size = AppSettings.OPENSEARCH_DEFAULT_SIZE
                elif query_size > 1000:
                    query_size = 1000  # Cap at reasonable maximum
                # Create field mapping
                field_mapping = FieldMapper.create_mapping_from_dict(
                    {
                        "embedding": embedding_field,
                        "text": text_field,
                        "id": id_field,
                        "category": category_field,
                        "subcategory": subcategory_field,
                        "tags": tags_field,
                    }
                )
                # Fetch data from OpenSearch
                success, raw_documents, message = opensearch_client.fetch_data(
                    index_name, size=query_size
                )
                if not success:
                    return (
                        no_update,
                        "",
                        False,
                        f"❌ Failed to fetch {section_type}: {message}",
                        True,
                    )
                # Process the data
                processed_data = self.processor.process_opensearch_data(
                    raw_documents, field_mapping
                )
                if processed_data.error:
                    return (
                        {"error": processed_data.error},
                        "",
                        False,
                        f"❌ {section_type.title()} processing error: {processed_data.error}",
                        True,
                    )
                success_message = f"✅ Successfully loaded {len(processed_data.documents)} {section_type} from OpenSearch"
                # Format for appropriate target (data vs prompts)
                if section_type == "data":
                    return (
                        {
                            "documents": [
                                self._document_to_dict(doc)
                                for doc in processed_data.documents
                            ],
                            "embeddings": processed_data.embeddings.tolist(),
                        },
                        success_message,
                        True,
                        "",
                        False,
                    )
                else:  # prompts
                    return (
                        {
                            "prompts": [
                                self._document_to_dict(doc)
                                for doc in processed_data.documents
                            ],
                            "embeddings": processed_data.embeddings.tolist(),
                        },
                        success_message,
                        True,
                        "",
                        False,
                    )
            except Exception as e:
                return (no_update, "", False, f"❌ Unexpected error: {str(e)}", True)
        # Sync callbacks to update hidden dropdowns from UI dropdowns
        @callback(
            Output(f"{section_type}-embedding-field-dropdown", "value"),
            Input(f"{section_type}-embedding-field-dropdown-ui", "value"),
            prevent_initial_call=True,
        )
        def sync_embedding_dropdown(value):
            return value
        @callback(
            Output(f"{section_type}-text-field-dropdown", "value"),
            Input(f"{section_type}-text-field-dropdown-ui", "value"),
            prevent_initial_call=True,
        )
        def sync_text_dropdown(value):
            return value
        @callback(
            Output(f"{section_type}-id-field-dropdown", "value"),
            Input(f"{section_type}-id-field-dropdown-ui", "value"),
            prevent_initial_call=True,
        )
        def sync_id_dropdown(value):
            return value
        @callback(
            Output(f"{section_type}-category-field-dropdown", "value"),
            Input(f"{section_type}-category-field-dropdown-ui", "value"),
            prevent_initial_call=True,
        )
        def sync_category_dropdown(value):
            return value
        @callback(
            Output(f"{section_type}-subcategory-field-dropdown", "value"),
            Input(f"{section_type}-subcategory-field-dropdown-ui", "value"),
            prevent_initial_call=True,
        )
        def sync_subcategory_dropdown(value):
            return value
        @callback(
            Output(f"{section_type}-tags-field-dropdown", "value"),
            Input(f"{section_type}-tags-field-dropdown-ui", "value"),
            prevent_initial_call=True,
        )
        def sync_tags_dropdown(value):
            return value
    def _register_collapse_callbacks(self):
        """Register callbacks for collapsible sections."""
        # Data section collapse callback
        @callback(
            [
                Output("data-collapse", "is_open"),
                Output("data-collapse-icon", "className"),
            ],
            [Input("data-collapse-toggle", "n_clicks")],
            [State("data-collapse", "is_open")],
            prevent_initial_call=True,
        )
        def toggle_data_collapse(n_clicks, is_open):
            if n_clicks:
                new_state = not is_open
                icon_class = (
                    "fas fa-chevron-down me-2"
                    if new_state
                    else "fas fa-chevron-right me-2"
                )
                return new_state, icon_class
            return is_open, "fas fa-chevron-down me-2"
        # Prompts section collapse callback
        @callback(
            [
                Output("prompts-collapse", "is_open"),
                Output("prompts-collapse-icon", "className"),
            ],
            [Input("prompts-collapse-toggle", "n_clicks")],
            [State("prompts-collapse", "is_open")],
            prevent_initial_call=True,
        )
        def toggle_prompts_collapse(n_clicks, is_open):
            if n_clicks:
                new_state = not is_open
                icon_class = (
                    "fas fa-chevron-down me-2"
                    if new_state
                    else "fas fa-chevron-right me-2"
                )
                return new_state, icon_class
            return is_open, "fas fa-chevron-down me-2"
    def _register_text_input_callbacks(self):
        """Register callbacks for text input functionality."""
        # Text length counter callback
        @callback(
            Output("text-length-counter", "children"),
            Input("text-input-area", "value"),
            prevent_initial_call=False,
        )
        def update_text_length_counter(text_value):
            if not text_value:
                return "0"
            return f"{len(text_value):,}"
        # Generate button enable/disable callback
        @callback(
            [
                Output("generate-embeddings-btn", "disabled"),
                Output("generation-help", "children"),
                Output("generation-help", "color"),
            ],
            [
                Input("text-input-area", "value"),
                Input("model-selection", "value"),
            ],
            prevent_initial_call=False,
        )
        def toggle_generate_button(text_value, model_name):
            import dash_bootstrap_components as dbc
            if not text_value or not text_value.strip():
                return (
                    True,
                    dbc.Alert(
                        [
                            html.I(className="fas fa-info-circle me-2"),
                            "Enter some text above to enable embedding generation.",
                        ],
                        color="light",
                    ),
                    "light",
                )
            if not model_name:
                return (
                    True,
                    dbc.Alert(
                        [
                            html.I(className="fas fa-exclamation-triangle me-2"),
                            "Select an embedding model to continue.",
                        ],
                        color="warning",
                    ),
                    "warning",
                )
            text_length = len(text_value.strip())
            if text_length > AppSettings.MAX_TEXT_LENGTH:
                return (
                    True,
                    dbc.Alert(
                        [
                            html.I(className="fas fa-exclamation-triangle me-2"),
                            f"Text too long ({text_length:,} characters). Maximum allowed: {AppSettings.MAX_TEXT_LENGTH:,} characters.",
                        ],
                        color="danger",
                    ),
                    "danger",
                )
            return (
                False,
                dbc.Alert(
                    [
                        html.I(className="fas fa-check-circle me-2"),
                        f"Ready to generate embeddings for {text_length:,} characters using {model_name}.",
                    ],
                    color="success",
                ),
                "success",
            )
        # Clear text callback
        @callback(
            Output("text-input-area", "value"),
            [Input("clear-text-btn", "n_clicks"), Input("load-sample-btn", "n_clicks")],
            prevent_initial_call=True,
        )
        def handle_text_input_actions(clear_clicks, load_clicks):
            from dash import ctx
            if not ctx.triggered:
                return no_update
            button_id = ctx.triggered[0]["prop_id"].split(".")[0]
            if button_id == "clear-text-btn" and clear_clicks:
                return ""
            elif button_id == "load-sample-btn" and load_clicks:
                return self._load_sample_text()
            return no_update
        # Model info callback
        @callback(
            Output("model-info", "children"),
            Input("model-selection", "value"),
            prevent_initial_call=False,
        )
        def update_model_info(model_name):
            if not model_name:
                return html.Span("Please select a model", className="text-muted")
            from ...config.settings import AppSettings
            settings = AppSettings()
            for model in settings.AVAILABLE_MODELS:
                if model["name"] == model_name:
                    return html.Div(
                        [
                            html.Strong(
                                f"Dimensions: {model['dimensions']} | Context Length: {model['context_length']}"
                            ),
                            html.Br(),
                            html.Span(model["description"]),
                            html.Br(),
                            html.Small(
                                f"Multilingual: {'Yes' if model.get('multilingual', False) else 'No'} | Size: {model['size']}",
                                className="text-muted",
                            ),
                        ]
                    )
            return html.Span("Model information not available", className="text-muted")
        # Process client-side embeddings result callback
        @callback(
            [
                Output("processed-data", "data", allow_duplicate=True),
                Output("text-input-status", "children"),
                Output("text-input-status", "color"),
                Output("text-input-status", "style"),
                Output("generate-embeddings-btn", "disabled", allow_duplicate=True),
            ],
            [Input("embeddings-generated-trigger", "data")],
            prevent_initial_call=True,
        )
        def process_embeddings_result(embeddings_data):
            """Process embeddings generated client-side."""
            if not embeddings_data:
                return no_update, no_update, no_update, no_update, no_update
            processed_data = self.processor.process_client_embeddings(embeddings_data)
            if processed_data.error:
                return (
                    {"error": processed_data.error},
                    f"❌ Error: {processed_data.error}",
                    "danger",
                    {"display": "block"},
                    False,
                )
            return (
                {
                    "documents": [
                        self._document_to_dict(doc) for doc in processed_data.documents
                    ],
                    "embeddings": processed_data.embeddings.tolist(),
                },
                f"✅ Generated embeddings for {len(processed_data.documents)} text chunks",
                "success",
                {"display": "block"},
                False,
            )
    def _load_sample_text(self):
        """Load sample text from assets/sample-txt.md file."""
        import os
        try:
            # Get the project root directory (four levels up from this file)
            current_file = os.path.abspath(__file__)
            project_root = os.path.dirname(
                os.path.dirname(
                    os.path.dirname(os.path.dirname(os.path.dirname(current_file)))
                )
            )
            sample_file_path = os.path.join(project_root, "assets", "sample-txt.md")
            if os.path.exists(sample_file_path):
                with open(sample_file_path, "r", encoding="utf-8") as file:
                    return file.read()
            else:
                # Fallback sample text if file doesn't exist
                return """The sun peeked through the clouds after a drizzly morning.
 A gentle breeze rustled the leaves as we walked along the shoreline.
 Heavy rains caused flooding in several low-lying neighborhoods.
 It was so hot that even the birds sought shade under the palm trees.
 By midnight, the temperature had dropped below freezing.
 The new smartphone features a foldable display and 5G connectivity.
 In the world of AI, transformers have revolutionized natural language processing.
 Quantum computing promises to solve problems beyond classical computers' reach.
 Blockchain technology is being explored for secure voting systems.
 Virtual reality headsets are becoming more affordable and accessible.
 Preheat the oven to 375°F before you start mixing the batter.
 She finely chopped the garlic and sautéed it in two tablespoons of olive oil.
 A pinch of saffron adds a beautiful color and aroma to traditional paella.
 If the soup is too salty, add a peeled potato to absorb excess sodium.
 Let the bread dough rise for at least an hour in a warm, draft-free spot."""
        except Exception:
            # Return a simple fallback if there's any error
            return "This is sample text for testing embedding generation. You can replace this with your own text."
    @staticmethod
    def _document_to_dict(doc):
        return {
@@ -118,3 +737,10 @@ class DataProcessingCallbacks:
                f"❌ Error processing file{file_part}: {error}. "
                "Please check that your file is valid NDJSON with required 'text' and 'embedding' fields."
            )
    @staticmethod
    def _create_status_alert(message: str, color: str):
        """Create a status alert component."""
        import dash_bootstrap_components as dbc
        return dbc.Alert(message, color=color, className="mb-2")
--- a/src/embeddingbuddy/ui/components/datasource.py
+++ b/src/embeddingbuddy/ui/components/datasource.py
@@ -0,0 +1,526 @@
 from dash import dcc, html
 import dash_bootstrap_components as dbc
 from .upload import UploadComponent
 from .textinput import TextInputComponent
 class DataSourceComponent:
    def __init__(self):
        self.upload_component = UploadComponent()
        self.text_input_component = TextInputComponent()
    def create_tabbed_interface(self):
        """Create tabbed interface for different data sources."""
        return dbc.Card(
            [
                dbc.CardHeader(
                    [
                        dbc.Tabs(
                            [
                                dbc.Tab(label="File Upload", tab_id="file-tab"),
                                dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
                                dbc.Tab(label="Text Input", tab_id="text-input-tab"),
                            ],
                            id="data-source-tabs",
                            active_tab="file-tab",
                        )
                    ]
                ),
                dbc.CardBody([html.Div(id="tab-content")]),
            ]
        )
    def create_file_upload_tab(self):
        """Create file upload tab content."""
        return html.Div(
            [
                self.upload_component.create_error_alert(),
                self.upload_component.create_data_upload(),
                self.upload_component.create_prompts_upload(),
                self.upload_component.create_reset_button(),
            ]
        )
    def create_opensearch_tab(self):
        """Create OpenSearch tab content with separate Data and Prompts sections."""
        return html.Div(
            [
                # Data Section
                dbc.Card(
                    [
                        dbc.CardHeader(
                            [
                                dbc.Button(
                                    [
                                        html.I(
                                            className="fas fa-chevron-down me-2",
                                            id="data-collapse-icon",
                                        ),
                                        "📄 Documents/Data",
                                    ],
                                    id="data-collapse-toggle",
                                    color="link",
                                    className="text-start p-0 w-100 text-decoration-none",
                                    style={
                                        "border": "none",
                                        "font-size": "1.25rem",
                                        "font-weight": "500",
                                    },
                                ),
                            ]
                        ),
                        dbc.Collapse(
                            [dbc.CardBody([self._create_opensearch_section("data")])],
                            id="data-collapse",
                            is_open=True,
                        ),
                    ],
                    className="mb-4",
                ),
                # Prompts Section
                dbc.Card(
                    [
                        dbc.CardHeader(
                            [
                                dbc.Button(
                                    [
                                        html.I(
                                            className="fas fa-chevron-down me-2",
                                            id="prompts-collapse-icon",
                                        ),
                                        "💬 Prompts",
                                    ],
                                    id="prompts-collapse-toggle",
                                    color="link",
                                    className="text-start p-0 w-100 text-decoration-none",
                                    style={
                                        "border": "none",
                                        "font-size": "1.25rem",
                                        "font-weight": "500",
                                    },
                                ),
                            ]
                        ),
                        dbc.Collapse(
                            [
                                dbc.CardBody(
                                    [self._create_opensearch_section("prompts")]
                                )
                            ],
                            id="prompts-collapse",
                            is_open=True,
                        ),
                    ],
                    className="mb-4",
                ),
                # Hidden dropdowns to prevent callback errors (for both sections)
                html.Div(
                    [
                        # Data dropdowns (hidden sync targets)
                        dcc.Dropdown(
                            id="data-embedding-field-dropdown",
                            style={"display": "none"},
                        ),
                        dcc.Dropdown(
                            id="data-text-field-dropdown", style={"display": "none"}
                        ),
                        dcc.Dropdown(
                            id="data-id-field-dropdown", style={"display": "none"}
                        ),
                        dcc.Dropdown(
                            id="data-category-field-dropdown", style={"display": "none"}
                        ),
                        dcc.Dropdown(
                            id="data-subcategory-field-dropdown",
                            style={"display": "none"},
                        ),
                        dcc.Dropdown(
                            id="data-tags-field-dropdown", style={"display": "none"}
                        ),
                        # Data UI dropdowns (hidden placeholders)
                        dcc.Dropdown(
                            id="data-embedding-field-dropdown-ui",
                            style={"display": "none"},
                        ),
                        dcc.Dropdown(
                            id="data-text-field-dropdown-ui", style={"display": "none"}
                        ),
                        dcc.Dropdown(
                            id="data-id-field-dropdown-ui", style={"display": "none"}
                        ),
                        dcc.Dropdown(
                            id="data-category-field-dropdown-ui",
                            style={"display": "none"},
                        ),
                        dcc.Dropdown(
                            id="data-subcategory-field-dropdown-ui",
                            style={"display": "none"},
                        ),
                        dcc.Dropdown(
                            id="data-tags-field-dropdown-ui", style={"display": "none"}
                        ),
                        # Prompts dropdowns (hidden sync targets)
                        dcc.Dropdown(
                            id="prompts-embedding-field-dropdown",
                            style={"display": "none"},
                        ),
                        dcc.Dropdown(
                            id="prompts-text-field-dropdown", style={"display": "none"}
                        ),
                        dcc.Dropdown(
                            id="prompts-id-field-dropdown", style={"display": "none"}
                        ),
                        dcc.Dropdown(
                            id="prompts-category-field-dropdown",
                            style={"display": "none"},
                        ),
                        dcc.Dropdown(
                            id="prompts-subcategory-field-dropdown",
                            style={"display": "none"},
                        ),
                        dcc.Dropdown(
                            id="prompts-tags-field-dropdown", style={"display": "none"}
                        ),
                        # Prompts UI dropdowns (hidden placeholders)
                        dcc.Dropdown(
                            id="prompts-embedding-field-dropdown-ui",
                            style={"display": "none"},
                        ),
                        dcc.Dropdown(
                            id="prompts-text-field-dropdown-ui",
                            style={"display": "none"},
                        ),
                        dcc.Dropdown(
                            id="prompts-id-field-dropdown-ui", style={"display": "none"}
                        ),
                        dcc.Dropdown(
                            id="prompts-category-field-dropdown-ui",
                            style={"display": "none"},
                        ),
                        dcc.Dropdown(
                            id="prompts-subcategory-field-dropdown-ui",
                            style={"display": "none"},
                        ),
                        dcc.Dropdown(
                            id="prompts-tags-field-dropdown-ui",
                            style={"display": "none"},
                        ),
                    ],
                    style={"display": "none"},
                ),
            ]
        )
    def create_text_input_tab(self):
        """Create text input tab content for browser-based embedding generation."""
        return html.Div([self.text_input_component.create_text_input_interface()])
    def _create_opensearch_section(self, section_type):
        """Create a complete OpenSearch section for either 'data' or 'prompts'."""
        section_id = section_type  # 'data' or 'prompts'
        return html.Div(
            [
                # Connection section
                html.H6("Connection", className="mb-2"),
                dbc.Row(
                    [
                        dbc.Col(
                            [
                                dbc.Label("OpenSearch URL:"),
                                dbc.Input(
                                    id=f"{section_id}-opensearch-url",
                                    type="text",
                                    placeholder="https://opensearch.example.com:9200",
                                    className="mb-2",
                                ),
                            ],
                            width=12,
                        ),
                    ]
                ),
                dbc.Row(
                    [
                        dbc.Col(
                            [
                                dbc.Label("Index Name:"),
                                dbc.Input(
                                    id=f"{section_id}-opensearch-index",
                                    type="text",
                                    placeholder="my-embeddings-index",
                                    className="mb-2",
                                ),
                            ],
                            width=6,
                        ),
                        dbc.Col(
                            [
                                dbc.Label("Query Size:"),
                                dbc.Input(
                                    id=f"{section_id}-opensearch-query-size",
                                    type="number",
                                    value=100,
                                    min=1,
                                    max=1000,
                                    placeholder="100",
                                    className="mb-2",
                                ),
                            ],
                            width=6,
                        ),
                    ]
                ),
                dbc.Row(
                    [
                        dbc.Col(
                            [
                                dbc.Button(
                                    "Test Connection",
                                    id=f"{section_id}-test-connection-btn",
                                    color="primary",
                                    className="mb-3",
                                ),
                            ],
                            width=12,
                        ),
                    ]
                ),
                # Authentication section (collapsible)
                dbc.Collapse(
                    [
                        html.Hr(),
                        html.H6("Authentication (Optional)", className="mb-2"),
                        dbc.Row(
                            [
                                dbc.Col(
                                    [
                                        dbc.Label("Username:"),
                                        dbc.Input(
                                            id=f"{section_id}-opensearch-username",
                                            type="text",
                                            className="mb-2",
                                        ),
                                    ],
                                    width=6,
                                ),
                                dbc.Col(
                                    [
                                        dbc.Label("Password:"),
                                        dbc.Input(
                                            id=f"{section_id}-opensearch-password",
                                            type="password",
                                            className="mb-2",
                                        ),
                                    ],
                                    width=6,
                                ),
                            ]
                        ),
                        dbc.Label("OR"),
                        dbc.Input(
                            id=f"{section_id}-opensearch-api-key",
                            type="text",
                            placeholder="API Key",
                            className="mb-2",
                        ),
                    ],
                    id=f"{section_id}-auth-collapse",
                    is_open=False,
                ),
                dbc.Button(
                    "Show Authentication",
                    id=f"{section_id}-auth-toggle",
                    color="link",
                    size="sm",
                    className="p-0 mb-3",
                ),
                # Connection status
                html.Div(id=f"{section_id}-connection-status", className="mb-3"),
                # Field mapping section (hidden initially)
                html.Div(
                    id=f"{section_id}-field-mapping-section", style={"display": "none"}
                ),
                # Load data button (hidden initially)
                html.Div(
                    [
                        dbc.Button(
                            f"Load {section_type.title()}",
                            id=f"{section_id}-load-opensearch-data-btn",
                            color="success",
                            className="mb-2",
                            disabled=True,
                        ),
                    ],
                    id=f"{section_id}-load-data-section",
                    style={"display": "none"},
                ),
                # OpenSearch status/results
                html.Div(id=f"{section_id}-opensearch-status", className="mb-3"),
            ]
        )
    def create_field_mapping_interface(self, field_suggestions, section_type="data"):
        """Create field mapping interface based on detected fields."""
        return html.Div(
            [
                html.Hr(),
                html.H6("Field Mapping", className="mb-2"),
                html.P(
                    "Map your OpenSearch fields to the required format:",
                    className="text-muted small",
                ),
                # Required fields
                dbc.Row(
                    [
                        dbc.Col(
                            [
                                dbc.Label(
                                    "Embedding Field (required):", className="fw-bold"
                                ),
                                dcc.Dropdown(
                                    id=f"{section_type}-embedding-field-dropdown-ui",
                                    options=[
                                        {"label": field, "value": field}
                                        for field in field_suggestions.get(
                                            "embedding", []
                                        )
                                    ],
                                    value=field_suggestions.get("embedding", [None])[
                                        0
                                    ],  # Default to first suggestion
                                    placeholder="Select embedding field...",
                                    className="mb-2",
                                ),
                            ],
                            width=6,
                        ),
                        dbc.Col(
                            [
                                dbc.Label(
                                    "Text Field (required):", className="fw-bold"
                                ),
                                dcc.Dropdown(
                                    id=f"{section_type}-text-field-dropdown-ui",
                                    options=[
                                        {"label": field, "value": field}
                                        for field in field_suggestions.get("text", [])
                                    ],
                                    value=field_suggestions.get("text", [None])[
                                        0
                                    ],  # Default to first suggestion
                                    placeholder="Select text field...",
                                    className="mb-2",
                                ),
                            ],
                            width=6,
                        ),
                    ]
                ),
                # Optional fields
                html.H6("Optional Fields", className="mb-2 mt-3"),
                dbc.Row(
                    [
                        dbc.Col(
                            [
                                dbc.Label("ID Field:"),
                                dcc.Dropdown(
                                    id=f"{section_type}-id-field-dropdown-ui",
                                    options=[
                                        {"label": field, "value": field}
                                        for field in field_suggestions.get("id", [])
                                    ],
                                    value=field_suggestions.get("id", [None])[
                                        0
                                    ],  # Default to first suggestion
                                    placeholder="Select ID field...",
                                    className="mb-2",
                                ),
                            ],
                            width=6,
                        ),
                        dbc.Col(
                            [
                                dbc.Label("Category Field:"),
                                dcc.Dropdown(
                                    id=f"{section_type}-category-field-dropdown-ui",
                                    options=[
                                        {"label": field, "value": field}
                                        for field in field_suggestions.get(
                                            "category", []
                                        )
                                    ],
                                    value=field_suggestions.get("category", [None])[
                                        0
                                    ],  # Default to first suggestion
                                    placeholder="Select category field...",
                                    className="mb-2",
                                ),
                            ],
                            width=6,
                        ),
                    ]
                ),
                dbc.Row(
                    [
                        dbc.Col(
                            [
                                dbc.Label("Subcategory Field:"),
                                dcc.Dropdown(
                                    id=f"{section_type}-subcategory-field-dropdown-ui",
                                    options=[
                                        {"label": field, "value": field}
                                        for field in field_suggestions.get(
                                            "subcategory", []
                                        )
                                    ],
                                    value=field_suggestions.get("subcategory", [None])[
                                        0
                                    ],  # Default to first suggestion
                                    placeholder="Select subcategory field...",
                                    className="mb-2",
                                ),
                            ],
                            width=6,
                        ),
                        dbc.Col(
                            [
                                dbc.Label("Tags Field:"),
                                dcc.Dropdown(
                                    id=f"{section_type}-tags-field-dropdown-ui",
                                    options=[
                                        {"label": field, "value": field}
                                        for field in field_suggestions.get("tags", [])
                                    ],
                                    value=field_suggestions.get("tags", [None])[
                                        0
                                    ],  # Default to first suggestion
                                    placeholder="Select tags field...",
                                    className="mb-2",
                                ),
                            ],
                            width=6,
                        ),
                    ]
                ),
            ]
        )
    def create_error_alert(self):
        """Create error alert component for OpenSearch issues."""
        return dbc.Alert(
            id="opensearch-error-alert",
            dismissable=True,
            is_open=False,
            color="danger",
            className="mb-3",
        )
    def create_success_alert(self):
        """Create success alert component for OpenSearch operations."""
        return dbc.Alert(
            id="opensearch-success-alert",
            dismissable=True,
            is_open=False,
            color="success",
            className="mb-3",
        )
--- a/src/embeddingbuddy/ui/components/sidebar.py
+++ b/src/embeddingbuddy/ui/components/sidebar.py
@@ -1,21 +1,22 @@
 from dash import dcc, html
 import dash_bootstrap_components as dbc
 from .upload import UploadComponent
 from .datasource import DataSourceComponent
 class SidebarComponent:
    def __init__(self):
        self.upload_component = UploadComponent()
        self.datasource_component = DataSourceComponent()
    def create_layout(self):
        return dbc.Col(
            [
-                html.H5("Upload Data", className="mb-3"),
+                html.H5("Data Sources", className="mb-3"),
-                self.upload_component.create_error_alert(),
+                self.datasource_component.create_error_alert(),
-                self.upload_component.create_data_upload(),
+                self.datasource_component.create_success_alert(),
-                self.upload_component.create_prompts_upload(),
+                self.datasource_component.create_tabbed_interface(),
-                self.upload_component.create_reset_button(),
+                html.H5("Visualization Controls", className="mb-3 mt-4"),
                html.H5("Visualization Controls", className="mb-3"),
            ]
            + self._create_method_dropdown()
            + self._create_color_dropdown()
--- a/src/embeddingbuddy/ui/components/textinput.py
+++ b/src/embeddingbuddy/ui/components/textinput.py
@@ -0,0 +1,402 @@
 """Text input component for generating embeddings from user text."""
 import dash_bootstrap_components as dbc
 from dash import dcc, html
 from embeddingbuddy.config.settings import AppSettings
 class TextInputComponent:
    """Component for text input and embedding generation."""
    def __init__(self):
        self.settings = AppSettings()
    def create_text_input_interface(self):
        """Create the complete text input interface with model selection and processing options."""
        return html.Div(
            [
                # Model selection section
                self._create_model_selection(),
                html.Hr(),
                # Text input section
                self._create_text_input_area(),
                # Text action buttons
                self._create_text_action_buttons(),
                html.Hr(),
                # Processing options
                self._create_processing_options(),
                html.Hr(),
                # Generation controls
                self._create_generation_controls(),
                html.Hr(),
                # Progress indicators
                self._create_progress_indicators(),
                html.Hr(),
                # Status and results
                self._create_status_section(),
                # Hidden components for data flow
                self._create_hidden_components(),
            ],
            className="p-3",
        )
    def _create_model_selection(self):
        """Create model selection dropdown with descriptions."""
        model_options = []
        for model in self.settings.AVAILABLE_MODELS:
            label = f"{model['label']} - {model['size']}"
            if model.get("default", False):
                label += " (Recommended)"
            model_options.append({"label": label, "value": model["name"]})
        return html.Div(
            [
                html.H5("Embedding Model", className="mb-3"),
                html.Div(
                    [
                        dcc.Dropdown(
                            id="model-selection",
                            options=model_options,
                            value=self.settings.DEFAULT_EMBEDDING_MODEL,
                            placeholder="Select an embedding model...",
                            className="mb-2",
                        ),
                        dbc.Alert(
                            [
                                html.Div(
                                    id="model-info",
                                    children=self._get_model_description(
                                        self.settings.DEFAULT_EMBEDDING_MODEL
                                    ),
                                )
                            ],
                            color="info",
                            className="small",
                        ),
                    ]
                ),
            ]
        )
    def _create_text_input_area(self):
        """Create text input textarea with character limits."""
        return html.Div(
            [
                html.H5("Text Input", className="mb-3"),
                dcc.Textarea(
                    id="text-input-area",
                    placeholder="Paste your text here... Each sentence, paragraph, or line will become a separate data point depending on your tokenization method below.",
                    value="",
                    style={
                        "width": "100%",
                        "height": "300px",
                        "resize": "vertical",
                        "font-family": "monospace",
                        "font-size": "14px",
                    },
                    maxLength=self.settings.MAX_TEXT_LENGTH,
                    className="form-control",
                ),
                html.Small(
                    f"Maximum {self.settings.MAX_TEXT_LENGTH:,} characters. Current: ",
                    className="text-muted",
                ),
                html.Small(
                    id="text-length-counter",
                    children="0",
                    className="text-muted fw-bold",
                ),
                html.Small(" characters", className="text-muted"),
            ]
        )
    def _create_text_action_buttons(self):
        """Create action buttons for text input (Load Sample, Clear)."""
        return html.Div(
            [
                dbc.Row(
                    [
                        dbc.Col(
                            [
                                dbc.Button(
                                    [
                                        html.I(className="fas fa-file-text me-2"),
                                        "Load Sample Text",
                                    ],
                                    id="load-sample-btn",
                                    color="info",
                                    size="sm",
                                    className="w-100",
                                )
                            ],
                            md=6,
                        ),
                        dbc.Col(
                            [
                                dbc.Button(
                                    [
                                        html.I(className="fas fa-trash me-2"),
                                        "Clear Text",
                                    ],
                                    id="clear-text-btn",
                                    color="outline-secondary",
                                    size="sm",
                                    className="w-100",
                                )
                            ],
                            md=6,
                        ),
                    ],
                    className="mt-2 mb-3",
                )
            ]
        )
    def _create_processing_options(self):
        """Create tokenization and metadata options."""
        return html.Div(
            [
                html.H5("Processing Options", className="mb-3"),
                dbc.Row(
                    [
                        dbc.Col(
                            [
                                html.Label(
                                    "Text Splitting Method:", className="form-label"
                                ),
                                dcc.Dropdown(
                                    id="tokenization-method",
                                    options=[
                                        {
                                            "label": "Sentences (split on . ! ?)",
                                            "value": "sentence",
                                        },
                                        {
                                            "label": "Paragraphs (split on double newline)",
                                            "value": "paragraph",
                                        },
                                        {
                                            "label": "Lines (split on single newline)",
                                            "value": "manual",
                                        },
                                        {
                                            "label": "Entire text as one document",
                                            "value": "whole",
                                        },
                                    ],
                                    value=self.settings.DEFAULT_TOKENIZATION_METHOD,
                                    className="mb-3",
                                ),
                            ],
                            md=6,
                        ),
                        dbc.Col(
                            [
                                html.Label("Batch Size:", className="form-label"),
                                dcc.Dropdown(
                                    id="batch-size",
                                    options=[
                                        {
                                            "label": "Small batches (4) - Lower memory",
                                            "value": 4,
                                        },
                                        {
                                            "label": "Medium batches (8) - Balanced",
                                            "value": 8,
                                        },
                                        {
                                            "label": "Large batches (16) - Faster",
                                            "value": 16,
                                        },
                                    ],
                                    value=self.settings.MAX_BATCH_SIZE,
                                    className="mb-3",
                                ),
                            ],
                            md=6,
                        ),
                    ]
                ),
                dbc.Row(
                    [
                        dbc.Col(
                            [
                                html.Label(
                                    "Category (Optional):", className="form-label"
                                ),
                                dcc.Input(
                                    id="text-category",
                                    type="text",
                                    placeholder="e.g., Notes, Articles, Ideas...",
                                    value="Text Input",
                                    className="form-control mb-3",
                                ),
                            ],
                            md=6,
                        ),
                        dbc.Col(
                            [
                                html.Label(
                                    "Subcategory (Optional):", className="form-label"
                                ),
                                dcc.Input(
                                    id="text-subcategory",
                                    type="text",
                                    placeholder="e.g., Meeting Notes, Research...",
                                    value="Generated",
                                    className="form-control mb-3",
                                ),
                            ],
                            md=6,
                        ),
                    ]
                ),
            ]
        )
    def _create_generation_controls(self):
        """Create embedding generation button and controls."""
        return html.Div(
            [
                html.H5("Generate Embeddings", className="mb-3"),
                dbc.Row(
                    [
                        dbc.Col(
                            [
                                dbc.Button(
                                    [
                                        html.I(className="fas fa-magic me-2"),
                                        "Generate Embeddings",
                                    ],
                                    id="generate-embeddings-btn",
                                    color="primary",
                                    size="lg",
                                    disabled=True,
                                    className="w-100",
                                )
                            ],
                            md=12,
                        ),
                    ]
                ),
                html.Div(
                    [
                        dbc.Alert(
                            [
                                html.I(className="fas fa-info-circle me-2"),
                                "Enter some text above and select a model to enable embedding generation.",
                            ],
                            color="light",
                            className="mt-3",
                            id="generation-help",
                        )
                    ]
                ),
            ]
        )
    def _create_progress_indicators(self):
        """Create progress bars for model loading and embedding generation."""
        return html.Div(
            [
                # Model loading progress
                html.Div(
                    [
                        html.H6("Model Loading Progress", className="mb-2"),
                        dbc.Progress(
                            id="model-loading-progress",
                            value=0,
                            striped=True,
                            animated=True,
                            className="mb-2",
                        ),
                        html.Small(
                            id="model-loading-status",
                            children="No model loading in progress",
                            className="text-muted",
                        ),
                    ],
                    id="model-loading-section",
                    style={"display": "none"},
                ),
                html.Br(),
                # Embedding generation progress
                html.Div(
                    [
                        html.H6("Embedding Generation Progress", className="mb-2"),
                        dbc.Progress(
                            id="embedding-progress",
                            value=0,
                            striped=True,
                            animated=True,
                            className="mb-2",
                        ),
                        html.Small(
                            id="embedding-status",
                            children="No embedding generation in progress",
                            className="text-muted",
                        ),
                    ],
                    id="embedding-progress-section",
                    style={"display": "none"},
                ),
            ]
        )
    def _create_status_section(self):
        """Create status alerts and results preview."""
        return html.Div(
            [
                # Immediate status (from client-side)
                dbc.Alert(
                    id="text-input-status-immediate",
                    children="Ready to generate embeddings",
                    color="light",
                    className="mb-3",
                ),
                # Server-side status
                dbc.Alert(
                    id="text-input-status",
                    children="",
                    color="light",
                    className="mb-3",
                    style={"display": "none"},
                ),
                # Results preview
                html.Div(id="embedding-results-preview"),
            ]
        )
    def _create_hidden_components(self):
        """Create hidden components for data flow."""
        return html.Div(
            [
                # Store for embeddings data from client-side
                dcc.Store(id="embeddings-generated-trigger"),
                # Store for tokenization preview
                dcc.Store(id="tokenization-preview-data"),
            ]
        )
    def _get_model_description(self, model_name):
        """Get description for a specific model."""
        for model in self.settings.AVAILABLE_MODELS:
            if model["name"] == model_name:
                return html.Div(
                    [
                        html.Strong(
                            f"Dimensions: {model['dimensions']} | Context Length: {model['context_length']}"
                        ),
                        html.Br(),
                        html.Span(model["description"]),
                        html.Br(),
                        html.Small(
                            f"Multilingual: {'Yes' if model.get('multilingual', False) else 'No'} | Size: {model['size']}",
                            className="text-muted",
                        ),
                    ]
                )
        return html.Span("Model information not available", className="text-muted")
--- a/src/embeddingbuddy/ui/layout.py
+++ b/src/embeddingbuddy/ui/layout.py
@@ -20,6 +20,15 @@ class AppLayout:
                dbc.Col(
                    [
                        html.H1("EmbeddingBuddy", className="text-center mb-4"),
                        # Load Transformers.js from CDN
                        html.Script(
                            """
                            import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2';
                            window.transformersPipeline = pipeline;
                            console.log('✅ Transformers.js pipeline loaded globally');
                            """,
                            type="module",
                        ),
                    ],
                    width=12,
                )
--- a/tests/test_client_embeddings.py
+++ b/tests/test_client_embeddings.py
@@ -0,0 +1,158 @@
 """Tests for client-side embedding processing functionality."""
 import numpy as np
 from src.embeddingbuddy.data.processor import DataProcessor
 from src.embeddingbuddy.models.schemas import ProcessedData
 class TestClientEmbeddingsProcessing:
    """Test client-side embeddings processing functionality."""
    def setup_method(self):
        """Set up test instances."""
        self.processor = DataProcessor()
    def test_process_client_embeddings_success(self):
        """Test successful processing of client-side embeddings data."""
        client_data = {
            "documents": [
                {
                    "id": "text_input_0",
                    "text": "First test document",
                    "category": "Text Input",
                    "subcategory": "Generated",
                    "tags": [],
                },
                {
                    "id": "text_input_1",
                    "text": "Second test document",
                    "category": "Text Input",
                    "subcategory": "Generated",
                    "tags": [],
                },
            ],
            "embeddings": [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]],
        }
        result = self.processor.process_client_embeddings(client_data)
        assert isinstance(result, ProcessedData)
        assert result.error is None
        assert len(result.documents) == 2
        assert result.embeddings.shape == (2, 4)
        # Check document content
        assert result.documents[0].text == "First test document"
        assert result.documents[1].text == "Second test document"
        # Check embeddings match
        np.testing.assert_array_equal(result.embeddings[0], [0.1, 0.2, 0.3, 0.4])
        np.testing.assert_array_equal(result.embeddings[1], [0.5, 0.6, 0.7, 0.8])
    def test_process_client_embeddings_with_error(self):
        """Test processing client data with error."""
        client_data = {"error": "Transformers.js not loaded"}
        result = self.processor.process_client_embeddings(client_data)
        assert isinstance(result, ProcessedData)
        assert result.error == "Transformers.js not loaded"
        assert len(result.documents) == 0
        assert result.embeddings.size == 0
    def test_process_client_embeddings_missing_data(self):
        """Test processing with missing documents or embeddings."""
        client_data = {"documents": []}
        result = self.processor.process_client_embeddings(client_data)
        assert isinstance(result, ProcessedData)
        assert "No documents or embeddings in client data" in result.error
        assert len(result.documents) == 0
    def test_process_client_embeddings_mismatch_count(self):
        """Test processing with mismatched document and embedding counts."""
        client_data = {
            "documents": [
                {
                    "id": "test",
                    "text": "Test document",
                    "category": "Test",
                    "subcategory": "Test",
                    "tags": [],
                }
            ],
            "embeddings": [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]],
        }
        result = self.processor.process_client_embeddings(client_data)
        assert isinstance(result, ProcessedData)
        assert "Mismatch between number of documents and embeddings" in result.error
        assert len(result.documents) == 0
    def test_process_client_embeddings_invalid_document(self):
        """Test processing with invalid document data."""
        client_data = {
            "documents": [
                {"text": ""},  # Empty text should be skipped
                {
                    "id": "test2",
                    "text": "Valid document",
                    "category": "Test",
                    "subcategory": "Test",
                    "tags": [],
                },
            ],
            "embeddings": [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]],
        }
        result = self.processor.process_client_embeddings(client_data)
        assert isinstance(result, ProcessedData)
        assert result.error is None
        assert len(result.documents) == 1  # Only valid document should be processed
        assert result.documents[0].text == "Valid document"
    def test_process_client_embeddings_auto_id_generation(self):
        """Test automatic ID generation for documents without IDs."""
        client_data = {
            "documents": [
                {
                    "text": "Document without ID",
                    "category": "Test",
                    "subcategory": "Test",
                    "tags": [],
                }
            ],
            "embeddings": [[0.1, 0.2, 0.3, 0.4]],
        }
        result = self.processor.process_client_embeddings(client_data)
        assert isinstance(result, ProcessedData)
        assert result.error is None
        assert len(result.documents) == 1
        assert result.documents[0].id.startswith("text_input_")
    def test_process_client_embeddings_invalid_embedding_format(self):
        """Test processing with invalid embedding format."""
        client_data = {
            "documents": [
                {
                    "id": "test",
                    "text": "Test document",
                    "category": "Test",
                    "subcategory": "Test",
                    "tags": [],
                }
            ],
            "embeddings": 0.5,  # Scalar instead of array
        }
        result = self.processor.process_client_embeddings(client_data)
        assert isinstance(result, ProcessedData)
        assert result.error is not None  # Should have some error
        assert len(result.documents) == 0
--- a/tests/test_data_processor_opensearch.py
+++ b/tests/test_data_processor_opensearch.py
@@ -0,0 +1,155 @@
 from unittest.mock import patch
 from src.embeddingbuddy.data.processor import DataProcessor
 from src.embeddingbuddy.models.field_mapper import FieldMapping
 class TestDataProcessorOpenSearch:
    def test_process_opensearch_data_success(self):
        processor = DataProcessor()
        # Mock raw OpenSearch documents
        raw_documents = [
            {
                "vector": [0.1, 0.2, 0.3],
                "content": "Test document 1",
                "doc_id": "doc1",
                "type": "news",
            },
            {
                "vector": [0.4, 0.5, 0.6],
                "content": "Test document 2",
                "doc_id": "doc2",
                "type": "blog",
            },
        ]
        # Create field mapping
        field_mapping = FieldMapping(
            embedding_field="vector",
            text_field="content",
            id_field="doc_id",
            category_field="type",
        )
        # Process the data
        processed_data = processor.process_opensearch_data(raw_documents, field_mapping)
        # Assertions
        assert processed_data.error is None
        assert len(processed_data.documents) == 2
        assert processed_data.embeddings.shape == (2, 3)
        # Check first document
        doc1 = processed_data.documents[0]
        assert doc1.text == "Test document 1"
        assert doc1.embedding == [0.1, 0.2, 0.3]
        assert doc1.id == "doc1"
        assert doc1.category == "news"
        # Check second document
        doc2 = processed_data.documents[1]
        assert doc2.text == "Test document 2"
        assert doc2.embedding == [0.4, 0.5, 0.6]
        assert doc2.id == "doc2"
        assert doc2.category == "blog"
    def test_process_opensearch_data_with_tags(self):
        processor = DataProcessor()
        # Mock raw OpenSearch documents with tags
        raw_documents = [
            {
                "vector": [0.1, 0.2, 0.3],
                "content": "Test document with tags",
                "keywords": ["tag1", "tag2"],
            }
        ]
        # Create field mapping
        field_mapping = FieldMapping(
            embedding_field="vector", text_field="content", tags_field="keywords"
        )
        processed_data = processor.process_opensearch_data(raw_documents, field_mapping)
        assert processed_data.error is None
        assert len(processed_data.documents) == 1
        doc = processed_data.documents[0]
        assert doc.tags == ["tag1", "tag2"]
    def test_process_opensearch_data_invalid_documents(self):
        processor = DataProcessor()
        # Mock raw documents with missing required fields
        raw_documents = [
            {
                "vector": [0.1, 0.2, 0.3],
                # Missing text field
            }
        ]
        field_mapping = FieldMapping(embedding_field="vector", text_field="content")
        processed_data = processor.process_opensearch_data(raw_documents, field_mapping)
        # Should return error since no valid documents
        assert processed_data.error is not None
        assert "No valid documents" in processed_data.error
        assert len(processed_data.documents) == 0
    def test_process_opensearch_data_partial_success(self):
        processor = DataProcessor()
        # Mix of valid and invalid documents
        raw_documents = [
            {
                "vector": [0.1, 0.2, 0.3],
                "content": "Valid document",
            },
            {
                "vector": [0.4, 0.5, 0.6],
                # Missing content field - should be skipped
            },
            {
                "vector": [0.7, 0.8, 0.9],
                "content": "Another valid document",
            },
        ]
        field_mapping = FieldMapping(embedding_field="vector", text_field="content")
        processed_data = processor.process_opensearch_data(raw_documents, field_mapping)
        # Should process valid documents only
        assert processed_data.error is None
        assert len(processed_data.documents) == 2
        assert processed_data.documents[0].text == "Valid document"
        assert processed_data.documents[1].text == "Another valid document"
    @patch("src.embeddingbuddy.models.field_mapper.FieldMapper.transform_documents")
    def test_process_opensearch_data_transformation_error(self, mock_transform):
        processor = DataProcessor()
        # Mock transformation error
        mock_transform.side_effect = Exception("Transformation failed")
        raw_documents = [{"vector": [0.1], "content": "test"}]
        field_mapping = FieldMapping(embedding_field="vector", text_field="content")
        processed_data = processor.process_opensearch_data(raw_documents, field_mapping)
        assert processed_data.error is not None
        assert "Transformation failed" in processed_data.error
        assert len(processed_data.documents) == 0
    def test_process_opensearch_data_empty_input(self):
        processor = DataProcessor()
        raw_documents = []
        field_mapping = FieldMapping(embedding_field="vector", text_field="content")
        processed_data = processor.process_opensearch_data(raw_documents, field_mapping)
        assert processed_data.error is not None
        assert "No valid documents" in processed_data.error
        assert len(processed_data.documents) == 0
--- a/tests/test_opensearch.py
+++ b/tests/test_opensearch.py
@@ -0,0 +1,310 @@
 from unittest.mock import Mock, patch
 from src.embeddingbuddy.data.sources.opensearch import OpenSearchClient
 from src.embeddingbuddy.models.field_mapper import FieldMapper, FieldMapping
 class TestOpenSearchClient:
    def test_init(self):
        client = OpenSearchClient()
        assert client.client is None
        assert client.connection_info is None
    @patch("src.embeddingbuddy.data.sources.opensearch.OpenSearch")
    def test_connect_success(self, mock_opensearch):
        # Mock the OpenSearch client
        mock_client_instance = Mock()
        mock_client_instance.info.return_value = {
            "cluster_name": "test-cluster",
            "version": {"number": "2.0.0"},
        }
        mock_opensearch.return_value = mock_client_instance
        client = OpenSearchClient()
        success, message = client.connect("https://localhost:9200")
        assert success is True
        assert "test-cluster" in message
        assert client.client is not None
        assert client.connection_info["cluster_name"] == "test-cluster"
    @patch("src.embeddingbuddy.data.sources.opensearch.OpenSearch")
    def test_connect_failure(self, mock_opensearch):
        # Mock connection failure
        mock_opensearch.side_effect = Exception("Connection failed")
        client = OpenSearchClient()
        success, message = client.connect("https://localhost:9200")
        assert success is False
        assert "Connection failed" in message
        assert client.client is None
    def test_analyze_fields(self):
        client = OpenSearchClient()
        client.client = Mock()
        # Mock mapping response
        mock_mapping = {
            "test-index": {
                "mappings": {
                    "properties": {
                        "embedding": {"type": "dense_vector", "dimension": 768},
                        "text": {"type": "text"},
                        "category": {"type": "keyword"},
                        "id": {"type": "keyword"},
                        "count": {"type": "integer"},
                    }
                }
            }
        }
        client.client.indices.get_mapping.return_value = mock_mapping
        success, analysis, message = client.analyze_fields("test-index")
        assert success is True
        assert len(analysis["vector_fields"]) == 1
        assert analysis["vector_fields"][0]["name"] == "embedding"
        assert analysis["vector_fields"][0]["dimension"] == 768
        assert "text" in analysis["text_fields"]
        assert "category" in analysis["keyword_fields"]
        assert "count" in analysis["numeric_fields"]
    def test_fetch_sample_data(self):
        client = OpenSearchClient()
        client.client = Mock()
        # Mock search response
        mock_response = {
            "hits": {
                "hits": [
                    {"_source": {"text": "doc1", "embedding": [0.1, 0.2]}},
                    {"_source": {"text": "doc2", "embedding": [0.3, 0.4]}},
                ]
            }
        }
        client.client.search.return_value = mock_response
        success, documents, message = client.fetch_sample_data("test-index", size=2)
        assert success is True
        assert len(documents) == 2
        assert documents[0]["text"] == "doc1"
        assert documents[1]["text"] == "doc2"
 class TestFieldMapper:
    def test_suggest_mappings(self):
        field_analysis = {
            "vector_fields": [{"name": "embedding", "dimension": 768}],
            "text_fields": ["content", "description"],
            "keyword_fields": ["doc_id", "category", "type", "tags"],
            "numeric_fields": ["count"],
            "all_fields": [
                "embedding",
                "content",
                "description",
                "doc_id",
                "category",
                "type",
                "tags",
                "count",
            ],
        }
        suggestions = FieldMapper.suggest_mappings(field_analysis)
        # Check that all dropdowns contain all fields
        all_fields = [
            "embedding",
            "content",
            "description",
            "doc_id",
            "category",
            "type",
            "tags",
            "count",
        ]
        for field_type in [
            "embedding",
            "text",
            "id",
            "category",
            "subcategory",
            "tags",
        ]:
            for field in all_fields:
                assert field in suggestions[field_type], (
                    f"Field '{field}' missing from {field_type} suggestions"
                )
        # Check that best candidates are first
        assert (
            suggestions["embedding"][0] == "embedding"
        )  # vector field should be first
        assert suggestions["text"][0] in [
            "content",
            "description",
        ]  # text fields should be first
        assert suggestions["id"][0] == "doc_id"  # ID-like field should be first
        assert suggestions["category"][0] in [
            "category",
            "type",
        ]  # category-like field should be first
        assert suggestions["tags"][0] == "tags"  # tags field should be first
    def test_suggest_mappings_name_based_embedding(self):
        """Test that fields named 'embedding' are prioritized even without vector type."""
        field_analysis = {
            "vector_fields": [],  # No explicit vector fields detected
            "text_fields": ["content", "description"],
            "keyword_fields": ["doc_id", "category", "type", "tags"],
            "numeric_fields": ["count"],
            "all_fields": [
                "content",
                "description",
                "doc_id",
                "category",
                "embedding",
                "type",
                "tags",
                "count",
            ],
        }
        suggestions = FieldMapper.suggest_mappings(field_analysis)
        # Check that 'embedding' field is prioritized despite not being detected as vector type
        assert suggestions["embedding"][0] == "embedding", (
            "Field named 'embedding' should be first priority"
        )
        # Check that all fields are still available
        all_fields = [
            "content",
            "description",
            "doc_id",
            "category",
            "embedding",
            "type",
            "tags",
            "count",
        ]
        for field_type in [
            "embedding",
            "text",
            "id",
            "category",
            "subcategory",
            "tags",
        ]:
            for field in all_fields:
                assert field in suggestions[field_type], (
                    f"Field '{field}' missing from {field_type} suggestions"
                )
    def test_validate_mapping_success(self):
        mapping = FieldMapping(
            embedding_field="embedding", text_field="text", id_field="doc_id"
        )
        available_fields = ["embedding", "text", "doc_id", "category"]
        errors = FieldMapper.validate_mapping(mapping, available_fields)
        assert len(errors) == 0
    def test_validate_mapping_missing_required(self):
        mapping = FieldMapping(embedding_field="missing_field", text_field="text")
        available_fields = ["text", "category"]
        errors = FieldMapper.validate_mapping(mapping, available_fields)
        assert len(errors) == 1
        assert "missing_field" in errors[0]
        assert "not found" in errors[0]
    def test_validate_mapping_missing_optional(self):
        mapping = FieldMapping(
            embedding_field="embedding",
            text_field="text",
            category_field="missing_category",
        )
        available_fields = ["embedding", "text"]
        errors = FieldMapper.validate_mapping(mapping, available_fields)
        assert len(errors) == 1
        assert "missing_category" in errors[0]
    def test_transform_documents(self):
        mapping = FieldMapping(
            embedding_field="vector",
            text_field="content",
            id_field="doc_id",
            category_field="type",
        )
        raw_documents = [
            {
                "vector": [0.1, 0.2, 0.3],
                "content": "Test document 1",
                "doc_id": "doc1",
                "type": "news",
            },
            {
                "vector": [0.4, 0.5, 0.6],
                "content": "Test document 2",
                "doc_id": "doc2",
                "type": "blog",
            },
        ]
        transformed = FieldMapper.transform_documents(raw_documents, mapping)
        assert len(transformed) == 2
        assert transformed[0]["embedding"] == [0.1, 0.2, 0.3]
        assert transformed[0]["text"] == "Test document 1"
        assert transformed[0]["id"] == "doc1"
        assert transformed[0]["category"] == "news"
    def test_transform_documents_missing_required(self):
        mapping = FieldMapping(embedding_field="vector", text_field="content")
        raw_documents = [
            {
                "vector": [0.1, 0.2, 0.3],
                # Missing content field
            }
        ]
        transformed = FieldMapper.transform_documents(raw_documents, mapping)
        assert len(transformed) == 0  # Document should be skipped
    def test_create_mapping_from_dict(self):
        mapping_dict = {
            "embedding": "vector_field",
            "text": "text_field",
            "id": "doc_id",
            "category": "cat_field",
            "subcategory": "subcat_field",
            "tags": "tags_field",
        }
        mapping = FieldMapper.create_mapping_from_dict(mapping_dict)
        assert mapping.embedding_field == "vector_field"
        assert mapping.text_field == "text_field"
        assert mapping.id_field == "doc_id"
        assert mapping.category_field == "cat_field"
        assert mapping.subcategory_field == "subcat_field"
        assert mapping.tags_field == "tags_field"
    def test_create_mapping_from_dict_minimal(self):
        mapping_dict = {"embedding": "vector_field", "text": "text_field"}
        mapping = FieldMapper.create_mapping_from_dict(mapping_dict)
        assert mapping.embedding_field == "vector_field"
        assert mapping.text_field == "text_field"
        assert mapping.id_field is None
        assert mapping.category_field is None
--- a/uv.lock
+++ b/uv.lock
@@ -412,7 +412,7 @@ wheels = [
 [[package]]
 name = "embeddingbuddy"
-version = "0.2.0"
+version = "0.3.0"
 source = { editable = "." }
 dependencies = [
    { name = "dash" },
@@ -420,6 +420,7 @@ dependencies = [
    { name = "mypy" },
    { name = "numba" },
    { name = "numpy" },
    { name = "opensearch-py" },
    { name = "opentsne" },
    { name = "pandas" },
    { name = "plotly" },
@@ -471,6 +472,7 @@ requires-dist = [
    { name = "mypy", marker = "extra == 'lint'", specifier = ">=1.5.0" },
    { name = "numba", specifier = ">=0.56.4" },
    { name = "numpy", specifier = ">=1.24.4" },
    { name = "opensearch-py", specifier = ">=3.0.0" },
    { name = "opentsne", specifier = ">=1.0.0" },
    { name = "pandas", specifier = ">=2.1.4" },
    { name = "pip-audit", marker = "extra == 'security'", specifier = ">=2.6.0" },
@@ -484,6 +486,14 @@ requires-dist = [
 ]
 provides-extras = ["test", "lint", "security", "dev", "all"]
 [[package]]
 name = "events"
 version = "0.5"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/25/ed/e47dec0626edd468c84c04d97769e7ab4ea6457b7f54dcb3f72b17fcd876/Events-0.5-py3-none-any.whl", hash = "sha256:a7286af378ba3e46640ac9825156c93bdba7502174dd696090fdfcd4d80a1abd", size = 6758, upload-time = "2023-07-31T08:23:13.645Z" },
 ]
 [[package]]
 name = "filelock"
 version = "3.16.1"
@@ -913,6 +923,22 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" },
 ]
 [[package]]
 name = "opensearch-py"
 version = "3.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "certifi" },
    { name = "events" },
    { name = "python-dateutil" },
    { name = "requests" },
    { name = "urllib3" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/b8/58/ecec7f855aae7bcfb08f570088c6cb993f68c361a0727abab35dbf021acb/opensearch_py-3.0.0.tar.gz", hash = "sha256:ebb38f303f8a3f794db816196315bcddad880be0dc75094e3334bc271db2ed39", size = 248890, upload-time = "2025-06-17T05:39:48.453Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/71/e0/69fd114c607b0323d3f864ab4a5ecb87d76ec5a172d2e36a739c8baebea1/opensearch_py-3.0.0-py3-none-any.whl", hash = "sha256:842bf5d56a4a0d8290eda9bb921c50f3080e5dc4e5fefb9c9648289da3f6a8bb", size = 371491, upload-time = "2025-06-17T05:39:46.539Z" },
 ]
 [[package]]
 name = "opentsne"
 version = "1.0.2"
Author	SHA1	Message	Date
Austin Godber	bced5e07ce	minor formatting All checks were successful Security Scan / security (pull_request) Successful in 43s Details Security Scan / dependency-check (pull_request) Successful in 45s Details Test Suite / lint (pull_request) Successful in 30s Details Test Suite / test (3.11) (pull_request) Successful in 1m29s Details Test Suite / build (pull_request) Successful in 39s Details	2025-09-06 07:23:26 -07:00
Austin Godber	cdaaffd735	add in browser embedding generation Some checks failed Security Scan / security (pull_request) Successful in 44s Details Security Scan / dependency-check (pull_request) Successful in 49s Details Test Suite / lint (pull_request) Failing after 40s Details Test Suite / test (3.11) (pull_request) Successful in 1m39s Details Test Suite / build (pull_request) Has been skipped Details	2025-09-06 07:16:30 -07:00
Austin Godber	14abc446b7	Merge pull request 'add-os-load' (#3 ) from add-os-load into main Some checks failed Test Suite / lint (push) Successful in 25s Details Test Suite / test (3.11) (push) Successful in 1m29s Details Release / test (push) Successful in 1m2s Details Release / build-and-release (push) Failing after 32s Details Test Suite / build (push) Successful in 45s Details Security Scan / security (push) Successful in 46s Details Security Scan / dependency-check (push) Successful in 50s Details This adds support for loading data from Opensearch. Reviewed-on: #3	2025-08-14 19:07:24 -07:00
Austin Godber	1b6845774b	fix formatting and bump version to v0.3.0 All checks were successful Security Scan / dependency-check (pull_request) Successful in 44s Details Security Scan / security (pull_request) Successful in 49s Details Test Suite / lint (pull_request) Successful in 34s Details Test Suite / test (3.11) (pull_request) Successful in 1m32s Details Test Suite / build (pull_request) Successful in 38s Details	2025-08-14 19:02:17 -07:00
Austin Godber	09e3c86f0a	opensearch load improvements Some checks failed Security Scan / dependency-check (pull_request) Successful in 44s Details Security Scan / security (pull_request) Successful in 45s Details Test Suite / lint (pull_request) Failing after 32s Details Test Suite / test (3.11) (pull_request) Successful in 1m31s Details Test Suite / build (pull_request) Has been skipped Details	2025-08-14 14:30:52 -07:00
Austin Godber	9cf2f0e6fa	this will load data from Opensearch. it doesn't have prompts as well	2025-08-14 13:49:46 -07:00