add in browser embedding generation

2025-09-06 07:16:30 -07:00
parent 14abc446b7
commit cdaaffd735
13 changed files with 1582 additions and 3 deletions
--- a/src/embeddingbuddy/app.py
+++ b/src/embeddingbuddy/app.py
@@ -8,7 +8,16 @@ from .ui.callbacks.interactions import InteractionCallbacks


 def create_app():
-    app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
+    import os
+    # Get the project root directory (two levels up from this file)
+    project_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+    assets_path = os.path.join(project_root, 'assets')
+    
+    app = dash.Dash(
+        __name__, 
+        external_stylesheets=[dbc.themes.BOOTSTRAP],
+        assets_folder=assets_path
+    )

    # Allow callbacks to components that are dynamically created in tabs
    app.config.suppress_callback_exceptions = True
@@ -20,9 +29,78 @@ def create_app():
    VisualizationCallbacks()
    InteractionCallbacks()

+    # Register client-side callback for embedding generation
+    _register_client_side_callbacks(app)
+
    return app


+def _register_client_side_callbacks(app):
+    """Register client-side callbacks for browser-based processing."""
+    from dash import Input, Output, State
+
+    # Client-side callback for embedding generation
+    app.clientside_callback(
+        """
+        function(nClicks, textContent, modelName, tokenizationMethod, batchSize, category, subcategory) {
+            if (!nClicks || !textContent || !textContent.trim()) {
+                return window.dash_clientside.no_update;
+            }
+            
+            console.log('🔍 Checking for Transformers.js...');
+            console.log('window.dash_clientside:', typeof window.dash_clientside);
+            console.log('window.dash_clientside.transformers:', typeof window.dash_clientside?.transformers);
+            console.log('generateEmbeddings function:', typeof window.dash_clientside?.transformers?.generateEmbeddings);
+            
+            if (typeof window.dash_clientside !== 'undefined' && 
+                typeof window.dash_clientside.transformers !== 'undefined' &&
+                typeof window.dash_clientside.transformers.generateEmbeddings === 'function') {
+                
+                console.log('✅ Calling Transformers.js generateEmbeddings...');
+                return window.dash_clientside.transformers.generateEmbeddings(
+                    nClicks, textContent, modelName, tokenizationMethod, category, subcategory
+                );
+            }
+            
+            // More detailed error information
+            let errorMsg = '❌ Transformers.js not available. ';
+            if (typeof window.dash_clientside === 'undefined') {
+                errorMsg += 'dash_clientside not found.';
+            } else if (typeof window.dash_clientside.transformers === 'undefined') {
+                errorMsg += 'transformers module not found.';
+            } else if (typeof window.dash_clientside.transformers.generateEmbeddings !== 'function') {
+                errorMsg += 'generateEmbeddings function not found.';
+            }
+            
+            console.error(errorMsg);
+            
+            return [
+                { error: 'Transformers.js not loaded. Please refresh the page and try again.' },
+                errorMsg + ' Please refresh the page.',
+                'danger',
+                false
+            ];
+        }
+        """,
+        [
+            Output("embeddings-generated-trigger", "data"),
+            Output("text-input-status-immediate", "children"),
+            Output("text-input-status-immediate", "color"),
+            Output("generate-embeddings-btn", "disabled", allow_duplicate=True),
+        ],
+        [Input("generate-embeddings-btn", "n_clicks")],
+        [
+            State("text-input-area", "value"),
+            State("model-selection", "value"),
+            State("tokenization-method", "value"),
+            State("batch-size", "value"),
+            State("text-category", "value"),
+            State("text-subcategory", "value"),
+        ],
+        prevent_initial_call=True,
+    )
+
+
 def run_app(app=None, debug=None, host=None, port=None):
    if app is None:
        app = create_app()
--- a/src/embeddingbuddy/config/settings.py
+++ b/src/embeddingbuddy/config/settings.py
@@ -79,6 +79,71 @@ class AppSettings:
    OPENSEARCH_CONNECTION_TIMEOUT = 30
    OPENSEARCH_VERIFY_CERTS = True

+    # Text Input / Transformers.js Configuration
+    DEFAULT_EMBEDDING_MODEL = "Xenova/all-mpnet-base-v2"
+    MAX_TEXT_LENGTH = 50000  # Characters (browser memory limits)
+    DEFAULT_TOKENIZATION_METHOD = "sentence"
+    MAX_BATCH_SIZE = 8  # Process in smaller batches for memory management
+
+    # Available Transformers.js compatible models
+    AVAILABLE_MODELS = [
+        {
+            "name": "Xenova/all-mpnet-base-v2",
+            "label": "All-MPNet-Base-v2 (Quality, 768d)",
+            "description": "Higher quality embeddings with better semantic understanding",
+            "dimensions": 768,
+            "size": "109 MB",
+            "context_length": 512,
+            "multilingual": False,
+            "default": True,
+        },
+        {
+            "name": "Xenova/all-MiniLM-L6-v2",
+            "label": "All-MiniLM-L6-v2 (Fast, 384d)",
+            "description": "Lightweight model, good for quick testing and general purpose",
+            "dimensions": 384,
+            "size": "23 MB",
+            "context_length": 512,
+            "multilingual": False,
+            "default": False,
+        },
+        {
+            "name": "Xenova/paraphrase-multilingual-MiniLM-L12-v2",
+            "label": "Multilingual MiniLM (50+ languages)",
+            "description": "Support for multiple languages with good performance",
+            "dimensions": 384,
+            "size": "127 MB",
+            "context_length": 512,
+            "multilingual": True,
+        },
+        {
+            "name": "Xenova/bge-small-en-v1.5",
+            "label": "BGE Small English (High quality, 384d)",
+            "description": "Beijing Academy of AI model with excellent performance on retrieval tasks",
+            "dimensions": 384,
+            "size": "67 MB",
+            "context_length": 512,
+            "multilingual": False,
+        },
+        {
+            "name": "Xenova/gte-small",
+            "label": "GTE Small (General Text Embeddings, 384d)",
+            "description": "Alibaba's general text embedding model, balanced performance",
+            "dimensions": 384,
+            "size": "67 MB",
+            "context_length": 512,
+            "multilingual": False,
+        },
+    ]
+
+    # Browser compatibility requirements
+    SUPPORTED_BROWSERS = {
+        "chrome": ">=88",
+        "firefox": ">=92",
+        "safari": ">=15.4",
+        "edge": ">=88",
+    }
+
    # Bootstrap Theme
    EXTERNAL_STYLESHEETS = [
        "https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css"
--- a/src/embeddingbuddy/data/processor.py
+++ b/src/embeddingbuddy/data/processor.py
@@ -63,6 +63,90 @@ class DataProcessor:
        except Exception as e:
            return ProcessedData(documents=[], embeddings=np.array([]), error=str(e))

+    def process_client_embeddings(self, embeddings_data: dict) -> ProcessedData:
+        """Process embeddings data received from client-side JavaScript."""
+        try:
+            if "error" in embeddings_data:
+                return ProcessedData(
+                    documents=[],
+                    embeddings=np.array([]),
+                    error=embeddings_data["error"],
+                )
+
+            # Extract documents and embeddings from client data
+            documents_data = embeddings_data.get("documents", [])
+            embeddings_list = embeddings_data.get("embeddings", [])
+
+            if not documents_data or not embeddings_list:
+                return ProcessedData(
+                    documents=[],
+                    embeddings=np.array([]),
+                    error="No documents or embeddings in client data",
+                )
+
+            if len(documents_data) != len(embeddings_list):
+                return ProcessedData(
+                    documents=[],
+                    embeddings=np.array([]),
+                    error="Mismatch between number of documents and embeddings",
+                )
+
+            # Convert embeddings to numpy array first
+            try:
+                embeddings = np.array(embeddings_list)
+
+                if embeddings.ndim != 2:
+                    return ProcessedData(
+                        documents=[],
+                        embeddings=np.array([]),
+                        error="Invalid embedding dimensions",
+                    )
+
+            except Exception as e:
+                return ProcessedData(
+                    documents=[],
+                    embeddings=np.array([]),
+                    error=f"Error processing embeddings: {str(e)}",
+                )
+
+            # Convert to Document objects with embeddings
+            documents = []
+            for i, doc_data in enumerate(documents_data):
+                try:
+                    # Skip if we don't have a corresponding embedding
+                    if i >= len(embeddings):
+                        continue
+
+                    # Ensure required fields are present
+                    if "id" not in doc_data or not doc_data["id"]:
+                        doc_data["id"] = f"text_input_{i}"
+                    if "text" not in doc_data or not doc_data["text"].strip():
+                        continue  # Skip documents without text
+
+                    # Add the embedding to doc_data
+                    doc_data["embedding"] = embeddings[i].tolist()
+
+                    doc = Document(**doc_data)
+                    documents.append(doc)
+                except Exception:
+                    # Skip invalid documents but continue processing
+                    continue
+
+            if not documents:
+                return ProcessedData(
+                    documents=[],
+                    embeddings=np.array([]),
+                    error="No valid documents found in client data",
+                )
+
+            # Only keep embeddings for valid documents
+            valid_embeddings = embeddings[: len(documents)]
+
+            return ProcessedData(documents=documents, embeddings=valid_embeddings)
+
+        except Exception as e:
+            return ProcessedData(documents=[], embeddings=np.array([]), error=str(e))
+
    def _extract_embeddings(self, documents: List[Document]) -> np.ndarray:
        if not documents:
            return np.array([])
--- a/src/embeddingbuddy/ui/callbacks/data_processing.py
+++ b/src/embeddingbuddy/ui/callbacks/data_processing.py
@@ -1,4 +1,4 @@
-from dash import callback, Input, Output, State, no_update
+from dash import callback, Input, Output, State, no_update, html
 from ...data.processor import DataProcessor
 from ...data.sources.opensearch import OpenSearchClient
 from ...models.field_mapper import FieldMapper
@@ -87,6 +87,8 @@ class DataProcessingCallbacks:

            if active_tab == "opensearch-tab":
                return [datasource.create_opensearch_tab()]
+            elif active_tab == "text-input-tab":
+                return [datasource.create_text_input_tab()]
            else:
                return [datasource.create_file_upload_tab()]

@@ -97,6 +99,9 @@ class DataProcessingCallbacks:
        # Register collapsible section callbacks
        self._register_collapse_callbacks()

+        # Register text input callbacks
+        self._register_text_input_callbacks()
+
    def _register_opensearch_callbacks(self, section_type, opensearch_client):
        """Register callbacks for a specific section (data or prompts)."""

@@ -463,6 +468,220 @@ class DataProcessingCallbacks:
                return new_state, icon_class
            return is_open, "fas fa-chevron-down me-2"

+    def _register_text_input_callbacks(self):
+        """Register callbacks for text input functionality."""
+
+        # Text length counter callback
+        @callback(
+            Output("text-length-counter", "children"),
+            Input("text-input-area", "value"),
+            prevent_initial_call=False,
+        )
+        def update_text_length_counter(text_value):
+            if not text_value:
+                return "0"
+            return f"{len(text_value):,}"
+
+        # Generate button enable/disable callback
+        @callback(
+            [
+                Output("generate-embeddings-btn", "disabled"),
+                Output("generation-help", "children"),
+                Output("generation-help", "color"),
+            ],
+            [
+                Input("text-input-area", "value"),
+                Input("model-selection", "value"),
+            ],
+            prevent_initial_call=False,
+        )
+        def toggle_generate_button(text_value, model_name):
+            import dash_bootstrap_components as dbc
+
+            if not text_value or not text_value.strip():
+                return (
+                    True,
+                    dbc.Alert(
+                        [
+                            html.I(className="fas fa-info-circle me-2"),
+                            "Enter some text above to enable embedding generation.",
+                        ],
+                        color="light",
+                    ),
+                    "light",
+                )
+
+            if not model_name:
+                return (
+                    True,
+                    dbc.Alert(
+                        [
+                            html.I(className="fas fa-exclamation-triangle me-2"),
+                            "Select an embedding model to continue.",
+                        ],
+                        color="warning",
+                    ),
+                    "warning",
+                )
+
+            text_length = len(text_value.strip())
+            if text_length > AppSettings.MAX_TEXT_LENGTH:
+                return (
+                    True,
+                    dbc.Alert(
+                        [
+                            html.I(className="fas fa-exclamation-triangle me-2"),
+                            f"Text too long ({text_length:,} characters). Maximum allowed: {AppSettings.MAX_TEXT_LENGTH:,} characters.",
+                        ],
+                        color="danger",
+                    ),
+                    "danger",
+                )
+
+            return (
+                False,
+                dbc.Alert(
+                    [
+                        html.I(className="fas fa-check-circle me-2"),
+                        f"Ready to generate embeddings for {text_length:,} characters using {model_name}.",
+                    ],
+                    color="success",
+                ),
+                "success",
+            )
+
+        # Clear text callback
+        @callback(
+            Output("text-input-area", "value"),
+            [Input("clear-text-btn", "n_clicks"), Input("load-sample-btn", "n_clicks")],
+            prevent_initial_call=True,
+        )
+        def handle_text_input_actions(clear_clicks, load_clicks):
+            from dash import ctx
+            
+            if not ctx.triggered:
+                return no_update
+                
+            button_id = ctx.triggered[0]['prop_id'].split('.')[0]
+            
+            if button_id == "clear-text-btn" and clear_clicks:
+                return ""
+            elif button_id == "load-sample-btn" and load_clicks:
+                return self._load_sample_text()
+            
+            return no_update
+
+        # Model info callback
+        @callback(
+            Output("model-info", "children"),
+            Input("model-selection", "value"),
+            prevent_initial_call=False,
+        )
+        def update_model_info(model_name):
+            if not model_name:
+                return html.Span("Please select a model", className="text-muted")
+
+            from ...config.settings import AppSettings
+
+            settings = AppSettings()
+
+            for model in settings.AVAILABLE_MODELS:
+                if model["name"] == model_name:
+                    return html.Div(
+                        [
+                            html.Strong(
+                                f"Dimensions: {model['dimensions']} | Context Length: {model['context_length']}"
+                            ),
+                            html.Br(),
+                            html.Span(model["description"]),
+                            html.Br(),
+                            html.Small(
+                                f"Multilingual: {'Yes' if model.get('multilingual', False) else 'No'} | Size: {model['size']}",
+                                className="text-muted",
+                            ),
+                        ]
+                    )
+
+            return html.Span("Model information not available", className="text-muted")
+
+        # Process client-side embeddings result callback
+        @callback(
+            [
+                Output("processed-data", "data", allow_duplicate=True),
+                Output("text-input-status", "children"),
+                Output("text-input-status", "color"),
+                Output("text-input-status", "style"),
+                Output("generate-embeddings-btn", "disabled", allow_duplicate=True),
+            ],
+            [Input("embeddings-generated-trigger", "data")],
+            prevent_initial_call=True,
+        )
+        def process_embeddings_result(embeddings_data):
+            """Process embeddings generated client-side."""
+            if not embeddings_data:
+                return no_update, no_update, no_update, no_update, no_update
+
+            processed_data = self.processor.process_client_embeddings(embeddings_data)
+
+            if processed_data.error:
+                return (
+                    {"error": processed_data.error},
+                    f"❌ Error: {processed_data.error}",
+                    "danger",
+                    {"display": "block"},
+                    False,
+                )
+
+            return (
+                {
+                    "documents": [
+                        self._document_to_dict(doc) for doc in processed_data.documents
+                    ],
+                    "embeddings": processed_data.embeddings.tolist(),
+                },
+                f"✅ Generated embeddings for {len(processed_data.documents)} text chunks",
+                "success",
+                {"display": "block"},
+                False,
+            )
+
+    def _load_sample_text(self):
+        """Load sample text from assets/sample-txt.md file."""
+        import os
+        
+        try:
+            # Get the project root directory (four levels up from this file)
+            current_file = os.path.abspath(__file__)
+            project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(current_file)))))
+            sample_file_path = os.path.join(project_root, 'assets', 'sample-txt.md')
+            
+            if os.path.exists(sample_file_path):
+                with open(sample_file_path, 'r', encoding='utf-8') as file:
+                    return file.read()
+            else:
+                # Fallback sample text if file doesn't exist
+                return """The sun peeked through the clouds after a drizzly morning.
+A gentle breeze rustled the leaves as we walked along the shoreline.
+Heavy rains caused flooding in several low-lying neighborhoods.
+It was so hot that even the birds sought shade under the palm trees.
+By midnight, the temperature had dropped below freezing.
+
+The new smartphone features a foldable display and 5G connectivity.
+In the world of AI, transformers have revolutionized natural language processing.
+Quantum computing promises to solve problems beyond classical computers' reach.
+Blockchain technology is being explored for secure voting systems.
+Virtual reality headsets are becoming more affordable and accessible.
+
+Preheat the oven to 375°F before you start mixing the batter.
+She finely chopped the garlic and sautéed it in two tablespoons of olive oil.
+A pinch of saffron adds a beautiful color and aroma to traditional paella.
+If the soup is too salty, add a peeled potato to absorb excess sodium.
+Let the bread dough rise for at least an hour in a warm, draft-free spot."""
+                
+        except Exception as e:
+            # Return a simple fallback if there's any error
+            return "This is sample text for testing embedding generation. You can replace this with your own text."
+
    @staticmethod
    def _document_to_dict(doc):
        return {
--- a/src/embeddingbuddy/ui/components/datasource.py
+++ b/src/embeddingbuddy/ui/components/datasource.py
@@ -1,11 +1,13 @@
 from dash import dcc, html
 import dash_bootstrap_components as dbc
 from .upload import UploadComponent
+from .textinput import TextInputComponent


 class DataSourceComponent:
    def __init__(self):
        self.upload_component = UploadComponent()
+        self.text_input_component = TextInputComponent()

    def create_tabbed_interface(self):
        """Create tabbed interface for different data sources."""
@@ -17,6 +19,7 @@ class DataSourceComponent:
                            [
                                dbc.Tab(label="File Upload", tab_id="file-tab"),
                                dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
+                                dbc.Tab(label="Text Input", tab_id="text-input-tab"),
                            ],
                            id="data-source-tabs",
                            active_tab="file-tab",
@@ -208,6 +211,10 @@ class DataSourceComponent:
            ]
        )

+    def create_text_input_tab(self):
+        """Create text input tab content for browser-based embedding generation."""
+        return html.Div([self.text_input_component.create_text_input_interface()])
+
    def _create_opensearch_section(self, section_type):
        """Create a complete OpenSearch section for either 'data' or 'prompts'."""
        section_id = section_type  # 'data' or 'prompts'
--- a/src/embeddingbuddy/ui/components/textinput.py
+++ b/src/embeddingbuddy/ui/components/textinput.py
@@ -0,0 +1,402 @@
+"""Text input component for generating embeddings from user text."""
+
+import dash_bootstrap_components as dbc
+from dash import dcc, html
+
+from embeddingbuddy.config.settings import AppSettings
+
+
+class TextInputComponent:
+    """Component for text input and embedding generation."""
+
+    def __init__(self):
+        self.settings = AppSettings()
+
+    def create_text_input_interface(self):
+        """Create the complete text input interface with model selection and processing options."""
+        return html.Div(
+            [
+                # Model selection section
+                self._create_model_selection(),
+                html.Hr(),
+                # Text input section
+                self._create_text_input_area(),
+                # Text action buttons
+                self._create_text_action_buttons(),
+                html.Hr(),
+                # Processing options
+                self._create_processing_options(),
+                html.Hr(),
+                # Generation controls
+                self._create_generation_controls(),
+                html.Hr(),
+                # Progress indicators
+                self._create_progress_indicators(),
+                html.Hr(),
+                # Status and results
+                self._create_status_section(),
+                # Hidden components for data flow
+                self._create_hidden_components(),
+            ],
+            className="p-3",
+        )
+
+    def _create_model_selection(self):
+        """Create model selection dropdown with descriptions."""
+        model_options = []
+        for model in self.settings.AVAILABLE_MODELS:
+            label = f"{model['label']} - {model['size']}"
+            if model.get("default", False):
+                label += " (Recommended)"
+
+            model_options.append({"label": label, "value": model["name"]})
+
+        return html.Div(
+            [
+                html.H5("Embedding Model", className="mb-3"),
+                html.Div(
+                    [
+                        dcc.Dropdown(
+                            id="model-selection",
+                            options=model_options,
+                            value=self.settings.DEFAULT_EMBEDDING_MODEL,
+                            placeholder="Select an embedding model...",
+                            className="mb-2",
+                        ),
+                        dbc.Alert(
+                            [
+                                html.Div(
+                                    id="model-info",
+                                    children=self._get_model_description(
+                                        self.settings.DEFAULT_EMBEDDING_MODEL
+                                    ),
+                                )
+                            ],
+                            color="info",
+                            className="small",
+                        ),
+                    ]
+                ),
+            ]
+        )
+
+    def _create_text_input_area(self):
+        """Create text input textarea with character limits."""
+        return html.Div(
+            [
+                html.H5("Text Input", className="mb-3"),
+                dcc.Textarea(
+                    id="text-input-area",
+                    placeholder="Paste your text here... Each sentence, paragraph, or line will become a separate data point depending on your tokenization method below.",
+                    value="",
+                    style={
+                        "width": "100%",
+                        "height": "300px",
+                        "resize": "vertical",
+                        "font-family": "monospace",
+                        "font-size": "14px",
+                    },
+                    maxLength=self.settings.MAX_TEXT_LENGTH,
+                    className="form-control",
+                ),
+                html.Small(
+                    f"Maximum {self.settings.MAX_TEXT_LENGTH:,} characters. Current: ",
+                    className="text-muted",
+                ),
+                html.Small(
+                    id="text-length-counter",
+                    children="0",
+                    className="text-muted fw-bold",
+                ),
+                html.Small(" characters", className="text-muted"),
+            ]
+        )
+
+    def _create_text_action_buttons(self):
+        """Create action buttons for text input (Load Sample, Clear)."""
+        return html.Div(
+            [
+                dbc.Row(
+                    [
+                        dbc.Col(
+                            [
+                                dbc.Button(
+                                    [
+                                        html.I(className="fas fa-file-text me-2"),
+                                        "Load Sample Text",
+                                    ],
+                                    id="load-sample-btn",
+                                    color="info",
+                                    size="sm",
+                                    className="w-100",
+                                )
+                            ],
+                            md=6,
+                        ),
+                        dbc.Col(
+                            [
+                                dbc.Button(
+                                    [
+                                        html.I(className="fas fa-trash me-2"),
+                                        "Clear Text",
+                                    ],
+                                    id="clear-text-btn",
+                                    color="outline-secondary",
+                                    size="sm",
+                                    className="w-100",
+                                )
+                            ],
+                            md=6,
+                        ),
+                    ],
+                    className="mt-2 mb-3",
+                )
+            ]
+        )
+
+    def _create_processing_options(self):
+        """Create tokenization and metadata options."""
+        return html.Div(
+            [
+                html.H5("Processing Options", className="mb-3"),
+                dbc.Row(
+                    [
+                        dbc.Col(
+                            [
+                                html.Label(
+                                    "Text Splitting Method:", className="form-label"
+                                ),
+                                dcc.Dropdown(
+                                    id="tokenization-method",
+                                    options=[
+                                        {
+                                            "label": "Sentences (split on . ! ?)",
+                                            "value": "sentence",
+                                        },
+                                        {
+                                            "label": "Paragraphs (split on double newline)",
+                                            "value": "paragraph",
+                                        },
+                                        {
+                                            "label": "Lines (split on single newline)",
+                                            "value": "manual",
+                                        },
+                                        {
+                                            "label": "Entire text as one document",
+                                            "value": "whole",
+                                        },
+                                    ],
+                                    value=self.settings.DEFAULT_TOKENIZATION_METHOD,
+                                    className="mb-3",
+                                ),
+                            ],
+                            md=6,
+                        ),
+                        dbc.Col(
+                            [
+                                html.Label("Batch Size:", className="form-label"),
+                                dcc.Dropdown(
+                                    id="batch-size",
+                                    options=[
+                                        {
+                                            "label": "Small batches (4) - Lower memory",
+                                            "value": 4,
+                                        },
+                                        {
+                                            "label": "Medium batches (8) - Balanced",
+                                            "value": 8,
+                                        },
+                                        {
+                                            "label": "Large batches (16) - Faster",
+                                            "value": 16,
+                                        },
+                                    ],
+                                    value=self.settings.MAX_BATCH_SIZE,
+                                    className="mb-3",
+                                ),
+                            ],
+                            md=6,
+                        ),
+                    ]
+                ),
+                dbc.Row(
+                    [
+                        dbc.Col(
+                            [
+                                html.Label(
+                                    "Category (Optional):", className="form-label"
+                                ),
+                                dcc.Input(
+                                    id="text-category",
+                                    type="text",
+                                    placeholder="e.g., Notes, Articles, Ideas...",
+                                    value="Text Input",
+                                    className="form-control mb-3",
+                                ),
+                            ],
+                            md=6,
+                        ),
+                        dbc.Col(
+                            [
+                                html.Label(
+                                    "Subcategory (Optional):", className="form-label"
+                                ),
+                                dcc.Input(
+                                    id="text-subcategory",
+                                    type="text",
+                                    placeholder="e.g., Meeting Notes, Research...",
+                                    value="Generated",
+                                    className="form-control mb-3",
+                                ),
+                            ],
+                            md=6,
+                        ),
+                    ]
+                ),
+            ]
+        )
+
+    def _create_generation_controls(self):
+        """Create embedding generation button and controls."""
+        return html.Div(
+            [
+                html.H5("Generate Embeddings", className="mb-3"),
+                dbc.Row(
+                    [
+                        dbc.Col(
+                            [
+                                dbc.Button(
+                                    [
+                                        html.I(className="fas fa-magic me-2"),
+                                        "Generate Embeddings",
+                                    ],
+                                    id="generate-embeddings-btn",
+                                    color="primary",
+                                    size="lg",
+                                    disabled=True,
+                                    className="w-100",
+                                )
+                            ],
+                            md=12,
+                        ),
+                    ]
+                ),
+                html.Div(
+                    [
+                        dbc.Alert(
+                            [
+                                html.I(className="fas fa-info-circle me-2"),
+                                "Enter some text above and select a model to enable embedding generation.",
+                            ],
+                            color="light",
+                            className="mt-3",
+                            id="generation-help",
+                        )
+                    ]
+                ),
+            ]
+        )
+
+    def _create_progress_indicators(self):
+        """Create progress bars for model loading and embedding generation."""
+        return html.Div(
+            [
+                # Model loading progress
+                html.Div(
+                    [
+                        html.H6("Model Loading Progress", className="mb-2"),
+                        dbc.Progress(
+                            id="model-loading-progress",
+                            value=0,
+                            striped=True,
+                            animated=True,
+                            className="mb-2",
+                        ),
+                        html.Small(
+                            id="model-loading-status",
+                            children="No model loading in progress",
+                            className="text-muted",
+                        ),
+                    ],
+                    id="model-loading-section",
+                    style={"display": "none"},
+                ),
+                html.Br(),
+                # Embedding generation progress
+                html.Div(
+                    [
+                        html.H6("Embedding Generation Progress", className="mb-2"),
+                        dbc.Progress(
+                            id="embedding-progress",
+                            value=0,
+                            striped=True,
+                            animated=True,
+                            className="mb-2",
+                        ),
+                        html.Small(
+                            id="embedding-status",
+                            children="No embedding generation in progress",
+                            className="text-muted",
+                        ),
+                    ],
+                    id="embedding-progress-section",
+                    style={"display": "none"},
+                ),
+            ]
+        )
+
+    def _create_status_section(self):
+        """Create status alerts and results preview."""
+        return html.Div(
+            [
+                # Immediate status (from client-side)
+                dbc.Alert(
+                    id="text-input-status-immediate",
+                    children="Ready to generate embeddings",
+                    color="light",
+                    className="mb-3",
+                ),
+                # Server-side status
+                dbc.Alert(
+                    id="text-input-status",
+                    children="",
+                    color="light",
+                    className="mb-3",
+                    style={"display": "none"},
+                ),
+                # Results preview
+                html.Div(id="embedding-results-preview"),
+            ]
+        )
+
+    def _create_hidden_components(self):
+        """Create hidden components for data flow."""
+        return html.Div(
+            [
+                # Store for embeddings data from client-side
+                dcc.Store(id="embeddings-generated-trigger"),
+                # Store for tokenization preview
+                dcc.Store(id="tokenization-preview-data"),
+            ]
+        )
+
+    def _get_model_description(self, model_name):
+        """Get description for a specific model."""
+        for model in self.settings.AVAILABLE_MODELS:
+            if model["name"] == model_name:
+                return html.Div(
+                    [
+                        html.Strong(
+                            f"Dimensions: {model['dimensions']} | Context Length: {model['context_length']}"
+                        ),
+                        html.Br(),
+                        html.Span(model["description"]),
+                        html.Br(),
+                        html.Small(
+                            f"Multilingual: {'Yes' if model.get('multilingual', False) else 'No'} | Size: {model['size']}",
+                            className="text-muted",
+                        ),
+                    ]
+                )
+
+        return html.Span("Model information not available", className="text-muted")
--- a/src/embeddingbuddy/ui/layout.py
+++ b/src/embeddingbuddy/ui/layout.py
@@ -20,6 +20,15 @@ class AppLayout:
                dbc.Col(
                    [
                        html.H1("EmbeddingBuddy", className="text-center mb-4"),
+                        # Load Transformers.js from CDN
+                        html.Script(
+                            """
+                            import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2';
+                            window.transformersPipeline = pipeline;
+                            console.log('✅ Transformers.js pipeline loaded globally');
+                            """,
+                            type="module"
+                        ),
                    ],
                    width=12,
                )