8 Commits

Author SHA1 Message Date
c29160c9e9 format
All checks were successful
Security Scan / security (pull_request) Successful in 37s
Security Scan / dependency-check (pull_request) Successful in 39s
Test Suite / lint (pull_request) Successful in 31s
Test Suite / test (3.11) (pull_request) Successful in 1m34s
Test Suite / build (pull_request) Successful in 39s
2025-09-13 14:43:09 -07:00
bd3ee6e35a update screenshot
Some checks failed
Security Scan / dependency-check (pull_request) Successful in 44s
Security Scan / security (pull_request) Successful in 44s
Test Suite / lint (pull_request) Failing after 31s
Test Suite / test (3.11) (pull_request) Successful in 1m28s
Test Suite / build (pull_request) Has been skipped
2025-09-13 14:38:52 -07:00
6936bc5d97 v0.5.0 - rework the sidebar
Some checks failed
Security Scan / security (pull_request) Successful in 32s
Security Scan / dependency-check (pull_request) Successful in 33s
Test Suite / test (3.11) (pull_request) Successful in 1m17s
Test Suite / lint (pull_request) Failing after 25s
Test Suite / build (pull_request) Has been skipped
This PR reworks the sidebar to be an accordian.
I also remove some of the progress feedback since it wasn't working right.
2025-09-13 14:34:02 -07:00
9a2e257b0d Merge pull request 'add-docker' (#5) from add-docker into main
Some checks failed
Security Scan / dependency-check (push) Successful in 39s
Security Scan / security (push) Successful in 44s
Test Suite / lint (push) Successful in 32s
Test Suite / test (3.11) (push) Successful in 1m36s
Test Suite / build (push) Successful in 45s
Release / test (push) Successful in 1m9s
Release / build-and-release (push) Failing after 24s
Reviewed-on: #5
2025-09-07 18:13:51 -07:00
9c3ff6e799 fix formatting
All checks were successful
Security Scan / security (pull_request) Successful in 38s
Security Scan / dependency-check (pull_request) Successful in 38s
Test Suite / test (3.11) (pull_request) Successful in 1m18s
Test Suite / lint (pull_request) Successful in 31s
Test Suite / build (pull_request) Successful in 37s
2025-09-07 17:44:33 -07:00
781d055e60 fix prod
Some checks failed
Security Scan / dependency-check (pull_request) Successful in 40s
Security Scan / security (pull_request) Successful in 40s
Test Suite / lint (pull_request) Failing after 28s
Test Suite / test (3.11) (pull_request) Successful in 1m26s
Test Suite / build (pull_request) Has been skipped
2025-09-07 17:14:07 -07:00
0f5cea2850 updating image
Some checks failed
Security Scan / dependency-check (pull_request) Successful in 37s
Security Scan / security (pull_request) Successful in 43s
Test Suite / lint (pull_request) Failing after 31s
Test Suite / test (3.11) (pull_request) Successful in 1m30s
Test Suite / build (pull_request) Has been skipped
2025-09-07 16:58:07 -07:00
1bd70705e7 bump version to 0.4.0
Some checks failed
Security Scan / dependency-check (pull_request) Successful in 37s
Security Scan / security (pull_request) Successful in 40s
Test Suite / lint (pull_request) Failing after 31s
Test Suite / test (3.11) (pull_request) Successful in 1m33s
Test Suite / build (pull_request) Has been skipped
2025-09-07 16:53:49 -07:00
23 changed files with 306 additions and 406 deletions

View File

@@ -4,7 +4,9 @@
"Bash(mkdir:*)", "Bash(mkdir:*)",
"Bash(uv run:*)", "Bash(uv run:*)",
"Bash(uv add:*)", "Bash(uv add:*)",
"Bash(uv sync:*)" "Bash(uv sync:*)",
"Bash(tree:*)",
"WebFetch(domain:www.dash-bootstrap-components.com)"
], ],
"deny": [], "deny": [],
"ask": [], "ask": [],

View File

@@ -22,11 +22,13 @@ uv sync
**Run the application:** **Run the application:**
Development mode (with auto-reload): Development mode (with auto-reload):
```bash ```bash
uv run run_dev.py uv run run_dev.py
``` ```
Production mode (with Gunicorn WSGI server): Production mode (with Gunicorn WSGI server):
```bash ```bash
# First install production dependencies # First install production dependencies
uv sync --extra prod uv sync --extra prod
@@ -36,11 +38,12 @@ uv run run_prod.py
``` ```
Legacy mode (basic Dash server): Legacy mode (basic Dash server):
```bash ```bash
uv run main.py uv run main.py
``` ```
The app will be available at http://127.0.0.1:8050 The app will be available at <http://127.0.0.1:8050>
**Run tests:** **Run tests:**

View File

@@ -65,6 +65,11 @@ ENV EMBEDDINGBUDDY_ENV=production
# Expose port # Expose port
EXPOSE 8050 EXPOSE 8050
# Create non-root user
RUN groupadd -r appuser && useradd -r -g appuser appuser
RUN chown -R appuser:appuser /app
USER appuser
# Health check # Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:8050/', timeout=5)" || exit 1 CMD python -c "import requests; requests.get('http://localhost:8050/', timeout=5)" || exit 1

View File

@@ -152,22 +152,38 @@ The application follows a modular architecture for improved maintainability and
```text ```text
src/embeddingbuddy/ src/embeddingbuddy/
├── config/ # Configuration management ├── app.py # Main application entry point and factory
│ └── settings.py # Centralized app settings ├── config/ # Configuration management
├── data/ # Data parsing and processing │ └── settings.py # Centralized app settings
│ ├── parser.py # NDJSON parsing logic ├── data/ # Data parsing and processing
── processor.py # Data transformation utilities ── parser.py # NDJSON parsing logic
├── models/ # Data schemas and algorithms │ ├── processor.py # Data transformation utilities
── schemas.py # Pydantic data models ── sources/ # Data source integrations
└── reducers.py # Dimensionality reduction algorithms └── opensearch.py # OpenSearch data source
├── visualization/ # Plot creation and styling ├── models/ # Data schemas and algorithms
│ ├── plots.py # Plot factory and creation logic │ ├── schemas.py # Pydantic data models
── colors.py # Color mapping utilities ── reducers.py # Dimensionality reduction algorithms
├── ui/ # User interface components │ └── field_mapper.py # Field mapping utilities
│ ├── layout.py # Main application layout ├── visualization/ # Plot creation and styling
│ ├── components/ # Reusable UI components │ ├── plots.py # Plot factory and creation logic
│ └── callbacks/ # Organized callback functions │ └── colors.py # Color mapping utilities
── utils/ # Utility functions ── ui/ # User interface components
│ ├── layout.py # Main application layout
│ ├── components/ # Reusable UI components
│ │ ├── sidebar.py # Sidebar component
│ │ ├── upload.py # Upload components
│ │ ├── textinput.py # Text input components
│ │ └── datasource.py # Data source components
│ └── callbacks/ # Organized callback functions
│ ├── data_processing.py # Data upload/processing callbacks
│ ├── visualization.py # Plot update callbacks
│ └── interactions.py # User interaction callbacks
└── utils/ # Utility functions
main.py # Application runner (at project root)
main.py # Application runner (at project root)
run_dev.py # Development server runner
run_prod.py # Production server runner
``` ```
### Testing ### Testing

17
assets/custom.css Normal file
View File

@@ -0,0 +1,17 @@
/* CSS override for transparent hover boxes in Plotly plots */
/* Make hover boxes transparent while preserving text readability */
.hovertext {
fill-opacity: 0.8 !important;
stroke-opacity: 1 !important;
}
/* Alternative selector for different Plotly versions */
g.hovertext > path {
opacity: 0.8 !important;
}
/* Ensure text remains fully visible */
.hovertext text {
opacity: 1 !important;
}

View File

@@ -45,28 +45,12 @@ class TransformersEmbedder {
console.log('✅ Using globally loaded Transformers.js pipeline'); console.log('✅ Using globally loaded Transformers.js pipeline');
} }
// Show loading progress to user this.extractor = await window.transformers.pipeline('feature-extraction', modelName);
if (window.updateModelLoadingProgress) {
window.updateModelLoadingProgress(0, `Loading ${modelName}...`);
}
this.extractor = await window.transformers.pipeline('feature-extraction', modelName, {
progress_callback: (data) => {
if (window.updateModelLoadingProgress && data.progress !== undefined) {
const progress = Math.round(data.progress);
window.updateModelLoadingProgress(progress, data.status || 'Loading...');
}
}
});
this.modelCache.set(modelName, this.extractor); this.modelCache.set(modelName, this.extractor);
this.currentModel = modelName; this.currentModel = modelName;
this.isLoading = false; this.isLoading = false;
if (window.updateModelLoadingProgress) {
window.updateModelLoadingProgress(100, 'Model loaded successfully');
}
return { success: true, model: modelName }; return { success: true, model: modelName };
} catch (error) { } catch (error) {
this.isLoading = false; this.isLoading = false;
@@ -116,17 +100,8 @@ class TransformersEmbedder {
} }
}); });
// Update progress
const progress = Math.min(100, ((i + batch.length) / texts.length) * 100);
if (window.updateEmbeddingProgress) {
window.updateEmbeddingProgress(progress, `Processing ${i + batch.length}/${texts.length} texts`);
}
} }
if (window.updateEmbeddingProgress) {
window.updateEmbeddingProgress(100, `Generated ${embeddings.length} embeddings successfully`);
}
return embeddings; return embeddings;
} catch (error) { } catch (error) {
console.error('Embedding generation error:', error); console.error('Embedding generation error:', error);
@@ -139,30 +114,6 @@ class TransformersEmbedder {
window.transformersEmbedder = new TransformersEmbedder(); window.transformersEmbedder = new TransformersEmbedder();
console.log('📦 TransformersEmbedder instance created'); console.log('📦 TransformersEmbedder instance created');
// Global progress update functions
window.updateModelLoadingProgress = function(progress, status) {
const progressBar = document.getElementById('model-loading-progress');
const statusText = document.getElementById('model-loading-status');
if (progressBar) {
progressBar.style.width = progress + '%';
progressBar.setAttribute('aria-valuenow', progress);
}
if (statusText) {
statusText.textContent = status;
}
};
window.updateEmbeddingProgress = function(progress, status) {
const progressBar = document.getElementById('embedding-progress');
const statusText = document.getElementById('embedding-status');
if (progressBar) {
progressBar.style.width = progress + '%';
progressBar.setAttribute('aria-valuenow', progress);
}
if (statusText) {
statusText.textContent = status;
}
};
// Dash clientside callback functions // Dash clientside callback functions
window.dash_clientside = window.dash_clientside || {}; window.dash_clientside = window.dash_clientside || {};
@@ -170,31 +121,28 @@ console.log('🔧 Setting up window.dash_clientside.transformers');
window.dash_clientside.transformers = { window.dash_clientside.transformers = {
generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) { generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) {
console.log('🚀 generateEmbeddings called with:', { nClicks, modelName, tokenizationMethod, textLength: textContent?.length }); console.log('🚀 generateEmbeddings called with:', { nClicks, modelName, tokenizationMethod, textLength: textContent?.length });
if (!nClicks || !textContent || textContent.trim().length === 0) { if (!nClicks || !textContent || textContent.trim().length === 0) {
console.log('⚠️ Early return - missing required parameters'); console.log('⚠️ Early return - missing required parameters');
return window.dash_clientside.no_update; return window.dash_clientside.no_update;
} }
try { try {
// Initialize model if needed // Initialize model if needed
const initResult = await window.transformersEmbedder.initializeModel(modelName); const initResult = await window.transformersEmbedder.initializeModel(modelName);
if (!initResult.success) { if (!initResult.success) {
return [ return [
{ error: initResult.error }, { error: `Model loading error: ${initResult.error}` },
`❌ Model loading error: ${initResult.error}`,
"danger",
false false
]; ];
} }
// Tokenize text based on method // Tokenize text based on method
let textChunks; let textChunks;
const trimmedText = textContent.trim(); const trimmedText = textContent.trim();
switch (tokenizationMethod) { switch (tokenizationMethod) {
case 'sentence': case 'sentence':
// Simple sentence splitting - can be enhanced with proper NLP
textChunks = trimmedText textChunks = trimmedText
.split(/[.!?]+/) .split(/[.!?]+/)
.map(s => s.trim()) .map(s => s.trim())
@@ -215,28 +163,24 @@ window.dash_clientside.transformers = {
default: default:
textChunks = [trimmedText]; textChunks = [trimmedText];
} }
if (textChunks.length === 0) { if (textChunks.length === 0) {
return [ return [
{ error: 'No valid text chunks found after tokenization' }, { error: 'No valid text chunks found after tokenization' },
'❌ Error: No valid text chunks found after tokenization',
"danger",
false false
]; ];
} }
// Generate embeddings // Generate embeddings
const embeddings = await window.transformersEmbedder.generateEmbeddings(textChunks); const embeddings = await window.transformersEmbedder.generateEmbeddings(textChunks);
if (!embeddings || embeddings.length !== textChunks.length) { if (!embeddings || embeddings.length !== textChunks.length) {
return [ return [
{ error: 'Embedding generation failed - mismatch in text chunks and embeddings' }, { error: 'Embedding generation failed' },
'❌ Error: Embedding generation failed',
"danger",
false false
]; ];
} }
// Create documents structure // Create documents structure
const documents = textChunks.map((text, i) => ({ const documents = textChunks.map((text, i) => ({
id: `text_input_${Date.now()}_${i}`, id: `text_input_${Date.now()}_${i}`,
@@ -246,33 +190,36 @@ window.dash_clientside.transformers = {
subcategory: subcategory || "Generated", subcategory: subcategory || "Generated",
tags: [] tags: []
})); }));
// Return the successful embeddings data
const embeddingsData = {
documents: documents,
embeddings: embeddings
};
console.log('✅ Embeddings generated successfully:', embeddingsData);
return [ return [
{ embeddingsData,
documents: documents,
embeddings: embeddings
},
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
"success",
false false
]; ];
} catch (error) { } catch (error) {
console.error('Client-side embedding error:', error); console.error('Client-side embedding error:', error);
return [ return [
{ error: error.message }, { error: error.message },
`❌ Error: ${error.message}`,
"danger",
false false
]; ];
} }
} }
}; };
console.log('✅ Transformers.js client-side setup complete'); console.log('✅ Transformers.js client-side setup complete');
console.log('Available:', { console.log('Available:', {
transformersEmbedder: !!window.transformersEmbedder, transformersEmbedder: !!window.transformersEmbedder,
dashClientside: !!window.dash_clientside, dashClientside: !!window.dash_clientside,
transformersModule: !!window.dash_clientside?.transformers, transformersModule: !!window.dash_clientside?.transformers,
generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings,
processAsync: typeof window.processEmbeddingsAsync
}); });

View File

@@ -104,17 +104,28 @@ window.dash_clientside = window.dash_clientside || {};
window.dash_clientside.transformers = { window.dash_clientside.transformers = {
generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) { generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) {
console.log('🚀 Client-side generateEmbeddings called'); console.log('🚀 Client-side generateEmbeddings called');
if (!nClicks || !textContent || textContent.trim().length === 0) { if (!nClicks || !textContent || textContent.trim().length === 0) {
console.log('⚠️ Missing required parameters'); console.log('⚠️ Missing required parameters');
return window.dash_clientside.no_update; return window.dash_clientside.no_update;
} }
try { try {
// Ensure Transformers.js is loaded
if (!window.transformersLibraryLoaded) {
const loaded = await initializeTransformers();
if (!loaded) {
return [
{ error: 'Failed to load Transformers.js' },
false
];
}
}
// Tokenize text // Tokenize text
let textChunks; let textChunks;
const trimmedText = textContent.trim(); const trimmedText = textContent.trim();
switch (tokenizationMethod) { switch (tokenizationMethod) {
case 'sentence': case 'sentence':
textChunks = trimmedText.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 0); textChunks = trimmedText.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 0);
@@ -128,45 +139,50 @@ window.dash_clientside.transformers = {
default: default:
textChunks = [trimmedText]; textChunks = [trimmedText];
} }
if (textChunks.length === 0) { if (textChunks.length === 0) {
throw new Error('No valid text chunks after tokenization'); return [
{ error: 'No valid text chunks after tokenization' },
false
];
} }
// Generate embeddings // Generate embeddings
const embeddings = await window.simpleEmbedder.generateEmbeddings(textChunks, modelName); const embeddings = await window.simpleEmbedder.generateEmbeddings(textChunks, modelName);
// Create documents // Create documents
const documents = textChunks.map((text, i) => ({ const documents = textChunks.map((text, i) => ({
id: `text_input_${Date.now()}_${i}`, id: `text_input_${Date.now()}_${i}`,
text: text, text: text,
embedding: embeddings[i], embedding: embeddings[i],
category: category || "Text Input", category: category || "Text Input",
subcategory: subcategory || "Generated", subcategory: subcategory || "Generated",
tags: [] tags: []
})); }));
// Return the successful embeddings data
const embeddingsData = {
documents: documents,
embeddings: embeddings
};
console.log('✅ Embeddings generated successfully:', embeddingsData);
return [ return [
{ embeddingsData,
documents: documents,
embeddings: embeddings
},
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
"success",
false false
]; ];
} catch (error) { } catch (error) {
console.error('❌ Error generating embeddings:', error); console.error('❌ Error generating embeddings:', error);
return [ return [
{ error: error.message }, { error: error.message },
`❌ Error: ${error.message}`,
"danger",
false false
]; ];
} }
} }
}; };
console.log('✅ Simple Transformers.js setup complete'); console.log('✅ Simple Transformers.js setup complete');
console.log('Available functions:', Object.keys(window.dash_clientside.transformers)); console.log('Available functions:', Object.keys(window.dash_clientside.transformers));

Binary file not shown.

Before

Width:  |  Height:  |  Size: 339 KiB

After

Width:  |  Height:  |  Size: 844 KiB

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "embeddingbuddy" name = "embeddingbuddy"
version = "0.4.0" version = "0.5.0"
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques." description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"

View File

@@ -25,7 +25,7 @@ def main():
"--workers", str(AppSettings.GUNICORN_WORKERS), "--workers", str(AppSettings.GUNICORN_WORKERS),
"--bind", AppSettings.GUNICORN_BIND, "--bind", AppSettings.GUNICORN_BIND,
"--timeout", str(AppSettings.GUNICORN_TIMEOUT), "--timeout", str(AppSettings.GUNICORN_TIMEOUT),
"--keepalive", str(AppSettings.GUNICORN_KEEPALIVE), "--keep-alive", str(AppSettings.GUNICORN_KEEPALIVE),
"--access-logfile", "-", "--access-logfile", "-",
"--error-logfile", "-", "--error-logfile", "-",
"--log-level", "info", "--log-level", "info",

View File

@@ -15,7 +15,12 @@ def create_app():
assets_path = os.path.join(project_root, "assets") assets_path = os.path.join(project_root, "assets")
app = dash.Dash( app = dash.Dash(
__name__, external_stylesheets=[dbc.themes.BOOTSTRAP], assets_folder=assets_path __name__,
external_stylesheets=[
dbc.themes.BOOTSTRAP,
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css",
],
assets_folder=assets_path,
) )
# Allow callbacks to components that are dynamically created in tabs # Allow callbacks to components that are dynamically created in tabs
@@ -45,22 +50,22 @@ def _register_client_side_callbacks(app):
if (!nClicks || !textContent || !textContent.trim()) { if (!nClicks || !textContent || !textContent.trim()) {
return window.dash_clientside.no_update; return window.dash_clientside.no_update;
} }
console.log('🔍 Checking for Transformers.js...'); console.log('🔍 Checking for Transformers.js...');
console.log('window.dash_clientside:', typeof window.dash_clientside); console.log('window.dash_clientside:', typeof window.dash_clientside);
console.log('window.dash_clientside.transformers:', typeof window.dash_clientside?.transformers); console.log('window.dash_clientside.transformers:', typeof window.dash_clientside?.transformers);
console.log('generateEmbeddings function:', typeof window.dash_clientside?.transformers?.generateEmbeddings); console.log('generateEmbeddings function:', typeof window.dash_clientside?.transformers?.generateEmbeddings);
if (typeof window.dash_clientside !== 'undefined' && if (typeof window.dash_clientside !== 'undefined' &&
typeof window.dash_clientside.transformers !== 'undefined' && typeof window.dash_clientside.transformers !== 'undefined' &&
typeof window.dash_clientside.transformers.generateEmbeddings === 'function') { typeof window.dash_clientside.transformers.generateEmbeddings === 'function') {
console.log('✅ Calling Transformers.js generateEmbeddings...'); console.log('✅ Calling Transformers.js generateEmbeddings...');
return window.dash_clientside.transformers.generateEmbeddings( return window.dash_clientside.transformers.generateEmbeddings(
nClicks, textContent, modelName, tokenizationMethod, category, subcategory nClicks, textContent, modelName, tokenizationMethod, category, subcategory
); );
} }
// More detailed error information // More detailed error information
let errorMsg = '❌ Transformers.js not available. '; let errorMsg = '❌ Transformers.js not available. ';
if (typeof window.dash_clientside === 'undefined') { if (typeof window.dash_clientside === 'undefined') {
@@ -70,21 +75,17 @@ def _register_client_side_callbacks(app):
} else if (typeof window.dash_clientside.transformers.generateEmbeddings !== 'function') { } else if (typeof window.dash_clientside.transformers.generateEmbeddings !== 'function') {
errorMsg += 'generateEmbeddings function not found.'; errorMsg += 'generateEmbeddings function not found.';
} }
console.error(errorMsg); console.error(errorMsg);
return [ return [
{ error: 'Transformers.js not loaded. Please refresh the page and try again.' }, { error: 'Transformers.js not loaded. Please refresh the page and try again.' },
errorMsg + ' Please refresh the page.',
'danger',
false false
]; ];
} }
""", """,
[ [
Output("embeddings-generated-trigger", "data"), Output("embeddings-generated-trigger", "data"),
Output("text-input-status-immediate", "children"),
Output("text-input-status-immediate", "color"),
Output("generate-embeddings-btn", "disabled", allow_duplicate=True), Output("generate-embeddings-btn", "disabled", allow_duplicate=True),
], ],
[Input("generate-embeddings-btn", "n_clicks")], [Input("generate-embeddings-btn", "n_clicks")],

View File

@@ -72,10 +72,12 @@ class AppSettings:
DEBUG = os.getenv("EMBEDDINGBUDDY_DEBUG", "True").lower() == "true" DEBUG = os.getenv("EMBEDDINGBUDDY_DEBUG", "True").lower() == "true"
HOST = os.getenv("EMBEDDINGBUDDY_HOST", "127.0.0.1") HOST = os.getenv("EMBEDDINGBUDDY_HOST", "127.0.0.1")
PORT = int(os.getenv("EMBEDDINGBUDDY_PORT", "8050")) PORT = int(os.getenv("EMBEDDINGBUDDY_PORT", "8050"))
# Environment Configuration # Environment Configuration
ENVIRONMENT = os.getenv("EMBEDDINGBUDDY_ENV", "development") # development, production ENVIRONMENT = os.getenv(
"EMBEDDINGBUDDY_ENV", "development"
) # development, production
# WSGI Server Configuration (for production) # WSGI Server Configuration (for production)
GUNICORN_WORKERS = int(os.getenv("GUNICORN_WORKERS", "4")) GUNICORN_WORKERS = int(os.getenv("GUNICORN_WORKERS", "4"))
GUNICORN_BIND = os.getenv("GUNICORN_BIND", f"{HOST}:{PORT}") GUNICORN_BIND = os.getenv("GUNICORN_BIND", f"{HOST}:{PORT}")

View File

@@ -621,6 +621,12 @@ class DataProcessingCallbacks:
if not embeddings_data: if not embeddings_data:
return no_update, no_update, no_update, no_update, no_update return no_update, no_update, no_update, no_update, no_update
# Check if this is a request trigger (contains textContent) vs actual embeddings data
if isinstance(embeddings_data, dict) and "textContent" in embeddings_data:
# This is a processing request trigger, not the actual results
# The JavaScript will handle the async processing and update the UI directly
return no_update, no_update, no_update, no_update, no_update
processed_data = self.processor.process_client_embeddings(embeddings_data) processed_data = self.processor.process_client_embeddings(embeddings_data)
if processed_data.error: if processed_data.error:

View File

@@ -1,6 +1,5 @@
import dash import dash
from dash import callback, Input, Output, State, html from dash import callback, Input, Output
import dash_bootstrap_components as dbc
class InteractionCallbacks: class InteractionCallbacks:
@@ -8,75 +7,16 @@ class InteractionCallbacks:
self._register_callbacks() self._register_callbacks()
def _register_callbacks(self): def _register_callbacks(self):
@callback(
Output("point-details", "children"),
Input("embedding-plot", "clickData"),
[State("processed-data", "data"), State("processed-prompts", "data")],
)
def display_click_data(clickData, data, prompts_data):
if not clickData or not data:
return "Click on a point to see details"
point_data = clickData["points"][0]
trace_name = point_data.get("fullData", {}).get("name", "Documents")
if "pointIndex" in point_data:
point_index = point_data["pointIndex"]
elif "pointNumber" in point_data:
point_index = point_data["pointNumber"]
else:
return "Could not identify clicked point"
if (
trace_name.startswith("Prompts")
and prompts_data
and "prompts" in prompts_data
):
item = prompts_data["prompts"][point_index]
item_type = "Prompt"
else:
item = data["documents"][point_index]
item_type = "Document"
return self._create_detail_card(item, item_type)
@callback( @callback(
[ [
Output("processed-data", "data", allow_duplicate=True), Output("processed-data", "data", allow_duplicate=True),
Output("processed-prompts", "data", allow_duplicate=True), Output("processed-prompts", "data", allow_duplicate=True),
Output("point-details", "children", allow_duplicate=True),
], ],
Input("reset-button", "n_clicks"), Input("reset-button", "n_clicks"),
prevent_initial_call=True, prevent_initial_call=True,
) )
def reset_data(n_clicks): def reset_data(n_clicks):
if n_clicks is None or n_clicks == 0: if n_clicks is None or n_clicks == 0:
return dash.no_update, dash.no_update, dash.no_update return dash.no_update, dash.no_update
return None, None, "Click on a point to see details" return None, None
@staticmethod
def _create_detail_card(item, item_type):
return dbc.Card(
[
dbc.CardBody(
[
html.H5(f"{item_type}: {item['id']}", className="card-title"),
html.P(f"Text: {item['text']}", className="card-text"),
html.P(
f"Category: {item.get('category', 'Unknown')}",
className="card-text",
),
html.P(
f"Subcategory: {item.get('subcategory', 'Unknown')}",
className="card-text",
),
html.P(
f"Tags: {', '.join(item.get('tags', [])) if item.get('tags') else 'None'}",
className="card-text",
),
html.P(f"Type: {item_type}", className="card-text text-muted"),
]
)
]
)

View File

@@ -1,13 +1,11 @@
from dash import dcc, html from dash import dcc, html
import dash_bootstrap_components as dbc import dash_bootstrap_components as dbc
from .upload import UploadComponent from .upload import UploadComponent
from .textinput import TextInputComponent
class DataSourceComponent: class DataSourceComponent:
def __init__(self): def __init__(self):
self.upload_component = UploadComponent() self.upload_component = UploadComponent()
self.text_input_component = TextInputComponent()
def create_tabbed_interface(self): def create_tabbed_interface(self):
"""Create tabbed interface for different data sources.""" """Create tabbed interface for different data sources."""
@@ -19,7 +17,6 @@ class DataSourceComponent:
[ [
dbc.Tab(label="File Upload", tab_id="file-tab"), dbc.Tab(label="File Upload", tab_id="file-tab"),
dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"), dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
dbc.Tab(label="Text Input", tab_id="text-input-tab"),
], ],
id="data-source-tabs", id="data-source-tabs",
active_tab="file-tab", active_tab="file-tab",
@@ -211,10 +208,6 @@ class DataSourceComponent:
] ]
) )
def create_text_input_tab(self):
"""Create text input tab content for browser-based embedding generation."""
return html.Div([self.text_input_component.create_text_input_interface()])
def _create_opensearch_section(self, section_type): def _create_opensearch_section(self, section_type):
"""Create a complete OpenSearch section for either 'data' or 'prompts'.""" """Create a complete OpenSearch section for either 'data' or 'prompts'."""
section_id = section_type # 'data' or 'prompts' section_id = section_type # 'data' or 'prompts'

View File

@@ -2,31 +2,26 @@ from dash import dcc, html
import dash_bootstrap_components as dbc import dash_bootstrap_components as dbc
from .upload import UploadComponent from .upload import UploadComponent
from .datasource import DataSourceComponent from .datasource import DataSourceComponent
from .textinput import TextInputComponent
class SidebarComponent: class SidebarComponent:
def __init__(self): def __init__(self):
self.upload_component = UploadComponent() self.upload_component = UploadComponent()
self.datasource_component = DataSourceComponent() self.datasource_component = DataSourceComponent()
self.textinput_component = TextInputComponent()
def create_layout(self): def create_layout(self):
return dbc.Col( return dbc.Col(
[ [
html.H5("Data Sources", className="mb-3"), dbc.Accordion(
self.datasource_component.create_error_alert(), [
self.datasource_component.create_success_alert(), self._create_data_sources_item(),
self.datasource_component.create_tabbed_interface(), self._create_generate_embeddings_item(),
html.H5("Visualization Controls", className="mb-3 mt-4"), self._create_visualization_controls_item(),
] ],
+ self._create_method_dropdown() always_open=True,
+ self._create_color_dropdown() )
+ self._create_dimension_toggle()
+ self._create_prompts_toggle()
+ [
html.H5("Point Details", className="mb-3"),
html.Div(
id="point-details", children="Click on a point to see details"
),
], ],
width=3, width=3,
style={"padding-right": "20px"}, style={"padding-right": "20px"},
@@ -86,3 +81,63 @@ class SidebarComponent:
style={"margin-bottom": "20px"}, style={"margin-bottom": "20px"},
), ),
] ]
def _create_generate_embeddings_item(self):
return dbc.AccordionItem(
[
self.textinput_component.create_text_input_interface(),
],
title=html.Span(
[
"Generate Embeddings ",
html.I(
className="fas fa-info-circle text-muted",
style={"cursor": "pointer"},
id="generate-embeddings-info-icon",
title="Create new embeddings from text input using various in-browser models",
),
]
),
item_id="generate-embeddings-accordion",
)
def _create_data_sources_item(self):
return dbc.AccordionItem(
[
self.datasource_component.create_error_alert(),
self.datasource_component.create_success_alert(),
self.datasource_component.create_tabbed_interface(),
],
title=html.Span(
[
"Load Embeddings ",
html.I(
className="fas fa-info-circle text-muted",
style={"cursor": "pointer"},
id="load-embeddings-info-icon",
title="Load existing embeddings: upload files or read from OpenSearch",
),
]
),
item_id="data-sources-accordion",
)
def _create_visualization_controls_item(self):
return dbc.AccordionItem(
self._create_method_dropdown()
+ self._create_color_dropdown()
+ self._create_dimension_toggle()
+ self._create_prompts_toggle(),
title=html.Span(
[
"Visualization Controls ",
html.I(
className="fas fa-info-circle text-muted",
style={"cursor": "pointer"},
id="visualization-controls-info-icon",
title="Configure plot settings: select dimensionality reduction method, colors, and display options",
),
]
),
item_id="visualization-controls-accordion",
)

View File

@@ -30,9 +30,6 @@ class TextInputComponent:
# Generation controls # Generation controls
self._create_generation_controls(), self._create_generation_controls(),
html.Hr(), html.Hr(),
# Progress indicators
self._create_progress_indicators(),
html.Hr(),
# Status and results # Status and results
self._create_status_section(), self._create_status_section(),
# Hidden components for data flow # Hidden components for data flow
@@ -297,65 +294,10 @@ class TextInputComponent:
] ]
) )
def _create_progress_indicators(self):
"""Create progress bars for model loading and embedding generation."""
return html.Div(
[
# Model loading progress
html.Div(
[
html.H6("Model Loading Progress", className="mb-2"),
dbc.Progress(
id="model-loading-progress",
value=0,
striped=True,
animated=True,
className="mb-2",
),
html.Small(
id="model-loading-status",
children="No model loading in progress",
className="text-muted",
),
],
id="model-loading-section",
style={"display": "none"},
),
html.Br(),
# Embedding generation progress
html.Div(
[
html.H6("Embedding Generation Progress", className="mb-2"),
dbc.Progress(
id="embedding-progress",
value=0,
striped=True,
animated=True,
className="mb-2",
),
html.Small(
id="embedding-status",
children="No embedding generation in progress",
className="text-muted",
),
],
id="embedding-progress-section",
style={"display": "none"},
),
]
)
def _create_status_section(self): def _create_status_section(self):
"""Create status alerts and results preview.""" """Create status alerts and results preview."""
return html.Div( return html.Div(
[ [
# Immediate status (from client-side)
dbc.Alert(
id="text-input-status-immediate",
children="Ready to generate embeddings",
color="light",
className="mb-3",
),
# Server-side status # Server-side status
dbc.Alert( dbc.Alert(
id="text-input-status", id="text-input-status",

View File

@@ -5,39 +5,75 @@ import dash_bootstrap_components as dbc
class UploadComponent: class UploadComponent:
@staticmethod @staticmethod
def create_data_upload(): def create_data_upload():
return dcc.Upload( return html.Div(
id="upload-data", [
children=html.Div(["Drag and Drop or ", html.A("Select Files")]), dcc.Upload(
style={ id="upload-data",
"width": "100%", children=html.Div(
"height": "60px", [
"lineHeight": "60px", "Upload Data ",
"borderWidth": "1px", html.I(
"borderStyle": "dashed", className="fas fa-info-circle",
"borderRadius": "5px", style={"color": "#6c757d", "fontSize": "14px"},
"textAlign": "center", id="data-upload-info",
"margin-bottom": "20px", ),
}, ]
multiple=False, ),
style={
"width": "100%",
"height": "60px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "5px",
"textAlign": "center",
"margin-bottom": "20px",
},
multiple=False,
),
dbc.Tooltip(
"Click here or drag and drop NDJSON files containing document embeddings",
target="data-upload-info",
placement="top",
),
]
) )
@staticmethod @staticmethod
def create_prompts_upload(): def create_prompts_upload():
return dcc.Upload( return html.Div(
id="upload-prompts", [
children=html.Div(["Drag and Drop Prompts or ", html.A("Select Files")]), dcc.Upload(
style={ id="upload-prompts",
"width": "100%", children=html.Div(
"height": "60px", [
"lineHeight": "60px", "Upload Prompts ",
"borderWidth": "1px", html.I(
"borderStyle": "dashed", className="fas fa-info-circle",
"borderRadius": "5px", style={"color": "#6c757d", "fontSize": "14px"},
"textAlign": "center", id="prompts-upload-info",
"margin-bottom": "20px", ),
"borderColor": "#28a745", ]
}, ),
multiple=False, style={
"width": "100%",
"height": "60px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "5px",
"textAlign": "center",
"margin-bottom": "20px",
"borderColor": "#28a745",
},
multiple=False,
),
dbc.Tooltip(
"Click here or drag and drop NDJSON files containing prompt embeddings",
target="prompts-upload-info",
placement="top",
),
]
) )
@staticmethod @staticmethod

View File

@@ -38,9 +38,9 @@ class PlotFactory:
if dimensions == "3d": if dimensions == "3d":
fig = px.scatter_3d( fig = px.scatter_3d(
df, df,
x="dim_1", x="x",
y="dim_2", y="y",
z="dim_3", z="z",
color=color_values, color=color_values,
hover_data=hover_fields, hover_data=hover_fields,
title=f"3D Embedding Visualization - {method} (colored by {color_by})", title=f"3D Embedding Visualization - {method} (colored by {color_by})",
@@ -49,8 +49,8 @@ class PlotFactory:
else: else:
fig = px.scatter( fig = px.scatter(
df, df,
x="dim_1", x="x",
y="dim_2", y="y",
color=color_values, color=color_values,
hover_data=hover_fields, hover_data=hover_fields,
title=f"2D Embedding Visualization - {method} (colored by {color_by})", title=f"2D Embedding Visualization - {method} (colored by {color_by})",
@@ -77,17 +77,17 @@ class PlotFactory:
if dimensions == "3d": if dimensions == "3d":
doc_fig = px.scatter_3d( doc_fig = px.scatter_3d(
doc_df, doc_df,
x="dim_1", x="x",
y="dim_2", y="y",
z="dim_3", z="z",
color=doc_color_values, color=doc_color_values,
hover_data=hover_fields, hover_data=hover_fields,
) )
else: else:
doc_fig = px.scatter( doc_fig = px.scatter(
doc_df, doc_df,
x="dim_1", x="x",
y="dim_2", y="y",
color=doc_color_values, color=doc_color_values,
hover_data=hover_fields, hover_data=hover_fields,
) )
@@ -114,17 +114,17 @@ class PlotFactory:
if dimensions == "3d": if dimensions == "3d":
prompt_fig = px.scatter_3d( prompt_fig = px.scatter_3d(
prompt_df, prompt_df,
x="dim_1", x="x",
y="dim_2", y="y",
z="dim_3", z="z",
color=prompt_color_values, color=prompt_color_values,
hover_data=hover_fields, hover_data=hover_fields,
) )
else: else:
prompt_fig = px.scatter( prompt_fig = px.scatter(
prompt_df, prompt_df,
x="dim_1", x="x",
y="dim_2", y="y",
color=prompt_color_values, color=prompt_color_values,
hover_data=hover_fields, hover_data=hover_fields,
) )
@@ -168,11 +168,11 @@ class PlotFactory:
"category": doc.category, "category": doc.category,
"subcategory": doc.subcategory, "subcategory": doc.subcategory,
"tags_str": ", ".join(doc.tags) if doc.tags else "None", "tags_str": ", ".join(doc.tags) if doc.tags else "None",
"dim_1": coordinates[i, 0], "x": coordinates[i, 0],
"dim_2": coordinates[i, 1], "y": coordinates[i, 1],
} }
if dimensions == "3d": if dimensions == "3d":
row["dim_3"] = coordinates[i, 2] row["z"] = coordinates[i, 2]
df_data.append(row) df_data.append(row)
return pd.DataFrame(df_data) return pd.DataFrame(df_data)

2
uv.lock generated
View File

@@ -412,7 +412,7 @@ wheels = [
[[package]] [[package]]
name = "embeddingbuddy" name = "embeddingbuddy"
version = "0.3.0" version = "0.5.0"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "dash" }, { name = "dash" },