Compare commits
9 Commits
d35ef995a3
...
v0.5.0
Author | SHA1 | Date | |
---|---|---|---|
e022b26399 | |||
c29160c9e9 | |||
bd3ee6e35a | |||
6936bc5d97 | |||
9a2e257b0d | |||
9c3ff6e799 | |||
781d055e60 | |||
0f5cea2850 | |||
1bd70705e7 |
@@ -4,7 +4,9 @@
|
|||||||
"Bash(mkdir:*)",
|
"Bash(mkdir:*)",
|
||||||
"Bash(uv run:*)",
|
"Bash(uv run:*)",
|
||||||
"Bash(uv add:*)",
|
"Bash(uv add:*)",
|
||||||
"Bash(uv sync:*)"
|
"Bash(uv sync:*)",
|
||||||
|
"Bash(tree:*)",
|
||||||
|
"WebFetch(domain:www.dash-bootstrap-components.com)"
|
||||||
],
|
],
|
||||||
"deny": [],
|
"deny": [],
|
||||||
"ask": [],
|
"ask": [],
|
||||||
|
@@ -22,11 +22,13 @@ uv sync
|
|||||||
**Run the application:**
|
**Run the application:**
|
||||||
|
|
||||||
Development mode (with auto-reload):
|
Development mode (with auto-reload):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run run_dev.py
|
uv run run_dev.py
|
||||||
```
|
```
|
||||||
|
|
||||||
Production mode (with Gunicorn WSGI server):
|
Production mode (with Gunicorn WSGI server):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# First install production dependencies
|
# First install production dependencies
|
||||||
uv sync --extra prod
|
uv sync --extra prod
|
||||||
@@ -36,11 +38,12 @@ uv run run_prod.py
|
|||||||
```
|
```
|
||||||
|
|
||||||
Legacy mode (basic Dash server):
|
Legacy mode (basic Dash server):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run main.py
|
uv run main.py
|
||||||
```
|
```
|
||||||
|
|
||||||
The app will be available at http://127.0.0.1:8050
|
The app will be available at <http://127.0.0.1:8050>
|
||||||
|
|
||||||
**Run tests:**
|
**Run tests:**
|
||||||
|
|
||||||
|
@@ -65,6 +65,11 @@ ENV EMBEDDINGBUDDY_ENV=production
|
|||||||
# Expose port
|
# Expose port
|
||||||
EXPOSE 8050
|
EXPOSE 8050
|
||||||
|
|
||||||
|
# Create non-root user
|
||||||
|
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||||
|
RUN chown -R appuser:appuser /app
|
||||||
|
USER appuser
|
||||||
|
|
||||||
# Health check
|
# Health check
|
||||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
|
||||||
CMD python -c "import requests; requests.get('http://localhost:8050/', timeout=5)" || exit 1
|
CMD python -c "import requests; requests.get('http://localhost:8050/', timeout=5)" || exit 1
|
||||||
|
48
README.md
48
README.md
@@ -152,22 +152,38 @@ The application follows a modular architecture for improved maintainability and
|
|||||||
|
|
||||||
```text
|
```text
|
||||||
src/embeddingbuddy/
|
src/embeddingbuddy/
|
||||||
├── config/ # Configuration management
|
├── app.py # Main application entry point and factory
|
||||||
│ └── settings.py # Centralized app settings
|
├── config/ # Configuration management
|
||||||
├── data/ # Data parsing and processing
|
│ └── settings.py # Centralized app settings
|
||||||
│ ├── parser.py # NDJSON parsing logic
|
├── data/ # Data parsing and processing
|
||||||
│ └── processor.py # Data transformation utilities
|
│ ├── parser.py # NDJSON parsing logic
|
||||||
├── models/ # Data schemas and algorithms
|
│ ├── processor.py # Data transformation utilities
|
||||||
│ ├── schemas.py # Pydantic data models
|
│ └── sources/ # Data source integrations
|
||||||
│ └── reducers.py # Dimensionality reduction algorithms
|
│ └── opensearch.py # OpenSearch data source
|
||||||
├── visualization/ # Plot creation and styling
|
├── models/ # Data schemas and algorithms
|
||||||
│ ├── plots.py # Plot factory and creation logic
|
│ ├── schemas.py # Pydantic data models
|
||||||
│ └── colors.py # Color mapping utilities
|
│ ├── reducers.py # Dimensionality reduction algorithms
|
||||||
├── ui/ # User interface components
|
│ └── field_mapper.py # Field mapping utilities
|
||||||
│ ├── layout.py # Main application layout
|
├── visualization/ # Plot creation and styling
|
||||||
│ ├── components/ # Reusable UI components
|
│ ├── plots.py # Plot factory and creation logic
|
||||||
│ └── callbacks/ # Organized callback functions
|
│ └── colors.py # Color mapping utilities
|
||||||
└── utils/ # Utility functions
|
├── ui/ # User interface components
|
||||||
|
│ ├── layout.py # Main application layout
|
||||||
|
│ ├── components/ # Reusable UI components
|
||||||
|
│ │ ├── sidebar.py # Sidebar component
|
||||||
|
│ │ ├── upload.py # Upload components
|
||||||
|
│ │ ├── textinput.py # Text input components
|
||||||
|
│ │ └── datasource.py # Data source components
|
||||||
|
│ └── callbacks/ # Organized callback functions
|
||||||
|
│ ├── data_processing.py # Data upload/processing callbacks
|
||||||
|
│ ├── visualization.py # Plot update callbacks
|
||||||
|
│ └── interactions.py # User interaction callbacks
|
||||||
|
└── utils/ # Utility functions
|
||||||
|
|
||||||
|
main.py # Application runner (at project root)
|
||||||
|
main.py # Application runner (at project root)
|
||||||
|
run_dev.py # Development server runner
|
||||||
|
run_prod.py # Production server runner
|
||||||
```
|
```
|
||||||
|
|
||||||
### Testing
|
### Testing
|
||||||
|
17
assets/custom.css
Normal file
17
assets/custom.css
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
/* CSS override for transparent hover boxes in Plotly plots */
|
||||||
|
|
||||||
|
/* Make hover boxes transparent while preserving text readability */
|
||||||
|
.hovertext {
|
||||||
|
fill-opacity: 0.8 !important;
|
||||||
|
stroke-opacity: 1 !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Alternative selector for different Plotly versions */
|
||||||
|
g.hovertext > path {
|
||||||
|
opacity: 0.8 !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ensure text remains fully visible */
|
||||||
|
.hovertext text {
|
||||||
|
opacity: 1 !important;
|
||||||
|
}
|
@@ -45,28 +45,12 @@ class TransformersEmbedder {
|
|||||||
console.log('✅ Using globally loaded Transformers.js pipeline');
|
console.log('✅ Using globally loaded Transformers.js pipeline');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Show loading progress to user
|
this.extractor = await window.transformers.pipeline('feature-extraction', modelName);
|
||||||
if (window.updateModelLoadingProgress) {
|
|
||||||
window.updateModelLoadingProgress(0, `Loading ${modelName}...`);
|
|
||||||
}
|
|
||||||
|
|
||||||
this.extractor = await window.transformers.pipeline('feature-extraction', modelName, {
|
|
||||||
progress_callback: (data) => {
|
|
||||||
if (window.updateModelLoadingProgress && data.progress !== undefined) {
|
|
||||||
const progress = Math.round(data.progress);
|
|
||||||
window.updateModelLoadingProgress(progress, data.status || 'Loading...');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
this.modelCache.set(modelName, this.extractor);
|
this.modelCache.set(modelName, this.extractor);
|
||||||
this.currentModel = modelName;
|
this.currentModel = modelName;
|
||||||
this.isLoading = false;
|
this.isLoading = false;
|
||||||
|
|
||||||
if (window.updateModelLoadingProgress) {
|
|
||||||
window.updateModelLoadingProgress(100, 'Model loaded successfully');
|
|
||||||
}
|
|
||||||
|
|
||||||
return { success: true, model: modelName };
|
return { success: true, model: modelName };
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this.isLoading = false;
|
this.isLoading = false;
|
||||||
@@ -116,15 +100,6 @@ class TransformersEmbedder {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Update progress
|
|
||||||
const progress = Math.min(100, ((i + batch.length) / texts.length) * 100);
|
|
||||||
if (window.updateEmbeddingProgress) {
|
|
||||||
window.updateEmbeddingProgress(progress, `Processing ${i + batch.length}/${texts.length} texts`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (window.updateEmbeddingProgress) {
|
|
||||||
window.updateEmbeddingProgress(100, `Generated ${embeddings.length} embeddings successfully`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return embeddings;
|
return embeddings;
|
||||||
@@ -139,30 +114,6 @@ class TransformersEmbedder {
|
|||||||
window.transformersEmbedder = new TransformersEmbedder();
|
window.transformersEmbedder = new TransformersEmbedder();
|
||||||
console.log('📦 TransformersEmbedder instance created');
|
console.log('📦 TransformersEmbedder instance created');
|
||||||
|
|
||||||
// Global progress update functions
|
|
||||||
window.updateModelLoadingProgress = function(progress, status) {
|
|
||||||
const progressBar = document.getElementById('model-loading-progress');
|
|
||||||
const statusText = document.getElementById('model-loading-status');
|
|
||||||
if (progressBar) {
|
|
||||||
progressBar.style.width = progress + '%';
|
|
||||||
progressBar.setAttribute('aria-valuenow', progress);
|
|
||||||
}
|
|
||||||
if (statusText) {
|
|
||||||
statusText.textContent = status;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
window.updateEmbeddingProgress = function(progress, status) {
|
|
||||||
const progressBar = document.getElementById('embedding-progress');
|
|
||||||
const statusText = document.getElementById('embedding-status');
|
|
||||||
if (progressBar) {
|
|
||||||
progressBar.style.width = progress + '%';
|
|
||||||
progressBar.setAttribute('aria-valuenow', progress);
|
|
||||||
}
|
|
||||||
if (statusText) {
|
|
||||||
statusText.textContent = status;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Dash clientside callback functions
|
// Dash clientside callback functions
|
||||||
window.dash_clientside = window.dash_clientside || {};
|
window.dash_clientside = window.dash_clientside || {};
|
||||||
@@ -181,9 +132,7 @@ window.dash_clientside.transformers = {
|
|||||||
const initResult = await window.transformersEmbedder.initializeModel(modelName);
|
const initResult = await window.transformersEmbedder.initializeModel(modelName);
|
||||||
if (!initResult.success) {
|
if (!initResult.success) {
|
||||||
return [
|
return [
|
||||||
{ error: initResult.error },
|
{ error: `Model loading error: ${initResult.error}` },
|
||||||
`❌ Model loading error: ${initResult.error}`,
|
|
||||||
"danger",
|
|
||||||
false
|
false
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
@@ -194,7 +143,6 @@ window.dash_clientside.transformers = {
|
|||||||
|
|
||||||
switch (tokenizationMethod) {
|
switch (tokenizationMethod) {
|
||||||
case 'sentence':
|
case 'sentence':
|
||||||
// Simple sentence splitting - can be enhanced with proper NLP
|
|
||||||
textChunks = trimmedText
|
textChunks = trimmedText
|
||||||
.split(/[.!?]+/)
|
.split(/[.!?]+/)
|
||||||
.map(s => s.trim())
|
.map(s => s.trim())
|
||||||
@@ -219,8 +167,6 @@ window.dash_clientside.transformers = {
|
|||||||
if (textChunks.length === 0) {
|
if (textChunks.length === 0) {
|
||||||
return [
|
return [
|
||||||
{ error: 'No valid text chunks found after tokenization' },
|
{ error: 'No valid text chunks found after tokenization' },
|
||||||
'❌ Error: No valid text chunks found after tokenization',
|
|
||||||
"danger",
|
|
||||||
false
|
false
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
@@ -230,9 +176,7 @@ window.dash_clientside.transformers = {
|
|||||||
|
|
||||||
if (!embeddings || embeddings.length !== textChunks.length) {
|
if (!embeddings || embeddings.length !== textChunks.length) {
|
||||||
return [
|
return [
|
||||||
{ error: 'Embedding generation failed - mismatch in text chunks and embeddings' },
|
{ error: 'Embedding generation failed' },
|
||||||
'❌ Error: Embedding generation failed',
|
|
||||||
"danger",
|
|
||||||
false
|
false
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
@@ -247,13 +191,16 @@ window.dash_clientside.transformers = {
|
|||||||
tags: []
|
tags: []
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
// Return the successful embeddings data
|
||||||
|
const embeddingsData = {
|
||||||
|
documents: documents,
|
||||||
|
embeddings: embeddings
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log('✅ Embeddings generated successfully:', embeddingsData);
|
||||||
|
|
||||||
return [
|
return [
|
||||||
{
|
embeddingsData,
|
||||||
documents: documents,
|
|
||||||
embeddings: embeddings
|
|
||||||
},
|
|
||||||
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
|
|
||||||
"success",
|
|
||||||
false
|
false
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -261,18 +208,18 @@ window.dash_clientside.transformers = {
|
|||||||
console.error('Client-side embedding error:', error);
|
console.error('Client-side embedding error:', error);
|
||||||
return [
|
return [
|
||||||
{ error: error.message },
|
{ error: error.message },
|
||||||
`❌ Error: ${error.message}`,
|
|
||||||
"danger",
|
|
||||||
false
|
false
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
console.log('✅ Transformers.js client-side setup complete');
|
console.log('✅ Transformers.js client-side setup complete');
|
||||||
console.log('Available:', {
|
console.log('Available:', {
|
||||||
transformersEmbedder: !!window.transformersEmbedder,
|
transformersEmbedder: !!window.transformersEmbedder,
|
||||||
dashClientside: !!window.dash_clientside,
|
dashClientside: !!window.dash_clientside,
|
||||||
transformersModule: !!window.dash_clientside?.transformers,
|
transformersModule: !!window.dash_clientside?.transformers,
|
||||||
generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings
|
generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings,
|
||||||
|
processAsync: typeof window.processEmbeddingsAsync
|
||||||
});
|
});
|
@@ -111,6 +111,17 @@ window.dash_clientside.transformers = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// Ensure Transformers.js is loaded
|
||||||
|
if (!window.transformersLibraryLoaded) {
|
||||||
|
const loaded = await initializeTransformers();
|
||||||
|
if (!loaded) {
|
||||||
|
return [
|
||||||
|
{ error: 'Failed to load Transformers.js' },
|
||||||
|
false
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Tokenize text
|
// Tokenize text
|
||||||
let textChunks;
|
let textChunks;
|
||||||
const trimmedText = textContent.trim();
|
const trimmedText = textContent.trim();
|
||||||
@@ -130,7 +141,10 @@ window.dash_clientside.transformers = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (textChunks.length === 0) {
|
if (textChunks.length === 0) {
|
||||||
throw new Error('No valid text chunks after tokenization');
|
return [
|
||||||
|
{ error: 'No valid text chunks after tokenization' },
|
||||||
|
false
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate embeddings
|
// Generate embeddings
|
||||||
@@ -146,13 +160,16 @@ window.dash_clientside.transformers = {
|
|||||||
tags: []
|
tags: []
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
// Return the successful embeddings data
|
||||||
|
const embeddingsData = {
|
||||||
|
documents: documents,
|
||||||
|
embeddings: embeddings
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log('✅ Embeddings generated successfully:', embeddingsData);
|
||||||
|
|
||||||
return [
|
return [
|
||||||
{
|
embeddingsData,
|
||||||
documents: documents,
|
|
||||||
embeddings: embeddings
|
|
||||||
},
|
|
||||||
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
|
|
||||||
"success",
|
|
||||||
false
|
false
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -160,13 +177,12 @@ window.dash_clientside.transformers = {
|
|||||||
console.error('❌ Error generating embeddings:', error);
|
console.error('❌ Error generating embeddings:', error);
|
||||||
return [
|
return [
|
||||||
{ error: error.message },
|
{ error: error.message },
|
||||||
`❌ Error: ${error.message}`,
|
|
||||||
"danger",
|
|
||||||
false
|
false
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
console.log('✅ Simple Transformers.js setup complete');
|
console.log('✅ Simple Transformers.js setup complete');
|
||||||
console.log('Available functions:', Object.keys(window.dash_clientside.transformers));
|
console.log('Available functions:', Object.keys(window.dash_clientside.transformers));
|
Binary file not shown.
Before Width: | Height: | Size: 339 KiB After Width: | Height: | Size: 844 KiB |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "embeddingbuddy"
|
name = "embeddingbuddy"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
|
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
|
@@ -25,7 +25,7 @@ def main():
|
|||||||
"--workers", str(AppSettings.GUNICORN_WORKERS),
|
"--workers", str(AppSettings.GUNICORN_WORKERS),
|
||||||
"--bind", AppSettings.GUNICORN_BIND,
|
"--bind", AppSettings.GUNICORN_BIND,
|
||||||
"--timeout", str(AppSettings.GUNICORN_TIMEOUT),
|
"--timeout", str(AppSettings.GUNICORN_TIMEOUT),
|
||||||
"--keepalive", str(AppSettings.GUNICORN_KEEPALIVE),
|
"--keep-alive", str(AppSettings.GUNICORN_KEEPALIVE),
|
||||||
"--access-logfile", "-",
|
"--access-logfile", "-",
|
||||||
"--error-logfile", "-",
|
"--error-logfile", "-",
|
||||||
"--log-level", "info",
|
"--log-level", "info",
|
||||||
|
@@ -15,7 +15,12 @@ def create_app():
|
|||||||
assets_path = os.path.join(project_root, "assets")
|
assets_path = os.path.join(project_root, "assets")
|
||||||
|
|
||||||
app = dash.Dash(
|
app = dash.Dash(
|
||||||
__name__, external_stylesheets=[dbc.themes.BOOTSTRAP], assets_folder=assets_path
|
__name__,
|
||||||
|
external_stylesheets=[
|
||||||
|
dbc.themes.BOOTSTRAP,
|
||||||
|
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css",
|
||||||
|
],
|
||||||
|
assets_folder=assets_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Allow callbacks to components that are dynamically created in tabs
|
# Allow callbacks to components that are dynamically created in tabs
|
||||||
@@ -75,16 +80,12 @@ def _register_client_side_callbacks(app):
|
|||||||
|
|
||||||
return [
|
return [
|
||||||
{ error: 'Transformers.js not loaded. Please refresh the page and try again.' },
|
{ error: 'Transformers.js not loaded. Please refresh the page and try again.' },
|
||||||
errorMsg + ' Please refresh the page.',
|
|
||||||
'danger',
|
|
||||||
false
|
false
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
""",
|
""",
|
||||||
[
|
[
|
||||||
Output("embeddings-generated-trigger", "data"),
|
Output("embeddings-generated-trigger", "data"),
|
||||||
Output("text-input-status-immediate", "children"),
|
|
||||||
Output("text-input-status-immediate", "color"),
|
|
||||||
Output("generate-embeddings-btn", "disabled", allow_duplicate=True),
|
Output("generate-embeddings-btn", "disabled", allow_duplicate=True),
|
||||||
],
|
],
|
||||||
[Input("generate-embeddings-btn", "n_clicks")],
|
[Input("generate-embeddings-btn", "n_clicks")],
|
||||||
|
@@ -74,7 +74,9 @@ class AppSettings:
|
|||||||
PORT = int(os.getenv("EMBEDDINGBUDDY_PORT", "8050"))
|
PORT = int(os.getenv("EMBEDDINGBUDDY_PORT", "8050"))
|
||||||
|
|
||||||
# Environment Configuration
|
# Environment Configuration
|
||||||
ENVIRONMENT = os.getenv("EMBEDDINGBUDDY_ENV", "development") # development, production
|
ENVIRONMENT = os.getenv(
|
||||||
|
"EMBEDDINGBUDDY_ENV", "development"
|
||||||
|
) # development, production
|
||||||
|
|
||||||
# WSGI Server Configuration (for production)
|
# WSGI Server Configuration (for production)
|
||||||
GUNICORN_WORKERS = int(os.getenv("GUNICORN_WORKERS", "4"))
|
GUNICORN_WORKERS = int(os.getenv("GUNICORN_WORKERS", "4"))
|
||||||
|
@@ -621,6 +621,12 @@ class DataProcessingCallbacks:
|
|||||||
if not embeddings_data:
|
if not embeddings_data:
|
||||||
return no_update, no_update, no_update, no_update, no_update
|
return no_update, no_update, no_update, no_update, no_update
|
||||||
|
|
||||||
|
# Check if this is a request trigger (contains textContent) vs actual embeddings data
|
||||||
|
if isinstance(embeddings_data, dict) and "textContent" in embeddings_data:
|
||||||
|
# This is a processing request trigger, not the actual results
|
||||||
|
# The JavaScript will handle the async processing and update the UI directly
|
||||||
|
return no_update, no_update, no_update, no_update, no_update
|
||||||
|
|
||||||
processed_data = self.processor.process_client_embeddings(embeddings_data)
|
processed_data = self.processor.process_client_embeddings(embeddings_data)
|
||||||
|
|
||||||
if processed_data.error:
|
if processed_data.error:
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
import dash
|
import dash
|
||||||
from dash import callback, Input, Output, State, html
|
from dash import callback, Input, Output
|
||||||
import dash_bootstrap_components as dbc
|
|
||||||
|
|
||||||
|
|
||||||
class InteractionCallbacks:
|
class InteractionCallbacks:
|
||||||
@@ -8,75 +7,16 @@ class InteractionCallbacks:
|
|||||||
self._register_callbacks()
|
self._register_callbacks()
|
||||||
|
|
||||||
def _register_callbacks(self):
|
def _register_callbacks(self):
|
||||||
@callback(
|
|
||||||
Output("point-details", "children"),
|
|
||||||
Input("embedding-plot", "clickData"),
|
|
||||||
[State("processed-data", "data"), State("processed-prompts", "data")],
|
|
||||||
)
|
|
||||||
def display_click_data(clickData, data, prompts_data):
|
|
||||||
if not clickData or not data:
|
|
||||||
return "Click on a point to see details"
|
|
||||||
|
|
||||||
point_data = clickData["points"][0]
|
|
||||||
trace_name = point_data.get("fullData", {}).get("name", "Documents")
|
|
||||||
|
|
||||||
if "pointIndex" in point_data:
|
|
||||||
point_index = point_data["pointIndex"]
|
|
||||||
elif "pointNumber" in point_data:
|
|
||||||
point_index = point_data["pointNumber"]
|
|
||||||
else:
|
|
||||||
return "Could not identify clicked point"
|
|
||||||
|
|
||||||
if (
|
|
||||||
trace_name.startswith("Prompts")
|
|
||||||
and prompts_data
|
|
||||||
and "prompts" in prompts_data
|
|
||||||
):
|
|
||||||
item = prompts_data["prompts"][point_index]
|
|
||||||
item_type = "Prompt"
|
|
||||||
else:
|
|
||||||
item = data["documents"][point_index]
|
|
||||||
item_type = "Document"
|
|
||||||
|
|
||||||
return self._create_detail_card(item, item_type)
|
|
||||||
|
|
||||||
@callback(
|
@callback(
|
||||||
[
|
[
|
||||||
Output("processed-data", "data", allow_duplicate=True),
|
Output("processed-data", "data", allow_duplicate=True),
|
||||||
Output("processed-prompts", "data", allow_duplicate=True),
|
Output("processed-prompts", "data", allow_duplicate=True),
|
||||||
Output("point-details", "children", allow_duplicate=True),
|
|
||||||
],
|
],
|
||||||
Input("reset-button", "n_clicks"),
|
Input("reset-button", "n_clicks"),
|
||||||
prevent_initial_call=True,
|
prevent_initial_call=True,
|
||||||
)
|
)
|
||||||
def reset_data(n_clicks):
|
def reset_data(n_clicks):
|
||||||
if n_clicks is None or n_clicks == 0:
|
if n_clicks is None or n_clicks == 0:
|
||||||
return dash.no_update, dash.no_update, dash.no_update
|
return dash.no_update, dash.no_update
|
||||||
|
|
||||||
return None, None, "Click on a point to see details"
|
return None, None
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _create_detail_card(item, item_type):
|
|
||||||
return dbc.Card(
|
|
||||||
[
|
|
||||||
dbc.CardBody(
|
|
||||||
[
|
|
||||||
html.H5(f"{item_type}: {item['id']}", className="card-title"),
|
|
||||||
html.P(f"Text: {item['text']}", className="card-text"),
|
|
||||||
html.P(
|
|
||||||
f"Category: {item.get('category', 'Unknown')}",
|
|
||||||
className="card-text",
|
|
||||||
),
|
|
||||||
html.P(
|
|
||||||
f"Subcategory: {item.get('subcategory', 'Unknown')}",
|
|
||||||
className="card-text",
|
|
||||||
),
|
|
||||||
html.P(
|
|
||||||
f"Tags: {', '.join(item.get('tags', [])) if item.get('tags') else 'None'}",
|
|
||||||
className="card-text",
|
|
||||||
),
|
|
||||||
html.P(f"Type: {item_type}", className="card-text text-muted"),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
@@ -1,13 +1,11 @@
|
|||||||
from dash import dcc, html
|
from dash import dcc, html
|
||||||
import dash_bootstrap_components as dbc
|
import dash_bootstrap_components as dbc
|
||||||
from .upload import UploadComponent
|
from .upload import UploadComponent
|
||||||
from .textinput import TextInputComponent
|
|
||||||
|
|
||||||
|
|
||||||
class DataSourceComponent:
|
class DataSourceComponent:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.upload_component = UploadComponent()
|
self.upload_component = UploadComponent()
|
||||||
self.text_input_component = TextInputComponent()
|
|
||||||
|
|
||||||
def create_tabbed_interface(self):
|
def create_tabbed_interface(self):
|
||||||
"""Create tabbed interface for different data sources."""
|
"""Create tabbed interface for different data sources."""
|
||||||
@@ -19,7 +17,6 @@ class DataSourceComponent:
|
|||||||
[
|
[
|
||||||
dbc.Tab(label="File Upload", tab_id="file-tab"),
|
dbc.Tab(label="File Upload", tab_id="file-tab"),
|
||||||
dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
|
dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
|
||||||
dbc.Tab(label="Text Input", tab_id="text-input-tab"),
|
|
||||||
],
|
],
|
||||||
id="data-source-tabs",
|
id="data-source-tabs",
|
||||||
active_tab="file-tab",
|
active_tab="file-tab",
|
||||||
@@ -211,10 +208,6 @@ class DataSourceComponent:
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
def create_text_input_tab(self):
|
|
||||||
"""Create text input tab content for browser-based embedding generation."""
|
|
||||||
return html.Div([self.text_input_component.create_text_input_interface()])
|
|
||||||
|
|
||||||
def _create_opensearch_section(self, section_type):
|
def _create_opensearch_section(self, section_type):
|
||||||
"""Create a complete OpenSearch section for either 'data' or 'prompts'."""
|
"""Create a complete OpenSearch section for either 'data' or 'prompts'."""
|
||||||
section_id = section_type # 'data' or 'prompts'
|
section_id = section_type # 'data' or 'prompts'
|
||||||
|
@@ -2,31 +2,26 @@ from dash import dcc, html
|
|||||||
import dash_bootstrap_components as dbc
|
import dash_bootstrap_components as dbc
|
||||||
from .upload import UploadComponent
|
from .upload import UploadComponent
|
||||||
from .datasource import DataSourceComponent
|
from .datasource import DataSourceComponent
|
||||||
|
from .textinput import TextInputComponent
|
||||||
|
|
||||||
|
|
||||||
class SidebarComponent:
|
class SidebarComponent:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.upload_component = UploadComponent()
|
self.upload_component = UploadComponent()
|
||||||
self.datasource_component = DataSourceComponent()
|
self.datasource_component = DataSourceComponent()
|
||||||
|
self.textinput_component = TextInputComponent()
|
||||||
|
|
||||||
def create_layout(self):
|
def create_layout(self):
|
||||||
return dbc.Col(
|
return dbc.Col(
|
||||||
[
|
[
|
||||||
html.H5("Data Sources", className="mb-3"),
|
dbc.Accordion(
|
||||||
self.datasource_component.create_error_alert(),
|
[
|
||||||
self.datasource_component.create_success_alert(),
|
self._create_data_sources_item(),
|
||||||
self.datasource_component.create_tabbed_interface(),
|
self._create_generate_embeddings_item(),
|
||||||
html.H5("Visualization Controls", className="mb-3 mt-4"),
|
self._create_visualization_controls_item(),
|
||||||
]
|
],
|
||||||
+ self._create_method_dropdown()
|
always_open=True,
|
||||||
+ self._create_color_dropdown()
|
)
|
||||||
+ self._create_dimension_toggle()
|
|
||||||
+ self._create_prompts_toggle()
|
|
||||||
+ [
|
|
||||||
html.H5("Point Details", className="mb-3"),
|
|
||||||
html.Div(
|
|
||||||
id="point-details", children="Click on a point to see details"
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
width=3,
|
width=3,
|
||||||
style={"padding-right": "20px"},
|
style={"padding-right": "20px"},
|
||||||
@@ -86,3 +81,63 @@ class SidebarComponent:
|
|||||||
style={"margin-bottom": "20px"},
|
style={"margin-bottom": "20px"},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def _create_generate_embeddings_item(self):
|
||||||
|
return dbc.AccordionItem(
|
||||||
|
[
|
||||||
|
self.textinput_component.create_text_input_interface(),
|
||||||
|
],
|
||||||
|
title=html.Span(
|
||||||
|
[
|
||||||
|
"Generate Embeddings ",
|
||||||
|
html.I(
|
||||||
|
className="fas fa-info-circle text-muted",
|
||||||
|
style={"cursor": "pointer"},
|
||||||
|
id="generate-embeddings-info-icon",
|
||||||
|
title="Create new embeddings from text input using various in-browser models",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
item_id="generate-embeddings-accordion",
|
||||||
|
)
|
||||||
|
|
||||||
|
def _create_data_sources_item(self):
|
||||||
|
return dbc.AccordionItem(
|
||||||
|
[
|
||||||
|
self.datasource_component.create_error_alert(),
|
||||||
|
self.datasource_component.create_success_alert(),
|
||||||
|
self.datasource_component.create_tabbed_interface(),
|
||||||
|
],
|
||||||
|
title=html.Span(
|
||||||
|
[
|
||||||
|
"Load Embeddings ",
|
||||||
|
html.I(
|
||||||
|
className="fas fa-info-circle text-muted",
|
||||||
|
style={"cursor": "pointer"},
|
||||||
|
id="load-embeddings-info-icon",
|
||||||
|
title="Load existing embeddings: upload files or read from OpenSearch",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
item_id="data-sources-accordion",
|
||||||
|
)
|
||||||
|
|
||||||
|
def _create_visualization_controls_item(self):
|
||||||
|
return dbc.AccordionItem(
|
||||||
|
self._create_method_dropdown()
|
||||||
|
+ self._create_color_dropdown()
|
||||||
|
+ self._create_dimension_toggle()
|
||||||
|
+ self._create_prompts_toggle(),
|
||||||
|
title=html.Span(
|
||||||
|
[
|
||||||
|
"Visualization Controls ",
|
||||||
|
html.I(
|
||||||
|
className="fas fa-info-circle text-muted",
|
||||||
|
style={"cursor": "pointer"},
|
||||||
|
id="visualization-controls-info-icon",
|
||||||
|
title="Configure plot settings: select dimensionality reduction method, colors, and display options",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
item_id="visualization-controls-accordion",
|
||||||
|
)
|
||||||
|
@@ -30,9 +30,6 @@ class TextInputComponent:
|
|||||||
# Generation controls
|
# Generation controls
|
||||||
self._create_generation_controls(),
|
self._create_generation_controls(),
|
||||||
html.Hr(),
|
html.Hr(),
|
||||||
# Progress indicators
|
|
||||||
self._create_progress_indicators(),
|
|
||||||
html.Hr(),
|
|
||||||
# Status and results
|
# Status and results
|
||||||
self._create_status_section(),
|
self._create_status_section(),
|
||||||
# Hidden components for data flow
|
# Hidden components for data flow
|
||||||
@@ -297,65 +294,10 @@ class TextInputComponent:
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
def _create_progress_indicators(self):
|
|
||||||
"""Create progress bars for model loading and embedding generation."""
|
|
||||||
return html.Div(
|
|
||||||
[
|
|
||||||
# Model loading progress
|
|
||||||
html.Div(
|
|
||||||
[
|
|
||||||
html.H6("Model Loading Progress", className="mb-2"),
|
|
||||||
dbc.Progress(
|
|
||||||
id="model-loading-progress",
|
|
||||||
value=0,
|
|
||||||
striped=True,
|
|
||||||
animated=True,
|
|
||||||
className="mb-2",
|
|
||||||
),
|
|
||||||
html.Small(
|
|
||||||
id="model-loading-status",
|
|
||||||
children="No model loading in progress",
|
|
||||||
className="text-muted",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
id="model-loading-section",
|
|
||||||
style={"display": "none"},
|
|
||||||
),
|
|
||||||
html.Br(),
|
|
||||||
# Embedding generation progress
|
|
||||||
html.Div(
|
|
||||||
[
|
|
||||||
html.H6("Embedding Generation Progress", className="mb-2"),
|
|
||||||
dbc.Progress(
|
|
||||||
id="embedding-progress",
|
|
||||||
value=0,
|
|
||||||
striped=True,
|
|
||||||
animated=True,
|
|
||||||
className="mb-2",
|
|
||||||
),
|
|
||||||
html.Small(
|
|
||||||
id="embedding-status",
|
|
||||||
children="No embedding generation in progress",
|
|
||||||
className="text-muted",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
id="embedding-progress-section",
|
|
||||||
style={"display": "none"},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
def _create_status_section(self):
|
def _create_status_section(self):
|
||||||
"""Create status alerts and results preview."""
|
"""Create status alerts and results preview."""
|
||||||
return html.Div(
|
return html.Div(
|
||||||
[
|
[
|
||||||
# Immediate status (from client-side)
|
|
||||||
dbc.Alert(
|
|
||||||
id="text-input-status-immediate",
|
|
||||||
children="Ready to generate embeddings",
|
|
||||||
color="light",
|
|
||||||
className="mb-3",
|
|
||||||
),
|
|
||||||
# Server-side status
|
# Server-side status
|
||||||
dbc.Alert(
|
dbc.Alert(
|
||||||
id="text-input-status",
|
id="text-input-status",
|
||||||
|
@@ -5,39 +5,75 @@ import dash_bootstrap_components as dbc
|
|||||||
class UploadComponent:
|
class UploadComponent:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_data_upload():
|
def create_data_upload():
|
||||||
return dcc.Upload(
|
return html.Div(
|
||||||
id="upload-data",
|
[
|
||||||
children=html.Div(["Drag and Drop or ", html.A("Select Files")]),
|
dcc.Upload(
|
||||||
style={
|
id="upload-data",
|
||||||
"width": "100%",
|
children=html.Div(
|
||||||
"height": "60px",
|
[
|
||||||
"lineHeight": "60px",
|
"Upload Data ",
|
||||||
"borderWidth": "1px",
|
html.I(
|
||||||
"borderStyle": "dashed",
|
className="fas fa-info-circle",
|
||||||
"borderRadius": "5px",
|
style={"color": "#6c757d", "fontSize": "14px"},
|
||||||
"textAlign": "center",
|
id="data-upload-info",
|
||||||
"margin-bottom": "20px",
|
),
|
||||||
},
|
]
|
||||||
multiple=False,
|
),
|
||||||
|
style={
|
||||||
|
"width": "100%",
|
||||||
|
"height": "60px",
|
||||||
|
"lineHeight": "60px",
|
||||||
|
"borderWidth": "1px",
|
||||||
|
"borderStyle": "dashed",
|
||||||
|
"borderRadius": "5px",
|
||||||
|
"textAlign": "center",
|
||||||
|
"margin-bottom": "20px",
|
||||||
|
},
|
||||||
|
multiple=False,
|
||||||
|
),
|
||||||
|
dbc.Tooltip(
|
||||||
|
"Click here or drag and drop NDJSON files containing document embeddings",
|
||||||
|
target="data-upload-info",
|
||||||
|
placement="top",
|
||||||
|
),
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_prompts_upload():
|
def create_prompts_upload():
|
||||||
return dcc.Upload(
|
return html.Div(
|
||||||
id="upload-prompts",
|
[
|
||||||
children=html.Div(["Drag and Drop Prompts or ", html.A("Select Files")]),
|
dcc.Upload(
|
||||||
style={
|
id="upload-prompts",
|
||||||
"width": "100%",
|
children=html.Div(
|
||||||
"height": "60px",
|
[
|
||||||
"lineHeight": "60px",
|
"Upload Prompts ",
|
||||||
"borderWidth": "1px",
|
html.I(
|
||||||
"borderStyle": "dashed",
|
className="fas fa-info-circle",
|
||||||
"borderRadius": "5px",
|
style={"color": "#6c757d", "fontSize": "14px"},
|
||||||
"textAlign": "center",
|
id="prompts-upload-info",
|
||||||
"margin-bottom": "20px",
|
),
|
||||||
"borderColor": "#28a745",
|
]
|
||||||
},
|
),
|
||||||
multiple=False,
|
style={
|
||||||
|
"width": "100%",
|
||||||
|
"height": "60px",
|
||||||
|
"lineHeight": "60px",
|
||||||
|
"borderWidth": "1px",
|
||||||
|
"borderStyle": "dashed",
|
||||||
|
"borderRadius": "5px",
|
||||||
|
"textAlign": "center",
|
||||||
|
"margin-bottom": "20px",
|
||||||
|
"borderColor": "#28a745",
|
||||||
|
},
|
||||||
|
multiple=False,
|
||||||
|
),
|
||||||
|
dbc.Tooltip(
|
||||||
|
"Click here or drag and drop NDJSON files containing prompt embeddings",
|
||||||
|
target="prompts-upload-info",
|
||||||
|
placement="top",
|
||||||
|
),
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@@ -38,9 +38,9 @@ class PlotFactory:
|
|||||||
if dimensions == "3d":
|
if dimensions == "3d":
|
||||||
fig = px.scatter_3d(
|
fig = px.scatter_3d(
|
||||||
df,
|
df,
|
||||||
x="dim_1",
|
x="x",
|
||||||
y="dim_2",
|
y="y",
|
||||||
z="dim_3",
|
z="z",
|
||||||
color=color_values,
|
color=color_values,
|
||||||
hover_data=hover_fields,
|
hover_data=hover_fields,
|
||||||
title=f"3D Embedding Visualization - {method} (colored by {color_by})",
|
title=f"3D Embedding Visualization - {method} (colored by {color_by})",
|
||||||
@@ -49,8 +49,8 @@ class PlotFactory:
|
|||||||
else:
|
else:
|
||||||
fig = px.scatter(
|
fig = px.scatter(
|
||||||
df,
|
df,
|
||||||
x="dim_1",
|
x="x",
|
||||||
y="dim_2",
|
y="y",
|
||||||
color=color_values,
|
color=color_values,
|
||||||
hover_data=hover_fields,
|
hover_data=hover_fields,
|
||||||
title=f"2D Embedding Visualization - {method} (colored by {color_by})",
|
title=f"2D Embedding Visualization - {method} (colored by {color_by})",
|
||||||
@@ -77,17 +77,17 @@ class PlotFactory:
|
|||||||
if dimensions == "3d":
|
if dimensions == "3d":
|
||||||
doc_fig = px.scatter_3d(
|
doc_fig = px.scatter_3d(
|
||||||
doc_df,
|
doc_df,
|
||||||
x="dim_1",
|
x="x",
|
||||||
y="dim_2",
|
y="y",
|
||||||
z="dim_3",
|
z="z",
|
||||||
color=doc_color_values,
|
color=doc_color_values,
|
||||||
hover_data=hover_fields,
|
hover_data=hover_fields,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
doc_fig = px.scatter(
|
doc_fig = px.scatter(
|
||||||
doc_df,
|
doc_df,
|
||||||
x="dim_1",
|
x="x",
|
||||||
y="dim_2",
|
y="y",
|
||||||
color=doc_color_values,
|
color=doc_color_values,
|
||||||
hover_data=hover_fields,
|
hover_data=hover_fields,
|
||||||
)
|
)
|
||||||
@@ -114,17 +114,17 @@ class PlotFactory:
|
|||||||
if dimensions == "3d":
|
if dimensions == "3d":
|
||||||
prompt_fig = px.scatter_3d(
|
prompt_fig = px.scatter_3d(
|
||||||
prompt_df,
|
prompt_df,
|
||||||
x="dim_1",
|
x="x",
|
||||||
y="dim_2",
|
y="y",
|
||||||
z="dim_3",
|
z="z",
|
||||||
color=prompt_color_values,
|
color=prompt_color_values,
|
||||||
hover_data=hover_fields,
|
hover_data=hover_fields,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
prompt_fig = px.scatter(
|
prompt_fig = px.scatter(
|
||||||
prompt_df,
|
prompt_df,
|
||||||
x="dim_1",
|
x="x",
|
||||||
y="dim_2",
|
y="y",
|
||||||
color=prompt_color_values,
|
color=prompt_color_values,
|
||||||
hover_data=hover_fields,
|
hover_data=hover_fields,
|
||||||
)
|
)
|
||||||
@@ -168,11 +168,11 @@ class PlotFactory:
|
|||||||
"category": doc.category,
|
"category": doc.category,
|
||||||
"subcategory": doc.subcategory,
|
"subcategory": doc.subcategory,
|
||||||
"tags_str": ", ".join(doc.tags) if doc.tags else "None",
|
"tags_str": ", ".join(doc.tags) if doc.tags else "None",
|
||||||
"dim_1": coordinates[i, 0],
|
"x": coordinates[i, 0],
|
||||||
"dim_2": coordinates[i, 1],
|
"y": coordinates[i, 1],
|
||||||
}
|
}
|
||||||
if dimensions == "3d":
|
if dimensions == "3d":
|
||||||
row["dim_3"] = coordinates[i, 2]
|
row["z"] = coordinates[i, 2]
|
||||||
df_data.append(row)
|
df_data.append(row)
|
||||||
|
|
||||||
return pd.DataFrame(df_data)
|
return pd.DataFrame(df_data)
|
||||||
|
Reference in New Issue
Block a user