v0.5.0 - rework the sidebar
Some checks failed
Security Scan / security (pull_request) Successful in 32s
Security Scan / dependency-check (pull_request) Successful in 33s
Test Suite / test (3.11) (pull_request) Successful in 1m17s
Test Suite / lint (pull_request) Failing after 25s
Test Suite / build (pull_request) Has been skipped

This PR reworks the sidebar to be an accordian.
I also remove some of the progress feedback since it wasn't working right.
This commit is contained in:
2025-09-13 14:34:02 -07:00
parent 9a2e257b0d
commit 6936bc5d97
16 changed files with 287 additions and 319 deletions

View File

@@ -4,7 +4,9 @@
"Bash(mkdir:*)",
"Bash(uv run:*)",
"Bash(uv add:*)",
"Bash(uv sync:*)"
"Bash(uv sync:*)",
"Bash(tree:*)",
"WebFetch(domain:www.dash-bootstrap-components.com)"
],
"deny": [],
"ask": [],

View File

@@ -22,11 +22,13 @@ uv sync
**Run the application:**
Development mode (with auto-reload):
```bash
uv run run_dev.py
```
Production mode (with Gunicorn WSGI server):
```bash
# First install production dependencies
uv sync --extra prod
@@ -36,11 +38,12 @@ uv run run_prod.py
```
Legacy mode (basic Dash server):
```bash
uv run main.py
```
The app will be available at http://127.0.0.1:8050
The app will be available at <http://127.0.0.1:8050>
**Run tests:**

View File

@@ -65,6 +65,11 @@ ENV EMBEDDINGBUDDY_ENV=production
# Expose port
EXPOSE 8050
# Create non-root user
RUN groupadd -r appuser && useradd -r -g appuser appuser
RUN chown -R appuser:appuser /app
USER appuser
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:8050/', timeout=5)" || exit 1

View File

@@ -152,22 +152,38 @@ The application follows a modular architecture for improved maintainability and
```text
src/embeddingbuddy/
├── config/ # Configuration management
│ └── settings.py # Centralized app settings
├── data/ # Data parsing and processing
│ ├── parser.py # NDJSON parsing logic
── processor.py # Data transformation utilities
├── models/ # Data schemas and algorithms
── schemas.py # Pydantic data models
└── reducers.py # Dimensionality reduction algorithms
├── visualization/ # Plot creation and styling
│ ├── plots.py # Plot factory and creation logic
── colors.py # Color mapping utilities
├── ui/ # User interface components
│ ├── layout.py # Main application layout
│ ├── components/ # Reusable UI components
│ └── callbacks/ # Organized callback functions
── utils/ # Utility functions
├── app.py # Main application entry point and factory
├── config/ # Configuration management
│ └── settings.py # Centralized app settings
├── data/ # Data parsing and processing
── parser.py # NDJSON parsing logic
│ ├── processor.py # Data transformation utilities
── sources/ # Data source integrations
└── opensearch.py # OpenSearch data source
├── models/ # Data schemas and algorithms
│ ├── schemas.py # Pydantic data models
── reducers.py # Dimensionality reduction algorithms
│ └── field_mapper.py # Field mapping utilities
├── visualization/ # Plot creation and styling
│ ├── plots.py # Plot factory and creation logic
│ └── colors.py # Color mapping utilities
── ui/ # User interface components
│ ├── layout.py # Main application layout
│ ├── components/ # Reusable UI components
│ │ ├── sidebar.py # Sidebar component
│ │ ├── upload.py # Upload components
│ │ ├── textinput.py # Text input components
│ │ └── datasource.py # Data source components
│ └── callbacks/ # Organized callback functions
│ ├── data_processing.py # Data upload/processing callbacks
│ ├── visualization.py # Plot update callbacks
│ └── interactions.py # User interaction callbacks
└── utils/ # Utility functions
main.py # Application runner (at project root)
main.py # Application runner (at project root)
run_dev.py # Development server runner
run_prod.py # Production server runner
```
### Testing

17
assets/custom.css Normal file
View File

@@ -0,0 +1,17 @@
/* CSS override for transparent hover boxes in Plotly plots */
/* Make hover boxes transparent while preserving text readability */
.hovertext {
fill-opacity: 0.8 !important;
stroke-opacity: 1 !important;
}
/* Alternative selector for different Plotly versions */
g.hovertext > path {
opacity: 0.8 !important;
}
/* Ensure text remains fully visible */
.hovertext text {
opacity: 1 !important;
}

View File

@@ -45,28 +45,12 @@ class TransformersEmbedder {
console.log('✅ Using globally loaded Transformers.js pipeline');
}
// Show loading progress to user
if (window.updateModelLoadingProgress) {
window.updateModelLoadingProgress(0, `Loading ${modelName}...`);
}
this.extractor = await window.transformers.pipeline('feature-extraction', modelName, {
progress_callback: (data) => {
if (window.updateModelLoadingProgress && data.progress !== undefined) {
const progress = Math.round(data.progress);
window.updateModelLoadingProgress(progress, data.status || 'Loading...');
}
}
});
this.extractor = await window.transformers.pipeline('feature-extraction', modelName);
this.modelCache.set(modelName, this.extractor);
this.currentModel = modelName;
this.isLoading = false;
if (window.updateModelLoadingProgress) {
window.updateModelLoadingProgress(100, 'Model loaded successfully');
}
return { success: true, model: modelName };
} catch (error) {
this.isLoading = false;
@@ -116,15 +100,6 @@ class TransformersEmbedder {
}
});
// Update progress
const progress = Math.min(100, ((i + batch.length) / texts.length) * 100);
if (window.updateEmbeddingProgress) {
window.updateEmbeddingProgress(progress, `Processing ${i + batch.length}/${texts.length} texts`);
}
}
if (window.updateEmbeddingProgress) {
window.updateEmbeddingProgress(100, `Generated ${embeddings.length} embeddings successfully`);
}
return embeddings;
@@ -139,30 +114,6 @@ class TransformersEmbedder {
window.transformersEmbedder = new TransformersEmbedder();
console.log('📦 TransformersEmbedder instance created');
// Global progress update functions
window.updateModelLoadingProgress = function(progress, status) {
const progressBar = document.getElementById('model-loading-progress');
const statusText = document.getElementById('model-loading-status');
if (progressBar) {
progressBar.style.width = progress + '%';
progressBar.setAttribute('aria-valuenow', progress);
}
if (statusText) {
statusText.textContent = status;
}
};
window.updateEmbeddingProgress = function(progress, status) {
const progressBar = document.getElementById('embedding-progress');
const statusText = document.getElementById('embedding-status');
if (progressBar) {
progressBar.style.width = progress + '%';
progressBar.setAttribute('aria-valuenow', progress);
}
if (statusText) {
statusText.textContent = status;
}
};
// Dash clientside callback functions
window.dash_clientside = window.dash_clientside || {};
@@ -181,9 +132,7 @@ window.dash_clientside.transformers = {
const initResult = await window.transformersEmbedder.initializeModel(modelName);
if (!initResult.success) {
return [
{ error: initResult.error },
`❌ Model loading error: ${initResult.error}`,
"danger",
{ error: `Model loading error: ${initResult.error}` },
false
];
}
@@ -194,7 +143,6 @@ window.dash_clientside.transformers = {
switch (tokenizationMethod) {
case 'sentence':
// Simple sentence splitting - can be enhanced with proper NLP
textChunks = trimmedText
.split(/[.!?]+/)
.map(s => s.trim())
@@ -219,8 +167,6 @@ window.dash_clientside.transformers = {
if (textChunks.length === 0) {
return [
{ error: 'No valid text chunks found after tokenization' },
'❌ Error: No valid text chunks found after tokenization',
"danger",
false
];
}
@@ -230,9 +176,7 @@ window.dash_clientside.transformers = {
if (!embeddings || embeddings.length !== textChunks.length) {
return [
{ error: 'Embedding generation failed - mismatch in text chunks and embeddings' },
'❌ Error: Embedding generation failed',
"danger",
{ error: 'Embedding generation failed' },
false
];
}
@@ -247,13 +191,16 @@ window.dash_clientside.transformers = {
tags: []
}));
// Return the successful embeddings data
const embeddingsData = {
documents: documents,
embeddings: embeddings
};
console.log('✅ Embeddings generated successfully:', embeddingsData);
return [
{
documents: documents,
embeddings: embeddings
},
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
"success",
embeddingsData,
false
];
@@ -261,18 +208,18 @@ window.dash_clientside.transformers = {
console.error('Client-side embedding error:', error);
return [
{ error: error.message },
`❌ Error: ${error.message}`,
"danger",
false
];
}
}
};
console.log('✅ Transformers.js client-side setup complete');
console.log('Available:', {
transformersEmbedder: !!window.transformersEmbedder,
dashClientside: !!window.dash_clientside,
transformersModule: !!window.dash_clientside?.transformers,
generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings
generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings,
processAsync: typeof window.processEmbeddingsAsync
});

View File

@@ -111,6 +111,17 @@ window.dash_clientside.transformers = {
}
try {
// Ensure Transformers.js is loaded
if (!window.transformersLibraryLoaded) {
const loaded = await initializeTransformers();
if (!loaded) {
return [
{ error: 'Failed to load Transformers.js' },
false
];
}
}
// Tokenize text
let textChunks;
const trimmedText = textContent.trim();
@@ -130,7 +141,10 @@ window.dash_clientside.transformers = {
}
if (textChunks.length === 0) {
throw new Error('No valid text chunks after tokenization');
return [
{ error: 'No valid text chunks after tokenization' },
false
];
}
// Generate embeddings
@@ -146,13 +160,16 @@ window.dash_clientside.transformers = {
tags: []
}));
// Return the successful embeddings data
const embeddingsData = {
documents: documents,
embeddings: embeddings
};
console.log('✅ Embeddings generated successfully:', embeddingsData);
return [
{
documents: documents,
embeddings: embeddings
},
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
"success",
embeddingsData,
false
];
@@ -160,13 +177,12 @@ window.dash_clientside.transformers = {
console.error('❌ Error generating embeddings:', error);
return [
{ error: error.message },
`❌ Error: ${error.message}`,
"danger",
false
];
}
}
};
console.log('✅ Simple Transformers.js setup complete');
console.log('Available functions:', Object.keys(window.dash_clientside.transformers));

View File

@@ -1,6 +1,6 @@
[project]
name = "embeddingbuddy"
version = "0.4.0"
version = "0.5.0"
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
readme = "README.md"
requires-python = ">=3.11"

View File

@@ -15,7 +15,12 @@ def create_app():
assets_path = os.path.join(project_root, "assets")
app = dash.Dash(
__name__, external_stylesheets=[dbc.themes.BOOTSTRAP], assets_folder=assets_path
__name__,
external_stylesheets=[
dbc.themes.BOOTSTRAP,
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"
],
assets_folder=assets_path
)
# Allow callbacks to components that are dynamically created in tabs
@@ -75,16 +80,12 @@ def _register_client_side_callbacks(app):
return [
{ error: 'Transformers.js not loaded. Please refresh the page and try again.' },
errorMsg + ' Please refresh the page.',
'danger',
false
];
}
""",
[
Output("embeddings-generated-trigger", "data"),
Output("text-input-status-immediate", "children"),
Output("text-input-status-immediate", "color"),
Output("generate-embeddings-btn", "disabled", allow_duplicate=True),
],
[Input("generate-embeddings-btn", "n_clicks")],

View File

@@ -621,6 +621,12 @@ class DataProcessingCallbacks:
if not embeddings_data:
return no_update, no_update, no_update, no_update, no_update
# Check if this is a request trigger (contains textContent) vs actual embeddings data
if isinstance(embeddings_data, dict) and "textContent" in embeddings_data:
# This is a processing request trigger, not the actual results
# The JavaScript will handle the async processing and update the UI directly
return no_update, no_update, no_update, no_update, no_update
processed_data = self.processor.process_client_embeddings(embeddings_data)
if processed_data.error:

View File

@@ -1,6 +1,5 @@
import dash
from dash import callback, Input, Output, State, html
import dash_bootstrap_components as dbc
from dash import callback, Input, Output
class InteractionCallbacks:
@@ -8,75 +7,17 @@ class InteractionCallbacks:
self._register_callbacks()
def _register_callbacks(self):
@callback(
Output("point-details", "children"),
Input("embedding-plot", "clickData"),
[State("processed-data", "data"), State("processed-prompts", "data")],
)
def display_click_data(clickData, data, prompts_data):
if not clickData or not data:
return "Click on a point to see details"
point_data = clickData["points"][0]
trace_name = point_data.get("fullData", {}).get("name", "Documents")
if "pointIndex" in point_data:
point_index = point_data["pointIndex"]
elif "pointNumber" in point_data:
point_index = point_data["pointNumber"]
else:
return "Could not identify clicked point"
if (
trace_name.startswith("Prompts")
and prompts_data
and "prompts" in prompts_data
):
item = prompts_data["prompts"][point_index]
item_type = "Prompt"
else:
item = data["documents"][point_index]
item_type = "Document"
return self._create_detail_card(item, item_type)
@callback(
[
Output("processed-data", "data", allow_duplicate=True),
Output("processed-prompts", "data", allow_duplicate=True),
Output("point-details", "children", allow_duplicate=True),
],
Input("reset-button", "n_clicks"),
prevent_initial_call=True,
)
def reset_data(n_clicks):
if n_clicks is None or n_clicks == 0:
return dash.no_update, dash.no_update, dash.no_update
return dash.no_update, dash.no_update
return None, None, "Click on a point to see details"
return None, None
@staticmethod
def _create_detail_card(item, item_type):
return dbc.Card(
[
dbc.CardBody(
[
html.H5(f"{item_type}: {item['id']}", className="card-title"),
html.P(f"Text: {item['text']}", className="card-text"),
html.P(
f"Category: {item.get('category', 'Unknown')}",
className="card-text",
),
html.P(
f"Subcategory: {item.get('subcategory', 'Unknown')}",
className="card-text",
),
html.P(
f"Tags: {', '.join(item.get('tags', [])) if item.get('tags') else 'None'}",
className="card-text",
),
html.P(f"Type: {item_type}", className="card-text text-muted"),
]
)
]
)

View File

@@ -1,13 +1,11 @@
from dash import dcc, html
import dash_bootstrap_components as dbc
from .upload import UploadComponent
from .textinput import TextInputComponent
class DataSourceComponent:
def __init__(self):
self.upload_component = UploadComponent()
self.text_input_component = TextInputComponent()
def create_tabbed_interface(self):
"""Create tabbed interface for different data sources."""
@@ -19,7 +17,6 @@ class DataSourceComponent:
[
dbc.Tab(label="File Upload", tab_id="file-tab"),
dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
dbc.Tab(label="Text Input", tab_id="text-input-tab"),
],
id="data-source-tabs",
active_tab="file-tab",
@@ -211,9 +208,6 @@ class DataSourceComponent:
]
)
def create_text_input_tab(self):
"""Create text input tab content for browser-based embedding generation."""
return html.Div([self.text_input_component.create_text_input_interface()])
def _create_opensearch_section(self, section_type):
"""Create a complete OpenSearch section for either 'data' or 'prompts'."""

View File

@@ -2,31 +2,26 @@ from dash import dcc, html
import dash_bootstrap_components as dbc
from .upload import UploadComponent
from .datasource import DataSourceComponent
from .textinput import TextInputComponent
class SidebarComponent:
def __init__(self):
self.upload_component = UploadComponent()
self.datasource_component = DataSourceComponent()
self.textinput_component = TextInputComponent()
def create_layout(self):
return dbc.Col(
[
html.H5("Data Sources", className="mb-3"),
self.datasource_component.create_error_alert(),
self.datasource_component.create_success_alert(),
self.datasource_component.create_tabbed_interface(),
html.H5("Visualization Controls", className="mb-3 mt-4"),
]
+ self._create_method_dropdown()
+ self._create_color_dropdown()
+ self._create_dimension_toggle()
+ self._create_prompts_toggle()
+ [
html.H5("Point Details", className="mb-3"),
html.Div(
id="point-details", children="Click on a point to see details"
),
dbc.Accordion(
[
self._create_data_sources_item(),
self._create_generate_embeddings_item(),
self._create_visualization_controls_item(),
],
always_open=True,
)
],
width=3,
style={"padding-right": "20px"},
@@ -86,3 +81,57 @@ class SidebarComponent:
style={"margin-bottom": "20px"},
),
]
def _create_generate_embeddings_item(self):
return dbc.AccordionItem(
[
self.textinput_component.create_text_input_interface(),
],
title=html.Span([
"Generate Embeddings ",
html.I(
className="fas fa-info-circle text-muted",
style={"cursor": "pointer"},
id="generate-embeddings-info-icon",
title="Create new embeddings from text input using various in-browser models"
)
]),
item_id="generate-embeddings-accordion",
)
def _create_data_sources_item(self):
return dbc.AccordionItem(
[
self.datasource_component.create_error_alert(),
self.datasource_component.create_success_alert(),
self.datasource_component.create_tabbed_interface(),
],
title=html.Span([
"Load Embeddings ",
html.I(
className="fas fa-info-circle text-muted",
style={"cursor": "pointer"},
id="load-embeddings-info-icon",
title="Load existing embeddings: upload files or read from OpenSearch"
)
]),
item_id="data-sources-accordion",
)
def _create_visualization_controls_item(self):
return dbc.AccordionItem(
self._create_method_dropdown()
+ self._create_color_dropdown()
+ self._create_dimension_toggle()
+ self._create_prompts_toggle(),
title=html.Span([
"Visualization Controls ",
html.I(
className="fas fa-info-circle text-muted",
style={"cursor": "pointer"},
id="visualization-controls-info-icon",
title="Configure plot settings: select dimensionality reduction method, colors, and display options"
)
]),
item_id="visualization-controls-accordion",
)

View File

@@ -30,9 +30,6 @@ class TextInputComponent:
# Generation controls
self._create_generation_controls(),
html.Hr(),
# Progress indicators
self._create_progress_indicators(),
html.Hr(),
# Status and results
self._create_status_section(),
# Hidden components for data flow
@@ -297,65 +294,11 @@ class TextInputComponent:
]
)
def _create_progress_indicators(self):
"""Create progress bars for model loading and embedding generation."""
return html.Div(
[
# Model loading progress
html.Div(
[
html.H6("Model Loading Progress", className="mb-2"),
dbc.Progress(
id="model-loading-progress",
value=0,
striped=True,
animated=True,
className="mb-2",
),
html.Small(
id="model-loading-status",
children="No model loading in progress",
className="text-muted",
),
],
id="model-loading-section",
style={"display": "none"},
),
html.Br(),
# Embedding generation progress
html.Div(
[
html.H6("Embedding Generation Progress", className="mb-2"),
dbc.Progress(
id="embedding-progress",
value=0,
striped=True,
animated=True,
className="mb-2",
),
html.Small(
id="embedding-status",
children="No embedding generation in progress",
className="text-muted",
),
],
id="embedding-progress-section",
style={"display": "none"},
),
]
)
def _create_status_section(self):
"""Create status alerts and results preview."""
return html.Div(
[
# Immediate status (from client-side)
dbc.Alert(
id="text-input-status-immediate",
children="Ready to generate embeddings",
color="light",
className="mb-3",
),
# Server-side status
dbc.Alert(
id="text-input-status",

View File

@@ -5,40 +5,68 @@ import dash_bootstrap_components as dbc
class UploadComponent:
@staticmethod
def create_data_upload():
return dcc.Upload(
id="upload-data",
children=html.Div(["Drag and Drop or ", html.A("Select Files")]),
style={
"width": "100%",
"height": "60px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "5px",
"textAlign": "center",
"margin-bottom": "20px",
},
multiple=False,
)
return html.Div([
dcc.Upload(
id="upload-data",
children=html.Div([
"Upload Data ",
html.I(
className="fas fa-info-circle",
style={"color": "#6c757d", "fontSize": "14px"},
id="data-upload-info"
)
]),
style={
"width": "100%",
"height": "60px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "5px",
"textAlign": "center",
"margin-bottom": "20px",
},
multiple=False,
),
dbc.Tooltip(
"Click here or drag and drop NDJSON files containing document embeddings",
target="data-upload-info",
placement="top"
)
])
@staticmethod
def create_prompts_upload():
return dcc.Upload(
id="upload-prompts",
children=html.Div(["Drag and Drop Prompts or ", html.A("Select Files")]),
style={
"width": "100%",
"height": "60px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "5px",
"textAlign": "center",
"margin-bottom": "20px",
"borderColor": "#28a745",
},
multiple=False,
)
return html.Div([
dcc.Upload(
id="upload-prompts",
children=html.Div([
"Upload Prompts ",
html.I(
className="fas fa-info-circle",
style={"color": "#6c757d", "fontSize": "14px"},
id="prompts-upload-info"
)
]),
style={
"width": "100%",
"height": "60px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "5px",
"textAlign": "center",
"margin-bottom": "20px",
"borderColor": "#28a745",
},
multiple=False,
),
dbc.Tooltip(
"Click here or drag and drop NDJSON files containing prompt embeddings",
target="prompts-upload-info",
placement="top"
)
])
@staticmethod
def create_reset_button():

View File

@@ -38,9 +38,9 @@ class PlotFactory:
if dimensions == "3d":
fig = px.scatter_3d(
df,
x="dim_1",
y="dim_2",
z="dim_3",
x="x",
y="y",
z="z",
color=color_values,
hover_data=hover_fields,
title=f"3D Embedding Visualization - {method} (colored by {color_by})",
@@ -49,8 +49,8 @@ class PlotFactory:
else:
fig = px.scatter(
df,
x="dim_1",
y="dim_2",
x="x",
y="y",
color=color_values,
hover_data=hover_fields,
title=f"2D Embedding Visualization - {method} (colored by {color_by})",
@@ -77,17 +77,17 @@ class PlotFactory:
if dimensions == "3d":
doc_fig = px.scatter_3d(
doc_df,
x="dim_1",
y="dim_2",
z="dim_3",
x="x",
y="y",
z="z",
color=doc_color_values,
hover_data=hover_fields,
)
else:
doc_fig = px.scatter(
doc_df,
x="dim_1",
y="dim_2",
x="x",
y="y",
color=doc_color_values,
hover_data=hover_fields,
)
@@ -114,17 +114,17 @@ class PlotFactory:
if dimensions == "3d":
prompt_fig = px.scatter_3d(
prompt_df,
x="dim_1",
y="dim_2",
z="dim_3",
x="x",
y="y",
z="z",
color=prompt_color_values,
hover_data=hover_fields,
)
else:
prompt_fig = px.scatter(
prompt_df,
x="dim_1",
y="dim_2",
x="x",
y="y",
color=prompt_color_values,
hover_data=hover_fields,
)
@@ -168,11 +168,11 @@ class PlotFactory:
"category": doc.category,
"subcategory": doc.subcategory,
"tags_str": ", ".join(doc.tags) if doc.tags else "None",
"dim_1": coordinates[i, 0],
"dim_2": coordinates[i, 1],
"x": coordinates[i, 0],
"y": coordinates[i, 1],
}
if dimensions == "3d":
row["dim_3"] = coordinates[i, 2]
row["z"] = coordinates[i, 2]
df_data.append(row)
return pd.DataFrame(df_data)