1 Commits

Author SHA1 Message Date
d35ef995a3 bump version to 0.4.0
Some checks failed
Security Scan / security (pull_request) Successful in 42s
Security Scan / dependency-check (pull_request) Successful in 46s
Test Suite / lint (pull_request) Failing after 31s
Test Suite / test (3.11) (pull_request) Successful in 1m28s
Test Suite / build (pull_request) Has been skipped
2025-09-07 16:47:13 -07:00
29 changed files with 437 additions and 578 deletions

View File

@@ -4,9 +4,7 @@
"Bash(mkdir:*)", "Bash(mkdir:*)",
"Bash(uv run:*)", "Bash(uv run:*)",
"Bash(uv add:*)", "Bash(uv add:*)",
"Bash(uv sync:*)", "Bash(uv sync:*)"
"Bash(tree:*)",
"WebFetch(domain:www.dash-bootstrap-components.com)"
], ],
"deny": [], "deny": [],
"ask": [], "ask": [],

View File

@@ -71,15 +71,22 @@ jobs:
echo '```' >> release-notes.md echo '```' >> release-notes.md
- name: Create Release - name: Create Release
uses: akkuman/gitea-release-action@v1 uses: actions/create-release@v1
env: env:
NODE_OPTIONS: '--experimental-fetch' GITHUB_TOKEN: ${{ secrets.GITEA_TOKEN }}
with: with:
token: ${{ secrets.GITEA_TOKEN }}
tag_name: ${{ github.ref_name || github.event.inputs.version }} tag_name: ${{ github.ref_name || github.event.inputs.version }}
release_name: Release ${{ github.ref_name || github.event.inputs.version }} release_name: Release ${{ github.ref_name || github.event.inputs.version }}
body_path: release-notes.md body_path: release-notes.md
draft: false draft: false
prerelease: false prerelease: false
files: |-
dist/* - name: Upload Release Assets
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITEA_TOKEN }}
with:
upload_url: ${{ steps.create_release.outputs.upload_url }}
asset_path: dist/
asset_name: embeddingbuddy-dist
asset_content_type: application/zip

View File

@@ -1,53 +0,0 @@
name: Docker Release
on:
push:
tags:
- 'v[0-9]+.[0-9]+.[0-9]+'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
build-and-push:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=tag
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max

View File

@@ -22,13 +22,11 @@ uv sync
**Run the application:** **Run the application:**
Development mode (with auto-reload): Development mode (with auto-reload):
```bash ```bash
uv run run_dev.py uv run run_dev.py
``` ```
Production mode (with Gunicorn WSGI server): Production mode (with Gunicorn WSGI server):
```bash ```bash
# First install production dependencies # First install production dependencies
uv sync --extra prod uv sync --extra prod
@@ -38,12 +36,11 @@ uv run run_prod.py
``` ```
Legacy mode (basic Dash server): Legacy mode (basic Dash server):
```bash ```bash
uv run main.py uv run main.py
``` ```
The app will be available at <http://127.0.0.1:8050> The app will be available at http://127.0.0.1:8050
**Run tests:** **Run tests:**

View File

@@ -2,9 +2,6 @@
# Stage 1: Builder # Stage 1: Builder
FROM python:3.11-slim as builder FROM python:3.11-slim as builder
# Create non-root user early in builder stage
RUN groupadd -r appuser && useradd -r -g appuser appuser
# Install system dependencies for building Python packages # Install system dependencies for building Python packages
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
build-essential \ build-essential \
@@ -28,15 +25,6 @@ COPY wsgi.py .
COPY run_prod.py . COPY run_prod.py .
COPY assets/ assets/ COPY assets/ assets/
# Change ownership of source files before building (lighter I/O)
RUN chown -R appuser:appuser /app
# Create and set permissions for appuser home directory (needed for uv cache)
RUN mkdir -p /home/appuser && chown -R appuser:appuser /home/appuser
# Switch to non-root user before building
USER appuser
# Create virtual environment and install dependencies (including production extras) # Create virtual environment and install dependencies (including production extras)
RUN uv venv .venv RUN uv venv .venv
RUN uv sync --frozen --extra prod RUN uv sync --frozen --extra prod
@@ -44,28 +32,23 @@ RUN uv sync --frozen --extra prod
# Stage 2: Runtime # Stage 2: Runtime
FROM python:3.11-slim as runtime FROM python:3.11-slim as runtime
# Create non-root user in runtime stage
RUN groupadd -r appuser && useradd -r -g appuser appuser
# Install runtime dependencies for compiled packages # Install runtime dependencies for compiled packages
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
libgomp1 \ libgomp1 \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Set working directory and change ownership (small directory) # Set working directory
WORKDIR /app WORKDIR /app
RUN chown appuser:appuser /app
# Copy files from builder with correct ownership # Copy virtual environment from builder stage
COPY --from=builder --chown=appuser:appuser /app/.venv /app/.venv COPY --from=builder /app/.venv /app/.venv
COPY --from=builder --chown=appuser:appuser /app/src /app/src
COPY --from=builder --chown=appuser:appuser /app/main.py /app/main.py
COPY --from=builder --chown=appuser:appuser /app/assets /app/assets
COPY --from=builder --chown=appuser:appuser /app/wsgi.py /app/wsgi.py
COPY --from=builder --chown=appuser:appuser /app/run_prod.py /app/run_prod.py
# Switch to non-root user # Copy application files from builder stage
USER appuser COPY --from=builder /app/src /app/src
COPY --from=builder /app/main.py /app/main.py
COPY --from=builder /app/assets /app/assets
COPY --from=builder /app/wsgi.py /app/wsgi.py
COPY --from=builder /app/run_prod.py /app/run_prod.py
# Make sure the virtual environment is in PATH # Make sure the virtual environment is in PATH
ENV PATH="/app/.venv/bin:$PATH" ENV PATH="/app/.venv/bin:$PATH"

21
LICENSE
View File

@@ -1,21 +0,0 @@
MIT License
Copyright (c) 2025 Austin Godber - EmbeddingBuddy
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -152,38 +152,22 @@ The application follows a modular architecture for improved maintainability and
```text ```text
src/embeddingbuddy/ src/embeddingbuddy/
├── app.py # Main application entry point and factory ├── config/ # Configuration management
├── config/ # Configuration management │ └── settings.py # Centralized app settings
│ └── settings.py # Centralized app settings ├── data/ # Data parsing and processing
├── data/ # Data parsing and processing │ ├── parser.py # NDJSON parsing logic
── parser.py # NDJSON parsing logic ── processor.py # Data transformation utilities
│ ├── processor.py # Data transformation utilities ├── models/ # Data schemas and algorithms
── sources/ # Data source integrations ── schemas.py # Pydantic data models
└── opensearch.py # OpenSearch data source └── reducers.py # Dimensionality reduction algorithms
├── models/ # Data schemas and algorithms ├── visualization/ # Plot creation and styling
│ ├── schemas.py # Pydantic data models │ ├── plots.py # Plot factory and creation logic
── reducers.py # Dimensionality reduction algorithms ── colors.py # Color mapping utilities
│ └── field_mapper.py # Field mapping utilities ├── ui/ # User interface components
├── visualization/ # Plot creation and styling │ ├── layout.py # Main application layout
│ ├── plots.py # Plot factory and creation logic │ ├── components/ # Reusable UI components
│ └── colors.py # Color mapping utilities │ └── callbacks/ # Organized callback functions
── ui/ # User interface components ── utils/ # Utility functions
│ ├── layout.py # Main application layout
│ ├── components/ # Reusable UI components
│ │ ├── sidebar.py # Sidebar component
│ │ ├── upload.py # Upload components
│ │ ├── textinput.py # Text input components
│ │ └── datasource.py # Data source components
│ └── callbacks/ # Organized callback functions
│ ├── data_processing.py # Data upload/processing callbacks
│ ├── visualization.py # Plot update callbacks
│ └── interactions.py # User interaction callbacks
└── utils/ # Utility functions
main.py # Application runner (at project root)
main.py # Application runner (at project root)
run_dev.py # Development server runner
run_prod.py # Production server runner
``` ```
### Testing ### Testing

View File

@@ -1,17 +0,0 @@
/* CSS override for transparent hover boxes in Plotly plots */
/* Make hover boxes transparent while preserving text readability */
.hovertext {
fill-opacity: 0.8 !important;
stroke-opacity: 1 !important;
}
/* Alternative selector for different Plotly versions */
g.hovertext > path {
opacity: 0.8 !important;
}
/* Ensure text remains fully visible */
.hovertext text {
opacity: 1 !important;
}

View File

@@ -45,12 +45,28 @@ class TransformersEmbedder {
console.log('✅ Using globally loaded Transformers.js pipeline'); console.log('✅ Using globally loaded Transformers.js pipeline');
} }
this.extractor = await window.transformers.pipeline('feature-extraction', modelName); // Show loading progress to user
if (window.updateModelLoadingProgress) {
window.updateModelLoadingProgress(0, `Loading ${modelName}...`);
}
this.extractor = await window.transformers.pipeline('feature-extraction', modelName, {
progress_callback: (data) => {
if (window.updateModelLoadingProgress && data.progress !== undefined) {
const progress = Math.round(data.progress);
window.updateModelLoadingProgress(progress, data.status || 'Loading...');
}
}
});
this.modelCache.set(modelName, this.extractor); this.modelCache.set(modelName, this.extractor);
this.currentModel = modelName; this.currentModel = modelName;
this.isLoading = false; this.isLoading = false;
if (window.updateModelLoadingProgress) {
window.updateModelLoadingProgress(100, 'Model loaded successfully');
}
return { success: true, model: modelName }; return { success: true, model: modelName };
} catch (error) { } catch (error) {
this.isLoading = false; this.isLoading = false;
@@ -100,6 +116,15 @@ class TransformersEmbedder {
} }
}); });
// Update progress
const progress = Math.min(100, ((i + batch.length) / texts.length) * 100);
if (window.updateEmbeddingProgress) {
window.updateEmbeddingProgress(progress, `Processing ${i + batch.length}/${texts.length} texts`);
}
}
if (window.updateEmbeddingProgress) {
window.updateEmbeddingProgress(100, `Generated ${embeddings.length} embeddings successfully`);
} }
return embeddings; return embeddings;
@@ -114,6 +139,30 @@ class TransformersEmbedder {
window.transformersEmbedder = new TransformersEmbedder(); window.transformersEmbedder = new TransformersEmbedder();
console.log('📦 TransformersEmbedder instance created'); console.log('📦 TransformersEmbedder instance created');
// Global progress update functions
window.updateModelLoadingProgress = function(progress, status) {
const progressBar = document.getElementById('model-loading-progress');
const statusText = document.getElementById('model-loading-status');
if (progressBar) {
progressBar.style.width = progress + '%';
progressBar.setAttribute('aria-valuenow', progress);
}
if (statusText) {
statusText.textContent = status;
}
};
window.updateEmbeddingProgress = function(progress, status) {
const progressBar = document.getElementById('embedding-progress');
const statusText = document.getElementById('embedding-status');
if (progressBar) {
progressBar.style.width = progress + '%';
progressBar.setAttribute('aria-valuenow', progress);
}
if (statusText) {
statusText.textContent = status;
}
};
// Dash clientside callback functions // Dash clientside callback functions
window.dash_clientside = window.dash_clientside || {}; window.dash_clientside = window.dash_clientside || {};
@@ -132,7 +181,9 @@ window.dash_clientside.transformers = {
const initResult = await window.transformersEmbedder.initializeModel(modelName); const initResult = await window.transformersEmbedder.initializeModel(modelName);
if (!initResult.success) { if (!initResult.success) {
return [ return [
{ error: `Model loading error: ${initResult.error}` }, { error: initResult.error },
`❌ Model loading error: ${initResult.error}`,
"danger",
false false
]; ];
} }
@@ -143,6 +194,7 @@ window.dash_clientside.transformers = {
switch (tokenizationMethod) { switch (tokenizationMethod) {
case 'sentence': case 'sentence':
// Simple sentence splitting - can be enhanced with proper NLP
textChunks = trimmedText textChunks = trimmedText
.split(/[.!?]+/) .split(/[.!?]+/)
.map(s => s.trim()) .map(s => s.trim())
@@ -167,6 +219,8 @@ window.dash_clientside.transformers = {
if (textChunks.length === 0) { if (textChunks.length === 0) {
return [ return [
{ error: 'No valid text chunks found after tokenization' }, { error: 'No valid text chunks found after tokenization' },
'❌ Error: No valid text chunks found after tokenization',
"danger",
false false
]; ];
} }
@@ -176,7 +230,9 @@ window.dash_clientside.transformers = {
if (!embeddings || embeddings.length !== textChunks.length) { if (!embeddings || embeddings.length !== textChunks.length) {
return [ return [
{ error: 'Embedding generation failed' }, { error: 'Embedding generation failed - mismatch in text chunks and embeddings' },
'❌ Error: Embedding generation failed',
"danger",
false false
]; ];
} }
@@ -191,16 +247,13 @@ window.dash_clientside.transformers = {
tags: [] tags: []
})); }));
// Return the successful embeddings data
const embeddingsData = {
documents: documents,
embeddings: embeddings
};
console.log('✅ Embeddings generated successfully:', embeddingsData);
return [ return [
embeddingsData, {
documents: documents,
embeddings: embeddings
},
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
"success",
false false
]; ];
@@ -208,18 +261,18 @@ window.dash_clientside.transformers = {
console.error('Client-side embedding error:', error); console.error('Client-side embedding error:', error);
return [ return [
{ error: error.message }, { error: error.message },
`❌ Error: ${error.message}`,
"danger",
false false
]; ];
} }
} }
}; };
console.log('✅ Transformers.js client-side setup complete'); console.log('✅ Transformers.js client-side setup complete');
console.log('Available:', { console.log('Available:', {
transformersEmbedder: !!window.transformersEmbedder, transformersEmbedder: !!window.transformersEmbedder,
dashClientside: !!window.dash_clientside, dashClientside: !!window.dash_clientside,
transformersModule: !!window.dash_clientside?.transformers, transformersModule: !!window.dash_clientside?.transformers,
generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings, generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings
processAsync: typeof window.processEmbeddingsAsync
}); });

View File

@@ -111,17 +111,6 @@ window.dash_clientside.transformers = {
} }
try { try {
// Ensure Transformers.js is loaded
if (!window.transformersLibraryLoaded) {
const loaded = await initializeTransformers();
if (!loaded) {
return [
{ error: 'Failed to load Transformers.js' },
false
];
}
}
// Tokenize text // Tokenize text
let textChunks; let textChunks;
const trimmedText = textContent.trim(); const trimmedText = textContent.trim();
@@ -141,10 +130,7 @@ window.dash_clientside.transformers = {
} }
if (textChunks.length === 0) { if (textChunks.length === 0) {
return [ throw new Error('No valid text chunks after tokenization');
{ error: 'No valid text chunks after tokenization' },
false
];
} }
// Generate embeddings // Generate embeddings
@@ -160,16 +146,13 @@ window.dash_clientside.transformers = {
tags: [] tags: []
})); }));
// Return the successful embeddings data
const embeddingsData = {
documents: documents,
embeddings: embeddings
};
console.log('✅ Embeddings generated successfully:', embeddingsData);
return [ return [
embeddingsData, {
documents: documents,
embeddings: embeddings
},
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
"success",
false false
]; ];
@@ -177,12 +160,13 @@ window.dash_clientside.transformers = {
console.error('❌ Error generating embeddings:', error); console.error('❌ Error generating embeddings:', error);
return [ return [
{ error: error.message }, { error: error.message },
`❌ Error: ${error.message}`,
"danger",
false false
]; ];
} }
} }
}; };
console.log('✅ Simple Transformers.js setup complete'); console.log('✅ Simple Transformers.js setup complete');
console.log('Available functions:', Object.keys(window.dash_clientside.transformers)); console.log('Available functions:', Object.keys(window.dash_clientside.transformers));

Binary file not shown.

Before

Width:  |  Height:  |  Size: 844 KiB

After

Width:  |  Height:  |  Size: 339 KiB

File diff suppressed because one or more lines are too long

1
prompts-raw.ndjson Normal file

File diff suppressed because one or more lines are too long

64
prompts.ndjson Normal file

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "embeddingbuddy" name = "embeddingbuddy"
version = "0.6.0" version = "0.4.0"
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques." description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"

View File

@@ -12,15 +12,9 @@ def main():
os.environ["EMBEDDINGBUDDY_ENV"] = "development" os.environ["EMBEDDINGBUDDY_ENV"] = "development"
os.environ["EMBEDDINGBUDDY_DEBUG"] = "true" os.environ["EMBEDDINGBUDDY_DEBUG"] = "true"
# Check for OpenSearch disable flag (optional for testing)
# Set EMBEDDINGBUDDY_OPENSEARCH_ENABLED=false to test without OpenSearch
opensearch_status = os.getenv("EMBEDDINGBUDDY_OPENSEARCH_ENABLED", "true")
opensearch_enabled = opensearch_status.lower() == "true"
print("🚀 Starting EmbeddingBuddy in development mode...") print("🚀 Starting EmbeddingBuddy in development mode...")
print("📁 Auto-reload enabled - changes will trigger restart") print("📁 Auto-reload enabled - changes will trigger restart")
print("🌐 Server will be available at http://127.0.0.1:8050") print("🌐 Server will be available at http://127.0.0.1:8050")
print(f"🔍 OpenSearch: {'Enabled' if opensearch_enabled else 'Disabled'}")
print("⏹️ Press Ctrl+C to stop") print("⏹️ Press Ctrl+C to stop")
app = create_app() app = create_app()

View File

@@ -13,9 +13,6 @@ def main():
# Force production settings # Force production settings
os.environ["EMBEDDINGBUDDY_ENV"] = "production" os.environ["EMBEDDINGBUDDY_ENV"] = "production"
os.environ["EMBEDDINGBUDDY_DEBUG"] = "false" os.environ["EMBEDDINGBUDDY_DEBUG"] = "false"
# Disable OpenSearch by default in production (can be overridden by setting env var)
if "EMBEDDINGBUDDY_OPENSEARCH_ENABLED" not in os.environ:
os.environ["EMBEDDINGBUDDY_OPENSEARCH_ENABLED"] = "false"
print("🚀 Starting EmbeddingBuddy in production mode...") print("🚀 Starting EmbeddingBuddy in production mode...")
print(f"⚙️ Workers: {AppSettings.GUNICORN_WORKERS}") print(f"⚙️ Workers: {AppSettings.GUNICORN_WORKERS}")
@@ -28,7 +25,7 @@ def main():
"--workers", str(AppSettings.GUNICORN_WORKERS), "--workers", str(AppSettings.GUNICORN_WORKERS),
"--bind", AppSettings.GUNICORN_BIND, "--bind", AppSettings.GUNICORN_BIND,
"--timeout", str(AppSettings.GUNICORN_TIMEOUT), "--timeout", str(AppSettings.GUNICORN_TIMEOUT),
"--keep-alive", str(AppSettings.GUNICORN_KEEPALIVE), "--keepalive", str(AppSettings.GUNICORN_KEEPALIVE),
"--access-logfile", "-", "--access-logfile", "-",
"--error-logfile", "-", "--error-logfile", "-",
"--log-level", "info", "--log-level", "info",

View File

@@ -15,34 +15,7 @@ def create_app():
assets_path = os.path.join(project_root, "assets") assets_path = os.path.join(project_root, "assets")
app = dash.Dash( app = dash.Dash(
__name__, __name__, external_stylesheets=[dbc.themes.BOOTSTRAP], assets_folder=assets_path
title="EmbeddingBuddy",
external_stylesheets=[
dbc.themes.BOOTSTRAP,
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css",
],
assets_folder=assets_path,
meta_tags=[
{
"name": "description",
"content": "Interactive embedding visualization tool for exploring high-dimensional vectors through dimensionality reduction techniques like PCA, t-SNE, and UMAP.",
},
{"name": "author", "content": "EmbeddingBuddy"},
{
"name": "keywords",
"content": "embeddings, visualization, dimensionality reduction, PCA, t-SNE, UMAP, machine learning, data science",
},
{"name": "viewport", "content": "width=device-width, initial-scale=1.0"},
{
"property": "og:title",
"content": "EmbeddingBuddy - Interactive Embedding Visualization",
},
{
"property": "og:description",
"content": "Explore and visualize embedding vectors through interactive 2D/3D plots with multiple dimensionality reduction techniques.",
},
{"property": "og:type", "content": "website"},
],
) )
# Allow callbacks to components that are dynamically created in tabs # Allow callbacks to components that are dynamically created in tabs
@@ -102,12 +75,16 @@ def _register_client_side_callbacks(app):
return [ return [
{ error: 'Transformers.js not loaded. Please refresh the page and try again.' }, { error: 'Transformers.js not loaded. Please refresh the page and try again.' },
errorMsg + ' Please refresh the page.',
'danger',
false false
]; ];
} }
""", """,
[ [
Output("embeddings-generated-trigger", "data"), Output("embeddings-generated-trigger", "data"),
Output("text-input-status-immediate", "children"),
Output("text-input-status-immediate", "color"),
Output("generate-embeddings-btn", "disabled", allow_duplicate=True), Output("generate-embeddings-btn", "disabled", allow_duplicate=True),
], ],
[Input("generate-embeddings-btn", "n_clicks")], [Input("generate-embeddings-btn", "n_clicks")],

View File

@@ -74,9 +74,7 @@ class AppSettings:
PORT = int(os.getenv("EMBEDDINGBUDDY_PORT", "8050")) PORT = int(os.getenv("EMBEDDINGBUDDY_PORT", "8050"))
# Environment Configuration # Environment Configuration
ENVIRONMENT = os.getenv( ENVIRONMENT = os.getenv("EMBEDDINGBUDDY_ENV", "development") # development, production
"EMBEDDINGBUDDY_ENV", "development"
) # development, production
# WSGI Server Configuration (for production) # WSGI Server Configuration (for production)
GUNICORN_WORKERS = int(os.getenv("GUNICORN_WORKERS", "4")) GUNICORN_WORKERS = int(os.getenv("GUNICORN_WORKERS", "4"))
@@ -85,9 +83,6 @@ class AppSettings:
GUNICORN_KEEPALIVE = int(os.getenv("GUNICORN_KEEPALIVE", "5")) GUNICORN_KEEPALIVE = int(os.getenv("GUNICORN_KEEPALIVE", "5"))
# OpenSearch Configuration # OpenSearch Configuration
OPENSEARCH_ENABLED = (
os.getenv("EMBEDDINGBUDDY_OPENSEARCH_ENABLED", "True").lower() == "true"
)
OPENSEARCH_DEFAULT_SIZE = 100 OPENSEARCH_DEFAULT_SIZE = 100
OPENSEARCH_SAMPLE_SIZE = 5 OPENSEARCH_SAMPLE_SIZE = 5
OPENSEARCH_CONNECTION_TIMEOUT = 30 OPENSEARCH_CONNECTION_TIMEOUT = 30

View File

@@ -82,23 +82,19 @@ class DataProcessingCallbacks:
) )
def render_tab_content(active_tab): def render_tab_content(active_tab):
from ...ui.components.datasource import DataSourceComponent from ...ui.components.datasource import DataSourceComponent
from ...config.settings import AppSettings
datasource = DataSourceComponent() datasource = DataSourceComponent()
if active_tab == "opensearch-tab" and AppSettings.OPENSEARCH_ENABLED: if active_tab == "opensearch-tab":
return [datasource.create_opensearch_tab()] return [datasource.create_opensearch_tab()]
elif active_tab == "text-input-tab": elif active_tab == "text-input-tab":
return [datasource.create_text_input_tab()] return [datasource.create_text_input_tab()]
else: else:
return [datasource.create_file_upload_tab()] return [datasource.create_file_upload_tab()]
# Register callbacks for both data and prompts sections (only if OpenSearch is enabled) # Register callbacks for both data and prompts sections
if AppSettings.OPENSEARCH_ENABLED: self._register_opensearch_callbacks("data", self.opensearch_client_data)
self._register_opensearch_callbacks("data", self.opensearch_client_data) self._register_opensearch_callbacks("prompts", self.opensearch_client_prompts)
self._register_opensearch_callbacks(
"prompts", self.opensearch_client_prompts
)
# Register collapsible section callbacks # Register collapsible section callbacks
self._register_collapse_callbacks() self._register_collapse_callbacks()
@@ -625,12 +621,6 @@ class DataProcessingCallbacks:
if not embeddings_data: if not embeddings_data:
return no_update, no_update, no_update, no_update, no_update return no_update, no_update, no_update, no_update, no_update
# Check if this is a request trigger (contains textContent) vs actual embeddings data
if isinstance(embeddings_data, dict) and "textContent" in embeddings_data:
# This is a processing request trigger, not the actual results
# The JavaScript will handle the async processing and update the UI directly
return no_update, no_update, no_update, no_update, no_update
processed_data = self.processor.process_client_embeddings(embeddings_data) processed_data = self.processor.process_client_embeddings(embeddings_data)
if processed_data.error: if processed_data.error:

View File

@@ -1,5 +1,6 @@
import dash import dash
from dash import callback, Input, Output from dash import callback, Input, Output, State, html
import dash_bootstrap_components as dbc
class InteractionCallbacks: class InteractionCallbacks:
@@ -8,25 +9,74 @@ class InteractionCallbacks:
def _register_callbacks(self): def _register_callbacks(self):
@callback( @callback(
Output("about-modal", "is_open"), Output("point-details", "children"),
[Input("about-button", "n_clicks"), Input("about-modal-close", "n_clicks")], Input("embedding-plot", "clickData"),
prevent_initial_call=True, [State("processed-data", "data"), State("processed-prompts", "data")],
) )
def toggle_about_modal(about_clicks, close_clicks): def display_click_data(clickData, data, prompts_data):
if about_clicks or close_clicks: if not clickData or not data:
return True if about_clicks else False return "Click on a point to see details"
return False
point_data = clickData["points"][0]
trace_name = point_data.get("fullData", {}).get("name", "Documents")
if "pointIndex" in point_data:
point_index = point_data["pointIndex"]
elif "pointNumber" in point_data:
point_index = point_data["pointNumber"]
else:
return "Could not identify clicked point"
if (
trace_name.startswith("Prompts")
and prompts_data
and "prompts" in prompts_data
):
item = prompts_data["prompts"][point_index]
item_type = "Prompt"
else:
item = data["documents"][point_index]
item_type = "Document"
return self._create_detail_card(item, item_type)
@callback( @callback(
[ [
Output("processed-data", "data", allow_duplicate=True), Output("processed-data", "data", allow_duplicate=True),
Output("processed-prompts", "data", allow_duplicate=True), Output("processed-prompts", "data", allow_duplicate=True),
Output("point-details", "children", allow_duplicate=True),
], ],
Input("reset-button", "n_clicks"), Input("reset-button", "n_clicks"),
prevent_initial_call=True, prevent_initial_call=True,
) )
def reset_data(n_clicks): def reset_data(n_clicks):
if n_clicks is None or n_clicks == 0: if n_clicks is None or n_clicks == 0:
return dash.no_update, dash.no_update return dash.no_update, dash.no_update, dash.no_update
return None, None return None, None, "Click on a point to see details"
@staticmethod
def _create_detail_card(item, item_type):
return dbc.Card(
[
dbc.CardBody(
[
html.H5(f"{item_type}: {item['id']}", className="card-title"),
html.P(f"Text: {item['text']}", className="card-text"),
html.P(
f"Category: {item.get('category', 'Unknown')}",
className="card-text",
),
html.P(
f"Subcategory: {item.get('subcategory', 'Unknown')}",
className="card-text",
),
html.P(
f"Tags: {', '.join(item.get('tags', [])) if item.get('tags') else 'None'}",
className="card-text",
),
html.P(f"Type: {item_type}", className="card-text text-muted"),
]
)
]
)

View File

@@ -1,90 +0,0 @@
from dash import html, dcc
import dash_bootstrap_components as dbc
class AboutComponent:
def _get_about_content(self):
return """
# 🔍 Interactive Embedding Vector Visualization
EmbeddingBuddy is a web application for interactive exploration and
visualization of embedding vectors through dimensionality reduction techniques
(PCA, t-SNE, UMAP).
You have two ways to get started:
1. Generate embeddings directly in the browser if it supports WebGPU.
2. Upload your NDJSON file containing embedding vectors and metadata.
## Generating Embeddings in Browser
1. Expand the "Generate Embeddings" section.
2. Input your text data (one entry per line).
1. Optionally you can use the built in sample data by clicking "Load Sample Data" button.
3. Click "Generate Embeddings" to create vectors using a pre-trained model.
## NDJSON File Format
```json
{"id": "doc_001", "embedding": [0.1, -0.3, 0.7, ...], "text": "Sample text content", "category": "news", "subcategory": "politics", "tags": ["election", "politics"]}
{"id": "doc_002", "embedding": [0.2, -0.1, 0.9, ...], "text": "Another example", "category": "review", "subcategory": "product", "tags": ["tech", "gadget"]}
```
## ✨ Features
- Drag-and-drop NDJSON file upload
- Multiple dimensionality reduction algorithms
- 2D/3D interactive plots with Plotly
- Color coding by categories, subcategories, or tags
- In-browser embedding generation
- OpenSearch integration for data loading
## 🔧 Supported Algorithms
- **PCA** (Principal Component Analysis)
- **t-SNE** (t-Distributed Stochastic Neighbor Embedding)
- **UMAP** (Uniform Manifold Approximation and Projection)
---
📂 [View on GitHub](https://github.com/godber/EmbeddingBuddy)
*Built with: Python, Dash, Plotly, scikit-learn, OpenTSNE, UMAP*
""".strip()
def create_about_modal(self):
return dbc.Modal(
[
dbc.ModalHeader(
dbc.ModalTitle("Welcome to EmbeddingBuddy"),
close_button=True,
),
dbc.ModalBody(
[dcc.Markdown(self._get_about_content(), className="mb-0")]
),
dbc.ModalFooter(
[
dbc.Button(
"Close",
id="about-modal-close",
color="secondary",
n_clicks=0,
)
]
),
],
id="about-modal",
is_open=True,
size="lg",
)
def create_about_button(self):
return dbc.Button(
[html.I(className="fas fa-info-circle me-2"), "About"],
id="about-button",
color="outline-info",
size="sm",
n_clicks=0,
className="ms-2",
)

View File

@@ -1,27 +1,26 @@
from dash import dcc, html from dash import dcc, html
import dash_bootstrap_components as dbc import dash_bootstrap_components as dbc
from .upload import UploadComponent from .upload import UploadComponent
from embeddingbuddy.config.settings import AppSettings from .textinput import TextInputComponent
class DataSourceComponent: class DataSourceComponent:
def __init__(self): def __init__(self):
self.upload_component = UploadComponent() self.upload_component = UploadComponent()
self.text_input_component = TextInputComponent()
def create_tabbed_interface(self): def create_tabbed_interface(self):
"""Create tabbed interface for different data sources.""" """Create tabbed interface for different data sources."""
tabs = [dbc.Tab(label="File Upload", tab_id="file-tab")]
# Only add OpenSearch tab if enabled
if AppSettings.OPENSEARCH_ENABLED:
tabs.append(dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"))
return dbc.Card( return dbc.Card(
[ [
dbc.CardHeader( dbc.CardHeader(
[ [
dbc.Tabs( dbc.Tabs(
tabs, [
dbc.Tab(label="File Upload", tab_id="file-tab"),
dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
dbc.Tab(label="Text Input", tab_id="text-input-tab"),
],
id="data-source-tabs", id="data-source-tabs",
active_tab="file-tab", active_tab="file-tab",
) )
@@ -212,6 +211,10 @@ class DataSourceComponent:
] ]
) )
def create_text_input_tab(self):
"""Create text input tab content for browser-based embedding generation."""
return html.Div([self.text_input_component.create_text_input_interface()])
def _create_opensearch_section(self, section_type): def _create_opensearch_section(self, section_type):
"""Create a complete OpenSearch section for either 'data' or 'prompts'.""" """Create a complete OpenSearch section for either 'data' or 'prompts'."""
section_id = section_type # 'data' or 'prompts' section_id = section_type # 'data' or 'prompts'

View File

@@ -2,27 +2,31 @@ from dash import dcc, html
import dash_bootstrap_components as dbc import dash_bootstrap_components as dbc
from .upload import UploadComponent from .upload import UploadComponent
from .datasource import DataSourceComponent from .datasource import DataSourceComponent
from .textinput import TextInputComponent
from embeddingbuddy.config.settings import AppSettings
class SidebarComponent: class SidebarComponent:
def __init__(self): def __init__(self):
self.upload_component = UploadComponent() self.upload_component = UploadComponent()
self.datasource_component = DataSourceComponent() self.datasource_component = DataSourceComponent()
self.textinput_component = TextInputComponent()
def create_layout(self): def create_layout(self):
return dbc.Col( return dbc.Col(
[ [
dbc.Accordion( html.H5("Data Sources", className="mb-3"),
[ self.datasource_component.create_error_alert(),
self._create_data_sources_item(), self.datasource_component.create_success_alert(),
self._create_generate_embeddings_item(), self.datasource_component.create_tabbed_interface(),
self._create_visualization_controls_item(), html.H5("Visualization Controls", className="mb-3 mt-4"),
], ]
always_open=True, + self._create_method_dropdown()
) + self._create_color_dropdown()
+ self._create_dimension_toggle()
+ self._create_prompts_toggle()
+ [
html.H5("Point Details", className="mb-3"),
html.Div(
id="point-details", children="Click on a point to see details"
),
], ],
width=3, width=3,
style={"padding-right": "20px"}, style={"padding-right": "20px"},
@@ -82,67 +86,3 @@ class SidebarComponent:
style={"margin-bottom": "20px"}, style={"margin-bottom": "20px"},
), ),
] ]
def _create_generate_embeddings_item(self):
return dbc.AccordionItem(
[
self.textinput_component.create_text_input_interface(),
],
title=html.Span(
[
"Generate Embeddings ",
html.I(
className="fas fa-info-circle text-muted",
style={"cursor": "pointer"},
id="generate-embeddings-info-icon",
title="Create new embeddings from text input using various in-browser models",
),
]
),
item_id="generate-embeddings-accordion",
)
def _create_data_sources_item(self):
tooltip_text = "Load existing embeddings: upload files"
if AppSettings.OPENSEARCH_ENABLED:
tooltip_text += " or read from OpenSearch"
return dbc.AccordionItem(
[
self.datasource_component.create_error_alert(),
self.datasource_component.create_success_alert(),
self.datasource_component.create_tabbed_interface(),
],
title=html.Span(
[
"Load Embeddings ",
html.I(
className="fas fa-info-circle text-muted",
style={"cursor": "pointer"},
id="load-embeddings-info-icon",
title=tooltip_text,
),
]
),
item_id="data-sources-accordion",
)
def _create_visualization_controls_item(self):
return dbc.AccordionItem(
self._create_method_dropdown()
+ self._create_color_dropdown()
+ self._create_dimension_toggle()
+ self._create_prompts_toggle(),
title=html.Span(
[
"Visualization Controls ",
html.I(
className="fas fa-info-circle text-muted",
style={"cursor": "pointer"},
id="visualization-controls-info-icon",
title="Configure plot settings: select dimensionality reduction method, colors, and display options",
),
]
),
item_id="visualization-controls-accordion",
)

View File

@@ -16,20 +16,23 @@ class TextInputComponent:
"""Create the complete text input interface with model selection and processing options.""" """Create the complete text input interface with model selection and processing options."""
return html.Div( return html.Div(
[ [
# Model selection section
self._create_model_selection(),
html.Hr(),
# Text input section # Text input section
self._create_text_input_area(), self._create_text_input_area(),
# Text action buttons # Text action buttons
self._create_text_action_buttons(), self._create_text_action_buttons(),
html.Hr(), html.Hr(),
# Model selection section
self._create_model_selection(),
html.Hr(),
# Processing options # Processing options
self._create_processing_options(), self._create_processing_options(),
html.Hr(), html.Hr(),
# Generation controls # Generation controls
self._create_generation_controls(), self._create_generation_controls(),
html.Hr(), html.Hr(),
# Progress indicators
self._create_progress_indicators(),
html.Hr(),
# Status and results # Status and results
self._create_status_section(), self._create_status_section(),
# Hidden components for data flow # Hidden components for data flow
@@ -294,10 +297,65 @@ class TextInputComponent:
] ]
) )
def _create_progress_indicators(self):
"""Create progress bars for model loading and embedding generation."""
return html.Div(
[
# Model loading progress
html.Div(
[
html.H6("Model Loading Progress", className="mb-2"),
dbc.Progress(
id="model-loading-progress",
value=0,
striped=True,
animated=True,
className="mb-2",
),
html.Small(
id="model-loading-status",
children="No model loading in progress",
className="text-muted",
),
],
id="model-loading-section",
style={"display": "none"},
),
html.Br(),
# Embedding generation progress
html.Div(
[
html.H6("Embedding Generation Progress", className="mb-2"),
dbc.Progress(
id="embedding-progress",
value=0,
striped=True,
animated=True,
className="mb-2",
),
html.Small(
id="embedding-status",
children="No embedding generation in progress",
className="text-muted",
),
],
id="embedding-progress-section",
style={"display": "none"},
),
]
)
def _create_status_section(self): def _create_status_section(self):
"""Create status alerts and results preview.""" """Create status alerts and results preview."""
return html.Div( return html.Div(
[ [
# Immediate status (from client-side)
dbc.Alert(
id="text-input-status-immediate",
children="Ready to generate embeddings",
color="light",
className="mb-3",
),
# Server-side status # Server-side status
dbc.Alert( dbc.Alert(
id="text-input-status", id="text-input-status",

View File

@@ -5,75 +5,39 @@ import dash_bootstrap_components as dbc
class UploadComponent: class UploadComponent:
@staticmethod @staticmethod
def create_data_upload(): def create_data_upload():
return html.Div( return dcc.Upload(
[ id="upload-data",
dcc.Upload( children=html.Div(["Drag and Drop or ", html.A("Select Files")]),
id="upload-data", style={
children=html.Div( "width": "100%",
[ "height": "60px",
"Upload Data ", "lineHeight": "60px",
html.I( "borderWidth": "1px",
className="fas fa-info-circle", "borderStyle": "dashed",
style={"color": "#6c757d", "fontSize": "14px"}, "borderRadius": "5px",
id="data-upload-info", "textAlign": "center",
), "margin-bottom": "20px",
] },
), multiple=False,
style={
"width": "100%",
"height": "60px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "5px",
"textAlign": "center",
"margin-bottom": "20px",
},
multiple=False,
),
dbc.Tooltip(
"Click here or drag and drop NDJSON files containing document embeddings",
target="data-upload-info",
placement="top",
),
]
) )
@staticmethod @staticmethod
def create_prompts_upload(): def create_prompts_upload():
return html.Div( return dcc.Upload(
[ id="upload-prompts",
dcc.Upload( children=html.Div(["Drag and Drop Prompts or ", html.A("Select Files")]),
id="upload-prompts", style={
children=html.Div( "width": "100%",
[ "height": "60px",
"Upload Prompts ", "lineHeight": "60px",
html.I( "borderWidth": "1px",
className="fas fa-info-circle", "borderStyle": "dashed",
style={"color": "#6c757d", "fontSize": "14px"}, "borderRadius": "5px",
id="prompts-upload-info", "textAlign": "center",
), "margin-bottom": "20px",
] "borderColor": "#28a745",
), },
style={ multiple=False,
"width": "100%",
"height": "60px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "5px",
"textAlign": "center",
"margin-bottom": "20px",
"borderColor": "#28a745",
},
multiple=False,
),
dbc.Tooltip(
"Click here or drag and drop NDJSON files containing prompt embeddings",
target="prompts-upload-info",
placement="top",
),
]
) )
@staticmethod @staticmethod

View File

@@ -1,19 +1,16 @@
from dash import dcc, html from dash import dcc, html
import dash_bootstrap_components as dbc import dash_bootstrap_components as dbc
from .components.sidebar import SidebarComponent from .components.sidebar import SidebarComponent
from .components.about import AboutComponent
class AppLayout: class AppLayout:
def __init__(self): def __init__(self):
self.sidebar = SidebarComponent() self.sidebar = SidebarComponent()
self.about = AboutComponent()
def create_layout(self): def create_layout(self):
return dbc.Container( return dbc.Container(
[self._create_header(), self._create_main_content()] [self._create_header(), self._create_main_content()]
+ self._create_stores() + self._create_stores(),
+ [self.about.create_about_modal()],
fluid=True, fluid=True,
) )
@@ -22,19 +19,7 @@ class AppLayout:
[ [
dbc.Col( dbc.Col(
[ [
html.Div( html.H1("EmbeddingBuddy", className="text-center mb-4"),
[
html.H1(
"EmbeddingBuddy",
className="text-center mb-4 d-inline",
),
html.Div(
[self.about.create_about_button()],
className="float-end",
),
],
className="d-flex justify-content-between align-items-center",
),
# Load Transformers.js from CDN # Load Transformers.js from CDN
html.Script( html.Script(
""" """

View File

@@ -38,9 +38,9 @@ class PlotFactory:
if dimensions == "3d": if dimensions == "3d":
fig = px.scatter_3d( fig = px.scatter_3d(
df, df,
x="x", x="dim_1",
y="y", y="dim_2",
z="z", z="dim_3",
color=color_values, color=color_values,
hover_data=hover_fields, hover_data=hover_fields,
title=f"3D Embedding Visualization - {method} (colored by {color_by})", title=f"3D Embedding Visualization - {method} (colored by {color_by})",
@@ -49,8 +49,8 @@ class PlotFactory:
else: else:
fig = px.scatter( fig = px.scatter(
df, df,
x="x", x="dim_1",
y="y", y="dim_2",
color=color_values, color=color_values,
hover_data=hover_fields, hover_data=hover_fields,
title=f"2D Embedding Visualization - {method} (colored by {color_by})", title=f"2D Embedding Visualization - {method} (colored by {color_by})",
@@ -77,17 +77,17 @@ class PlotFactory:
if dimensions == "3d": if dimensions == "3d":
doc_fig = px.scatter_3d( doc_fig = px.scatter_3d(
doc_df, doc_df,
x="x", x="dim_1",
y="y", y="dim_2",
z="z", z="dim_3",
color=doc_color_values, color=doc_color_values,
hover_data=hover_fields, hover_data=hover_fields,
) )
else: else:
doc_fig = px.scatter( doc_fig = px.scatter(
doc_df, doc_df,
x="x", x="dim_1",
y="y", y="dim_2",
color=doc_color_values, color=doc_color_values,
hover_data=hover_fields, hover_data=hover_fields,
) )
@@ -114,17 +114,17 @@ class PlotFactory:
if dimensions == "3d": if dimensions == "3d":
prompt_fig = px.scatter_3d( prompt_fig = px.scatter_3d(
prompt_df, prompt_df,
x="x", x="dim_1",
y="y", y="dim_2",
z="z", z="dim_3",
color=prompt_color_values, color=prompt_color_values,
hover_data=hover_fields, hover_data=hover_fields,
) )
else: else:
prompt_fig = px.scatter( prompt_fig = px.scatter(
prompt_df, prompt_df,
x="x", x="dim_1",
y="y", y="dim_2",
color=prompt_color_values, color=prompt_color_values,
hover_data=hover_fields, hover_data=hover_fields,
) )
@@ -168,11 +168,11 @@ class PlotFactory:
"category": doc.category, "category": doc.category,
"subcategory": doc.subcategory, "subcategory": doc.subcategory,
"tags_str": ", ".join(doc.tags) if doc.tags else "None", "tags_str": ", ".join(doc.tags) if doc.tags else "None",
"x": coordinates[i, 0], "dim_1": coordinates[i, 0],
"y": coordinates[i, 1], "dim_2": coordinates[i, 1],
} }
if dimensions == "3d": if dimensions == "3d":
row["z"] = coordinates[i, 2] row["dim_3"] = coordinates[i, 2]
df_data.append(row) df_data.append(row)
return pd.DataFrame(df_data) return pd.DataFrame(df_data)

2
uv.lock generated
View File

@@ -412,7 +412,7 @@ wheels = [
[[package]] [[package]]
name = "embeddingbuddy" name = "embeddingbuddy"
version = "0.5.1" version = "0.3.0"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "dash" }, { name = "dash" },