6 Commits

Author SHA1 Message Date
2f458884a2 Add configurable OpenSearch feature and UI improvements
All checks were successful
Security Scan / security (pull_request) Successful in 49s
Security Scan / dependency-check (pull_request) Successful in 51s
Test Suite / lint (pull_request) Successful in 41s
Test Suite / test (3.11) (pull_request) Successful in 1m43s
Test Suite / build (pull_request) Successful in 37s
- Add MIT license with Austin Godber copyright
  - Implement optional OpenSearch feature toggle via EMBEDDINGBUDDY_OPENSEARCH_ENABLED
  - Disable OpenSearch by default in production for security
  - Add development environment flag to test OpenSearch disable state
  - Update about modal to open by default with improved content
  - Reorganize text input component: move model selection below text input
  - Conditionally show/hide OpenSearch tab and callbacks based on configuration
  - Update tooltips to reflect OpenSearch availability status
2025-09-17 19:01:51 -07:00
89dcafd311 ruff
All checks were successful
Security Scan / dependency-check (pull_request) Successful in 40s
Security Scan / security (pull_request) Successful in 46s
Test Suite / lint (pull_request) Successful in 40s
Test Suite / test (3.11) (pull_request) Successful in 1m40s
Test Suite / build (pull_request) Successful in 44s
2025-09-16 08:12:36 -07:00
ea01ce596d update version to 0.5.1
Some checks failed
Security Scan / security (pull_request) Successful in 41s
Security Scan / dependency-check (pull_request) Successful in 43s
Test Suite / lint (pull_request) Failing after 39s
Test Suite / test (3.11) (pull_request) Successful in 1m33s
Test Suite / build (pull_request) Has been skipped
2025-09-15 08:05:06 -07:00
8861b32ae5 add about modal 2025-09-15 08:03:39 -07:00
302453d313 improve dockerfile
All checks were successful
Security Scan / security (push) Successful in 46s
Security Scan / dependency-check (push) Successful in 52s
Test Suite / lint (push) Successful in 37s
Test Suite / test (3.11) (push) Successful in 1m34s
Test Suite / build (push) Successful in 39s
2025-09-14 18:32:56 -07:00
e022b26399 Merge pull request 'v0.5.0 - rework the sidebar' (#6) from restructure-sidebar into main
Some checks failed
Test Suite / lint (push) Successful in 30s
Test Suite / test (3.11) (push) Successful in 1m35s
Release / test (push) Successful in 1m6s
Test Suite / build (push) Successful in 48s
Release / build-and-release (push) Failing after 23s
Security Scan / security (push) Successful in 37s
Security Scan / dependency-check (push) Successful in 39s
Reviewed-on: #6
2025-09-13 14:59:48 -07:00
15 changed files with 225 additions and 30 deletions

View File

@@ -2,6 +2,9 @@
# Stage 1: Builder
FROM python:3.11-slim as builder
# Create non-root user early in builder stage
RUN groupadd -r appuser && useradd -r -g appuser appuser
# Install system dependencies for building Python packages
RUN apt-get update && apt-get install -y \
build-essential \
@@ -25,6 +28,15 @@ COPY wsgi.py .
COPY run_prod.py .
COPY assets/ assets/
# Change ownership of source files before building (lighter I/O)
RUN chown -R appuser:appuser /app
# Create and set permissions for appuser home directory (needed for uv cache)
RUN mkdir -p /home/appuser && chown -R appuser:appuser /home/appuser
# Switch to non-root user before building
USER appuser
# Create virtual environment and install dependencies (including production extras)
RUN uv venv .venv
RUN uv sync --frozen --extra prod
@@ -32,23 +44,28 @@ RUN uv sync --frozen --extra prod
# Stage 2: Runtime
FROM python:3.11-slim as runtime
# Create non-root user in runtime stage
RUN groupadd -r appuser && useradd -r -g appuser appuser
# Install runtime dependencies for compiled packages
RUN apt-get update && apt-get install -y \
libgomp1 \
&& rm -rf /var/lib/apt/lists/*
# Set working directory
# Set working directory and change ownership (small directory)
WORKDIR /app
RUN chown appuser:appuser /app
# Copy virtual environment from builder stage
COPY --from=builder /app/.venv /app/.venv
# Copy files from builder with correct ownership
COPY --from=builder --chown=appuser:appuser /app/.venv /app/.venv
COPY --from=builder --chown=appuser:appuser /app/src /app/src
COPY --from=builder --chown=appuser:appuser /app/main.py /app/main.py
COPY --from=builder --chown=appuser:appuser /app/assets /app/assets
COPY --from=builder --chown=appuser:appuser /app/wsgi.py /app/wsgi.py
COPY --from=builder --chown=appuser:appuser /app/run_prod.py /app/run_prod.py
# Copy application files from builder stage
COPY --from=builder /app/src /app/src
COPY --from=builder /app/main.py /app/main.py
COPY --from=builder /app/assets /app/assets
COPY --from=builder /app/wsgi.py /app/wsgi.py
COPY --from=builder /app/run_prod.py /app/run_prod.py
# Switch to non-root user
USER appuser
# Make sure the virtual environment is in PATH
ENV PATH="/app/.venv/bin:$PATH"
@@ -65,11 +82,6 @@ ENV EMBEDDINGBUDDY_ENV=production
# Expose port
EXPOSE 8050
# Create non-root user
RUN groupadd -r appuser && useradd -r -g appuser appuser
RUN chown -R appuser:appuser /app
USER appuser
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:8050/', timeout=5)" || exit 1

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 Austin Godber - EmbeddingBuddy
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,6 +1,6 @@
[project]
name = "embeddingbuddy"
version = "0.5.0"
version = "0.6.0"
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
readme = "README.md"
requires-python = ">=3.11"

View File

@@ -11,10 +11,16 @@ def main():
# Force development settings
os.environ["EMBEDDINGBUDDY_ENV"] = "development"
os.environ["EMBEDDINGBUDDY_DEBUG"] = "true"
# Check for OpenSearch disable flag (optional for testing)
# Set EMBEDDINGBUDDY_OPENSEARCH_ENABLED=false to test without OpenSearch
opensearch_status = os.getenv("EMBEDDINGBUDDY_OPENSEARCH_ENABLED", "true")
opensearch_enabled = opensearch_status.lower() == "true"
print("🚀 Starting EmbeddingBuddy in development mode...")
print("📁 Auto-reload enabled - changes will trigger restart")
print("🌐 Server will be available at http://127.0.0.1:8050")
print(f"🔍 OpenSearch: {'Enabled' if opensearch_enabled else 'Disabled'}")
print("⏹️ Press Ctrl+C to stop")
app = create_app()

View File

@@ -13,6 +13,9 @@ def main():
# Force production settings
os.environ["EMBEDDINGBUDDY_ENV"] = "production"
os.environ["EMBEDDINGBUDDY_DEBUG"] = "false"
# Disable OpenSearch by default in production (can be overridden by setting env var)
if "EMBEDDINGBUDDY_OPENSEARCH_ENABLED" not in os.environ:
os.environ["EMBEDDINGBUDDY_OPENSEARCH_ENABLED"] = "false"
print("🚀 Starting EmbeddingBuddy in production mode...")
print(f"⚙️ Workers: {AppSettings.GUNICORN_WORKERS}")

View File

@@ -16,11 +16,33 @@ def create_app():
app = dash.Dash(
__name__,
title="EmbeddingBuddy",
external_stylesheets=[
dbc.themes.BOOTSTRAP,
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css",
],
assets_folder=assets_path,
meta_tags=[
{
"name": "description",
"content": "Interactive embedding visualization tool for exploring high-dimensional vectors through dimensionality reduction techniques like PCA, t-SNE, and UMAP.",
},
{"name": "author", "content": "EmbeddingBuddy"},
{
"name": "keywords",
"content": "embeddings, visualization, dimensionality reduction, PCA, t-SNE, UMAP, machine learning, data science",
},
{"name": "viewport", "content": "width=device-width, initial-scale=1.0"},
{
"property": "og:title",
"content": "EmbeddingBuddy - Interactive Embedding Visualization",
},
{
"property": "og:description",
"content": "Explore and visualize embedding vectors through interactive 2D/3D plots with multiple dimensionality reduction techniques.",
},
{"property": "og:type", "content": "website"},
],
)
# Allow callbacks to components that are dynamically created in tabs

View File

@@ -85,6 +85,9 @@ class AppSettings:
GUNICORN_KEEPALIVE = int(os.getenv("GUNICORN_KEEPALIVE", "5"))
# OpenSearch Configuration
OPENSEARCH_ENABLED = (
os.getenv("EMBEDDINGBUDDY_OPENSEARCH_ENABLED", "True").lower() == "true"
)
OPENSEARCH_DEFAULT_SIZE = 100
OPENSEARCH_SAMPLE_SIZE = 5
OPENSEARCH_CONNECTION_TIMEOUT = 30

View File

@@ -82,19 +82,23 @@ class DataProcessingCallbacks:
)
def render_tab_content(active_tab):
from ...ui.components.datasource import DataSourceComponent
from ...config.settings import AppSettings
datasource = DataSourceComponent()
if active_tab == "opensearch-tab":
if active_tab == "opensearch-tab" and AppSettings.OPENSEARCH_ENABLED:
return [datasource.create_opensearch_tab()]
elif active_tab == "text-input-tab":
return [datasource.create_text_input_tab()]
else:
return [datasource.create_file_upload_tab()]
# Register callbacks for both data and prompts sections
self._register_opensearch_callbacks("data", self.opensearch_client_data)
self._register_opensearch_callbacks("prompts", self.opensearch_client_prompts)
# Register callbacks for both data and prompts sections (only if OpenSearch is enabled)
if AppSettings.OPENSEARCH_ENABLED:
self._register_opensearch_callbacks("data", self.opensearch_client_data)
self._register_opensearch_callbacks(
"prompts", self.opensearch_client_prompts
)
# Register collapsible section callbacks
self._register_collapse_callbacks()

View File

@@ -7,6 +7,16 @@ class InteractionCallbacks:
self._register_callbacks()
def _register_callbacks(self):
@callback(
Output("about-modal", "is_open"),
[Input("about-button", "n_clicks"), Input("about-modal-close", "n_clicks")],
prevent_initial_call=True,
)
def toggle_about_modal(about_clicks, close_clicks):
if about_clicks or close_clicks:
return True if about_clicks else False
return False
@callback(
[
Output("processed-data", "data", allow_duplicate=True),

View File

@@ -0,0 +1,90 @@
from dash import html, dcc
import dash_bootstrap_components as dbc
class AboutComponent:
def _get_about_content(self):
return """
# 🔍 Interactive Embedding Vector Visualization
EmbeddingBuddy is a web application for interactive exploration and
visualization of embedding vectors through dimensionality reduction techniques
(PCA, t-SNE, UMAP).
You have two ways to get started:
1. Generate embeddings directly in the browser if it supports WebGPU.
2. Upload your NDJSON file containing embedding vectors and metadata.
## Generating Embeddings in Browser
1. Expand the "Generate Embeddings" section.
2. Input your text data (one entry per line).
1. Optionally you can use the built in sample data by clicking "Load Sample Data" button.
3. Click "Generate Embeddings" to create vectors using a pre-trained model.
## NDJSON File Format
```json
{"id": "doc_001", "embedding": [0.1, -0.3, 0.7, ...], "text": "Sample text content", "category": "news", "subcategory": "politics", "tags": ["election", "politics"]}
{"id": "doc_002", "embedding": [0.2, -0.1, 0.9, ...], "text": "Another example", "category": "review", "subcategory": "product", "tags": ["tech", "gadget"]}
```
## ✨ Features
- Drag-and-drop NDJSON file upload
- Multiple dimensionality reduction algorithms
- 2D/3D interactive plots with Plotly
- Color coding by categories, subcategories, or tags
- In-browser embedding generation
- OpenSearch integration for data loading
## 🔧 Supported Algorithms
- **PCA** (Principal Component Analysis)
- **t-SNE** (t-Distributed Stochastic Neighbor Embedding)
- **UMAP** (Uniform Manifold Approximation and Projection)
---
📂 [View on GitHub](https://github.com/godber/EmbeddingBuddy)
*Built with: Python, Dash, Plotly, scikit-learn, OpenTSNE, UMAP*
""".strip()
def create_about_modal(self):
return dbc.Modal(
[
dbc.ModalHeader(
dbc.ModalTitle("Welcome to EmbeddingBuddy"),
close_button=True,
),
dbc.ModalBody(
[dcc.Markdown(self._get_about_content(), className="mb-0")]
),
dbc.ModalFooter(
[
dbc.Button(
"Close",
id="about-modal-close",
color="secondary",
n_clicks=0,
)
]
),
],
id="about-modal",
is_open=True,
size="lg",
)
def create_about_button(self):
return dbc.Button(
[html.I(className="fas fa-info-circle me-2"), "About"],
id="about-button",
color="outline-info",
size="sm",
n_clicks=0,
className="ms-2",
)

View File

@@ -1,6 +1,7 @@
from dash import dcc, html
import dash_bootstrap_components as dbc
from .upload import UploadComponent
from embeddingbuddy.config.settings import AppSettings
class DataSourceComponent:
@@ -9,15 +10,18 @@ class DataSourceComponent:
def create_tabbed_interface(self):
"""Create tabbed interface for different data sources."""
tabs = [dbc.Tab(label="File Upload", tab_id="file-tab")]
# Only add OpenSearch tab if enabled
if AppSettings.OPENSEARCH_ENABLED:
tabs.append(dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"))
return dbc.Card(
[
dbc.CardHeader(
[
dbc.Tabs(
[
dbc.Tab(label="File Upload", tab_id="file-tab"),
dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
],
tabs,
id="data-source-tabs",
active_tab="file-tab",
)

View File

@@ -3,6 +3,7 @@ import dash_bootstrap_components as dbc
from .upload import UploadComponent
from .datasource import DataSourceComponent
from .textinput import TextInputComponent
from embeddingbuddy.config.settings import AppSettings
class SidebarComponent:
@@ -102,6 +103,10 @@ class SidebarComponent:
)
def _create_data_sources_item(self):
tooltip_text = "Load existing embeddings: upload files"
if AppSettings.OPENSEARCH_ENABLED:
tooltip_text += " or read from OpenSearch"
return dbc.AccordionItem(
[
self.datasource_component.create_error_alert(),
@@ -115,7 +120,7 @@ class SidebarComponent:
className="fas fa-info-circle text-muted",
style={"cursor": "pointer"},
id="load-embeddings-info-icon",
title="Load existing embeddings: upload files or read from OpenSearch",
title=tooltip_text,
),
]
),

View File

@@ -16,14 +16,14 @@ class TextInputComponent:
"""Create the complete text input interface with model selection and processing options."""
return html.Div(
[
# Model selection section
self._create_model_selection(),
html.Hr(),
# Text input section
self._create_text_input_area(),
# Text action buttons
self._create_text_action_buttons(),
html.Hr(),
# Model selection section
self._create_model_selection(),
html.Hr(),
# Processing options
self._create_processing_options(),
html.Hr(),

View File

@@ -1,16 +1,19 @@
from dash import dcc, html
import dash_bootstrap_components as dbc
from .components.sidebar import SidebarComponent
from .components.about import AboutComponent
class AppLayout:
def __init__(self):
self.sidebar = SidebarComponent()
self.about = AboutComponent()
def create_layout(self):
return dbc.Container(
[self._create_header(), self._create_main_content()]
+ self._create_stores(),
+ self._create_stores()
+ [self.about.create_about_modal()],
fluid=True,
)
@@ -19,7 +22,19 @@ class AppLayout:
[
dbc.Col(
[
html.H1("EmbeddingBuddy", className="text-center mb-4"),
html.Div(
[
html.H1(
"EmbeddingBuddy",
className="text-center mb-4 d-inline",
),
html.Div(
[self.about.create_about_button()],
className="float-end",
),
],
className="d-flex justify-content-between align-items-center",
),
# Load Transformers.js from CDN
html.Script(
"""

2
uv.lock generated
View File

@@ -412,7 +412,7 @@ wheels = [
[[package]]
name = "embeddingbuddy"
version = "0.5.0"
version = "0.5.1"
source = { editable = "." }
dependencies = [
{ name = "dash" },