Compare commits
6 Commits
restructur
...
add-about
Author | SHA1 | Date | |
---|---|---|---|
2f458884a2 | |||
89dcafd311 | |||
ea01ce596d | |||
8861b32ae5 | |||
302453d313 | |||
e022b26399 |
40
Dockerfile
40
Dockerfile
@@ -2,6 +2,9 @@
|
||||
# Stage 1: Builder
|
||||
FROM python:3.11-slim as builder
|
||||
|
||||
# Create non-root user early in builder stage
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
|
||||
# Install system dependencies for building Python packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
@@ -25,6 +28,15 @@ COPY wsgi.py .
|
||||
COPY run_prod.py .
|
||||
COPY assets/ assets/
|
||||
|
||||
# Change ownership of source files before building (lighter I/O)
|
||||
RUN chown -R appuser:appuser /app
|
||||
|
||||
# Create and set permissions for appuser home directory (needed for uv cache)
|
||||
RUN mkdir -p /home/appuser && chown -R appuser:appuser /home/appuser
|
||||
|
||||
# Switch to non-root user before building
|
||||
USER appuser
|
||||
|
||||
# Create virtual environment and install dependencies (including production extras)
|
||||
RUN uv venv .venv
|
||||
RUN uv sync --frozen --extra prod
|
||||
@@ -32,23 +44,28 @@ RUN uv sync --frozen --extra prod
|
||||
# Stage 2: Runtime
|
||||
FROM python:3.11-slim as runtime
|
||||
|
||||
# Create non-root user in runtime stage
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
|
||||
# Install runtime dependencies for compiled packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set working directory
|
||||
# Set working directory and change ownership (small directory)
|
||||
WORKDIR /app
|
||||
RUN chown appuser:appuser /app
|
||||
|
||||
# Copy virtual environment from builder stage
|
||||
COPY --from=builder /app/.venv /app/.venv
|
||||
# Copy files from builder with correct ownership
|
||||
COPY --from=builder --chown=appuser:appuser /app/.venv /app/.venv
|
||||
COPY --from=builder --chown=appuser:appuser /app/src /app/src
|
||||
COPY --from=builder --chown=appuser:appuser /app/main.py /app/main.py
|
||||
COPY --from=builder --chown=appuser:appuser /app/assets /app/assets
|
||||
COPY --from=builder --chown=appuser:appuser /app/wsgi.py /app/wsgi.py
|
||||
COPY --from=builder --chown=appuser:appuser /app/run_prod.py /app/run_prod.py
|
||||
|
||||
# Copy application files from builder stage
|
||||
COPY --from=builder /app/src /app/src
|
||||
COPY --from=builder /app/main.py /app/main.py
|
||||
COPY --from=builder /app/assets /app/assets
|
||||
COPY --from=builder /app/wsgi.py /app/wsgi.py
|
||||
COPY --from=builder /app/run_prod.py /app/run_prod.py
|
||||
# Switch to non-root user
|
||||
USER appuser
|
||||
|
||||
# Make sure the virtual environment is in PATH
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
@@ -65,11 +82,6 @@ ENV EMBEDDINGBUDDY_ENV=production
|
||||
# Expose port
|
||||
EXPOSE 8050
|
||||
|
||||
# Create non-root user
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
RUN chown -R appuser:appuser /app
|
||||
USER appuser
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
|
||||
CMD python -c "import requests; requests.get('http://localhost:8050/', timeout=5)" || exit 1
|
||||
|
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Austin Godber - EmbeddingBuddy
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "embeddingbuddy"
|
||||
version = "0.5.0"
|
||||
version = "0.6.0"
|
||||
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
@@ -12,9 +12,15 @@ def main():
|
||||
os.environ["EMBEDDINGBUDDY_ENV"] = "development"
|
||||
os.environ["EMBEDDINGBUDDY_DEBUG"] = "true"
|
||||
|
||||
# Check for OpenSearch disable flag (optional for testing)
|
||||
# Set EMBEDDINGBUDDY_OPENSEARCH_ENABLED=false to test without OpenSearch
|
||||
opensearch_status = os.getenv("EMBEDDINGBUDDY_OPENSEARCH_ENABLED", "true")
|
||||
opensearch_enabled = opensearch_status.lower() == "true"
|
||||
|
||||
print("🚀 Starting EmbeddingBuddy in development mode...")
|
||||
print("📁 Auto-reload enabled - changes will trigger restart")
|
||||
print("🌐 Server will be available at http://127.0.0.1:8050")
|
||||
print(f"🔍 OpenSearch: {'Enabled' if opensearch_enabled else 'Disabled'}")
|
||||
print("⏹️ Press Ctrl+C to stop")
|
||||
|
||||
app = create_app()
|
||||
|
@@ -13,6 +13,9 @@ def main():
|
||||
# Force production settings
|
||||
os.environ["EMBEDDINGBUDDY_ENV"] = "production"
|
||||
os.environ["EMBEDDINGBUDDY_DEBUG"] = "false"
|
||||
# Disable OpenSearch by default in production (can be overridden by setting env var)
|
||||
if "EMBEDDINGBUDDY_OPENSEARCH_ENABLED" not in os.environ:
|
||||
os.environ["EMBEDDINGBUDDY_OPENSEARCH_ENABLED"] = "false"
|
||||
|
||||
print("🚀 Starting EmbeddingBuddy in production mode...")
|
||||
print(f"⚙️ Workers: {AppSettings.GUNICORN_WORKERS}")
|
||||
|
@@ -16,11 +16,33 @@ def create_app():
|
||||
|
||||
app = dash.Dash(
|
||||
__name__,
|
||||
title="EmbeddingBuddy",
|
||||
external_stylesheets=[
|
||||
dbc.themes.BOOTSTRAP,
|
||||
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css",
|
||||
],
|
||||
assets_folder=assets_path,
|
||||
meta_tags=[
|
||||
{
|
||||
"name": "description",
|
||||
"content": "Interactive embedding visualization tool for exploring high-dimensional vectors through dimensionality reduction techniques like PCA, t-SNE, and UMAP.",
|
||||
},
|
||||
{"name": "author", "content": "EmbeddingBuddy"},
|
||||
{
|
||||
"name": "keywords",
|
||||
"content": "embeddings, visualization, dimensionality reduction, PCA, t-SNE, UMAP, machine learning, data science",
|
||||
},
|
||||
{"name": "viewport", "content": "width=device-width, initial-scale=1.0"},
|
||||
{
|
||||
"property": "og:title",
|
||||
"content": "EmbeddingBuddy - Interactive Embedding Visualization",
|
||||
},
|
||||
{
|
||||
"property": "og:description",
|
||||
"content": "Explore and visualize embedding vectors through interactive 2D/3D plots with multiple dimensionality reduction techniques.",
|
||||
},
|
||||
{"property": "og:type", "content": "website"},
|
||||
],
|
||||
)
|
||||
|
||||
# Allow callbacks to components that are dynamically created in tabs
|
||||
|
@@ -85,6 +85,9 @@ class AppSettings:
|
||||
GUNICORN_KEEPALIVE = int(os.getenv("GUNICORN_KEEPALIVE", "5"))
|
||||
|
||||
# OpenSearch Configuration
|
||||
OPENSEARCH_ENABLED = (
|
||||
os.getenv("EMBEDDINGBUDDY_OPENSEARCH_ENABLED", "True").lower() == "true"
|
||||
)
|
||||
OPENSEARCH_DEFAULT_SIZE = 100
|
||||
OPENSEARCH_SAMPLE_SIZE = 5
|
||||
OPENSEARCH_CONNECTION_TIMEOUT = 30
|
||||
|
@@ -82,19 +82,23 @@ class DataProcessingCallbacks:
|
||||
)
|
||||
def render_tab_content(active_tab):
|
||||
from ...ui.components.datasource import DataSourceComponent
|
||||
from ...config.settings import AppSettings
|
||||
|
||||
datasource = DataSourceComponent()
|
||||
|
||||
if active_tab == "opensearch-tab":
|
||||
if active_tab == "opensearch-tab" and AppSettings.OPENSEARCH_ENABLED:
|
||||
return [datasource.create_opensearch_tab()]
|
||||
elif active_tab == "text-input-tab":
|
||||
return [datasource.create_text_input_tab()]
|
||||
else:
|
||||
return [datasource.create_file_upload_tab()]
|
||||
|
||||
# Register callbacks for both data and prompts sections
|
||||
self._register_opensearch_callbacks("data", self.opensearch_client_data)
|
||||
self._register_opensearch_callbacks("prompts", self.opensearch_client_prompts)
|
||||
# Register callbacks for both data and prompts sections (only if OpenSearch is enabled)
|
||||
if AppSettings.OPENSEARCH_ENABLED:
|
||||
self._register_opensearch_callbacks("data", self.opensearch_client_data)
|
||||
self._register_opensearch_callbacks(
|
||||
"prompts", self.opensearch_client_prompts
|
||||
)
|
||||
|
||||
# Register collapsible section callbacks
|
||||
self._register_collapse_callbacks()
|
||||
|
@@ -7,6 +7,16 @@ class InteractionCallbacks:
|
||||
self._register_callbacks()
|
||||
|
||||
def _register_callbacks(self):
|
||||
@callback(
|
||||
Output("about-modal", "is_open"),
|
||||
[Input("about-button", "n_clicks"), Input("about-modal-close", "n_clicks")],
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def toggle_about_modal(about_clicks, close_clicks):
|
||||
if about_clicks or close_clicks:
|
||||
return True if about_clicks else False
|
||||
return False
|
||||
|
||||
@callback(
|
||||
[
|
||||
Output("processed-data", "data", allow_duplicate=True),
|
||||
|
90
src/embeddingbuddy/ui/components/about.py
Normal file
90
src/embeddingbuddy/ui/components/about.py
Normal file
@@ -0,0 +1,90 @@
|
||||
from dash import html, dcc
|
||||
import dash_bootstrap_components as dbc
|
||||
|
||||
|
||||
class AboutComponent:
|
||||
def _get_about_content(self):
|
||||
return """
|
||||
# 🔍 Interactive Embedding Vector Visualization
|
||||
|
||||
EmbeddingBuddy is a web application for interactive exploration and
|
||||
visualization of embedding vectors through dimensionality reduction techniques
|
||||
(PCA, t-SNE, UMAP).
|
||||
|
||||
You have two ways to get started:
|
||||
|
||||
1. Generate embeddings directly in the browser if it supports WebGPU.
|
||||
2. Upload your NDJSON file containing embedding vectors and metadata.
|
||||
|
||||
## Generating Embeddings in Browser
|
||||
|
||||
1. Expand the "Generate Embeddings" section.
|
||||
2. Input your text data (one entry per line).
|
||||
1. Optionally you can use the built in sample data by clicking "Load Sample Data" button.
|
||||
3. Click "Generate Embeddings" to create vectors using a pre-trained model.
|
||||
|
||||
## NDJSON File Format
|
||||
|
||||
```json
|
||||
{"id": "doc_001", "embedding": [0.1, -0.3, 0.7, ...], "text": "Sample text content", "category": "news", "subcategory": "politics", "tags": ["election", "politics"]}
|
||||
{"id": "doc_002", "embedding": [0.2, -0.1, 0.9, ...], "text": "Another example", "category": "review", "subcategory": "product", "tags": ["tech", "gadget"]}
|
||||
```
|
||||
|
||||
|
||||
## ✨ Features
|
||||
|
||||
- Drag-and-drop NDJSON file upload
|
||||
- Multiple dimensionality reduction algorithms
|
||||
- 2D/3D interactive plots with Plotly
|
||||
- Color coding by categories, subcategories, or tags
|
||||
- In-browser embedding generation
|
||||
- OpenSearch integration for data loading
|
||||
|
||||
## 🔧 Supported Algorithms
|
||||
|
||||
- **PCA** (Principal Component Analysis)
|
||||
- **t-SNE** (t-Distributed Stochastic Neighbor Embedding)
|
||||
- **UMAP** (Uniform Manifold Approximation and Projection)
|
||||
|
||||
---
|
||||
|
||||
📂 [View on GitHub](https://github.com/godber/EmbeddingBuddy)
|
||||
|
||||
*Built with: Python, Dash, Plotly, scikit-learn, OpenTSNE, UMAP*
|
||||
""".strip()
|
||||
|
||||
def create_about_modal(self):
|
||||
return dbc.Modal(
|
||||
[
|
||||
dbc.ModalHeader(
|
||||
dbc.ModalTitle("Welcome to EmbeddingBuddy"),
|
||||
close_button=True,
|
||||
),
|
||||
dbc.ModalBody(
|
||||
[dcc.Markdown(self._get_about_content(), className="mb-0")]
|
||||
),
|
||||
dbc.ModalFooter(
|
||||
[
|
||||
dbc.Button(
|
||||
"Close",
|
||||
id="about-modal-close",
|
||||
color="secondary",
|
||||
n_clicks=0,
|
||||
)
|
||||
]
|
||||
),
|
||||
],
|
||||
id="about-modal",
|
||||
is_open=True,
|
||||
size="lg",
|
||||
)
|
||||
|
||||
def create_about_button(self):
|
||||
return dbc.Button(
|
||||
[html.I(className="fas fa-info-circle me-2"), "About"],
|
||||
id="about-button",
|
||||
color="outline-info",
|
||||
size="sm",
|
||||
n_clicks=0,
|
||||
className="ms-2",
|
||||
)
|
@@ -1,6 +1,7 @@
|
||||
from dash import dcc, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from .upload import UploadComponent
|
||||
from embeddingbuddy.config.settings import AppSettings
|
||||
|
||||
|
||||
class DataSourceComponent:
|
||||
@@ -9,15 +10,18 @@ class DataSourceComponent:
|
||||
|
||||
def create_tabbed_interface(self):
|
||||
"""Create tabbed interface for different data sources."""
|
||||
tabs = [dbc.Tab(label="File Upload", tab_id="file-tab")]
|
||||
|
||||
# Only add OpenSearch tab if enabled
|
||||
if AppSettings.OPENSEARCH_ENABLED:
|
||||
tabs.append(dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"))
|
||||
|
||||
return dbc.Card(
|
||||
[
|
||||
dbc.CardHeader(
|
||||
[
|
||||
dbc.Tabs(
|
||||
[
|
||||
dbc.Tab(label="File Upload", tab_id="file-tab"),
|
||||
dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
|
||||
],
|
||||
tabs,
|
||||
id="data-source-tabs",
|
||||
active_tab="file-tab",
|
||||
)
|
||||
|
@@ -3,6 +3,7 @@ import dash_bootstrap_components as dbc
|
||||
from .upload import UploadComponent
|
||||
from .datasource import DataSourceComponent
|
||||
from .textinput import TextInputComponent
|
||||
from embeddingbuddy.config.settings import AppSettings
|
||||
|
||||
|
||||
class SidebarComponent:
|
||||
@@ -102,6 +103,10 @@ class SidebarComponent:
|
||||
)
|
||||
|
||||
def _create_data_sources_item(self):
|
||||
tooltip_text = "Load existing embeddings: upload files"
|
||||
if AppSettings.OPENSEARCH_ENABLED:
|
||||
tooltip_text += " or read from OpenSearch"
|
||||
|
||||
return dbc.AccordionItem(
|
||||
[
|
||||
self.datasource_component.create_error_alert(),
|
||||
@@ -115,7 +120,7 @@ class SidebarComponent:
|
||||
className="fas fa-info-circle text-muted",
|
||||
style={"cursor": "pointer"},
|
||||
id="load-embeddings-info-icon",
|
||||
title="Load existing embeddings: upload files or read from OpenSearch",
|
||||
title=tooltip_text,
|
||||
),
|
||||
]
|
||||
),
|
||||
|
@@ -16,14 +16,14 @@ class TextInputComponent:
|
||||
"""Create the complete text input interface with model selection and processing options."""
|
||||
return html.Div(
|
||||
[
|
||||
# Model selection section
|
||||
self._create_model_selection(),
|
||||
html.Hr(),
|
||||
# Text input section
|
||||
self._create_text_input_area(),
|
||||
# Text action buttons
|
||||
self._create_text_action_buttons(),
|
||||
html.Hr(),
|
||||
# Model selection section
|
||||
self._create_model_selection(),
|
||||
html.Hr(),
|
||||
# Processing options
|
||||
self._create_processing_options(),
|
||||
html.Hr(),
|
||||
|
@@ -1,16 +1,19 @@
|
||||
from dash import dcc, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from .components.sidebar import SidebarComponent
|
||||
from .components.about import AboutComponent
|
||||
|
||||
|
||||
class AppLayout:
|
||||
def __init__(self):
|
||||
self.sidebar = SidebarComponent()
|
||||
self.about = AboutComponent()
|
||||
|
||||
def create_layout(self):
|
||||
return dbc.Container(
|
||||
[self._create_header(), self._create_main_content()]
|
||||
+ self._create_stores(),
|
||||
+ self._create_stores()
|
||||
+ [self.about.create_about_modal()],
|
||||
fluid=True,
|
||||
)
|
||||
|
||||
@@ -19,7 +22,19 @@ class AppLayout:
|
||||
[
|
||||
dbc.Col(
|
||||
[
|
||||
html.H1("EmbeddingBuddy", className="text-center mb-4"),
|
||||
html.Div(
|
||||
[
|
||||
html.H1(
|
||||
"EmbeddingBuddy",
|
||||
className="text-center mb-4 d-inline",
|
||||
),
|
||||
html.Div(
|
||||
[self.about.create_about_button()],
|
||||
className="float-end",
|
||||
),
|
||||
],
|
||||
className="d-flex justify-content-between align-items-center",
|
||||
),
|
||||
# Load Transformers.js from CDN
|
||||
html.Script(
|
||||
"""
|
||||
|
Reference in New Issue
Block a user