Compare commits
27 Commits
restructur
...
main
Author | SHA1 | Date | |
---|---|---|---|
9c39b848f0 | |||
|
6b6735e07f | ||
4b99f2e816 | |||
|
c38f8f2ef8 | ||
478dc3a8a1 | |||
|
2d4bc5fa2f | ||
291bf9473a | |||
|
d30387e201 | ||
63df27b480 | |||
68f5cf8617 | |||
|
41ed6e747b | ||
0f837495fc | |||
d66a20ddda | |||
|
0d4145df06 | ||
dfcfe4fd7c | |||
314151e525 | |||
a93556132b | |||
26aece0161 | |||
963a21d0ab | |||
b12c248ea1 | |||
5695348f37 | |||
2f458884a2 | |||
89dcafd311 | |||
ea01ce596d | |||
8861b32ae5 | |||
302453d313 | |||
e022b26399 |
52
.gitea/workflows/bump-and-release.yml
Normal file
52
.gitea/workflows/bump-and-release.yml
Normal file
@@ -0,0 +1,52 @@
|
||||
name: Bump Version and Release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
bump_type:
|
||||
description: 'Version bump type'
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- patch
|
||||
- minor
|
||||
- major
|
||||
|
||||
jobs:
|
||||
bump-and-release:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
token: ${{ secrets.GITEA_TOKEN }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Bump version
|
||||
id: bump
|
||||
run: |
|
||||
python bump_version.py ${{ github.event.inputs.bump_type }}
|
||||
NEW_VERSION=$(grep -oP 'version = "\K[^"]+' pyproject.toml)
|
||||
echo "version=$NEW_VERSION" >> $GITHUB_OUTPUT
|
||||
echo "tag=v$NEW_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Commit and tag
|
||||
run: |
|
||||
git config user.name "gitea-actions[bot]"
|
||||
git config user.email "gitea-actions[bot]@users.noreply.gitea.io"
|
||||
git add pyproject.toml
|
||||
git commit -m "bump version to v${{ steps.bump.outputs.version }}"
|
||||
git tag v${{ steps.bump.outputs.version }}
|
||||
|
||||
- name: Push changes
|
||||
run: |
|
||||
git push origin main
|
||||
git push origin v${{ steps.bump.outputs.version }}
|
@@ -66,27 +66,20 @@ jobs:
|
||||
echo "## Installation" >> release-notes.md
|
||||
echo "" >> release-notes.md
|
||||
echo '```bash' >> release-notes.md
|
||||
echo 'uv sync' >> release-notes.md
|
||||
echo 'uv run python main.py' >> release-notes.md
|
||||
echo 'pip install embeddingbuddy' >> release-notes.md
|
||||
echo 'embeddingbuddy serve' >> release-notes.md
|
||||
echo '```' >> release-notes.md
|
||||
|
||||
- name: Create Release
|
||||
uses: actions/create-release@v1
|
||||
uses: akkuman/gitea-release-action@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||
NODE_OPTIONS: '--experimental-fetch'
|
||||
with:
|
||||
token: ${{ secrets.GITEA_TOKEN }}
|
||||
tag_name: ${{ github.ref_name || github.event.inputs.version }}
|
||||
release_name: Release ${{ github.ref_name || github.event.inputs.version }}
|
||||
body_path: release-notes.md
|
||||
draft: false
|
||||
prerelease: false
|
||||
|
||||
- name: Upload Release Assets
|
||||
uses: actions/upload-release-asset@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||
with:
|
||||
upload_url: ${{ steps.create_release.outputs.upload_url }}
|
||||
asset_path: dist/
|
||||
asset_name: embeddingbuddy-dist
|
||||
asset_content_type: application/zip
|
||||
files: |-
|
||||
dist/*
|
54
.github/workflows/docker-release.yml
vendored
Normal file
54
.github/workflows/docker-release.yml
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
name: Docker Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v[0-9]+.[0-9]+.[0-9]+'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=ref,event=tag
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=semver,pattern={{major}}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
33
.github/workflows/pypi-release.yml
vendored
Normal file
33
.github/workflows/pypi-release.yml
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
name: PyPI Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v[0-9]+.[0-9]+.[0-9]+'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
pypi-publish:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write # For trusted publishing
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
|
||||
- name: Build package
|
||||
run: |
|
||||
uv build
|
||||
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
80
CLAUDE.md
80
CLAUDE.md
@@ -21,29 +21,23 @@ uv sync
|
||||
|
||||
**Run the application:**
|
||||
|
||||
Development mode (with auto-reload):
|
||||
Using the CLI (recommended):
|
||||
|
||||
```bash
|
||||
uv run run_dev.py
|
||||
# Production mode (no debug, no auto-reload)
|
||||
embeddingbuddy serve
|
||||
|
||||
# Development mode (debug + auto-reload on code changes)
|
||||
embeddingbuddy serve --dev
|
||||
|
||||
# Debug logging only (no auto-reload)
|
||||
embeddingbuddy serve --debug
|
||||
|
||||
# With custom host/port
|
||||
embeddingbuddy serve --host 0.0.0.0 --port 8080
|
||||
```
|
||||
|
||||
Production mode (with Gunicorn WSGI server):
|
||||
|
||||
```bash
|
||||
# First install production dependencies
|
||||
uv sync --extra prod
|
||||
|
||||
# Then run in production mode
|
||||
uv run run_prod.py
|
||||
```
|
||||
|
||||
Legacy mode (basic Dash server):
|
||||
|
||||
```bash
|
||||
uv run main.py
|
||||
```
|
||||
|
||||
The app will be available at <http://127.0.0.1:8050>
|
||||
The app will be available at <http://127.0.0.1:8050> by default
|
||||
|
||||
**Run tests:**
|
||||
|
||||
@@ -204,6 +198,52 @@ Uses modern Python stack with uv for dependency management:
|
||||
- **Testing:** pytest for test framework
|
||||
- **Dev Tools:** uv for package management
|
||||
|
||||
## CI/CD and Release Management
|
||||
|
||||
### Repository Setup
|
||||
|
||||
This project uses a **dual-repository workflow**:
|
||||
|
||||
- **Primary repository:** Gitea instance at `git.hawt.cloud` (read-write)
|
||||
- **Mirror repository:** GitHub (read-only mirror)
|
||||
|
||||
### Workflow Organization
|
||||
|
||||
**Gitea Workflows (`.gitea/workflows/`):**
|
||||
- **`bump-and-release.yml`** - Manual version bumping workflow
|
||||
- Runs `bump_version.py` to update version in `pyproject.toml`
|
||||
- Commits changes and creates git tag
|
||||
- Pushes to Gitea (main branch + tag)
|
||||
- Triggered manually via workflow_dispatch with choice of patch/minor/major bump
|
||||
- **`release.yml`** - Automated release creation
|
||||
- Triggered when version tags are pushed
|
||||
- Runs tests, builds packages
|
||||
- Creates Gitea release with artifacts
|
||||
- **`test.yml`** - Test suite execution
|
||||
- **`security.yml`** - Security scanning
|
||||
|
||||
**GitHub Workflows (`.github/workflows/`):**
|
||||
- **`docker-release.yml`** - Builds and publishes Docker images
|
||||
- **`pypi-release.yml`** - Publishes packages to PyPI
|
||||
- These workflows are read-only (no git commits/pushes) and create artifacts only
|
||||
|
||||
### Release Process
|
||||
|
||||
1. Run manual bump workflow on Gitea: **Actions → Bump Version and Release**
|
||||
2. Select version bump type (patch/minor/major)
|
||||
3. Workflow commits version change and pushes tag to Gitea
|
||||
4. Tag push triggers `release.yml` on Gitea (creates release)
|
||||
5. GitHub mirror receives tag and triggers artifact builds (Docker, PyPI)
|
||||
|
||||
### Version Management
|
||||
|
||||
Use `bump_version.py` for version updates:
|
||||
```bash
|
||||
python bump_version.py patch # 0.3.0 -> 0.3.1
|
||||
python bump_version.py minor # 0.3.0 -> 0.4.0
|
||||
python bump_version.py major # 0.3.0 -> 1.0.0
|
||||
```
|
||||
|
||||
## Development Guidelines
|
||||
|
||||
**When adding new features:**
|
||||
@@ -217,7 +257,7 @@ Uses modern Python stack with uv for dependency management:
|
||||
**Code Organization Principles:**
|
||||
|
||||
- Single responsibility principle
|
||||
- Clear module boundaries
|
||||
- Clear module boundaries
|
||||
- Testable, isolated components
|
||||
- Configuration over hardcoding
|
||||
- Error handling at appropriate layers
|
||||
|
44
Dockerfile
44
Dockerfile
@@ -2,6 +2,9 @@
|
||||
# Stage 1: Builder
|
||||
FROM python:3.11-slim as builder
|
||||
|
||||
# Create non-root user early in builder stage
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
|
||||
# Install system dependencies for building Python packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
@@ -20,10 +23,15 @@ COPY pyproject.toml uv.lock ./
|
||||
|
||||
# Copy source code (needed for editable install)
|
||||
COPY src/ src/
|
||||
COPY main.py .
|
||||
COPY wsgi.py .
|
||||
COPY run_prod.py .
|
||||
COPY assets/ assets/
|
||||
|
||||
# Change ownership of source files before building (lighter I/O)
|
||||
RUN chown -R appuser:appuser /app
|
||||
|
||||
# Create and set permissions for appuser home directory (needed for uv cache)
|
||||
RUN mkdir -p /home/appuser && chown -R appuser:appuser /home/appuser
|
||||
|
||||
# Switch to non-root user before building
|
||||
USER appuser
|
||||
|
||||
# Create virtual environment and install dependencies (including production extras)
|
||||
RUN uv venv .venv
|
||||
@@ -32,23 +40,24 @@ RUN uv sync --frozen --extra prod
|
||||
# Stage 2: Runtime
|
||||
FROM python:3.11-slim as runtime
|
||||
|
||||
# Create non-root user in runtime stage
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
|
||||
# Install runtime dependencies for compiled packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set working directory
|
||||
# Set working directory and change ownership (small directory)
|
||||
WORKDIR /app
|
||||
RUN chown appuser:appuser /app
|
||||
|
||||
# Copy virtual environment from builder stage
|
||||
COPY --from=builder /app/.venv /app/.venv
|
||||
# Copy files from builder with correct ownership
|
||||
COPY --from=builder --chown=appuser:appuser /app/.venv /app/.venv
|
||||
COPY --from=builder --chown=appuser:appuser /app/src /app/src
|
||||
|
||||
# Copy application files from builder stage
|
||||
COPY --from=builder /app/src /app/src
|
||||
COPY --from=builder /app/main.py /app/main.py
|
||||
COPY --from=builder /app/assets /app/assets
|
||||
COPY --from=builder /app/wsgi.py /app/wsgi.py
|
||||
COPY --from=builder /app/run_prod.py /app/run_prod.py
|
||||
# Switch to non-root user
|
||||
USER appuser
|
||||
|
||||
# Make sure the virtual environment is in PATH
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
@@ -65,14 +74,9 @@ ENV EMBEDDINGBUDDY_ENV=production
|
||||
# Expose port
|
||||
EXPOSE 8050
|
||||
|
||||
# Create non-root user
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
RUN chown -R appuser:appuser /app
|
||||
USER appuser
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
|
||||
CMD python -c "import requests; requests.get('http://localhost:8050/', timeout=5)" || exit 1
|
||||
|
||||
# Run application with Gunicorn in production
|
||||
CMD ["python", "run_prod.py"]
|
||||
# Run application in production mode (no debug, no auto-reload)
|
||||
CMD ["embeddingbuddy", "serve"]
|
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Austin Godber - EmbeddingBuddy
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
97
README.md
97
README.md
@@ -28,6 +28,73 @@ documents and prompts to understand how queries relate to your content.
|
||||
- **Sidebar layout** with controls on left, large visualization area on right
|
||||
- **Real-time visualization** optimized for small to medium datasets
|
||||
|
||||
## Network Dependency
|
||||
|
||||
**Note:** The application loads the Transformers.js library (v3.0.0) from `cdn.jsdelivr.net` for client-side embedding generation. This requires an active internet connection and sends requests to a third-party CDN. The application will function without internet if you only use the file upload features for pre-computed embeddings.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Installation
|
||||
|
||||
**Option 1: Install with uv (recommended)**
|
||||
|
||||
```bash
|
||||
# Install as a CLI tool (no need to clone the repo)
|
||||
uv tool install embeddingbuddy
|
||||
|
||||
# Run the application
|
||||
embeddingbuddy serve
|
||||
```
|
||||
|
||||
**Option 2: Install with pip/pipx**
|
||||
|
||||
```bash
|
||||
# Install with pipx (isolated environment)
|
||||
pipx install embeddingbuddy
|
||||
|
||||
# Or install with pip
|
||||
pip install embeddingbuddy
|
||||
|
||||
# Run the application
|
||||
embeddingbuddy
|
||||
```
|
||||
|
||||
**Option 3: Run with Docker**
|
||||
|
||||
```bash
|
||||
# Pull and run the Docker image
|
||||
docker run -p 8050:8050 ghcr.io/godber/embedding-buddy:latest
|
||||
```
|
||||
|
||||
The application will be available at <http://127.0.0.1:8050>
|
||||
|
||||
### macOS Installation Notes
|
||||
|
||||
If installation fails with C++ compiler or library errors (common with `opentsne`), install Xcode Command Line Tools:
|
||||
|
||||
```bash
|
||||
# Install or update Command Line Tools
|
||||
xcode-select --install
|
||||
|
||||
# Or reset existing installation
|
||||
sudo xcode-select --reset
|
||||
```
|
||||
|
||||
### Using the Application
|
||||
|
||||
1. **Open your browser** to <http://127.0.0.1:8050>
|
||||
2. **Upload your data**:
|
||||
- Drag and drop an NDJSON file containing embeddings (see Data Format below)
|
||||
- Optionally upload a second file with prompts to compare against documents
|
||||
3. **Choose visualization settings**:
|
||||
- Select dimensionality reduction method (PCA, t-SNE, or UMAP)
|
||||
- Choose 2D or 3D visualization
|
||||
- Pick color coding (by category, subcategory, or tags)
|
||||
4. **Explore**:
|
||||
- Click points to view full content
|
||||
- Toggle prompt visibility
|
||||
- Rotate and zoom 3D plots
|
||||
|
||||
## Data Format
|
||||
|
||||
EmbeddingBuddy accepts newline-delimited JSON (NDJSON) files for both documents
|
||||
@@ -73,26 +140,18 @@ uv sync
|
||||
|
||||
2. **Run the application:**
|
||||
|
||||
**Development mode** (with auto-reload):
|
||||
|
||||
```bash
|
||||
uv run run_dev.py
|
||||
```
|
||||
# Production mode (no debug, no auto-reload)
|
||||
embeddingbuddy serve
|
||||
|
||||
**Production mode** (with Gunicorn WSGI server):
|
||||
# Development mode (debug + auto-reload on code changes)
|
||||
embeddingbuddy serve --dev
|
||||
|
||||
```bash
|
||||
# Install production dependencies
|
||||
uv sync --extra prod
|
||||
# Debug logging only (no auto-reload)
|
||||
embeddingbuddy serve --debug
|
||||
|
||||
# Run in production mode
|
||||
uv run run_prod.py
|
||||
```
|
||||
|
||||
**Legacy mode** (basic Dash server):
|
||||
|
||||
```bash
|
||||
uv run main.py
|
||||
# Custom host/port
|
||||
embeddingbuddy serve --host 0.0.0.0 --port 8080
|
||||
```
|
||||
|
||||
3. **Open your browser** to <http://127.0.0.1:8050>
|
||||
@@ -180,10 +239,8 @@ src/embeddingbuddy/
|
||||
│ └── interactions.py # User interaction callbacks
|
||||
└── utils/ # Utility functions
|
||||
|
||||
main.py # Application runner (at project root)
|
||||
main.py # Application runner (at project root)
|
||||
run_dev.py # Development server runner
|
||||
run_prod.py # Production server runner
|
||||
# CLI entry point
|
||||
embeddingbuddy serve # Main CLI command to start the server
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
10
main.py
10
main.py
@@ -1,10 +0,0 @@
|
||||
from src.embeddingbuddy.app import create_app, run_app
|
||||
|
||||
|
||||
def main():
|
||||
app = create_app()
|
||||
run_app(app)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "embeddingbuddy"
|
||||
version = "0.5.0"
|
||||
version = "0.8.3"
|
||||
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
@@ -17,6 +17,10 @@ dependencies = [
|
||||
"opensearch-py>=3.0.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
embeddingbuddy = "embeddingbuddy.cli:main"
|
||||
embeddingbuddy-serve = "embeddingbuddy.app:serve"
|
||||
|
||||
[project.optional-dependencies]
|
||||
test = [
|
||||
"pytest>=8.4.1",
|
||||
@@ -50,3 +54,6 @@ where = ["src"]
|
||||
|
||||
[tool.setuptools.package-dir]
|
||||
"" = "src"
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
embeddingbuddy = ["assets/**/*"]
|
||||
|
26
run_dev.py
26
run_dev.py
@@ -1,26 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Development runner with auto-reload enabled.
|
||||
This runs the Dash development server with hot reloading.
|
||||
"""
|
||||
import os
|
||||
from src.embeddingbuddy.app import create_app, run_app
|
||||
|
||||
def main():
|
||||
"""Run the application in development mode with auto-reload."""
|
||||
# Force development settings
|
||||
os.environ["EMBEDDINGBUDDY_ENV"] = "development"
|
||||
os.environ["EMBEDDINGBUDDY_DEBUG"] = "true"
|
||||
|
||||
print("🚀 Starting EmbeddingBuddy in development mode...")
|
||||
print("📁 Auto-reload enabled - changes will trigger restart")
|
||||
print("🌐 Server will be available at http://127.0.0.1:8050")
|
||||
print("⏹️ Press Ctrl+C to stop")
|
||||
|
||||
app = create_app()
|
||||
|
||||
# Run with development server (includes auto-reload when debug=True)
|
||||
run_app(app, debug=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
49
run_prod.py
49
run_prod.py
@@ -1,49 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Production runner using Gunicorn WSGI server.
|
||||
This provides better performance and stability for production deployments.
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from src.embeddingbuddy.config.settings import AppSettings
|
||||
|
||||
def main():
|
||||
"""Run the application in production mode with Gunicorn."""
|
||||
# Force production settings
|
||||
os.environ["EMBEDDINGBUDDY_ENV"] = "production"
|
||||
os.environ["EMBEDDINGBUDDY_DEBUG"] = "false"
|
||||
|
||||
print("🚀 Starting EmbeddingBuddy in production mode...")
|
||||
print(f"⚙️ Workers: {AppSettings.GUNICORN_WORKERS}")
|
||||
print(f"🌐 Server will be available at http://{AppSettings.GUNICORN_BIND}")
|
||||
print("⏹️ Press Ctrl+C to stop")
|
||||
|
||||
# Gunicorn command
|
||||
cmd = [
|
||||
"gunicorn",
|
||||
"--workers", str(AppSettings.GUNICORN_WORKERS),
|
||||
"--bind", AppSettings.GUNICORN_BIND,
|
||||
"--timeout", str(AppSettings.GUNICORN_TIMEOUT),
|
||||
"--keep-alive", str(AppSettings.GUNICORN_KEEPALIVE),
|
||||
"--access-logfile", "-",
|
||||
"--error-logfile", "-",
|
||||
"--log-level", "info",
|
||||
"wsgi:application"
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, check=True)
|
||||
except KeyboardInterrupt:
|
||||
print("\n🛑 Shutting down...")
|
||||
sys.exit(0)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ Error running Gunicorn: {e}")
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
print("❌ Gunicorn not found. Install it with: uv add gunicorn")
|
||||
print("💡 Or run in development mode with: python run_dev.py")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -1,26 +1,53 @@
|
||||
import dash
|
||||
import dash_bootstrap_components as dbc
|
||||
from .config.settings import AppSettings
|
||||
from .ui.layout import AppLayout
|
||||
from .ui.callbacks.data_processing import DataProcessingCallbacks
|
||||
from .ui.callbacks.visualization import VisualizationCallbacks
|
||||
from .ui.callbacks.interactions import InteractionCallbacks
|
||||
"""
|
||||
EmbeddingBuddy application factory and server functions.
|
||||
|
||||
This module contains the main application creation logic with imports
|
||||
moved inside functions to avoid loading heavy dependencies at module level.
|
||||
"""
|
||||
|
||||
|
||||
def create_app():
|
||||
"""Create and configure the Dash application instance."""
|
||||
import os
|
||||
import dash
|
||||
import dash_bootstrap_components as dbc
|
||||
from .ui.layout import AppLayout
|
||||
from .ui.callbacks.data_processing import DataProcessingCallbacks
|
||||
from .ui.callbacks.visualization import VisualizationCallbacks
|
||||
from .ui.callbacks.interactions import InteractionCallbacks
|
||||
|
||||
# Get the project root directory (two levels up from this file)
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
||||
assets_path = os.path.join(project_root, "assets")
|
||||
# Get the assets directory relative to this module
|
||||
module_dir = os.path.dirname(__file__)
|
||||
assets_path = os.path.join(module_dir, "assets")
|
||||
|
||||
app = dash.Dash(
|
||||
__name__,
|
||||
title="EmbeddingBuddy",
|
||||
external_stylesheets=[
|
||||
dbc.themes.BOOTSTRAP,
|
||||
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css",
|
||||
],
|
||||
assets_folder=assets_path,
|
||||
meta_tags=[
|
||||
{
|
||||
"name": "description",
|
||||
"content": "Interactive embedding visualization tool for exploring high-dimensional vectors through dimensionality reduction techniques like PCA, t-SNE, and UMAP.",
|
||||
},
|
||||
{"name": "author", "content": "EmbeddingBuddy"},
|
||||
{
|
||||
"name": "keywords",
|
||||
"content": "embeddings, visualization, dimensionality reduction, PCA, t-SNE, UMAP, machine learning, data science",
|
||||
},
|
||||
{"name": "viewport", "content": "width=device-width, initial-scale=1.0"},
|
||||
{
|
||||
"property": "og:title",
|
||||
"content": "EmbeddingBuddy - Interactive Embedding Visualization",
|
||||
},
|
||||
{
|
||||
"property": "og:description",
|
||||
"content": "Explore and visualize embedding vectors through interactive 2D/3D plots with multiple dimensionality reduction techniques.",
|
||||
},
|
||||
{"property": "og:type", "content": "website"},
|
||||
],
|
||||
)
|
||||
|
||||
# Allow callbacks to components that are dynamically created in tabs
|
||||
@@ -102,6 +129,9 @@ def _register_client_side_callbacks(app):
|
||||
|
||||
|
||||
def run_app(app=None, debug=None, host=None, port=None):
|
||||
"""Run the Dash application with specified settings."""
|
||||
from .config.settings import AppSettings
|
||||
|
||||
if app is None:
|
||||
app = create_app()
|
||||
|
||||
@@ -112,6 +142,75 @@ def run_app(app=None, debug=None, host=None, port=None):
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
def serve(host=None, port=None, dev=False, debug=False):
|
||||
"""Start the EmbeddingBuddy web server.
|
||||
|
||||
Args:
|
||||
host: Host to bind to (default: 127.0.0.1)
|
||||
port: Port to bind to (default: 8050)
|
||||
dev: Development mode - enable debug logging and auto-reload (default: False)
|
||||
debug: Enable debug logging only, no auto-reload (default: False)
|
||||
"""
|
||||
import os
|
||||
from .config.settings import AppSettings
|
||||
|
||||
# Determine actual values to use
|
||||
actual_host = host if host is not None else AppSettings.HOST
|
||||
actual_port = port if port is not None else AppSettings.PORT
|
||||
|
||||
# Determine mode
|
||||
# --dev takes precedence and enables both debug and auto-reload
|
||||
# --debug enables only debug logging
|
||||
# No flags = production mode (no debug, no auto-reload)
|
||||
use_reloader = dev
|
||||
use_debug = dev or debug
|
||||
|
||||
# Only print startup messages in main process (not in Flask reloader)
|
||||
if not os.environ.get("WERKZEUG_RUN_MAIN"):
|
||||
from importlib.metadata import version
|
||||
|
||||
try:
|
||||
pkg_version = version("embeddingbuddy")
|
||||
except Exception:
|
||||
pkg_version = "unknown"
|
||||
|
||||
mode = "development" if dev else ("debug" if debug else "production")
|
||||
print(f"Starting EmbeddingBuddy v{pkg_version} in {mode} mode...")
|
||||
print("Loading dependencies (this may take a few seconds)...")
|
||||
print(f"Server will start at http://{actual_host}:{actual_port}")
|
||||
if use_reloader:
|
||||
print("Auto-reload enabled - server will restart on code changes")
|
||||
|
||||
app = create_app()
|
||||
run_app(app)
|
||||
|
||||
# Suppress Flask development server warning in production mode
|
||||
if not use_debug and not use_reloader:
|
||||
import warnings
|
||||
import logging
|
||||
|
||||
# Suppress the werkzeug warning
|
||||
warnings.filterwarnings("ignore", message=".*development server.*")
|
||||
|
||||
# Set werkzeug logger to ERROR level to suppress the warning
|
||||
werkzeug_logger = logging.getLogger("werkzeug")
|
||||
werkzeug_logger.setLevel(logging.ERROR)
|
||||
|
||||
# Use Flask's built-in server with appropriate settings
|
||||
app.run(
|
||||
debug=use_debug, host=actual_host, port=actual_port, use_reloader=use_reloader
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
"""Legacy entry point - redirects to cli module.
|
||||
|
||||
This is kept for backward compatibility but the main CLI
|
||||
is now in embeddingbuddy.cli for faster startup.
|
||||
"""
|
||||
from .cli import main as cli_main
|
||||
|
||||
cli_main()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
2
src/embeddingbuddy/assets/fontawesome.css
vendored
Normal file
2
src/embeddingbuddy/assets/fontawesome.css
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
/* Load Font Awesome from local assets */
|
||||
@import url("/assets/fontawesome/css/all.min.css");
|
165
src/embeddingbuddy/assets/fontawesome/LICENSE.txt
Normal file
165
src/embeddingbuddy/assets/fontawesome/LICENSE.txt
Normal file
@@ -0,0 +1,165 @@
|
||||
Fonticons, Inc. (https://fontawesome.com)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Font Awesome Free License
|
||||
|
||||
Font Awesome Free is free, open source, and GPL friendly. You can use it for
|
||||
commercial projects, open source projects, or really almost whatever you want.
|
||||
Full Font Awesome Free license: https://fontawesome.com/license/free.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
# Icons: CC BY 4.0 License (https://creativecommons.org/licenses/by/4.0/)
|
||||
|
||||
The Font Awesome Free download is licensed under a Creative Commons
|
||||
Attribution 4.0 International License and applies to all icons packaged
|
||||
as SVG and JS file types.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
# Fonts: SIL OFL 1.1 License
|
||||
|
||||
In the Font Awesome Free download, the SIL OFL license applies to all icons
|
||||
packaged as web and desktop font files.
|
||||
|
||||
Copyright (c) 2023 Fonticons, Inc. (https://fontawesome.com)
|
||||
with Reserved Font Name: "Font Awesome".
|
||||
|
||||
This Font Software is licensed under the SIL Open Font License, Version 1.1.
|
||||
This license is copied below, and is also available with a FAQ at:
|
||||
http://scripts.sil.org/OFL
|
||||
|
||||
SIL OPEN FONT LICENSE
|
||||
Version 1.1 - 26 February 2007
|
||||
|
||||
PREAMBLE
|
||||
The goals of the Open Font License (OFL) are to stimulate worldwide
|
||||
development of collaborative font projects, to support the font creation
|
||||
efforts of academic and linguistic communities, and to provide a free and
|
||||
open framework in which fonts may be shared and improved in partnership
|
||||
with others.
|
||||
|
||||
The OFL allows the licensed fonts to be used, studied, modified and
|
||||
redistributed freely as long as they are not sold by themselves. The
|
||||
fonts, including any derivative works, can be bundled, embedded,
|
||||
redistributed and/or sold with any software provided that any reserved
|
||||
names are not used by derivative works. The fonts and derivatives,
|
||||
however, cannot be released under any other type of license. The
|
||||
requirement for fonts to remain under this license does not apply
|
||||
to any document created using the fonts or their derivatives.
|
||||
|
||||
DEFINITIONS
|
||||
"Font Software" refers to the set of files released by the Copyright
|
||||
Holder(s) under this license and clearly marked as such. This may
|
||||
include source files, build scripts and documentation.
|
||||
|
||||
"Reserved Font Name" refers to any names specified as such after the
|
||||
copyright statement(s).
|
||||
|
||||
"Original Version" refers to the collection of Font Software components as
|
||||
distributed by the Copyright Holder(s).
|
||||
|
||||
"Modified Version" refers to any derivative made by adding to, deleting,
|
||||
or substituting — in part or in whole — any of the components of the
|
||||
Original Version, by changing formats or by porting the Font Software to a
|
||||
new environment.
|
||||
|
||||
"Author" refers to any designer, engineer, programmer, technical
|
||||
writer or other person who contributed to the Font Software.
|
||||
|
||||
PERMISSION & CONDITIONS
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Font Software, to use, study, copy, merge, embed, modify,
|
||||
redistribute, and sell modified and unmodified copies of the Font
|
||||
Software, subject to the following conditions:
|
||||
|
||||
1) Neither the Font Software nor any of its individual components,
|
||||
in Original or Modified Versions, may be sold by itself.
|
||||
|
||||
2) Original or Modified Versions of the Font Software may be bundled,
|
||||
redistributed and/or sold with any software, provided that each copy
|
||||
contains the above copyright notice and this license. These can be
|
||||
included either as stand-alone text files, human-readable headers or
|
||||
in the appropriate machine-readable metadata fields within text or
|
||||
binary files as long as those fields can be easily viewed by the user.
|
||||
|
||||
3) No Modified Version of the Font Software may use the Reserved Font
|
||||
Name(s) unless explicit written permission is granted by the corresponding
|
||||
Copyright Holder. This restriction only applies to the primary font name as
|
||||
presented to the users.
|
||||
|
||||
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
|
||||
Software shall not be used to promote, endorse or advertise any
|
||||
Modified Version, except to acknowledge the contribution(s) of the
|
||||
Copyright Holder(s) and the Author(s) or with their explicit written
|
||||
permission.
|
||||
|
||||
5) The Font Software, modified or unmodified, in part or in whole,
|
||||
must be distributed entirely under this license, and must not be
|
||||
distributed under any other license. The requirement for fonts to
|
||||
remain under this license does not apply to any document created
|
||||
using the Font Software.
|
||||
|
||||
TERMINATION
|
||||
This license becomes null and void if any of the above conditions are
|
||||
not met.
|
||||
|
||||
DISCLAIMER
|
||||
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
|
||||
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
|
||||
OTHER DEALINGS IN THE FONT SOFTWARE.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
# Code: MIT License (https://opensource.org/licenses/MIT)
|
||||
|
||||
In the Font Awesome Free download, the MIT license applies to all non-font and
|
||||
non-icon files.
|
||||
|
||||
Copyright 2023 Fonticons, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without limitation the rights to use, copy,
|
||||
modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
and to permit persons to whom the Software is furnished to do so, subject to the
|
||||
following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
# Attribution
|
||||
|
||||
Attribution is required by MIT, SIL OFL, and CC BY licenses. Downloaded Font
|
||||
Awesome Free files already contain embedded comments with sufficient
|
||||
attribution, so you shouldn't need to do anything additional when using these
|
||||
files normally.
|
||||
|
||||
We've kept attribution comments terse, so we ask that you do not actively work
|
||||
to remove them from files, especially code. They're a great way for folks to
|
||||
learn about Font Awesome.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
# Brand Icons
|
||||
|
||||
All brand icons are trademarks of their respective owners. The use of these
|
||||
trademarks does not indicate endorsement of the trademark holder by Font
|
||||
Awesome, nor vice versa. **Please do not use brand logos for any purpose except
|
||||
to represent the company, product, or service to which they refer.**
|
9
src/embeddingbuddy/assets/fontawesome/css/all.min.css
vendored
Normal file
9
src/embeddingbuddy/assets/fontawesome/css/all.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
BIN
src/embeddingbuddy/assets/fontawesome/webfonts/fa-brands-400.ttf
Normal file
BIN
src/embeddingbuddy/assets/fontawesome/webfonts/fa-brands-400.ttf
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
src/embeddingbuddy/assets/fontawesome/webfonts/fa-solid-900.ttf
Normal file
BIN
src/embeddingbuddy/assets/fontawesome/webfonts/fa-solid-900.ttf
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
67
src/embeddingbuddy/cli.py
Normal file
67
src/embeddingbuddy/cli.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""
|
||||
Lightweight CLI entry point for EmbeddingBuddy.
|
||||
|
||||
This module provides a fast command-line interface that only imports
|
||||
heavy dependencies when actually needed by subcommands.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
"""Main CLI entry point with minimal imports for fast help text."""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="embeddingbuddy",
|
||||
description="EmbeddingBuddy - Interactive embedding visualization tool",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
embeddingbuddy serve # Production mode (no debug, no auto-reload)
|
||||
embeddingbuddy serve --dev # Development mode (debug + auto-reload)
|
||||
embeddingbuddy serve --debug # Debug logging only (no auto-reload)
|
||||
embeddingbuddy serve --port 8080 # Custom port
|
||||
embeddingbuddy serve --host 0.0.0.0 # Bind to all interfaces
|
||||
""",
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(
|
||||
dest="command", help="Available commands", metavar="<command>"
|
||||
)
|
||||
|
||||
# Serve subcommand
|
||||
serve_parser = subparsers.add_parser(
|
||||
"serve",
|
||||
help="Start the web server",
|
||||
description="Start the EmbeddingBuddy web server for interactive visualization",
|
||||
)
|
||||
serve_parser.add_argument(
|
||||
"--host", default=None, help="Host to bind to (default: 127.0.0.1)"
|
||||
)
|
||||
serve_parser.add_argument(
|
||||
"--port", type=int, default=None, help="Port to bind to (default: 8050)"
|
||||
)
|
||||
serve_parser.add_argument(
|
||||
"--dev",
|
||||
action="store_true",
|
||||
help="Development mode: enable debug logging and auto-reload",
|
||||
)
|
||||
serve_parser.add_argument(
|
||||
"--debug", action="store_true", help="Enable debug logging (no auto-reload)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "serve":
|
||||
# Only import heavy dependencies when actually running serve
|
||||
from embeddingbuddy.app import serve
|
||||
|
||||
serve(host=args.host, port=args.port, dev=args.dev, debug=args.debug)
|
||||
else:
|
||||
# No command specified, show help
|
||||
parser.print_help()
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -69,7 +69,7 @@ class AppSettings:
|
||||
TEXT_PREVIEW_LENGTH = 100
|
||||
|
||||
# App Configuration
|
||||
DEBUG = os.getenv("EMBEDDINGBUDDY_DEBUG", "True").lower() == "true"
|
||||
DEBUG = os.getenv("EMBEDDINGBUDDY_DEBUG", "False").lower() == "true"
|
||||
HOST = os.getenv("EMBEDDINGBUDDY_HOST", "127.0.0.1")
|
||||
PORT = int(os.getenv("EMBEDDINGBUDDY_PORT", "8050"))
|
||||
|
||||
@@ -85,6 +85,9 @@ class AppSettings:
|
||||
GUNICORN_KEEPALIVE = int(os.getenv("GUNICORN_KEEPALIVE", "5"))
|
||||
|
||||
# OpenSearch Configuration
|
||||
OPENSEARCH_ENABLED = (
|
||||
os.getenv("EMBEDDINGBUDDY_OPENSEARCH_ENABLED", "True").lower() == "true"
|
||||
)
|
||||
OPENSEARCH_DEFAULT_SIZE = 100
|
||||
OPENSEARCH_SAMPLE_SIZE = 5
|
||||
OPENSEARCH_CONNECTION_TIMEOUT = 30
|
||||
|
@@ -82,19 +82,23 @@ class DataProcessingCallbacks:
|
||||
)
|
||||
def render_tab_content(active_tab):
|
||||
from ...ui.components.datasource import DataSourceComponent
|
||||
from ...config.settings import AppSettings
|
||||
|
||||
datasource = DataSourceComponent()
|
||||
|
||||
if active_tab == "opensearch-tab":
|
||||
if active_tab == "opensearch-tab" and AppSettings.OPENSEARCH_ENABLED:
|
||||
return [datasource.create_opensearch_tab()]
|
||||
elif active_tab == "text-input-tab":
|
||||
return [datasource.create_text_input_tab()]
|
||||
else:
|
||||
return [datasource.create_file_upload_tab()]
|
||||
|
||||
# Register callbacks for both data and prompts sections
|
||||
self._register_opensearch_callbacks("data", self.opensearch_client_data)
|
||||
self._register_opensearch_callbacks("prompts", self.opensearch_client_prompts)
|
||||
# Register callbacks for both data and prompts sections (only if OpenSearch is enabled)
|
||||
if AppSettings.OPENSEARCH_ENABLED:
|
||||
self._register_opensearch_callbacks("data", self.opensearch_client_data)
|
||||
self._register_opensearch_callbacks(
|
||||
"prompts", self.opensearch_client_prompts
|
||||
)
|
||||
|
||||
# Register collapsible section callbacks
|
||||
self._register_collapse_callbacks()
|
||||
@@ -656,14 +660,12 @@ class DataProcessingCallbacks:
|
||||
import os
|
||||
|
||||
try:
|
||||
# Get the project root directory (four levels up from this file)
|
||||
# Get the embeddingbuddy package directory (three levels up from this file)
|
||||
current_file = os.path.abspath(__file__)
|
||||
project_root = os.path.dirname(
|
||||
os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(current_file)))
|
||||
)
|
||||
package_dir = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(current_file))
|
||||
)
|
||||
sample_file_path = os.path.join(project_root, "assets", "sample-txt.md")
|
||||
sample_file_path = os.path.join(package_dir, "assets", "sample-txt.md")
|
||||
|
||||
if os.path.exists(sample_file_path):
|
||||
with open(sample_file_path, "r", encoding="utf-8") as file:
|
||||
|
@@ -7,6 +7,16 @@ class InteractionCallbacks:
|
||||
self._register_callbacks()
|
||||
|
||||
def _register_callbacks(self):
|
||||
@callback(
|
||||
Output("about-modal", "is_open"),
|
||||
[Input("about-button", "n_clicks"), Input("about-modal-close", "n_clicks")],
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def toggle_about_modal(about_clicks, close_clicks):
|
||||
if about_clicks or close_clicks:
|
||||
return True if about_clicks else False
|
||||
return False
|
||||
|
||||
@callback(
|
||||
[
|
||||
Output("processed-data", "data", allow_duplicate=True),
|
||||
|
90
src/embeddingbuddy/ui/components/about.py
Normal file
90
src/embeddingbuddy/ui/components/about.py
Normal file
@@ -0,0 +1,90 @@
|
||||
from dash import html, dcc
|
||||
import dash_bootstrap_components as dbc
|
||||
|
||||
|
||||
class AboutComponent:
|
||||
def _get_about_content(self):
|
||||
return """
|
||||
# 🔍 Interactive Embedding Vector Visualization
|
||||
|
||||
EmbeddingBuddy is a web application for interactive exploration and
|
||||
visualization of embedding vectors through dimensionality reduction techniques
|
||||
(PCA, t-SNE, UMAP).
|
||||
|
||||
You have two ways to get started:
|
||||
|
||||
1. Generate embeddings directly in the browser if it supports WebGPU.
|
||||
2. Upload your NDJSON file containing embedding vectors and metadata.
|
||||
|
||||
## Generating Embeddings in Browser
|
||||
|
||||
1. Expand the "Generate Embeddings" section.
|
||||
2. Input your text data (one entry per line).
|
||||
1. Optionally you can use the built in sample data by clicking "Load Sample Data" button.
|
||||
3. Click "Generate Embeddings" to create vectors using a pre-trained model.
|
||||
|
||||
## NDJSON File Format
|
||||
|
||||
```json
|
||||
{"id": "doc_001", "embedding": [0.1, -0.3, 0.7, ...], "text": "Sample text content", "category": "news", "subcategory": "politics", "tags": ["election", "politics"]}
|
||||
{"id": "doc_002", "embedding": [0.2, -0.1, 0.9, ...], "text": "Another example", "category": "review", "subcategory": "product", "tags": ["tech", "gadget"]}
|
||||
```
|
||||
|
||||
|
||||
## ✨ Features
|
||||
|
||||
- Drag-and-drop NDJSON file upload
|
||||
- Multiple dimensionality reduction algorithms
|
||||
- 2D/3D interactive plots with Plotly
|
||||
- Color coding by categories, subcategories, or tags
|
||||
- In-browser embedding generation
|
||||
- OpenSearch integration for data loading
|
||||
|
||||
## 🔧 Supported Algorithms
|
||||
|
||||
- **PCA** (Principal Component Analysis)
|
||||
- **t-SNE** (t-Distributed Stochastic Neighbor Embedding)
|
||||
- **UMAP** (Uniform Manifold Approximation and Projection)
|
||||
|
||||
---
|
||||
|
||||
📂 [View on GitHub](https://github.com/godber/EmbeddingBuddy)
|
||||
|
||||
*Built with: Python, Dash, Plotly, scikit-learn, OpenTSNE, UMAP*
|
||||
""".strip()
|
||||
|
||||
def create_about_modal(self):
|
||||
return dbc.Modal(
|
||||
[
|
||||
dbc.ModalHeader(
|
||||
dbc.ModalTitle("Welcome to EmbeddingBuddy"),
|
||||
close_button=True,
|
||||
),
|
||||
dbc.ModalBody(
|
||||
[dcc.Markdown(self._get_about_content(), className="mb-0")]
|
||||
),
|
||||
dbc.ModalFooter(
|
||||
[
|
||||
dbc.Button(
|
||||
"Close",
|
||||
id="about-modal-close",
|
||||
color="secondary",
|
||||
n_clicks=0,
|
||||
)
|
||||
]
|
||||
),
|
||||
],
|
||||
id="about-modal",
|
||||
is_open=True,
|
||||
size="lg",
|
||||
)
|
||||
|
||||
def create_about_button(self):
|
||||
return dbc.Button(
|
||||
[html.I(className="fas fa-info-circle me-2"), "About"],
|
||||
id="about-button",
|
||||
color="outline-info",
|
||||
size="sm",
|
||||
n_clicks=0,
|
||||
className="ms-2",
|
||||
)
|
@@ -1,6 +1,7 @@
|
||||
from dash import dcc, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from .upload import UploadComponent
|
||||
from embeddingbuddy.config.settings import AppSettings
|
||||
|
||||
|
||||
class DataSourceComponent:
|
||||
@@ -9,15 +10,18 @@ class DataSourceComponent:
|
||||
|
||||
def create_tabbed_interface(self):
|
||||
"""Create tabbed interface for different data sources."""
|
||||
tabs = [dbc.Tab(label="File Upload", tab_id="file-tab")]
|
||||
|
||||
# Only add OpenSearch tab if enabled
|
||||
if AppSettings.OPENSEARCH_ENABLED:
|
||||
tabs.append(dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"))
|
||||
|
||||
return dbc.Card(
|
||||
[
|
||||
dbc.CardHeader(
|
||||
[
|
||||
dbc.Tabs(
|
||||
[
|
||||
dbc.Tab(label="File Upload", tab_id="file-tab"),
|
||||
dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
|
||||
],
|
||||
tabs,
|
||||
id="data-source-tabs",
|
||||
active_tab="file-tab",
|
||||
)
|
||||
|
@@ -3,6 +3,7 @@ import dash_bootstrap_components as dbc
|
||||
from .upload import UploadComponent
|
||||
from .datasource import DataSourceComponent
|
||||
from .textinput import TextInputComponent
|
||||
from embeddingbuddy.config.settings import AppSettings
|
||||
|
||||
|
||||
class SidebarComponent:
|
||||
@@ -102,6 +103,10 @@ class SidebarComponent:
|
||||
)
|
||||
|
||||
def _create_data_sources_item(self):
|
||||
tooltip_text = "Load existing embeddings: upload files"
|
||||
if AppSettings.OPENSEARCH_ENABLED:
|
||||
tooltip_text += " or read from OpenSearch"
|
||||
|
||||
return dbc.AccordionItem(
|
||||
[
|
||||
self.datasource_component.create_error_alert(),
|
||||
@@ -115,7 +120,7 @@ class SidebarComponent:
|
||||
className="fas fa-info-circle text-muted",
|
||||
style={"cursor": "pointer"},
|
||||
id="load-embeddings-info-icon",
|
||||
title="Load existing embeddings: upload files or read from OpenSearch",
|
||||
title=tooltip_text,
|
||||
),
|
||||
]
|
||||
),
|
||||
|
@@ -16,14 +16,14 @@ class TextInputComponent:
|
||||
"""Create the complete text input interface with model selection and processing options."""
|
||||
return html.Div(
|
||||
[
|
||||
# Model selection section
|
||||
self._create_model_selection(),
|
||||
html.Hr(),
|
||||
# Text input section
|
||||
self._create_text_input_area(),
|
||||
# Text action buttons
|
||||
self._create_text_action_buttons(),
|
||||
html.Hr(),
|
||||
# Model selection section
|
||||
self._create_model_selection(),
|
||||
html.Hr(),
|
||||
# Processing options
|
||||
self._create_processing_options(),
|
||||
html.Hr(),
|
||||
|
@@ -1,16 +1,19 @@
|
||||
from dash import dcc, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from .components.sidebar import SidebarComponent
|
||||
from .components.about import AboutComponent
|
||||
|
||||
|
||||
class AppLayout:
|
||||
def __init__(self):
|
||||
self.sidebar = SidebarComponent()
|
||||
self.about = AboutComponent()
|
||||
|
||||
def create_layout(self):
|
||||
return dbc.Container(
|
||||
[self._create_header(), self._create_main_content()]
|
||||
+ self._create_stores(),
|
||||
+ self._create_stores()
|
||||
+ [self.about.create_about_modal()],
|
||||
fluid=True,
|
||||
)
|
||||
|
||||
@@ -19,7 +22,19 @@ class AppLayout:
|
||||
[
|
||||
dbc.Col(
|
||||
[
|
||||
html.H1("EmbeddingBuddy", className="text-center mb-4"),
|
||||
html.Div(
|
||||
[
|
||||
html.H1(
|
||||
"EmbeddingBuddy",
|
||||
className="text-center mb-4 d-inline",
|
||||
),
|
||||
html.Div(
|
||||
[self.about.create_about_button()],
|
||||
className="float-end",
|
||||
),
|
||||
],
|
||||
className="d-flex justify-content-between align-items-center",
|
||||
),
|
||||
# Load Transformers.js from CDN
|
||||
html.Script(
|
||||
"""
|
||||
|
12
src/embeddingbuddy/wsgi.py
Normal file
12
src/embeddingbuddy/wsgi.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""
|
||||
WSGI entry point for production deployment.
|
||||
Use this with a production WSGI server like Gunicorn.
|
||||
"""
|
||||
|
||||
from embeddingbuddy.app import create_app
|
||||
|
||||
# Create the application instance
|
||||
application = create_app()
|
||||
|
||||
# For compatibility with different WSGI servers
|
||||
app = application
|
2
uv.lock
generated
2
uv.lock
generated
@@ -412,7 +412,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "embeddingbuddy"
|
||||
version = "0.5.0"
|
||||
version = "0.8.1"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "dash" },
|
||||
|
20
wsgi.py
20
wsgi.py
@@ -1,20 +0,0 @@
|
||||
"""
|
||||
WSGI entry point for production deployment.
|
||||
Use this with a production WSGI server like Gunicorn.
|
||||
"""
|
||||
from src.embeddingbuddy.app import create_app
|
||||
|
||||
# Create the application instance
|
||||
application = create_app()
|
||||
|
||||
# For compatibility with different WSGI servers
|
||||
app = application
|
||||
|
||||
if __name__ == "__main__":
|
||||
# This won't be used in production, but useful for testing
|
||||
from src.embeddingbuddy.config.settings import AppSettings
|
||||
application.run(
|
||||
host=AppSettings.HOST,
|
||||
port=AppSettings.PORT,
|
||||
debug=False
|
||||
)
|
Reference in New Issue
Block a user