Compare commits
1 Commits
v0.8.0
...
d35ef995a3
Author | SHA1 | Date | |
---|---|---|---|
d35ef995a3 |
@@ -4,9 +4,7 @@
|
||||
"Bash(mkdir:*)",
|
||||
"Bash(uv run:*)",
|
||||
"Bash(uv add:*)",
|
||||
"Bash(uv sync:*)",
|
||||
"Bash(tree:*)",
|
||||
"WebFetch(domain:www.dash-bootstrap-components.com)"
|
||||
"Bash(uv sync:*)"
|
||||
],
|
||||
"deny": [],
|
||||
"ask": [],
|
||||
|
@@ -1,52 +0,0 @@
|
||||
name: Bump Version and Release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
bump_type:
|
||||
description: 'Version bump type'
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- patch
|
||||
- minor
|
||||
- major
|
||||
|
||||
jobs:
|
||||
bump-and-release:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
token: ${{ secrets.GITEA_TOKEN }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Bump version
|
||||
id: bump
|
||||
run: |
|
||||
python bump_version.py ${{ github.event.inputs.bump_type }}
|
||||
NEW_VERSION=$(grep -oP 'version = "\K[^"]+' pyproject.toml)
|
||||
echo "version=$NEW_VERSION" >> $GITHUB_OUTPUT
|
||||
echo "tag=v$NEW_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Commit and tag
|
||||
run: |
|
||||
git config user.name "gitea-actions[bot]"
|
||||
git config user.email "gitea-actions[bot]@users.noreply.gitea.io"
|
||||
git add pyproject.toml
|
||||
git commit -m "bump version to v${{ steps.bump.outputs.version }}"
|
||||
git tag v${{ steps.bump.outputs.version }}
|
||||
|
||||
- name: Push changes
|
||||
run: |
|
||||
git push origin main
|
||||
git push origin v${{ steps.bump.outputs.version }}
|
@@ -66,20 +66,27 @@ jobs:
|
||||
echo "## Installation" >> release-notes.md
|
||||
echo "" >> release-notes.md
|
||||
echo '```bash' >> release-notes.md
|
||||
echo 'pip install embeddingbuddy' >> release-notes.md
|
||||
echo 'embeddingbuddy serve' >> release-notes.md
|
||||
echo 'uv sync' >> release-notes.md
|
||||
echo 'uv run python main.py' >> release-notes.md
|
||||
echo '```' >> release-notes.md
|
||||
|
||||
- name: Create Release
|
||||
uses: akkuman/gitea-release-action@v1
|
||||
uses: actions/create-release@v1
|
||||
env:
|
||||
NODE_OPTIONS: '--experimental-fetch'
|
||||
GITHUB_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||
with:
|
||||
token: ${{ secrets.GITEA_TOKEN }}
|
||||
tag_name: ${{ github.ref_name || github.event.inputs.version }}
|
||||
release_name: Release ${{ github.ref_name || github.event.inputs.version }}
|
||||
body_path: release-notes.md
|
||||
draft: false
|
||||
prerelease: false
|
||||
files: |-
|
||||
dist/*
|
||||
|
||||
- name: Upload Release Assets
|
||||
uses: actions/upload-release-asset@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||
with:
|
||||
upload_url: ${{ steps.create_release.outputs.upload_url }}
|
||||
asset_path: dist/
|
||||
asset_name: embeddingbuddy-dist
|
||||
asset_content_type: application/zip
|
54
.github/workflows/docker-release.yml
vendored
54
.github/workflows/docker-release.yml
vendored
@@ -1,54 +0,0 @@
|
||||
name: Docker Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v[0-9]+.[0-9]+.[0-9]+'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=ref,event=tag
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=semver,pattern={{major}}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
33
.github/workflows/pypi-release.yml
vendored
33
.github/workflows/pypi-release.yml
vendored
@@ -1,33 +0,0 @@
|
||||
name: PyPI Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v[0-9]+.[0-9]+.[0-9]+'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
pypi-publish:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write # For trusted publishing
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
|
||||
- name: Build package
|
||||
run: |
|
||||
uv build
|
||||
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
79
CLAUDE.md
79
CLAUDE.md
@@ -21,23 +21,26 @@ uv sync
|
||||
|
||||
**Run the application:**
|
||||
|
||||
Using the CLI (recommended):
|
||||
|
||||
Development mode (with auto-reload):
|
||||
```bash
|
||||
# Production mode (no debug, no auto-reload)
|
||||
embeddingbuddy serve
|
||||
|
||||
# Development mode (debug + auto-reload on code changes)
|
||||
embeddingbuddy serve --dev
|
||||
|
||||
# Debug logging only (no auto-reload)
|
||||
embeddingbuddy serve --debug
|
||||
|
||||
# With custom host/port
|
||||
embeddingbuddy serve --host 0.0.0.0 --port 8080
|
||||
uv run run_dev.py
|
||||
```
|
||||
|
||||
The app will be available at <http://127.0.0.1:8050> by default
|
||||
Production mode (with Gunicorn WSGI server):
|
||||
```bash
|
||||
# First install production dependencies
|
||||
uv sync --extra prod
|
||||
|
||||
# Then run in production mode
|
||||
uv run run_prod.py
|
||||
```
|
||||
|
||||
Legacy mode (basic Dash server):
|
||||
```bash
|
||||
uv run main.py
|
||||
```
|
||||
|
||||
The app will be available at http://127.0.0.1:8050
|
||||
|
||||
**Run tests:**
|
||||
|
||||
@@ -198,52 +201,6 @@ Uses modern Python stack with uv for dependency management:
|
||||
- **Testing:** pytest for test framework
|
||||
- **Dev Tools:** uv for package management
|
||||
|
||||
## CI/CD and Release Management
|
||||
|
||||
### Repository Setup
|
||||
|
||||
This project uses a **dual-repository workflow**:
|
||||
|
||||
- **Primary repository:** Gitea instance at `git.hawt.cloud` (read-write)
|
||||
- **Mirror repository:** GitHub (read-only mirror)
|
||||
|
||||
### Workflow Organization
|
||||
|
||||
**Gitea Workflows (`.gitea/workflows/`):**
|
||||
- **`bump-and-release.yml`** - Manual version bumping workflow
|
||||
- Runs `bump_version.py` to update version in `pyproject.toml`
|
||||
- Commits changes and creates git tag
|
||||
- Pushes to Gitea (main branch + tag)
|
||||
- Triggered manually via workflow_dispatch with choice of patch/minor/major bump
|
||||
- **`release.yml`** - Automated release creation
|
||||
- Triggered when version tags are pushed
|
||||
- Runs tests, builds packages
|
||||
- Creates Gitea release with artifacts
|
||||
- **`test.yml`** - Test suite execution
|
||||
- **`security.yml`** - Security scanning
|
||||
|
||||
**GitHub Workflows (`.github/workflows/`):**
|
||||
- **`docker-release.yml`** - Builds and publishes Docker images
|
||||
- **`pypi-release.yml`** - Publishes packages to PyPI
|
||||
- These workflows are read-only (no git commits/pushes) and create artifacts only
|
||||
|
||||
### Release Process
|
||||
|
||||
1. Run manual bump workflow on Gitea: **Actions → Bump Version and Release**
|
||||
2. Select version bump type (patch/minor/major)
|
||||
3. Workflow commits version change and pushes tag to Gitea
|
||||
4. Tag push triggers `release.yml` on Gitea (creates release)
|
||||
5. GitHub mirror receives tag and triggers artifact builds (Docker, PyPI)
|
||||
|
||||
### Version Management
|
||||
|
||||
Use `bump_version.py` for version updates:
|
||||
```bash
|
||||
python bump_version.py patch # 0.3.0 -> 0.3.1
|
||||
python bump_version.py minor # 0.3.0 -> 0.4.0
|
||||
python bump_version.py major # 0.3.0 -> 1.0.0
|
||||
```
|
||||
|
||||
## Development Guidelines
|
||||
|
||||
**When adding new features:**
|
||||
@@ -257,7 +214,7 @@ python bump_version.py major # 0.3.0 -> 1.0.0
|
||||
**Code Organization Principles:**
|
||||
|
||||
- Single responsibility principle
|
||||
- Clear module boundaries
|
||||
- Clear module boundaries
|
||||
- Testable, isolated components
|
||||
- Configuration over hardcoding
|
||||
- Error handling at appropriate layers
|
||||
|
39
Dockerfile
39
Dockerfile
@@ -2,9 +2,6 @@
|
||||
# Stage 1: Builder
|
||||
FROM python:3.11-slim as builder
|
||||
|
||||
# Create non-root user early in builder stage
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
|
||||
# Install system dependencies for building Python packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
@@ -23,17 +20,11 @@ COPY pyproject.toml uv.lock ./
|
||||
|
||||
# Copy source code (needed for editable install)
|
||||
COPY src/ src/
|
||||
COPY main.py .
|
||||
COPY wsgi.py .
|
||||
COPY run_prod.py .
|
||||
COPY assets/ assets/
|
||||
|
||||
# Change ownership of source files before building (lighter I/O)
|
||||
RUN chown -R appuser:appuser /app
|
||||
|
||||
# Create and set permissions for appuser home directory (needed for uv cache)
|
||||
RUN mkdir -p /home/appuser && chown -R appuser:appuser /home/appuser
|
||||
|
||||
# Switch to non-root user before building
|
||||
USER appuser
|
||||
|
||||
# Create virtual environment and install dependencies (including production extras)
|
||||
RUN uv venv .venv
|
||||
RUN uv sync --frozen --extra prod
|
||||
@@ -41,25 +32,23 @@ RUN uv sync --frozen --extra prod
|
||||
# Stage 2: Runtime
|
||||
FROM python:3.11-slim as runtime
|
||||
|
||||
# Create non-root user in runtime stage
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
|
||||
# Install runtime dependencies for compiled packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set working directory and change ownership (small directory)
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
RUN chown appuser:appuser /app
|
||||
|
||||
# Copy files from builder with correct ownership
|
||||
COPY --from=builder --chown=appuser:appuser /app/.venv /app/.venv
|
||||
COPY --from=builder --chown=appuser:appuser /app/src /app/src
|
||||
COPY --from=builder --chown=appuser:appuser /app/assets /app/assets
|
||||
# Copy virtual environment from builder stage
|
||||
COPY --from=builder /app/.venv /app/.venv
|
||||
|
||||
# Switch to non-root user
|
||||
USER appuser
|
||||
# Copy application files from builder stage
|
||||
COPY --from=builder /app/src /app/src
|
||||
COPY --from=builder /app/main.py /app/main.py
|
||||
COPY --from=builder /app/assets /app/assets
|
||||
COPY --from=builder /app/wsgi.py /app/wsgi.py
|
||||
COPY --from=builder /app/run_prod.py /app/run_prod.py
|
||||
|
||||
# Make sure the virtual environment is in PATH
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
@@ -80,5 +69,5 @@ EXPOSE 8050
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
|
||||
CMD python -c "import requests; requests.get('http://localhost:8050/', timeout=5)" || exit 1
|
||||
|
||||
# Run application in production mode (no debug, no auto-reload)
|
||||
CMD ["embeddingbuddy", "serve"]
|
||||
# Run application with Gunicorn in production
|
||||
CMD ["python", "run_prod.py"]
|
21
LICENSE
21
LICENSE
@@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Austin Godber - EmbeddingBuddy
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
125
README.md
125
README.md
@@ -28,61 +28,6 @@ documents and prompts to understand how queries relate to your content.
|
||||
- **Sidebar layout** with controls on left, large visualization area on right
|
||||
- **Real-time visualization** optimized for small to medium datasets
|
||||
|
||||
## Network Dependency
|
||||
|
||||
**Note:** The application loads the Transformers.js library (v3.0.0) from `cdn.jsdelivr.net` for client-side embedding generation. This requires an active internet connection and sends requests to a third-party CDN. The application will function without internet if you only use the file upload features for pre-computed embeddings.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Installation
|
||||
|
||||
**Option 1: Install with uv (recommended)**
|
||||
|
||||
```bash
|
||||
# Install as a CLI tool (no need to clone the repo)
|
||||
uv tool install embeddingbuddy
|
||||
|
||||
# Run the application
|
||||
embeddingbuddy serve
|
||||
```
|
||||
|
||||
**Option 2: Install with pip/pipx**
|
||||
|
||||
```bash
|
||||
# Install with pipx (isolated environment)
|
||||
pipx install embeddingbuddy
|
||||
|
||||
# Or install with pip
|
||||
pip install embeddingbuddy
|
||||
|
||||
# Run the application
|
||||
embeddingbuddy
|
||||
```
|
||||
|
||||
**Option 3: Run with Docker**
|
||||
|
||||
```bash
|
||||
# Pull and run the Docker image
|
||||
docker run -p 8050:8050 ghcr.io/godber/embedding-buddy:latest
|
||||
```
|
||||
|
||||
The application will be available at <http://127.0.0.1:8050>
|
||||
|
||||
### Using the Application
|
||||
|
||||
1. **Open your browser** to <http://127.0.0.1:8050>
|
||||
2. **Upload your data**:
|
||||
- Drag and drop an NDJSON file containing embeddings (see Data Format below)
|
||||
- Optionally upload a second file with prompts to compare against documents
|
||||
3. **Choose visualization settings**:
|
||||
- Select dimensionality reduction method (PCA, t-SNE, or UMAP)
|
||||
- Choose 2D or 3D visualization
|
||||
- Pick color coding (by category, subcategory, or tags)
|
||||
4. **Explore**:
|
||||
- Click points to view full content
|
||||
- Toggle prompt visibility
|
||||
- Rotate and zoom 3D plots
|
||||
|
||||
## Data Format
|
||||
|
||||
EmbeddingBuddy accepts newline-delimited JSON (NDJSON) files for both documents
|
||||
@@ -128,18 +73,26 @@ uv sync
|
||||
|
||||
2. **Run the application:**
|
||||
|
||||
**Development mode** (with auto-reload):
|
||||
|
||||
```bash
|
||||
# Production mode (no debug, no auto-reload)
|
||||
embeddingbuddy serve
|
||||
uv run run_dev.py
|
||||
```
|
||||
|
||||
# Development mode (debug + auto-reload on code changes)
|
||||
embeddingbuddy serve --dev
|
||||
**Production mode** (with Gunicorn WSGI server):
|
||||
|
||||
# Debug logging only (no auto-reload)
|
||||
embeddingbuddy serve --debug
|
||||
```bash
|
||||
# Install production dependencies
|
||||
uv sync --extra prod
|
||||
|
||||
# Custom host/port
|
||||
embeddingbuddy serve --host 0.0.0.0 --port 8080
|
||||
# Run in production mode
|
||||
uv run run_prod.py
|
||||
```
|
||||
|
||||
**Legacy mode** (basic Dash server):
|
||||
|
||||
```bash
|
||||
uv run main.py
|
||||
```
|
||||
|
||||
3. **Open your browser** to <http://127.0.0.1:8050>
|
||||
@@ -199,36 +152,22 @@ The application follows a modular architecture for improved maintainability and
|
||||
|
||||
```text
|
||||
src/embeddingbuddy/
|
||||
├── app.py # Main application entry point and factory
|
||||
├── config/ # Configuration management
|
||||
│ └── settings.py # Centralized app settings
|
||||
├── data/ # Data parsing and processing
|
||||
│ ├── parser.py # NDJSON parsing logic
|
||||
│ ├── processor.py # Data transformation utilities
|
||||
│ └── sources/ # Data source integrations
|
||||
│ └── opensearch.py # OpenSearch data source
|
||||
├── models/ # Data schemas and algorithms
|
||||
│ ├── schemas.py # Pydantic data models
|
||||
│ ├── reducers.py # Dimensionality reduction algorithms
|
||||
│ └── field_mapper.py # Field mapping utilities
|
||||
├── visualization/ # Plot creation and styling
|
||||
│ ├── plots.py # Plot factory and creation logic
|
||||
│ └── colors.py # Color mapping utilities
|
||||
├── ui/ # User interface components
|
||||
│ ├── layout.py # Main application layout
|
||||
│ ├── components/ # Reusable UI components
|
||||
│ │ ├── sidebar.py # Sidebar component
|
||||
│ │ ├── upload.py # Upload components
|
||||
│ │ ├── textinput.py # Text input components
|
||||
│ │ └── datasource.py # Data source components
|
||||
│ └── callbacks/ # Organized callback functions
|
||||
│ ├── data_processing.py # Data upload/processing callbacks
|
||||
│ ├── visualization.py # Plot update callbacks
|
||||
│ └── interactions.py # User interaction callbacks
|
||||
└── utils/ # Utility functions
|
||||
|
||||
# CLI entry point
|
||||
embeddingbuddy serve # Main CLI command to start the server
|
||||
├── config/ # Configuration management
|
||||
│ └── settings.py # Centralized app settings
|
||||
├── data/ # Data parsing and processing
|
||||
│ ├── parser.py # NDJSON parsing logic
|
||||
│ └── processor.py # Data transformation utilities
|
||||
├── models/ # Data schemas and algorithms
|
||||
│ ├── schemas.py # Pydantic data models
|
||||
│ └── reducers.py # Dimensionality reduction algorithms
|
||||
├── visualization/ # Plot creation and styling
|
||||
│ ├── plots.py # Plot factory and creation logic
|
||||
│ └── colors.py # Color mapping utilities
|
||||
├── ui/ # User interface components
|
||||
│ ├── layout.py # Main application layout
|
||||
│ ├── components/ # Reusable UI components
|
||||
│ └── callbacks/ # Organized callback functions
|
||||
└── utils/ # Utility functions
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
@@ -1,17 +0,0 @@
|
||||
/* CSS override for transparent hover boxes in Plotly plots */
|
||||
|
||||
/* Make hover boxes transparent while preserving text readability */
|
||||
.hovertext {
|
||||
fill-opacity: 0.8 !important;
|
||||
stroke-opacity: 1 !important;
|
||||
}
|
||||
|
||||
/* Alternative selector for different Plotly versions */
|
||||
g.hovertext > path {
|
||||
opacity: 0.8 !important;
|
||||
}
|
||||
|
||||
/* Ensure text remains fully visible */
|
||||
.hovertext text {
|
||||
opacity: 1 !important;
|
||||
}
|
@@ -45,12 +45,28 @@ class TransformersEmbedder {
|
||||
console.log('✅ Using globally loaded Transformers.js pipeline');
|
||||
}
|
||||
|
||||
this.extractor = await window.transformers.pipeline('feature-extraction', modelName);
|
||||
|
||||
// Show loading progress to user
|
||||
if (window.updateModelLoadingProgress) {
|
||||
window.updateModelLoadingProgress(0, `Loading ${modelName}...`);
|
||||
}
|
||||
|
||||
this.extractor = await window.transformers.pipeline('feature-extraction', modelName, {
|
||||
progress_callback: (data) => {
|
||||
if (window.updateModelLoadingProgress && data.progress !== undefined) {
|
||||
const progress = Math.round(data.progress);
|
||||
window.updateModelLoadingProgress(progress, data.status || 'Loading...');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
this.modelCache.set(modelName, this.extractor);
|
||||
this.currentModel = modelName;
|
||||
this.isLoading = false;
|
||||
|
||||
|
||||
if (window.updateModelLoadingProgress) {
|
||||
window.updateModelLoadingProgress(100, 'Model loaded successfully');
|
||||
}
|
||||
|
||||
return { success: true, model: modelName };
|
||||
} catch (error) {
|
||||
this.isLoading = false;
|
||||
@@ -100,8 +116,17 @@ class TransformersEmbedder {
|
||||
}
|
||||
});
|
||||
|
||||
// Update progress
|
||||
const progress = Math.min(100, ((i + batch.length) / texts.length) * 100);
|
||||
if (window.updateEmbeddingProgress) {
|
||||
window.updateEmbeddingProgress(progress, `Processing ${i + batch.length}/${texts.length} texts`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (window.updateEmbeddingProgress) {
|
||||
window.updateEmbeddingProgress(100, `Generated ${embeddings.length} embeddings successfully`);
|
||||
}
|
||||
|
||||
return embeddings;
|
||||
} catch (error) {
|
||||
console.error('Embedding generation error:', error);
|
||||
@@ -114,6 +139,30 @@ class TransformersEmbedder {
|
||||
window.transformersEmbedder = new TransformersEmbedder();
|
||||
console.log('📦 TransformersEmbedder instance created');
|
||||
|
||||
// Global progress update functions
|
||||
window.updateModelLoadingProgress = function(progress, status) {
|
||||
const progressBar = document.getElementById('model-loading-progress');
|
||||
const statusText = document.getElementById('model-loading-status');
|
||||
if (progressBar) {
|
||||
progressBar.style.width = progress + '%';
|
||||
progressBar.setAttribute('aria-valuenow', progress);
|
||||
}
|
||||
if (statusText) {
|
||||
statusText.textContent = status;
|
||||
}
|
||||
};
|
||||
|
||||
window.updateEmbeddingProgress = function(progress, status) {
|
||||
const progressBar = document.getElementById('embedding-progress');
|
||||
const statusText = document.getElementById('embedding-status');
|
||||
if (progressBar) {
|
||||
progressBar.style.width = progress + '%';
|
||||
progressBar.setAttribute('aria-valuenow', progress);
|
||||
}
|
||||
if (statusText) {
|
||||
statusText.textContent = status;
|
||||
}
|
||||
};
|
||||
|
||||
// Dash clientside callback functions
|
||||
window.dash_clientside = window.dash_clientside || {};
|
||||
@@ -121,28 +170,31 @@ console.log('🔧 Setting up window.dash_clientside.transformers');
|
||||
window.dash_clientside.transformers = {
|
||||
generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) {
|
||||
console.log('🚀 generateEmbeddings called with:', { nClicks, modelName, tokenizationMethod, textLength: textContent?.length });
|
||||
|
||||
|
||||
if (!nClicks || !textContent || textContent.trim().length === 0) {
|
||||
console.log('⚠️ Early return - missing required parameters');
|
||||
return window.dash_clientside.no_update;
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
// Initialize model if needed
|
||||
const initResult = await window.transformersEmbedder.initializeModel(modelName);
|
||||
if (!initResult.success) {
|
||||
return [
|
||||
{ error: `Model loading error: ${initResult.error}` },
|
||||
{ error: initResult.error },
|
||||
`❌ Model loading error: ${initResult.error}`,
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
// Tokenize text based on method
|
||||
let textChunks;
|
||||
const trimmedText = textContent.trim();
|
||||
|
||||
|
||||
switch (tokenizationMethod) {
|
||||
case 'sentence':
|
||||
// Simple sentence splitting - can be enhanced with proper NLP
|
||||
textChunks = trimmedText
|
||||
.split(/[.!?]+/)
|
||||
.map(s => s.trim())
|
||||
@@ -163,24 +215,28 @@ window.dash_clientside.transformers = {
|
||||
default:
|
||||
textChunks = [trimmedText];
|
||||
}
|
||||
|
||||
|
||||
if (textChunks.length === 0) {
|
||||
return [
|
||||
{ error: 'No valid text chunks found after tokenization' },
|
||||
'❌ Error: No valid text chunks found after tokenization',
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
// Generate embeddings
|
||||
const embeddings = await window.transformersEmbedder.generateEmbeddings(textChunks);
|
||||
|
||||
|
||||
if (!embeddings || embeddings.length !== textChunks.length) {
|
||||
return [
|
||||
{ error: 'Embedding generation failed' },
|
||||
{ error: 'Embedding generation failed - mismatch in text chunks and embeddings' },
|
||||
'❌ Error: Embedding generation failed',
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
// Create documents structure
|
||||
const documents = textChunks.map((text, i) => ({
|
||||
id: `text_input_${Date.now()}_${i}`,
|
||||
@@ -190,36 +246,33 @@ window.dash_clientside.transformers = {
|
||||
subcategory: subcategory || "Generated",
|
||||
tags: []
|
||||
}));
|
||||
|
||||
// Return the successful embeddings data
|
||||
const embeddingsData = {
|
||||
documents: documents,
|
||||
embeddings: embeddings
|
||||
};
|
||||
|
||||
console.log('✅ Embeddings generated successfully:', embeddingsData);
|
||||
|
||||
|
||||
return [
|
||||
embeddingsData,
|
||||
{
|
||||
documents: documents,
|
||||
embeddings: embeddings
|
||||
},
|
||||
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
|
||||
"success",
|
||||
false
|
||||
];
|
||||
|
||||
|
||||
} catch (error) {
|
||||
console.error('Client-side embedding error:', error);
|
||||
return [
|
||||
{ error: error.message },
|
||||
`❌ Error: ${error.message}`,
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
console.log('✅ Transformers.js client-side setup complete');
|
||||
console.log('Available:', {
|
||||
transformersEmbedder: !!window.transformersEmbedder,
|
||||
dashClientside: !!window.dash_clientside,
|
||||
transformersModule: !!window.dash_clientside?.transformers,
|
||||
generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings,
|
||||
processAsync: typeof window.processEmbeddingsAsync
|
||||
generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings
|
||||
});
|
2
assets/fontawesome.css
vendored
2
assets/fontawesome.css
vendored
@@ -1,2 +0,0 @@
|
||||
/* Load Font Awesome from local assets */
|
||||
@import url("/assets/fontawesome/css/all.min.css");
|
@@ -1,165 +0,0 @@
|
||||
Fonticons, Inc. (https://fontawesome.com)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Font Awesome Free License
|
||||
|
||||
Font Awesome Free is free, open source, and GPL friendly. You can use it for
|
||||
commercial projects, open source projects, or really almost whatever you want.
|
||||
Full Font Awesome Free license: https://fontawesome.com/license/free.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
# Icons: CC BY 4.0 License (https://creativecommons.org/licenses/by/4.0/)
|
||||
|
||||
The Font Awesome Free download is licensed under a Creative Commons
|
||||
Attribution 4.0 International License and applies to all icons packaged
|
||||
as SVG and JS file types.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
# Fonts: SIL OFL 1.1 License
|
||||
|
||||
In the Font Awesome Free download, the SIL OFL license applies to all icons
|
||||
packaged as web and desktop font files.
|
||||
|
||||
Copyright (c) 2023 Fonticons, Inc. (https://fontawesome.com)
|
||||
with Reserved Font Name: "Font Awesome".
|
||||
|
||||
This Font Software is licensed under the SIL Open Font License, Version 1.1.
|
||||
This license is copied below, and is also available with a FAQ at:
|
||||
http://scripts.sil.org/OFL
|
||||
|
||||
SIL OPEN FONT LICENSE
|
||||
Version 1.1 - 26 February 2007
|
||||
|
||||
PREAMBLE
|
||||
The goals of the Open Font License (OFL) are to stimulate worldwide
|
||||
development of collaborative font projects, to support the font creation
|
||||
efforts of academic and linguistic communities, and to provide a free and
|
||||
open framework in which fonts may be shared and improved in partnership
|
||||
with others.
|
||||
|
||||
The OFL allows the licensed fonts to be used, studied, modified and
|
||||
redistributed freely as long as they are not sold by themselves. The
|
||||
fonts, including any derivative works, can be bundled, embedded,
|
||||
redistributed and/or sold with any software provided that any reserved
|
||||
names are not used by derivative works. The fonts and derivatives,
|
||||
however, cannot be released under any other type of license. The
|
||||
requirement for fonts to remain under this license does not apply
|
||||
to any document created using the fonts or their derivatives.
|
||||
|
||||
DEFINITIONS
|
||||
"Font Software" refers to the set of files released by the Copyright
|
||||
Holder(s) under this license and clearly marked as such. This may
|
||||
include source files, build scripts and documentation.
|
||||
|
||||
"Reserved Font Name" refers to any names specified as such after the
|
||||
copyright statement(s).
|
||||
|
||||
"Original Version" refers to the collection of Font Software components as
|
||||
distributed by the Copyright Holder(s).
|
||||
|
||||
"Modified Version" refers to any derivative made by adding to, deleting,
|
||||
or substituting — in part or in whole — any of the components of the
|
||||
Original Version, by changing formats or by porting the Font Software to a
|
||||
new environment.
|
||||
|
||||
"Author" refers to any designer, engineer, programmer, technical
|
||||
writer or other person who contributed to the Font Software.
|
||||
|
||||
PERMISSION & CONDITIONS
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Font Software, to use, study, copy, merge, embed, modify,
|
||||
redistribute, and sell modified and unmodified copies of the Font
|
||||
Software, subject to the following conditions:
|
||||
|
||||
1) Neither the Font Software nor any of its individual components,
|
||||
in Original or Modified Versions, may be sold by itself.
|
||||
|
||||
2) Original or Modified Versions of the Font Software may be bundled,
|
||||
redistributed and/or sold with any software, provided that each copy
|
||||
contains the above copyright notice and this license. These can be
|
||||
included either as stand-alone text files, human-readable headers or
|
||||
in the appropriate machine-readable metadata fields within text or
|
||||
binary files as long as those fields can be easily viewed by the user.
|
||||
|
||||
3) No Modified Version of the Font Software may use the Reserved Font
|
||||
Name(s) unless explicit written permission is granted by the corresponding
|
||||
Copyright Holder. This restriction only applies to the primary font name as
|
||||
presented to the users.
|
||||
|
||||
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
|
||||
Software shall not be used to promote, endorse or advertise any
|
||||
Modified Version, except to acknowledge the contribution(s) of the
|
||||
Copyright Holder(s) and the Author(s) or with their explicit written
|
||||
permission.
|
||||
|
||||
5) The Font Software, modified or unmodified, in part or in whole,
|
||||
must be distributed entirely under this license, and must not be
|
||||
distributed under any other license. The requirement for fonts to
|
||||
remain under this license does not apply to any document created
|
||||
using the Font Software.
|
||||
|
||||
TERMINATION
|
||||
This license becomes null and void if any of the above conditions are
|
||||
not met.
|
||||
|
||||
DISCLAIMER
|
||||
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
|
||||
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
|
||||
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
|
||||
OTHER DEALINGS IN THE FONT SOFTWARE.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
# Code: MIT License (https://opensource.org/licenses/MIT)
|
||||
|
||||
In the Font Awesome Free download, the MIT license applies to all non-font and
|
||||
non-icon files.
|
||||
|
||||
Copyright 2023 Fonticons, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without limitation the rights to use, copy,
|
||||
modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
and to permit persons to whom the Software is furnished to do so, subject to the
|
||||
following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
# Attribution
|
||||
|
||||
Attribution is required by MIT, SIL OFL, and CC BY licenses. Downloaded Font
|
||||
Awesome Free files already contain embedded comments with sufficient
|
||||
attribution, so you shouldn't need to do anything additional when using these
|
||||
files normally.
|
||||
|
||||
We've kept attribution comments terse, so we ask that you do not actively work
|
||||
to remove them from files, especially code. They're a great way for folks to
|
||||
learn about Font Awesome.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
# Brand Icons
|
||||
|
||||
All brand icons are trademarks of their respective owners. The use of these
|
||||
trademarks does not indicate endorsement of the trademark holder by Font
|
||||
Awesome, nor vice versa. **Please do not use brand logos for any purpose except
|
||||
to represent the company, product, or service to which they refer.**
|
9
assets/fontawesome/css/all.min.css
vendored
9
assets/fontawesome/css/all.min.css
vendored
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -104,28 +104,17 @@ window.dash_clientside = window.dash_clientside || {};
|
||||
window.dash_clientside.transformers = {
|
||||
generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) {
|
||||
console.log('🚀 Client-side generateEmbeddings called');
|
||||
|
||||
|
||||
if (!nClicks || !textContent || textContent.trim().length === 0) {
|
||||
console.log('⚠️ Missing required parameters');
|
||||
return window.dash_clientside.no_update;
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
// Ensure Transformers.js is loaded
|
||||
if (!window.transformersLibraryLoaded) {
|
||||
const loaded = await initializeTransformers();
|
||||
if (!loaded) {
|
||||
return [
|
||||
{ error: 'Failed to load Transformers.js' },
|
||||
false
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
// Tokenize text
|
||||
let textChunks;
|
||||
const trimmedText = textContent.trim();
|
||||
|
||||
|
||||
switch (tokenizationMethod) {
|
||||
case 'sentence':
|
||||
textChunks = trimmedText.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 0);
|
||||
@@ -139,50 +128,45 @@ window.dash_clientside.transformers = {
|
||||
default:
|
||||
textChunks = [trimmedText];
|
||||
}
|
||||
|
||||
|
||||
if (textChunks.length === 0) {
|
||||
return [
|
||||
{ error: 'No valid text chunks after tokenization' },
|
||||
false
|
||||
];
|
||||
throw new Error('No valid text chunks after tokenization');
|
||||
}
|
||||
|
||||
|
||||
// Generate embeddings
|
||||
const embeddings = await window.simpleEmbedder.generateEmbeddings(textChunks, modelName);
|
||||
|
||||
|
||||
// Create documents
|
||||
const documents = textChunks.map((text, i) => ({
|
||||
id: `text_input_${Date.now()}_${i}`,
|
||||
text: text,
|
||||
embedding: embeddings[i],
|
||||
category: category || "Text Input",
|
||||
subcategory: subcategory || "Generated",
|
||||
subcategory: subcategory || "Generated",
|
||||
tags: []
|
||||
}));
|
||||
|
||||
// Return the successful embeddings data
|
||||
const embeddingsData = {
|
||||
documents: documents,
|
||||
embeddings: embeddings
|
||||
};
|
||||
|
||||
console.log('✅ Embeddings generated successfully:', embeddingsData);
|
||||
|
||||
|
||||
return [
|
||||
embeddingsData,
|
||||
{
|
||||
documents: documents,
|
||||
embeddings: embeddings
|
||||
},
|
||||
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
|
||||
"success",
|
||||
false
|
||||
];
|
||||
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Error generating embeddings:', error);
|
||||
return [
|
||||
{ error: error.message },
|
||||
`❌ Error: ${error.message}`,
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
console.log('✅ Simple Transformers.js setup complete');
|
||||
console.log('Available functions:', Object.keys(window.dash_clientside.transformers));
|
Binary file not shown.
Before Width: | Height: | Size: 844 KiB After Width: | Height: | Size: 339 KiB |
File diff suppressed because one or more lines are too long
10
main.py
Normal file
10
main.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from src.embeddingbuddy.app import create_app, run_app
|
||||
|
||||
|
||||
def main():
|
||||
app = create_app()
|
||||
run_app(app)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
1
prompts-raw.ndjson
Normal file
1
prompts-raw.ndjson
Normal file
File diff suppressed because one or more lines are too long
64
prompts.ndjson
Normal file
64
prompts.ndjson
Normal file
File diff suppressed because one or more lines are too long
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "embeddingbuddy"
|
||||
version = "0.8.0"
|
||||
version = "0.4.0"
|
||||
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
@@ -17,10 +17,6 @@ dependencies = [
|
||||
"opensearch-py>=3.0.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
embeddingbuddy = "embeddingbuddy.cli:main"
|
||||
embeddingbuddy-serve = "embeddingbuddy.app:serve"
|
||||
|
||||
[project.optional-dependencies]
|
||||
test = [
|
||||
"pytest>=8.4.1",
|
||||
|
26
run_dev.py
Normal file
26
run_dev.py
Normal file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Development runner with auto-reload enabled.
|
||||
This runs the Dash development server with hot reloading.
|
||||
"""
|
||||
import os
|
||||
from src.embeddingbuddy.app import create_app, run_app
|
||||
|
||||
def main():
|
||||
"""Run the application in development mode with auto-reload."""
|
||||
# Force development settings
|
||||
os.environ["EMBEDDINGBUDDY_ENV"] = "development"
|
||||
os.environ["EMBEDDINGBUDDY_DEBUG"] = "true"
|
||||
|
||||
print("🚀 Starting EmbeddingBuddy in development mode...")
|
||||
print("📁 Auto-reload enabled - changes will trigger restart")
|
||||
print("🌐 Server will be available at http://127.0.0.1:8050")
|
||||
print("⏹️ Press Ctrl+C to stop")
|
||||
|
||||
app = create_app()
|
||||
|
||||
# Run with development server (includes auto-reload when debug=True)
|
||||
run_app(app, debug=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
49
run_prod.py
Normal file
49
run_prod.py
Normal file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Production runner using Gunicorn WSGI server.
|
||||
This provides better performance and stability for production deployments.
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from src.embeddingbuddy.config.settings import AppSettings
|
||||
|
||||
def main():
|
||||
"""Run the application in production mode with Gunicorn."""
|
||||
# Force production settings
|
||||
os.environ["EMBEDDINGBUDDY_ENV"] = "production"
|
||||
os.environ["EMBEDDINGBUDDY_DEBUG"] = "false"
|
||||
|
||||
print("🚀 Starting EmbeddingBuddy in production mode...")
|
||||
print(f"⚙️ Workers: {AppSettings.GUNICORN_WORKERS}")
|
||||
print(f"🌐 Server will be available at http://{AppSettings.GUNICORN_BIND}")
|
||||
print("⏹️ Press Ctrl+C to stop")
|
||||
|
||||
# Gunicorn command
|
||||
cmd = [
|
||||
"gunicorn",
|
||||
"--workers", str(AppSettings.GUNICORN_WORKERS),
|
||||
"--bind", AppSettings.GUNICORN_BIND,
|
||||
"--timeout", str(AppSettings.GUNICORN_TIMEOUT),
|
||||
"--keepalive", str(AppSettings.GUNICORN_KEEPALIVE),
|
||||
"--access-logfile", "-",
|
||||
"--error-logfile", "-",
|
||||
"--log-level", "info",
|
||||
"wsgi:application"
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, check=True)
|
||||
except KeyboardInterrupt:
|
||||
print("\n🛑 Shutting down...")
|
||||
sys.exit(0)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ Error running Gunicorn: {e}")
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
print("❌ Gunicorn not found. Install it with: uv add gunicorn")
|
||||
print("💡 Or run in development mode with: python run_dev.py")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -1,53 +1,21 @@
|
||||
"""
|
||||
EmbeddingBuddy application factory and server functions.
|
||||
|
||||
This module contains the main application creation logic with imports
|
||||
moved inside functions to avoid loading heavy dependencies at module level.
|
||||
"""
|
||||
import dash
|
||||
import dash_bootstrap_components as dbc
|
||||
from .config.settings import AppSettings
|
||||
from .ui.layout import AppLayout
|
||||
from .ui.callbacks.data_processing import DataProcessingCallbacks
|
||||
from .ui.callbacks.visualization import VisualizationCallbacks
|
||||
from .ui.callbacks.interactions import InteractionCallbacks
|
||||
|
||||
|
||||
def create_app():
|
||||
"""Create and configure the Dash application instance."""
|
||||
import os
|
||||
import dash
|
||||
import dash_bootstrap_components as dbc
|
||||
from .ui.layout import AppLayout
|
||||
from .ui.callbacks.data_processing import DataProcessingCallbacks
|
||||
from .ui.callbacks.visualization import VisualizationCallbacks
|
||||
from .ui.callbacks.interactions import InteractionCallbacks
|
||||
|
||||
# Get the project root directory (two levels up from this file)
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
||||
assets_path = os.path.join(project_root, "assets")
|
||||
|
||||
app = dash.Dash(
|
||||
__name__,
|
||||
title="EmbeddingBuddy",
|
||||
external_stylesheets=[
|
||||
dbc.themes.BOOTSTRAP,
|
||||
],
|
||||
assets_folder=assets_path,
|
||||
meta_tags=[
|
||||
{
|
||||
"name": "description",
|
||||
"content": "Interactive embedding visualization tool for exploring high-dimensional vectors through dimensionality reduction techniques like PCA, t-SNE, and UMAP.",
|
||||
},
|
||||
{"name": "author", "content": "EmbeddingBuddy"},
|
||||
{
|
||||
"name": "keywords",
|
||||
"content": "embeddings, visualization, dimensionality reduction, PCA, t-SNE, UMAP, machine learning, data science",
|
||||
},
|
||||
{"name": "viewport", "content": "width=device-width, initial-scale=1.0"},
|
||||
{
|
||||
"property": "og:title",
|
||||
"content": "EmbeddingBuddy - Interactive Embedding Visualization",
|
||||
},
|
||||
{
|
||||
"property": "og:description",
|
||||
"content": "Explore and visualize embedding vectors through interactive 2D/3D plots with multiple dimensionality reduction techniques.",
|
||||
},
|
||||
{"property": "og:type", "content": "website"},
|
||||
],
|
||||
__name__, external_stylesheets=[dbc.themes.BOOTSTRAP], assets_folder=assets_path
|
||||
)
|
||||
|
||||
# Allow callbacks to components that are dynamically created in tabs
|
||||
@@ -77,22 +45,22 @@ def _register_client_side_callbacks(app):
|
||||
if (!nClicks || !textContent || !textContent.trim()) {
|
||||
return window.dash_clientside.no_update;
|
||||
}
|
||||
|
||||
|
||||
console.log('🔍 Checking for Transformers.js...');
|
||||
console.log('window.dash_clientside:', typeof window.dash_clientside);
|
||||
console.log('window.dash_clientside.transformers:', typeof window.dash_clientside?.transformers);
|
||||
console.log('generateEmbeddings function:', typeof window.dash_clientside?.transformers?.generateEmbeddings);
|
||||
|
||||
if (typeof window.dash_clientside !== 'undefined' &&
|
||||
|
||||
if (typeof window.dash_clientside !== 'undefined' &&
|
||||
typeof window.dash_clientside.transformers !== 'undefined' &&
|
||||
typeof window.dash_clientside.transformers.generateEmbeddings === 'function') {
|
||||
|
||||
|
||||
console.log('✅ Calling Transformers.js generateEmbeddings...');
|
||||
return window.dash_clientside.transformers.generateEmbeddings(
|
||||
nClicks, textContent, modelName, tokenizationMethod, category, subcategory
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// More detailed error information
|
||||
let errorMsg = '❌ Transformers.js not available. ';
|
||||
if (typeof window.dash_clientside === 'undefined') {
|
||||
@@ -102,17 +70,21 @@ def _register_client_side_callbacks(app):
|
||||
} else if (typeof window.dash_clientside.transformers.generateEmbeddings !== 'function') {
|
||||
errorMsg += 'generateEmbeddings function not found.';
|
||||
}
|
||||
|
||||
|
||||
console.error(errorMsg);
|
||||
|
||||
|
||||
return [
|
||||
{ error: 'Transformers.js not loaded. Please refresh the page and try again.' },
|
||||
errorMsg + ' Please refresh the page.',
|
||||
'danger',
|
||||
false
|
||||
];
|
||||
}
|
||||
""",
|
||||
[
|
||||
Output("embeddings-generated-trigger", "data"),
|
||||
Output("text-input-status-immediate", "children"),
|
||||
Output("text-input-status-immediate", "color"),
|
||||
Output("generate-embeddings-btn", "disabled", allow_duplicate=True),
|
||||
],
|
||||
[Input("generate-embeddings-btn", "n_clicks")],
|
||||
@@ -129,9 +101,6 @@ def _register_client_side_callbacks(app):
|
||||
|
||||
|
||||
def run_app(app=None, debug=None, host=None, port=None):
|
||||
"""Run the Dash application with specified settings."""
|
||||
from .config.settings import AppSettings
|
||||
|
||||
if app is None:
|
||||
app = create_app()
|
||||
|
||||
@@ -142,68 +111,6 @@ def run_app(app=None, debug=None, host=None, port=None):
|
||||
)
|
||||
|
||||
|
||||
def serve(host=None, port=None, dev=False, debug=False):
|
||||
"""Start the EmbeddingBuddy web server.
|
||||
|
||||
Args:
|
||||
host: Host to bind to (default: 127.0.0.1)
|
||||
port: Port to bind to (default: 8050)
|
||||
dev: Development mode - enable debug logging and auto-reload (default: False)
|
||||
debug: Enable debug logging only, no auto-reload (default: False)
|
||||
"""
|
||||
import os
|
||||
from .config.settings import AppSettings
|
||||
|
||||
# Determine actual values to use
|
||||
actual_host = host if host is not None else AppSettings.HOST
|
||||
actual_port = port if port is not None else AppSettings.PORT
|
||||
|
||||
# Determine mode
|
||||
# --dev takes precedence and enables both debug and auto-reload
|
||||
# --debug enables only debug logging
|
||||
# No flags = production mode (no debug, no auto-reload)
|
||||
use_reloader = dev
|
||||
use_debug = dev or debug
|
||||
|
||||
# Only print startup messages in main process (not in Flask reloader)
|
||||
if not os.environ.get("WERKZEUG_RUN_MAIN"):
|
||||
mode = "development" if dev else ("debug" if debug else "production")
|
||||
print(f"Starting EmbeddingBuddy in {mode} mode...")
|
||||
print("Loading dependencies (this may take a few seconds)...")
|
||||
print(f"Server will start at http://{actual_host}:{actual_port}")
|
||||
if use_reloader:
|
||||
print("Auto-reload enabled - server will restart on code changes")
|
||||
|
||||
app = create_app()
|
||||
|
||||
# Suppress Flask development server warning in production mode
|
||||
if not use_debug and not use_reloader:
|
||||
import warnings
|
||||
import logging
|
||||
|
||||
# Suppress the werkzeug warning
|
||||
warnings.filterwarnings("ignore", message=".*development server.*")
|
||||
|
||||
# Set werkzeug logger to ERROR level to suppress the warning
|
||||
werkzeug_logger = logging.getLogger("werkzeug")
|
||||
werkzeug_logger.setLevel(logging.ERROR)
|
||||
|
||||
# Use Flask's built-in server with appropriate settings
|
||||
app.run(
|
||||
debug=use_debug, host=actual_host, port=actual_port, use_reloader=use_reloader
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
"""Legacy entry point - redirects to cli module.
|
||||
|
||||
This is kept for backward compatibility but the main CLI
|
||||
is now in embeddingbuddy.cli for faster startup.
|
||||
"""
|
||||
from .cli import main as cli_main
|
||||
|
||||
cli_main()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
app = create_app()
|
||||
run_app(app)
|
||||
|
@@ -1,67 +0,0 @@
|
||||
"""
|
||||
Lightweight CLI entry point for EmbeddingBuddy.
|
||||
|
||||
This module provides a fast command-line interface that only imports
|
||||
heavy dependencies when actually needed by subcommands.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
"""Main CLI entry point with minimal imports for fast help text."""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="embeddingbuddy",
|
||||
description="EmbeddingBuddy - Interactive embedding visualization tool",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
embeddingbuddy serve # Production mode (no debug, no auto-reload)
|
||||
embeddingbuddy serve --dev # Development mode (debug + auto-reload)
|
||||
embeddingbuddy serve --debug # Debug logging only (no auto-reload)
|
||||
embeddingbuddy serve --port 8080 # Custom port
|
||||
embeddingbuddy serve --host 0.0.0.0 # Bind to all interfaces
|
||||
""",
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(
|
||||
dest="command", help="Available commands", metavar="<command>"
|
||||
)
|
||||
|
||||
# Serve subcommand
|
||||
serve_parser = subparsers.add_parser(
|
||||
"serve",
|
||||
help="Start the web server",
|
||||
description="Start the EmbeddingBuddy web server for interactive visualization",
|
||||
)
|
||||
serve_parser.add_argument(
|
||||
"--host", default=None, help="Host to bind to (default: 127.0.0.1)"
|
||||
)
|
||||
serve_parser.add_argument(
|
||||
"--port", type=int, default=None, help="Port to bind to (default: 8050)"
|
||||
)
|
||||
serve_parser.add_argument(
|
||||
"--dev",
|
||||
action="store_true",
|
||||
help="Development mode: enable debug logging and auto-reload",
|
||||
)
|
||||
serve_parser.add_argument(
|
||||
"--debug", action="store_true", help="Enable debug logging (no auto-reload)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "serve":
|
||||
# Only import heavy dependencies when actually running serve
|
||||
from embeddingbuddy.app import serve
|
||||
|
||||
serve(host=args.host, port=args.port, dev=args.dev, debug=args.debug)
|
||||
else:
|
||||
# No command specified, show help
|
||||
parser.print_help()
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -69,15 +69,13 @@ class AppSettings:
|
||||
TEXT_PREVIEW_LENGTH = 100
|
||||
|
||||
# App Configuration
|
||||
DEBUG = os.getenv("EMBEDDINGBUDDY_DEBUG", "False").lower() == "true"
|
||||
DEBUG = os.getenv("EMBEDDINGBUDDY_DEBUG", "True").lower() == "true"
|
||||
HOST = os.getenv("EMBEDDINGBUDDY_HOST", "127.0.0.1")
|
||||
PORT = int(os.getenv("EMBEDDINGBUDDY_PORT", "8050"))
|
||||
|
||||
|
||||
# Environment Configuration
|
||||
ENVIRONMENT = os.getenv(
|
||||
"EMBEDDINGBUDDY_ENV", "development"
|
||||
) # development, production
|
||||
|
||||
ENVIRONMENT = os.getenv("EMBEDDINGBUDDY_ENV", "development") # development, production
|
||||
|
||||
# WSGI Server Configuration (for production)
|
||||
GUNICORN_WORKERS = int(os.getenv("GUNICORN_WORKERS", "4"))
|
||||
GUNICORN_BIND = os.getenv("GUNICORN_BIND", f"{HOST}:{PORT}")
|
||||
@@ -85,9 +83,6 @@ class AppSettings:
|
||||
GUNICORN_KEEPALIVE = int(os.getenv("GUNICORN_KEEPALIVE", "5"))
|
||||
|
||||
# OpenSearch Configuration
|
||||
OPENSEARCH_ENABLED = (
|
||||
os.getenv("EMBEDDINGBUDDY_OPENSEARCH_ENABLED", "True").lower() == "true"
|
||||
)
|
||||
OPENSEARCH_DEFAULT_SIZE = 100
|
||||
OPENSEARCH_SAMPLE_SIZE = 5
|
||||
OPENSEARCH_CONNECTION_TIMEOUT = 30
|
||||
|
@@ -82,23 +82,19 @@ class DataProcessingCallbacks:
|
||||
)
|
||||
def render_tab_content(active_tab):
|
||||
from ...ui.components.datasource import DataSourceComponent
|
||||
from ...config.settings import AppSettings
|
||||
|
||||
datasource = DataSourceComponent()
|
||||
|
||||
if active_tab == "opensearch-tab" and AppSettings.OPENSEARCH_ENABLED:
|
||||
if active_tab == "opensearch-tab":
|
||||
return [datasource.create_opensearch_tab()]
|
||||
elif active_tab == "text-input-tab":
|
||||
return [datasource.create_text_input_tab()]
|
||||
else:
|
||||
return [datasource.create_file_upload_tab()]
|
||||
|
||||
# Register callbacks for both data and prompts sections (only if OpenSearch is enabled)
|
||||
if AppSettings.OPENSEARCH_ENABLED:
|
||||
self._register_opensearch_callbacks("data", self.opensearch_client_data)
|
||||
self._register_opensearch_callbacks(
|
||||
"prompts", self.opensearch_client_prompts
|
||||
)
|
||||
# Register callbacks for both data and prompts sections
|
||||
self._register_opensearch_callbacks("data", self.opensearch_client_data)
|
||||
self._register_opensearch_callbacks("prompts", self.opensearch_client_prompts)
|
||||
|
||||
# Register collapsible section callbacks
|
||||
self._register_collapse_callbacks()
|
||||
@@ -625,12 +621,6 @@ class DataProcessingCallbacks:
|
||||
if not embeddings_data:
|
||||
return no_update, no_update, no_update, no_update, no_update
|
||||
|
||||
# Check if this is a request trigger (contains textContent) vs actual embeddings data
|
||||
if isinstance(embeddings_data, dict) and "textContent" in embeddings_data:
|
||||
# This is a processing request trigger, not the actual results
|
||||
# The JavaScript will handle the async processing and update the UI directly
|
||||
return no_update, no_update, no_update, no_update, no_update
|
||||
|
||||
processed_data = self.processor.process_client_embeddings(embeddings_data)
|
||||
|
||||
if processed_data.error:
|
||||
|
@@ -1,5 +1,6 @@
|
||||
import dash
|
||||
from dash import callback, Input, Output
|
||||
from dash import callback, Input, Output, State, html
|
||||
import dash_bootstrap_components as dbc
|
||||
|
||||
|
||||
class InteractionCallbacks:
|
||||
@@ -8,25 +9,74 @@ class InteractionCallbacks:
|
||||
|
||||
def _register_callbacks(self):
|
||||
@callback(
|
||||
Output("about-modal", "is_open"),
|
||||
[Input("about-button", "n_clicks"), Input("about-modal-close", "n_clicks")],
|
||||
prevent_initial_call=True,
|
||||
Output("point-details", "children"),
|
||||
Input("embedding-plot", "clickData"),
|
||||
[State("processed-data", "data"), State("processed-prompts", "data")],
|
||||
)
|
||||
def toggle_about_modal(about_clicks, close_clicks):
|
||||
if about_clicks or close_clicks:
|
||||
return True if about_clicks else False
|
||||
return False
|
||||
def display_click_data(clickData, data, prompts_data):
|
||||
if not clickData or not data:
|
||||
return "Click on a point to see details"
|
||||
|
||||
point_data = clickData["points"][0]
|
||||
trace_name = point_data.get("fullData", {}).get("name", "Documents")
|
||||
|
||||
if "pointIndex" in point_data:
|
||||
point_index = point_data["pointIndex"]
|
||||
elif "pointNumber" in point_data:
|
||||
point_index = point_data["pointNumber"]
|
||||
else:
|
||||
return "Could not identify clicked point"
|
||||
|
||||
if (
|
||||
trace_name.startswith("Prompts")
|
||||
and prompts_data
|
||||
and "prompts" in prompts_data
|
||||
):
|
||||
item = prompts_data["prompts"][point_index]
|
||||
item_type = "Prompt"
|
||||
else:
|
||||
item = data["documents"][point_index]
|
||||
item_type = "Document"
|
||||
|
||||
return self._create_detail_card(item, item_type)
|
||||
|
||||
@callback(
|
||||
[
|
||||
Output("processed-data", "data", allow_duplicate=True),
|
||||
Output("processed-prompts", "data", allow_duplicate=True),
|
||||
Output("point-details", "children", allow_duplicate=True),
|
||||
],
|
||||
Input("reset-button", "n_clicks"),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def reset_data(n_clicks):
|
||||
if n_clicks is None or n_clicks == 0:
|
||||
return dash.no_update, dash.no_update
|
||||
return dash.no_update, dash.no_update, dash.no_update
|
||||
|
||||
return None, None
|
||||
return None, None, "Click on a point to see details"
|
||||
|
||||
@staticmethod
|
||||
def _create_detail_card(item, item_type):
|
||||
return dbc.Card(
|
||||
[
|
||||
dbc.CardBody(
|
||||
[
|
||||
html.H5(f"{item_type}: {item['id']}", className="card-title"),
|
||||
html.P(f"Text: {item['text']}", className="card-text"),
|
||||
html.P(
|
||||
f"Category: {item.get('category', 'Unknown')}",
|
||||
className="card-text",
|
||||
),
|
||||
html.P(
|
||||
f"Subcategory: {item.get('subcategory', 'Unknown')}",
|
||||
className="card-text",
|
||||
),
|
||||
html.P(
|
||||
f"Tags: {', '.join(item.get('tags', [])) if item.get('tags') else 'None'}",
|
||||
className="card-text",
|
||||
),
|
||||
html.P(f"Type: {item_type}", className="card-text text-muted"),
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
|
@@ -1,90 +0,0 @@
|
||||
from dash import html, dcc
|
||||
import dash_bootstrap_components as dbc
|
||||
|
||||
|
||||
class AboutComponent:
|
||||
def _get_about_content(self):
|
||||
return """
|
||||
# 🔍 Interactive Embedding Vector Visualization
|
||||
|
||||
EmbeddingBuddy is a web application for interactive exploration and
|
||||
visualization of embedding vectors through dimensionality reduction techniques
|
||||
(PCA, t-SNE, UMAP).
|
||||
|
||||
You have two ways to get started:
|
||||
|
||||
1. Generate embeddings directly in the browser if it supports WebGPU.
|
||||
2. Upload your NDJSON file containing embedding vectors and metadata.
|
||||
|
||||
## Generating Embeddings in Browser
|
||||
|
||||
1. Expand the "Generate Embeddings" section.
|
||||
2. Input your text data (one entry per line).
|
||||
1. Optionally you can use the built in sample data by clicking "Load Sample Data" button.
|
||||
3. Click "Generate Embeddings" to create vectors using a pre-trained model.
|
||||
|
||||
## NDJSON File Format
|
||||
|
||||
```json
|
||||
{"id": "doc_001", "embedding": [0.1, -0.3, 0.7, ...], "text": "Sample text content", "category": "news", "subcategory": "politics", "tags": ["election", "politics"]}
|
||||
{"id": "doc_002", "embedding": [0.2, -0.1, 0.9, ...], "text": "Another example", "category": "review", "subcategory": "product", "tags": ["tech", "gadget"]}
|
||||
```
|
||||
|
||||
|
||||
## ✨ Features
|
||||
|
||||
- Drag-and-drop NDJSON file upload
|
||||
- Multiple dimensionality reduction algorithms
|
||||
- 2D/3D interactive plots with Plotly
|
||||
- Color coding by categories, subcategories, or tags
|
||||
- In-browser embedding generation
|
||||
- OpenSearch integration for data loading
|
||||
|
||||
## 🔧 Supported Algorithms
|
||||
|
||||
- **PCA** (Principal Component Analysis)
|
||||
- **t-SNE** (t-Distributed Stochastic Neighbor Embedding)
|
||||
- **UMAP** (Uniform Manifold Approximation and Projection)
|
||||
|
||||
---
|
||||
|
||||
📂 [View on GitHub](https://github.com/godber/EmbeddingBuddy)
|
||||
|
||||
*Built with: Python, Dash, Plotly, scikit-learn, OpenTSNE, UMAP*
|
||||
""".strip()
|
||||
|
||||
def create_about_modal(self):
|
||||
return dbc.Modal(
|
||||
[
|
||||
dbc.ModalHeader(
|
||||
dbc.ModalTitle("Welcome to EmbeddingBuddy"),
|
||||
close_button=True,
|
||||
),
|
||||
dbc.ModalBody(
|
||||
[dcc.Markdown(self._get_about_content(), className="mb-0")]
|
||||
),
|
||||
dbc.ModalFooter(
|
||||
[
|
||||
dbc.Button(
|
||||
"Close",
|
||||
id="about-modal-close",
|
||||
color="secondary",
|
||||
n_clicks=0,
|
||||
)
|
||||
]
|
||||
),
|
||||
],
|
||||
id="about-modal",
|
||||
is_open=True,
|
||||
size="lg",
|
||||
)
|
||||
|
||||
def create_about_button(self):
|
||||
return dbc.Button(
|
||||
[html.I(className="fas fa-info-circle me-2"), "About"],
|
||||
id="about-button",
|
||||
color="outline-info",
|
||||
size="sm",
|
||||
n_clicks=0,
|
||||
className="ms-2",
|
||||
)
|
@@ -1,27 +1,26 @@
|
||||
from dash import dcc, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from .upload import UploadComponent
|
||||
from embeddingbuddy.config.settings import AppSettings
|
||||
from .textinput import TextInputComponent
|
||||
|
||||
|
||||
class DataSourceComponent:
|
||||
def __init__(self):
|
||||
self.upload_component = UploadComponent()
|
||||
self.text_input_component = TextInputComponent()
|
||||
|
||||
def create_tabbed_interface(self):
|
||||
"""Create tabbed interface for different data sources."""
|
||||
tabs = [dbc.Tab(label="File Upload", tab_id="file-tab")]
|
||||
|
||||
# Only add OpenSearch tab if enabled
|
||||
if AppSettings.OPENSEARCH_ENABLED:
|
||||
tabs.append(dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"))
|
||||
|
||||
return dbc.Card(
|
||||
[
|
||||
dbc.CardHeader(
|
||||
[
|
||||
dbc.Tabs(
|
||||
tabs,
|
||||
[
|
||||
dbc.Tab(label="File Upload", tab_id="file-tab"),
|
||||
dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
|
||||
dbc.Tab(label="Text Input", tab_id="text-input-tab"),
|
||||
],
|
||||
id="data-source-tabs",
|
||||
active_tab="file-tab",
|
||||
)
|
||||
@@ -212,6 +211,10 @@ class DataSourceComponent:
|
||||
]
|
||||
)
|
||||
|
||||
def create_text_input_tab(self):
|
||||
"""Create text input tab content for browser-based embedding generation."""
|
||||
return html.Div([self.text_input_component.create_text_input_interface()])
|
||||
|
||||
def _create_opensearch_section(self, section_type):
|
||||
"""Create a complete OpenSearch section for either 'data' or 'prompts'."""
|
||||
section_id = section_type # 'data' or 'prompts'
|
||||
|
@@ -2,27 +2,31 @@ from dash import dcc, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from .upload import UploadComponent
|
||||
from .datasource import DataSourceComponent
|
||||
from .textinput import TextInputComponent
|
||||
from embeddingbuddy.config.settings import AppSettings
|
||||
|
||||
|
||||
class SidebarComponent:
|
||||
def __init__(self):
|
||||
self.upload_component = UploadComponent()
|
||||
self.datasource_component = DataSourceComponent()
|
||||
self.textinput_component = TextInputComponent()
|
||||
|
||||
def create_layout(self):
|
||||
return dbc.Col(
|
||||
[
|
||||
dbc.Accordion(
|
||||
[
|
||||
self._create_data_sources_item(),
|
||||
self._create_generate_embeddings_item(),
|
||||
self._create_visualization_controls_item(),
|
||||
],
|
||||
always_open=True,
|
||||
)
|
||||
html.H5("Data Sources", className="mb-3"),
|
||||
self.datasource_component.create_error_alert(),
|
||||
self.datasource_component.create_success_alert(),
|
||||
self.datasource_component.create_tabbed_interface(),
|
||||
html.H5("Visualization Controls", className="mb-3 mt-4"),
|
||||
]
|
||||
+ self._create_method_dropdown()
|
||||
+ self._create_color_dropdown()
|
||||
+ self._create_dimension_toggle()
|
||||
+ self._create_prompts_toggle()
|
||||
+ [
|
||||
html.H5("Point Details", className="mb-3"),
|
||||
html.Div(
|
||||
id="point-details", children="Click on a point to see details"
|
||||
),
|
||||
],
|
||||
width=3,
|
||||
style={"padding-right": "20px"},
|
||||
@@ -82,67 +86,3 @@ class SidebarComponent:
|
||||
style={"margin-bottom": "20px"},
|
||||
),
|
||||
]
|
||||
|
||||
def _create_generate_embeddings_item(self):
|
||||
return dbc.AccordionItem(
|
||||
[
|
||||
self.textinput_component.create_text_input_interface(),
|
||||
],
|
||||
title=html.Span(
|
||||
[
|
||||
"Generate Embeddings ",
|
||||
html.I(
|
||||
className="fas fa-info-circle text-muted",
|
||||
style={"cursor": "pointer"},
|
||||
id="generate-embeddings-info-icon",
|
||||
title="Create new embeddings from text input using various in-browser models",
|
||||
),
|
||||
]
|
||||
),
|
||||
item_id="generate-embeddings-accordion",
|
||||
)
|
||||
|
||||
def _create_data_sources_item(self):
|
||||
tooltip_text = "Load existing embeddings: upload files"
|
||||
if AppSettings.OPENSEARCH_ENABLED:
|
||||
tooltip_text += " or read from OpenSearch"
|
||||
|
||||
return dbc.AccordionItem(
|
||||
[
|
||||
self.datasource_component.create_error_alert(),
|
||||
self.datasource_component.create_success_alert(),
|
||||
self.datasource_component.create_tabbed_interface(),
|
||||
],
|
||||
title=html.Span(
|
||||
[
|
||||
"Load Embeddings ",
|
||||
html.I(
|
||||
className="fas fa-info-circle text-muted",
|
||||
style={"cursor": "pointer"},
|
||||
id="load-embeddings-info-icon",
|
||||
title=tooltip_text,
|
||||
),
|
||||
]
|
||||
),
|
||||
item_id="data-sources-accordion",
|
||||
)
|
||||
|
||||
def _create_visualization_controls_item(self):
|
||||
return dbc.AccordionItem(
|
||||
self._create_method_dropdown()
|
||||
+ self._create_color_dropdown()
|
||||
+ self._create_dimension_toggle()
|
||||
+ self._create_prompts_toggle(),
|
||||
title=html.Span(
|
||||
[
|
||||
"Visualization Controls ",
|
||||
html.I(
|
||||
className="fas fa-info-circle text-muted",
|
||||
style={"cursor": "pointer"},
|
||||
id="visualization-controls-info-icon",
|
||||
title="Configure plot settings: select dimensionality reduction method, colors, and display options",
|
||||
),
|
||||
]
|
||||
),
|
||||
item_id="visualization-controls-accordion",
|
||||
)
|
||||
|
@@ -16,20 +16,23 @@ class TextInputComponent:
|
||||
"""Create the complete text input interface with model selection and processing options."""
|
||||
return html.Div(
|
||||
[
|
||||
# Model selection section
|
||||
self._create_model_selection(),
|
||||
html.Hr(),
|
||||
# Text input section
|
||||
self._create_text_input_area(),
|
||||
# Text action buttons
|
||||
self._create_text_action_buttons(),
|
||||
html.Hr(),
|
||||
# Model selection section
|
||||
self._create_model_selection(),
|
||||
html.Hr(),
|
||||
# Processing options
|
||||
self._create_processing_options(),
|
||||
html.Hr(),
|
||||
# Generation controls
|
||||
self._create_generation_controls(),
|
||||
html.Hr(),
|
||||
# Progress indicators
|
||||
self._create_progress_indicators(),
|
||||
html.Hr(),
|
||||
# Status and results
|
||||
self._create_status_section(),
|
||||
# Hidden components for data flow
|
||||
@@ -294,10 +297,65 @@ class TextInputComponent:
|
||||
]
|
||||
)
|
||||
|
||||
def _create_progress_indicators(self):
|
||||
"""Create progress bars for model loading and embedding generation."""
|
||||
return html.Div(
|
||||
[
|
||||
# Model loading progress
|
||||
html.Div(
|
||||
[
|
||||
html.H6("Model Loading Progress", className="mb-2"),
|
||||
dbc.Progress(
|
||||
id="model-loading-progress",
|
||||
value=0,
|
||||
striped=True,
|
||||
animated=True,
|
||||
className="mb-2",
|
||||
),
|
||||
html.Small(
|
||||
id="model-loading-status",
|
||||
children="No model loading in progress",
|
||||
className="text-muted",
|
||||
),
|
||||
],
|
||||
id="model-loading-section",
|
||||
style={"display": "none"},
|
||||
),
|
||||
html.Br(),
|
||||
# Embedding generation progress
|
||||
html.Div(
|
||||
[
|
||||
html.H6("Embedding Generation Progress", className="mb-2"),
|
||||
dbc.Progress(
|
||||
id="embedding-progress",
|
||||
value=0,
|
||||
striped=True,
|
||||
animated=True,
|
||||
className="mb-2",
|
||||
),
|
||||
html.Small(
|
||||
id="embedding-status",
|
||||
children="No embedding generation in progress",
|
||||
className="text-muted",
|
||||
),
|
||||
],
|
||||
id="embedding-progress-section",
|
||||
style={"display": "none"},
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
def _create_status_section(self):
|
||||
"""Create status alerts and results preview."""
|
||||
return html.Div(
|
||||
[
|
||||
# Immediate status (from client-side)
|
||||
dbc.Alert(
|
||||
id="text-input-status-immediate",
|
||||
children="Ready to generate embeddings",
|
||||
color="light",
|
||||
className="mb-3",
|
||||
),
|
||||
# Server-side status
|
||||
dbc.Alert(
|
||||
id="text-input-status",
|
||||
|
@@ -5,75 +5,39 @@ import dash_bootstrap_components as dbc
|
||||
class UploadComponent:
|
||||
@staticmethod
|
||||
def create_data_upload():
|
||||
return html.Div(
|
||||
[
|
||||
dcc.Upload(
|
||||
id="upload-data",
|
||||
children=html.Div(
|
||||
[
|
||||
"Upload Data ",
|
||||
html.I(
|
||||
className="fas fa-info-circle",
|
||||
style={"color": "#6c757d", "fontSize": "14px"},
|
||||
id="data-upload-info",
|
||||
),
|
||||
]
|
||||
),
|
||||
style={
|
||||
"width": "100%",
|
||||
"height": "60px",
|
||||
"lineHeight": "60px",
|
||||
"borderWidth": "1px",
|
||||
"borderStyle": "dashed",
|
||||
"borderRadius": "5px",
|
||||
"textAlign": "center",
|
||||
"margin-bottom": "20px",
|
||||
},
|
||||
multiple=False,
|
||||
),
|
||||
dbc.Tooltip(
|
||||
"Click here or drag and drop NDJSON files containing document embeddings",
|
||||
target="data-upload-info",
|
||||
placement="top",
|
||||
),
|
||||
]
|
||||
return dcc.Upload(
|
||||
id="upload-data",
|
||||
children=html.Div(["Drag and Drop or ", html.A("Select Files")]),
|
||||
style={
|
||||
"width": "100%",
|
||||
"height": "60px",
|
||||
"lineHeight": "60px",
|
||||
"borderWidth": "1px",
|
||||
"borderStyle": "dashed",
|
||||
"borderRadius": "5px",
|
||||
"textAlign": "center",
|
||||
"margin-bottom": "20px",
|
||||
},
|
||||
multiple=False,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def create_prompts_upload():
|
||||
return html.Div(
|
||||
[
|
||||
dcc.Upload(
|
||||
id="upload-prompts",
|
||||
children=html.Div(
|
||||
[
|
||||
"Upload Prompts ",
|
||||
html.I(
|
||||
className="fas fa-info-circle",
|
||||
style={"color": "#6c757d", "fontSize": "14px"},
|
||||
id="prompts-upload-info",
|
||||
),
|
||||
]
|
||||
),
|
||||
style={
|
||||
"width": "100%",
|
||||
"height": "60px",
|
||||
"lineHeight": "60px",
|
||||
"borderWidth": "1px",
|
||||
"borderStyle": "dashed",
|
||||
"borderRadius": "5px",
|
||||
"textAlign": "center",
|
||||
"margin-bottom": "20px",
|
||||
"borderColor": "#28a745",
|
||||
},
|
||||
multiple=False,
|
||||
),
|
||||
dbc.Tooltip(
|
||||
"Click here or drag and drop NDJSON files containing prompt embeddings",
|
||||
target="prompts-upload-info",
|
||||
placement="top",
|
||||
),
|
||||
]
|
||||
return dcc.Upload(
|
||||
id="upload-prompts",
|
||||
children=html.Div(["Drag and Drop Prompts or ", html.A("Select Files")]),
|
||||
style={
|
||||
"width": "100%",
|
||||
"height": "60px",
|
||||
"lineHeight": "60px",
|
||||
"borderWidth": "1px",
|
||||
"borderStyle": "dashed",
|
||||
"borderRadius": "5px",
|
||||
"textAlign": "center",
|
||||
"margin-bottom": "20px",
|
||||
"borderColor": "#28a745",
|
||||
},
|
||||
multiple=False,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
@@ -1,19 +1,16 @@
|
||||
from dash import dcc, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from .components.sidebar import SidebarComponent
|
||||
from .components.about import AboutComponent
|
||||
|
||||
|
||||
class AppLayout:
|
||||
def __init__(self):
|
||||
self.sidebar = SidebarComponent()
|
||||
self.about = AboutComponent()
|
||||
|
||||
def create_layout(self):
|
||||
return dbc.Container(
|
||||
[self._create_header(), self._create_main_content()]
|
||||
+ self._create_stores()
|
||||
+ [self.about.create_about_modal()],
|
||||
+ self._create_stores(),
|
||||
fluid=True,
|
||||
)
|
||||
|
||||
@@ -22,19 +19,7 @@ class AppLayout:
|
||||
[
|
||||
dbc.Col(
|
||||
[
|
||||
html.Div(
|
||||
[
|
||||
html.H1(
|
||||
"EmbeddingBuddy",
|
||||
className="text-center mb-4 d-inline",
|
||||
),
|
||||
html.Div(
|
||||
[self.about.create_about_button()],
|
||||
className="float-end",
|
||||
),
|
||||
],
|
||||
className="d-flex justify-content-between align-items-center",
|
||||
),
|
||||
html.H1("EmbeddingBuddy", className="text-center mb-4"),
|
||||
# Load Transformers.js from CDN
|
||||
html.Script(
|
||||
"""
|
||||
|
@@ -38,9 +38,9 @@ class PlotFactory:
|
||||
if dimensions == "3d":
|
||||
fig = px.scatter_3d(
|
||||
df,
|
||||
x="x",
|
||||
y="y",
|
||||
z="z",
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
z="dim_3",
|
||||
color=color_values,
|
||||
hover_data=hover_fields,
|
||||
title=f"3D Embedding Visualization - {method} (colored by {color_by})",
|
||||
@@ -49,8 +49,8 @@ class PlotFactory:
|
||||
else:
|
||||
fig = px.scatter(
|
||||
df,
|
||||
x="x",
|
||||
y="y",
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
color=color_values,
|
||||
hover_data=hover_fields,
|
||||
title=f"2D Embedding Visualization - {method} (colored by {color_by})",
|
||||
@@ -77,17 +77,17 @@ class PlotFactory:
|
||||
if dimensions == "3d":
|
||||
doc_fig = px.scatter_3d(
|
||||
doc_df,
|
||||
x="x",
|
||||
y="y",
|
||||
z="z",
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
z="dim_3",
|
||||
color=doc_color_values,
|
||||
hover_data=hover_fields,
|
||||
)
|
||||
else:
|
||||
doc_fig = px.scatter(
|
||||
doc_df,
|
||||
x="x",
|
||||
y="y",
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
color=doc_color_values,
|
||||
hover_data=hover_fields,
|
||||
)
|
||||
@@ -114,17 +114,17 @@ class PlotFactory:
|
||||
if dimensions == "3d":
|
||||
prompt_fig = px.scatter_3d(
|
||||
prompt_df,
|
||||
x="x",
|
||||
y="y",
|
||||
z="z",
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
z="dim_3",
|
||||
color=prompt_color_values,
|
||||
hover_data=hover_fields,
|
||||
)
|
||||
else:
|
||||
prompt_fig = px.scatter(
|
||||
prompt_df,
|
||||
x="x",
|
||||
y="y",
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
color=prompt_color_values,
|
||||
hover_data=hover_fields,
|
||||
)
|
||||
@@ -168,11 +168,11 @@ class PlotFactory:
|
||||
"category": doc.category,
|
||||
"subcategory": doc.subcategory,
|
||||
"tags_str": ", ".join(doc.tags) if doc.tags else "None",
|
||||
"x": coordinates[i, 0],
|
||||
"y": coordinates[i, 1],
|
||||
"dim_1": coordinates[i, 0],
|
||||
"dim_2": coordinates[i, 1],
|
||||
}
|
||||
if dimensions == "3d":
|
||||
row["z"] = coordinates[i, 2]
|
||||
row["dim_3"] = coordinates[i, 2]
|
||||
df_data.append(row)
|
||||
|
||||
return pd.DataFrame(df_data)
|
||||
|
@@ -1,12 +0,0 @@
|
||||
"""
|
||||
WSGI entry point for production deployment.
|
||||
Use this with a production WSGI server like Gunicorn.
|
||||
"""
|
||||
|
||||
from embeddingbuddy.app import create_app
|
||||
|
||||
# Create the application instance
|
||||
application = create_app()
|
||||
|
||||
# For compatibility with different WSGI servers
|
||||
app = application
|
2
uv.lock
generated
2
uv.lock
generated
@@ -412,7 +412,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "embeddingbuddy"
|
||||
version = "0.6.4"
|
||||
version = "0.3.0"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "dash" },
|
||||
|
20
wsgi.py
Normal file
20
wsgi.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""
|
||||
WSGI entry point for production deployment.
|
||||
Use this with a production WSGI server like Gunicorn.
|
||||
"""
|
||||
from src.embeddingbuddy.app import create_app
|
||||
|
||||
# Create the application instance
|
||||
application = create_app()
|
||||
|
||||
# For compatibility with different WSGI servers
|
||||
app = application
|
||||
|
||||
if __name__ == "__main__":
|
||||
# This won't be used in production, but useful for testing
|
||||
from src.embeddingbuddy.config.settings import AppSettings
|
||||
application.run(
|
||||
host=AppSettings.HOST,
|
||||
port=AppSettings.PORT,
|
||||
debug=False
|
||||
)
|
Reference in New Issue
Block a user