Compare commits
11 Commits
cb6ab1bfbe
...
main
Author | SHA1 | Date | |
---|---|---|---|
e022b26399 | |||
c29160c9e9 | |||
bd3ee6e35a | |||
6936bc5d97 | |||
9a2e257b0d | |||
9c3ff6e799 | |||
781d055e60 | |||
0f5cea2850 | |||
1bd70705e7 | |||
6610b9c196 | |||
f4095cc0cb |
@@ -4,7 +4,9 @@
|
||||
"Bash(mkdir:*)",
|
||||
"Bash(uv run:*)",
|
||||
"Bash(uv add:*)",
|
||||
"Bash(uv sync:*)"
|
||||
"Bash(uv sync:*)",
|
||||
"Bash(tree:*)",
|
||||
"WebFetch(domain:www.dash-bootstrap-components.com)"
|
||||
],
|
||||
"deny": [],
|
||||
"ask": [],
|
||||
|
76
.dockerignore
Normal file
76
.dockerignore
Normal file
@@ -0,0 +1,76 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
.venv/
|
||||
|
||||
# Testing
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
htmlcov/
|
||||
.tox/
|
||||
coverage.xml
|
||||
*.cover
|
||||
|
||||
# Development tools
|
||||
.mypy_cache/
|
||||
.ruff_cache/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
# Git
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# Documentation
|
||||
*.md
|
||||
!README.md
|
||||
|
||||
# Docker
|
||||
Dockerfile*
|
||||
docker-compose*.yml
|
||||
.dockerignore
|
||||
|
||||
# Data files (may contain sensitive information)
|
||||
*.ndjson
|
||||
*.ldjson
|
||||
*.json
|
||||
|
||||
# Reports
|
||||
*-report.json
|
||||
bandit-report.json
|
||||
safety-report.json
|
||||
|
||||
# Screenshots
|
||||
*.png
|
||||
*.jpg
|
||||
*.jpeg
|
||||
*.gif
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
# Temporary files
|
||||
*.tmp
|
||||
*.temp
|
22
CLAUDE.md
22
CLAUDE.md
@@ -21,11 +21,29 @@ uv sync
|
||||
|
||||
**Run the application:**
|
||||
|
||||
Development mode (with auto-reload):
|
||||
|
||||
```bash
|
||||
uv run python main.py
|
||||
uv run run_dev.py
|
||||
```
|
||||
|
||||
The app will be available at http://127.0.0.1:8050
|
||||
Production mode (with Gunicorn WSGI server):
|
||||
|
||||
```bash
|
||||
# First install production dependencies
|
||||
uv sync --extra prod
|
||||
|
||||
# Then run in production mode
|
||||
uv run run_prod.py
|
||||
```
|
||||
|
||||
Legacy mode (basic Dash server):
|
||||
|
||||
```bash
|
||||
uv run main.py
|
||||
```
|
||||
|
||||
The app will be available at <http://127.0.0.1:8050>
|
||||
|
||||
**Run tests:**
|
||||
|
||||
|
78
Dockerfile
Normal file
78
Dockerfile
Normal file
@@ -0,0 +1,78 @@
|
||||
# Two-stage Dockerfile for EmbeddingBuddy
|
||||
# Stage 1: Builder
|
||||
FROM python:3.11-slim as builder
|
||||
|
||||
# Install system dependencies for building Python packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
gcc \
|
||||
g++ \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install uv for dependency management
|
||||
RUN pip install uv
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy dependency files
|
||||
COPY pyproject.toml uv.lock ./
|
||||
|
||||
# Copy source code (needed for editable install)
|
||||
COPY src/ src/
|
||||
COPY main.py .
|
||||
COPY wsgi.py .
|
||||
COPY run_prod.py .
|
||||
COPY assets/ assets/
|
||||
|
||||
# Create virtual environment and install dependencies (including production extras)
|
||||
RUN uv venv .venv
|
||||
RUN uv sync --frozen --extra prod
|
||||
|
||||
# Stage 2: Runtime
|
||||
FROM python:3.11-slim as runtime
|
||||
|
||||
# Install runtime dependencies for compiled packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy virtual environment from builder stage
|
||||
COPY --from=builder /app/.venv /app/.venv
|
||||
|
||||
# Copy application files from builder stage
|
||||
COPY --from=builder /app/src /app/src
|
||||
COPY --from=builder /app/main.py /app/main.py
|
||||
COPY --from=builder /app/assets /app/assets
|
||||
COPY --from=builder /app/wsgi.py /app/wsgi.py
|
||||
COPY --from=builder /app/run_prod.py /app/run_prod.py
|
||||
|
||||
# Make sure the virtual environment is in PATH
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
|
||||
# Set Python path
|
||||
ENV PYTHONPATH="/app/src:$PYTHONPATH"
|
||||
|
||||
# Environment variables for production
|
||||
ENV EMBEDDINGBUDDY_HOST=0.0.0.0
|
||||
ENV EMBEDDINGBUDDY_PORT=8050
|
||||
ENV EMBEDDINGBUDDY_DEBUG=false
|
||||
ENV EMBEDDINGBUDDY_ENV=production
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8050
|
||||
|
||||
# Create non-root user
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
RUN chown -R appuser:appuser /app
|
||||
USER appuser
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
|
||||
CMD python -c "import requests; requests.get('http://localhost:8050/', timeout=5)" || exit 1
|
||||
|
||||
# Run application with Gunicorn in production
|
||||
CMD ["python", "run_prod.py"]
|
114
README.md
114
README.md
@@ -12,7 +12,7 @@ EmbeddingBuddy provides an intuitive web interface for analyzing high-dimensiona
|
||||
embedding vectors by applying various dimensionality reduction algorithms and
|
||||
visualizing the results in interactive 2D and 3D plots. The application features
|
||||
a clean, modular architecture that makes it easy to test, maintain, and extend
|
||||
with new features. It supports dual dataset visualization, allowing you to compare
|
||||
with new features. It supports dual dataset visualization, allowing you to compare
|
||||
documents and prompts to understand how queries relate to your content.
|
||||
|
||||
## Features
|
||||
@@ -73,17 +73,77 @@ uv sync
|
||||
|
||||
2. **Run the application:**
|
||||
|
||||
**Development mode** (with auto-reload):
|
||||
|
||||
```bash
|
||||
uv run python main.py
|
||||
uv run run_dev.py
|
||||
```
|
||||
|
||||
3. **Open your browser** to http://127.0.0.1:8050
|
||||
**Production mode** (with Gunicorn WSGI server):
|
||||
|
||||
```bash
|
||||
# Install production dependencies
|
||||
uv sync --extra prod
|
||||
|
||||
# Run in production mode
|
||||
uv run run_prod.py
|
||||
```
|
||||
|
||||
**Legacy mode** (basic Dash server):
|
||||
|
||||
```bash
|
||||
uv run main.py
|
||||
```
|
||||
|
||||
3. **Open your browser** to <http://127.0.0.1:8050>
|
||||
|
||||
4. **Test with sample data**:
|
||||
- Upload `sample_data.ndjson` (documents)
|
||||
- Upload `sample_prompts.ndjson` (prompts) to see dual visualization
|
||||
- Use the "Show prompts" toggle to compare how prompts relate to documents
|
||||
|
||||
## Docker
|
||||
|
||||
You can also run EmbeddingBuddy using Docker:
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```bash
|
||||
# Run in the background
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
The application will be available at <http://127.0.0.1:8050>
|
||||
|
||||
### With OpenSearch
|
||||
|
||||
To run with OpenSearch for enhanced search capabilities:
|
||||
|
||||
```bash
|
||||
# Run in the background with OpenSearch
|
||||
docker compose --profile opensearch up -d
|
||||
```
|
||||
|
||||
This will start both the EmbeddingBuddy application and an OpenSearch instance.
|
||||
OpenSearch will be available at <http://127.0.0.1:9200>
|
||||
|
||||
### Docker Commands
|
||||
|
||||
```bash
|
||||
# Stop all services
|
||||
docker compose down
|
||||
|
||||
# Stop and remove volumes
|
||||
docker compose down -v
|
||||
|
||||
# View logs
|
||||
docker compose logs embeddingbuddy
|
||||
docker compose logs opensearch
|
||||
|
||||
# Rebuild containers
|
||||
docker compose build
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
### Project Structure
|
||||
@@ -92,22 +152,38 @@ The application follows a modular architecture for improved maintainability and
|
||||
|
||||
```text
|
||||
src/embeddingbuddy/
|
||||
├── config/ # Configuration management
|
||||
│ └── settings.py # Centralized app settings
|
||||
├── data/ # Data parsing and processing
|
||||
│ ├── parser.py # NDJSON parsing logic
|
||||
│ └── processor.py # Data transformation utilities
|
||||
├── models/ # Data schemas and algorithms
|
||||
│ ├── schemas.py # Pydantic data models
|
||||
│ └── reducers.py # Dimensionality reduction algorithms
|
||||
├── visualization/ # Plot creation and styling
|
||||
│ ├── plots.py # Plot factory and creation logic
|
||||
│ └── colors.py # Color mapping utilities
|
||||
├── ui/ # User interface components
|
||||
│ ├── layout.py # Main application layout
|
||||
│ ├── components/ # Reusable UI components
|
||||
│ └── callbacks/ # Organized callback functions
|
||||
└── utils/ # Utility functions
|
||||
├── app.py # Main application entry point and factory
|
||||
├── config/ # Configuration management
|
||||
│ └── settings.py # Centralized app settings
|
||||
├── data/ # Data parsing and processing
|
||||
│ ├── parser.py # NDJSON parsing logic
|
||||
│ ├── processor.py # Data transformation utilities
|
||||
│ └── sources/ # Data source integrations
|
||||
│ └── opensearch.py # OpenSearch data source
|
||||
├── models/ # Data schemas and algorithms
|
||||
│ ├── schemas.py # Pydantic data models
|
||||
│ ├── reducers.py # Dimensionality reduction algorithms
|
||||
│ └── field_mapper.py # Field mapping utilities
|
||||
├── visualization/ # Plot creation and styling
|
||||
│ ├── plots.py # Plot factory and creation logic
|
||||
│ └── colors.py # Color mapping utilities
|
||||
├── ui/ # User interface components
|
||||
│ ├── layout.py # Main application layout
|
||||
│ ├── components/ # Reusable UI components
|
||||
│ │ ├── sidebar.py # Sidebar component
|
||||
│ │ ├── upload.py # Upload components
|
||||
│ │ ├── textinput.py # Text input components
|
||||
│ │ └── datasource.py # Data source components
|
||||
│ └── callbacks/ # Organized callback functions
|
||||
│ ├── data_processing.py # Data upload/processing callbacks
|
||||
│ ├── visualization.py # Plot update callbacks
|
||||
│ └── interactions.py # User interaction callbacks
|
||||
└── utils/ # Utility functions
|
||||
|
||||
main.py # Application runner (at project root)
|
||||
main.py # Application runner (at project root)
|
||||
run_dev.py # Development server runner
|
||||
run_prod.py # Production server runner
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
17
assets/custom.css
Normal file
17
assets/custom.css
Normal file
@@ -0,0 +1,17 @@
|
||||
/* CSS override for transparent hover boxes in Plotly plots */
|
||||
|
||||
/* Make hover boxes transparent while preserving text readability */
|
||||
.hovertext {
|
||||
fill-opacity: 0.8 !important;
|
||||
stroke-opacity: 1 !important;
|
||||
}
|
||||
|
||||
/* Alternative selector for different Plotly versions */
|
||||
g.hovertext > path {
|
||||
opacity: 0.8 !important;
|
||||
}
|
||||
|
||||
/* Ensure text remains fully visible */
|
||||
.hovertext text {
|
||||
opacity: 1 !important;
|
||||
}
|
@@ -45,28 +45,12 @@ class TransformersEmbedder {
|
||||
console.log('✅ Using globally loaded Transformers.js pipeline');
|
||||
}
|
||||
|
||||
// Show loading progress to user
|
||||
if (window.updateModelLoadingProgress) {
|
||||
window.updateModelLoadingProgress(0, `Loading ${modelName}...`);
|
||||
}
|
||||
|
||||
this.extractor = await window.transformers.pipeline('feature-extraction', modelName, {
|
||||
progress_callback: (data) => {
|
||||
if (window.updateModelLoadingProgress && data.progress !== undefined) {
|
||||
const progress = Math.round(data.progress);
|
||||
window.updateModelLoadingProgress(progress, data.status || 'Loading...');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
this.extractor = await window.transformers.pipeline('feature-extraction', modelName);
|
||||
|
||||
this.modelCache.set(modelName, this.extractor);
|
||||
this.currentModel = modelName;
|
||||
this.isLoading = false;
|
||||
|
||||
if (window.updateModelLoadingProgress) {
|
||||
window.updateModelLoadingProgress(100, 'Model loaded successfully');
|
||||
}
|
||||
|
||||
|
||||
return { success: true, model: modelName };
|
||||
} catch (error) {
|
||||
this.isLoading = false;
|
||||
@@ -116,17 +100,8 @@ class TransformersEmbedder {
|
||||
}
|
||||
});
|
||||
|
||||
// Update progress
|
||||
const progress = Math.min(100, ((i + batch.length) / texts.length) * 100);
|
||||
if (window.updateEmbeddingProgress) {
|
||||
window.updateEmbeddingProgress(progress, `Processing ${i + batch.length}/${texts.length} texts`);
|
||||
}
|
||||
}
|
||||
|
||||
if (window.updateEmbeddingProgress) {
|
||||
window.updateEmbeddingProgress(100, `Generated ${embeddings.length} embeddings successfully`);
|
||||
}
|
||||
|
||||
|
||||
return embeddings;
|
||||
} catch (error) {
|
||||
console.error('Embedding generation error:', error);
|
||||
@@ -139,30 +114,6 @@ class TransformersEmbedder {
|
||||
window.transformersEmbedder = new TransformersEmbedder();
|
||||
console.log('📦 TransformersEmbedder instance created');
|
||||
|
||||
// Global progress update functions
|
||||
window.updateModelLoadingProgress = function(progress, status) {
|
||||
const progressBar = document.getElementById('model-loading-progress');
|
||||
const statusText = document.getElementById('model-loading-status');
|
||||
if (progressBar) {
|
||||
progressBar.style.width = progress + '%';
|
||||
progressBar.setAttribute('aria-valuenow', progress);
|
||||
}
|
||||
if (statusText) {
|
||||
statusText.textContent = status;
|
||||
}
|
||||
};
|
||||
|
||||
window.updateEmbeddingProgress = function(progress, status) {
|
||||
const progressBar = document.getElementById('embedding-progress');
|
||||
const statusText = document.getElementById('embedding-status');
|
||||
if (progressBar) {
|
||||
progressBar.style.width = progress + '%';
|
||||
progressBar.setAttribute('aria-valuenow', progress);
|
||||
}
|
||||
if (statusText) {
|
||||
statusText.textContent = status;
|
||||
}
|
||||
};
|
||||
|
||||
// Dash clientside callback functions
|
||||
window.dash_clientside = window.dash_clientside || {};
|
||||
@@ -170,31 +121,28 @@ console.log('🔧 Setting up window.dash_clientside.transformers');
|
||||
window.dash_clientside.transformers = {
|
||||
generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) {
|
||||
console.log('🚀 generateEmbeddings called with:', { nClicks, modelName, tokenizationMethod, textLength: textContent?.length });
|
||||
|
||||
|
||||
if (!nClicks || !textContent || textContent.trim().length === 0) {
|
||||
console.log('⚠️ Early return - missing required parameters');
|
||||
return window.dash_clientside.no_update;
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
// Initialize model if needed
|
||||
const initResult = await window.transformersEmbedder.initializeModel(modelName);
|
||||
if (!initResult.success) {
|
||||
return [
|
||||
{ error: initResult.error },
|
||||
`❌ Model loading error: ${initResult.error}`,
|
||||
"danger",
|
||||
{ error: `Model loading error: ${initResult.error}` },
|
||||
false
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
// Tokenize text based on method
|
||||
let textChunks;
|
||||
const trimmedText = textContent.trim();
|
||||
|
||||
|
||||
switch (tokenizationMethod) {
|
||||
case 'sentence':
|
||||
// Simple sentence splitting - can be enhanced with proper NLP
|
||||
textChunks = trimmedText
|
||||
.split(/[.!?]+/)
|
||||
.map(s => s.trim())
|
||||
@@ -215,28 +163,24 @@ window.dash_clientside.transformers = {
|
||||
default:
|
||||
textChunks = [trimmedText];
|
||||
}
|
||||
|
||||
|
||||
if (textChunks.length === 0) {
|
||||
return [
|
||||
{ error: 'No valid text chunks found after tokenization' },
|
||||
'❌ Error: No valid text chunks found after tokenization',
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
// Generate embeddings
|
||||
const embeddings = await window.transformersEmbedder.generateEmbeddings(textChunks);
|
||||
|
||||
|
||||
if (!embeddings || embeddings.length !== textChunks.length) {
|
||||
return [
|
||||
{ error: 'Embedding generation failed - mismatch in text chunks and embeddings' },
|
||||
'❌ Error: Embedding generation failed',
|
||||
"danger",
|
||||
{ error: 'Embedding generation failed' },
|
||||
false
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
// Create documents structure
|
||||
const documents = textChunks.map((text, i) => ({
|
||||
id: `text_input_${Date.now()}_${i}`,
|
||||
@@ -246,33 +190,36 @@ window.dash_clientside.transformers = {
|
||||
subcategory: subcategory || "Generated",
|
||||
tags: []
|
||||
}));
|
||||
|
||||
|
||||
// Return the successful embeddings data
|
||||
const embeddingsData = {
|
||||
documents: documents,
|
||||
embeddings: embeddings
|
||||
};
|
||||
|
||||
console.log('✅ Embeddings generated successfully:', embeddingsData);
|
||||
|
||||
return [
|
||||
{
|
||||
documents: documents,
|
||||
embeddings: embeddings
|
||||
},
|
||||
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
|
||||
"success",
|
||||
embeddingsData,
|
||||
false
|
||||
];
|
||||
|
||||
|
||||
} catch (error) {
|
||||
console.error('Client-side embedding error:', error);
|
||||
return [
|
||||
{ error: error.message },
|
||||
`❌ Error: ${error.message}`,
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
console.log('✅ Transformers.js client-side setup complete');
|
||||
console.log('Available:', {
|
||||
transformersEmbedder: !!window.transformersEmbedder,
|
||||
dashClientside: !!window.dash_clientside,
|
||||
transformersModule: !!window.dash_clientside?.transformers,
|
||||
generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings
|
||||
generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings,
|
||||
processAsync: typeof window.processEmbeddingsAsync
|
||||
});
|
@@ -104,17 +104,28 @@ window.dash_clientside = window.dash_clientside || {};
|
||||
window.dash_clientside.transformers = {
|
||||
generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) {
|
||||
console.log('🚀 Client-side generateEmbeddings called');
|
||||
|
||||
|
||||
if (!nClicks || !textContent || textContent.trim().length === 0) {
|
||||
console.log('⚠️ Missing required parameters');
|
||||
return window.dash_clientside.no_update;
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
// Ensure Transformers.js is loaded
|
||||
if (!window.transformersLibraryLoaded) {
|
||||
const loaded = await initializeTransformers();
|
||||
if (!loaded) {
|
||||
return [
|
||||
{ error: 'Failed to load Transformers.js' },
|
||||
false
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
// Tokenize text
|
||||
let textChunks;
|
||||
const trimmedText = textContent.trim();
|
||||
|
||||
|
||||
switch (tokenizationMethod) {
|
||||
case 'sentence':
|
||||
textChunks = trimmedText.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 0);
|
||||
@@ -128,45 +139,50 @@ window.dash_clientside.transformers = {
|
||||
default:
|
||||
textChunks = [trimmedText];
|
||||
}
|
||||
|
||||
|
||||
if (textChunks.length === 0) {
|
||||
throw new Error('No valid text chunks after tokenization');
|
||||
return [
|
||||
{ error: 'No valid text chunks after tokenization' },
|
||||
false
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
// Generate embeddings
|
||||
const embeddings = await window.simpleEmbedder.generateEmbeddings(textChunks, modelName);
|
||||
|
||||
|
||||
// Create documents
|
||||
const documents = textChunks.map((text, i) => ({
|
||||
id: `text_input_${Date.now()}_${i}`,
|
||||
text: text,
|
||||
embedding: embeddings[i],
|
||||
category: category || "Text Input",
|
||||
subcategory: subcategory || "Generated",
|
||||
subcategory: subcategory || "Generated",
|
||||
tags: []
|
||||
}));
|
||||
|
||||
|
||||
// Return the successful embeddings data
|
||||
const embeddingsData = {
|
||||
documents: documents,
|
||||
embeddings: embeddings
|
||||
};
|
||||
|
||||
console.log('✅ Embeddings generated successfully:', embeddingsData);
|
||||
|
||||
return [
|
||||
{
|
||||
documents: documents,
|
||||
embeddings: embeddings
|
||||
},
|
||||
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
|
||||
"success",
|
||||
embeddingsData,
|
||||
false
|
||||
];
|
||||
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Error generating embeddings:', error);
|
||||
return [
|
||||
{ error: error.message },
|
||||
`❌ Error: ${error.message}`,
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
console.log('✅ Simple Transformers.js setup complete');
|
||||
console.log('Available functions:', Object.keys(window.dash_clientside.transformers));
|
133
bump_version.py
Executable file
133
bump_version.py
Executable file
@@ -0,0 +1,133 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Version bump script for EmbeddingBuddy.
|
||||
Automatically updates version in pyproject.toml following semantic versioning.
|
||||
"""
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def get_current_version(pyproject_path: Path) -> str:
|
||||
"""Extract current version from pyproject.toml."""
|
||||
content = pyproject_path.read_text()
|
||||
match = re.search(r'version\s*=\s*"([^"]+)"', content)
|
||||
if not match:
|
||||
raise ValueError("Could not find version in pyproject.toml")
|
||||
return match.group(1)
|
||||
|
||||
|
||||
def parse_version(version_str: str) -> tuple[int, int, int]:
|
||||
"""Parse semantic version string into major, minor, patch tuple."""
|
||||
match = re.match(r'(\d+)\.(\d+)\.(\d+)', version_str)
|
||||
if not match:
|
||||
raise ValueError(f"Invalid version format: {version_str}")
|
||||
return int(match.group(1)), int(match.group(2)), int(match.group(3))
|
||||
|
||||
|
||||
def bump_version(current: str, bump_type: str) -> str:
|
||||
"""Bump version based on type (major, minor, patch)."""
|
||||
major, minor, patch = parse_version(current)
|
||||
|
||||
if bump_type == "major":
|
||||
return f"{major + 1}.0.0"
|
||||
elif bump_type == "minor":
|
||||
return f"{major}.{minor + 1}.0"
|
||||
elif bump_type == "patch":
|
||||
return f"{major}.{minor}.{patch + 1}"
|
||||
else:
|
||||
raise ValueError(f"Invalid bump type: {bump_type}")
|
||||
|
||||
|
||||
def update_version_in_file(pyproject_path: Path, new_version: str) -> None:
|
||||
"""Update version in pyproject.toml file."""
|
||||
content = pyproject_path.read_text()
|
||||
updated_content = re.sub(
|
||||
r'version\s*=\s*"[^"]+"',
|
||||
f'version = "{new_version}"',
|
||||
content
|
||||
)
|
||||
pyproject_path.write_text(updated_content)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main version bump function."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Bump version in pyproject.toml",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python bump_version.py patch # 0.3.0 -> 0.3.1
|
||||
python bump_version.py minor # 0.3.0 -> 0.4.0
|
||||
python bump_version.py major # 0.3.0 -> 1.0.0
|
||||
python bump_version.py --set 1.2.3 # Set specific version
|
||||
|
||||
Semantic versioning guide:
|
||||
- patch: Bug fixes, no API changes
|
||||
- minor: New features, backward compatible
|
||||
- major: Breaking changes, not backward compatible
|
||||
"""
|
||||
)
|
||||
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument(
|
||||
"bump_type",
|
||||
nargs="?",
|
||||
choices=["major", "minor", "patch"],
|
||||
help="Type of version bump"
|
||||
)
|
||||
group.add_argument(
|
||||
"--set",
|
||||
dest="set_version",
|
||||
help="Set specific version (e.g., 1.2.3)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Show what would be changed without making changes"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Find pyproject.toml
|
||||
pyproject_path = Path("pyproject.toml")
|
||||
if not pyproject_path.exists():
|
||||
print("❌ pyproject.toml not found in current directory")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
current_version = get_current_version(pyproject_path)
|
||||
print(f"📦 Current version: {current_version}")
|
||||
|
||||
if args.set_version:
|
||||
# Validate the set version format
|
||||
parse_version(args.set_version)
|
||||
new_version = args.set_version
|
||||
else:
|
||||
new_version = bump_version(current_version, args.bump_type)
|
||||
|
||||
print(f"🚀 New version: {new_version}")
|
||||
|
||||
if args.dry_run:
|
||||
print("🔍 Dry run - no changes made")
|
||||
else:
|
||||
update_version_in_file(pyproject_path, new_version)
|
||||
print("✅ Version updated in pyproject.toml")
|
||||
print()
|
||||
print("💡 Next steps:")
|
||||
print(" 1. Review changes: git diff")
|
||||
print(" 2. Commit changes: git add . && git commit -m 'bump version to {}'".format(new_version))
|
||||
print(" 3. Tag release: git tag v{}".format(new_version))
|
||||
|
||||
except ValueError as e:
|
||||
print(f"❌ Error: {e}")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"❌ Unexpected error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
69
docker-compose.yml
Normal file
69
docker-compose.yml
Normal file
@@ -0,0 +1,69 @@
|
||||
services:
|
||||
opensearch:
|
||||
image: opensearchproject/opensearch:2
|
||||
container_name: embeddingbuddy-opensearch
|
||||
profiles:
|
||||
- opensearch
|
||||
environment:
|
||||
- cluster.name=embeddingbuddy-cluster
|
||||
- node.name=embeddingbuddy-node
|
||||
- discovery.type=single-node
|
||||
- bootstrap.memory_lock=true
|
||||
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
|
||||
- "DISABLE_INSTALL_DEMO_CONFIG=true"
|
||||
- "DISABLE_SECURITY_PLUGIN=true"
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
nofile:
|
||||
soft: 65536
|
||||
hard: 65536
|
||||
volumes:
|
||||
- opensearch-data:/usr/share/opensearch/data
|
||||
ports:
|
||||
- "9200:9200"
|
||||
- "9600:9600"
|
||||
networks:
|
||||
- embeddingbuddy
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:9200/_cluster/health || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 60s
|
||||
|
||||
embeddingbuddy:
|
||||
build: .
|
||||
container_name: embeddingbuddy-app
|
||||
environment:
|
||||
- EMBEDDINGBUDDY_HOST=0.0.0.0
|
||||
- EMBEDDINGBUDDY_PORT=8050
|
||||
- EMBEDDINGBUDDY_DEBUG=false
|
||||
- OPENSEARCH_HOST=opensearch
|
||||
- OPENSEARCH_PORT=9200
|
||||
- OPENSEARCH_SCHEME=http
|
||||
- OPENSEARCH_VERIFY_CERTS=false
|
||||
ports:
|
||||
- "8050:8050"
|
||||
networks:
|
||||
- embeddingbuddy
|
||||
depends_on:
|
||||
opensearch:
|
||||
condition: service_healthy
|
||||
required: false
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "python -c 'import requests; requests.get(\"http://localhost:8050/\", timeout=5)'"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
opensearch-data:
|
||||
driver: local
|
||||
|
||||
networks:
|
||||
embeddingbuddy:
|
||||
driver: bridge
|
Binary file not shown.
Before Width: | Height: | Size: 339 KiB After Width: | Height: | Size: 844 KiB |
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "embeddingbuddy"
|
||||
version = "0.3.0"
|
||||
version = "0.5.0"
|
||||
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
@@ -12,7 +12,6 @@ dependencies = [
|
||||
"scikit-learn>=1.3.2",
|
||||
"dash-bootstrap-components>=1.5.0",
|
||||
"umap-learn>=0.5.8",
|
||||
"numba>=0.56.4",
|
||||
"openTSNE>=1.0.0",
|
||||
"mypy>=1.17.1",
|
||||
"opensearch-py>=3.0.0",
|
||||
@@ -32,11 +31,14 @@ security = [
|
||||
"safety>=2.3.0",
|
||||
"pip-audit>=2.6.0",
|
||||
]
|
||||
prod = [
|
||||
"gunicorn>=21.2.0",
|
||||
]
|
||||
dev = [
|
||||
"embeddingbuddy[test,lint,security]",
|
||||
]
|
||||
all = [
|
||||
"embeddingbuddy[test,lint,security]",
|
||||
"embeddingbuddy[test,lint,security,prod]",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
|
26
run_dev.py
Normal file
26
run_dev.py
Normal file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Development runner with auto-reload enabled.
|
||||
This runs the Dash development server with hot reloading.
|
||||
"""
|
||||
import os
|
||||
from src.embeddingbuddy.app import create_app, run_app
|
||||
|
||||
def main():
|
||||
"""Run the application in development mode with auto-reload."""
|
||||
# Force development settings
|
||||
os.environ["EMBEDDINGBUDDY_ENV"] = "development"
|
||||
os.environ["EMBEDDINGBUDDY_DEBUG"] = "true"
|
||||
|
||||
print("🚀 Starting EmbeddingBuddy in development mode...")
|
||||
print("📁 Auto-reload enabled - changes will trigger restart")
|
||||
print("🌐 Server will be available at http://127.0.0.1:8050")
|
||||
print("⏹️ Press Ctrl+C to stop")
|
||||
|
||||
app = create_app()
|
||||
|
||||
# Run with development server (includes auto-reload when debug=True)
|
||||
run_app(app, debug=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
49
run_prod.py
Normal file
49
run_prod.py
Normal file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Production runner using Gunicorn WSGI server.
|
||||
This provides better performance and stability for production deployments.
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from src.embeddingbuddy.config.settings import AppSettings
|
||||
|
||||
def main():
|
||||
"""Run the application in production mode with Gunicorn."""
|
||||
# Force production settings
|
||||
os.environ["EMBEDDINGBUDDY_ENV"] = "production"
|
||||
os.environ["EMBEDDINGBUDDY_DEBUG"] = "false"
|
||||
|
||||
print("🚀 Starting EmbeddingBuddy in production mode...")
|
||||
print(f"⚙️ Workers: {AppSettings.GUNICORN_WORKERS}")
|
||||
print(f"🌐 Server will be available at http://{AppSettings.GUNICORN_BIND}")
|
||||
print("⏹️ Press Ctrl+C to stop")
|
||||
|
||||
# Gunicorn command
|
||||
cmd = [
|
||||
"gunicorn",
|
||||
"--workers", str(AppSettings.GUNICORN_WORKERS),
|
||||
"--bind", AppSettings.GUNICORN_BIND,
|
||||
"--timeout", str(AppSettings.GUNICORN_TIMEOUT),
|
||||
"--keep-alive", str(AppSettings.GUNICORN_KEEPALIVE),
|
||||
"--access-logfile", "-",
|
||||
"--error-logfile", "-",
|
||||
"--log-level", "info",
|
||||
"wsgi:application"
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, check=True)
|
||||
except KeyboardInterrupt:
|
||||
print("\n🛑 Shutting down...")
|
||||
sys.exit(0)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ Error running Gunicorn: {e}")
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
print("❌ Gunicorn not found. Install it with: uv add gunicorn")
|
||||
print("💡 Or run in development mode with: python run_dev.py")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -15,7 +15,12 @@ def create_app():
|
||||
assets_path = os.path.join(project_root, "assets")
|
||||
|
||||
app = dash.Dash(
|
||||
__name__, external_stylesheets=[dbc.themes.BOOTSTRAP], assets_folder=assets_path
|
||||
__name__,
|
||||
external_stylesheets=[
|
||||
dbc.themes.BOOTSTRAP,
|
||||
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css",
|
||||
],
|
||||
assets_folder=assets_path,
|
||||
)
|
||||
|
||||
# Allow callbacks to components that are dynamically created in tabs
|
||||
@@ -45,22 +50,22 @@ def _register_client_side_callbacks(app):
|
||||
if (!nClicks || !textContent || !textContent.trim()) {
|
||||
return window.dash_clientside.no_update;
|
||||
}
|
||||
|
||||
|
||||
console.log('🔍 Checking for Transformers.js...');
|
||||
console.log('window.dash_clientside:', typeof window.dash_clientside);
|
||||
console.log('window.dash_clientside.transformers:', typeof window.dash_clientside?.transformers);
|
||||
console.log('generateEmbeddings function:', typeof window.dash_clientside?.transformers?.generateEmbeddings);
|
||||
|
||||
if (typeof window.dash_clientside !== 'undefined' &&
|
||||
|
||||
if (typeof window.dash_clientside !== 'undefined' &&
|
||||
typeof window.dash_clientside.transformers !== 'undefined' &&
|
||||
typeof window.dash_clientside.transformers.generateEmbeddings === 'function') {
|
||||
|
||||
|
||||
console.log('✅ Calling Transformers.js generateEmbeddings...');
|
||||
return window.dash_clientside.transformers.generateEmbeddings(
|
||||
nClicks, textContent, modelName, tokenizationMethod, category, subcategory
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// More detailed error information
|
||||
let errorMsg = '❌ Transformers.js not available. ';
|
||||
if (typeof window.dash_clientside === 'undefined') {
|
||||
@@ -70,21 +75,17 @@ def _register_client_side_callbacks(app):
|
||||
} else if (typeof window.dash_clientside.transformers.generateEmbeddings !== 'function') {
|
||||
errorMsg += 'generateEmbeddings function not found.';
|
||||
}
|
||||
|
||||
|
||||
console.error(errorMsg);
|
||||
|
||||
|
||||
return [
|
||||
{ error: 'Transformers.js not loaded. Please refresh the page and try again.' },
|
||||
errorMsg + ' Please refresh the page.',
|
||||
'danger',
|
||||
false
|
||||
];
|
||||
}
|
||||
""",
|
||||
[
|
||||
Output("embeddings-generated-trigger", "data"),
|
||||
Output("text-input-status-immediate", "children"),
|
||||
Output("text-input-status-immediate", "color"),
|
||||
Output("generate-embeddings-btn", "disabled", allow_duplicate=True),
|
||||
],
|
||||
[Input("generate-embeddings-btn", "n_clicks")],
|
||||
|
@@ -73,6 +73,17 @@ class AppSettings:
|
||||
HOST = os.getenv("EMBEDDINGBUDDY_HOST", "127.0.0.1")
|
||||
PORT = int(os.getenv("EMBEDDINGBUDDY_PORT", "8050"))
|
||||
|
||||
# Environment Configuration
|
||||
ENVIRONMENT = os.getenv(
|
||||
"EMBEDDINGBUDDY_ENV", "development"
|
||||
) # development, production
|
||||
|
||||
# WSGI Server Configuration (for production)
|
||||
GUNICORN_WORKERS = int(os.getenv("GUNICORN_WORKERS", "4"))
|
||||
GUNICORN_BIND = os.getenv("GUNICORN_BIND", f"{HOST}:{PORT}")
|
||||
GUNICORN_TIMEOUT = int(os.getenv("GUNICORN_TIMEOUT", "120"))
|
||||
GUNICORN_KEEPALIVE = int(os.getenv("GUNICORN_KEEPALIVE", "5"))
|
||||
|
||||
# OpenSearch Configuration
|
||||
OPENSEARCH_DEFAULT_SIZE = 100
|
||||
OPENSEARCH_SAMPLE_SIZE = 5
|
||||
|
@@ -621,6 +621,12 @@ class DataProcessingCallbacks:
|
||||
if not embeddings_data:
|
||||
return no_update, no_update, no_update, no_update, no_update
|
||||
|
||||
# Check if this is a request trigger (contains textContent) vs actual embeddings data
|
||||
if isinstance(embeddings_data, dict) and "textContent" in embeddings_data:
|
||||
# This is a processing request trigger, not the actual results
|
||||
# The JavaScript will handle the async processing and update the UI directly
|
||||
return no_update, no_update, no_update, no_update, no_update
|
||||
|
||||
processed_data = self.processor.process_client_embeddings(embeddings_data)
|
||||
|
||||
if processed_data.error:
|
||||
|
@@ -1,6 +1,5 @@
|
||||
import dash
|
||||
from dash import callback, Input, Output, State, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from dash import callback, Input, Output
|
||||
|
||||
|
||||
class InteractionCallbacks:
|
||||
@@ -8,75 +7,16 @@ class InteractionCallbacks:
|
||||
self._register_callbacks()
|
||||
|
||||
def _register_callbacks(self):
|
||||
@callback(
|
||||
Output("point-details", "children"),
|
||||
Input("embedding-plot", "clickData"),
|
||||
[State("processed-data", "data"), State("processed-prompts", "data")],
|
||||
)
|
||||
def display_click_data(clickData, data, prompts_data):
|
||||
if not clickData or not data:
|
||||
return "Click on a point to see details"
|
||||
|
||||
point_data = clickData["points"][0]
|
||||
trace_name = point_data.get("fullData", {}).get("name", "Documents")
|
||||
|
||||
if "pointIndex" in point_data:
|
||||
point_index = point_data["pointIndex"]
|
||||
elif "pointNumber" in point_data:
|
||||
point_index = point_data["pointNumber"]
|
||||
else:
|
||||
return "Could not identify clicked point"
|
||||
|
||||
if (
|
||||
trace_name.startswith("Prompts")
|
||||
and prompts_data
|
||||
and "prompts" in prompts_data
|
||||
):
|
||||
item = prompts_data["prompts"][point_index]
|
||||
item_type = "Prompt"
|
||||
else:
|
||||
item = data["documents"][point_index]
|
||||
item_type = "Document"
|
||||
|
||||
return self._create_detail_card(item, item_type)
|
||||
|
||||
@callback(
|
||||
[
|
||||
Output("processed-data", "data", allow_duplicate=True),
|
||||
Output("processed-prompts", "data", allow_duplicate=True),
|
||||
Output("point-details", "children", allow_duplicate=True),
|
||||
],
|
||||
Input("reset-button", "n_clicks"),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def reset_data(n_clicks):
|
||||
if n_clicks is None or n_clicks == 0:
|
||||
return dash.no_update, dash.no_update, dash.no_update
|
||||
return dash.no_update, dash.no_update
|
||||
|
||||
return None, None, "Click on a point to see details"
|
||||
|
||||
@staticmethod
|
||||
def _create_detail_card(item, item_type):
|
||||
return dbc.Card(
|
||||
[
|
||||
dbc.CardBody(
|
||||
[
|
||||
html.H5(f"{item_type}: {item['id']}", className="card-title"),
|
||||
html.P(f"Text: {item['text']}", className="card-text"),
|
||||
html.P(
|
||||
f"Category: {item.get('category', 'Unknown')}",
|
||||
className="card-text",
|
||||
),
|
||||
html.P(
|
||||
f"Subcategory: {item.get('subcategory', 'Unknown')}",
|
||||
className="card-text",
|
||||
),
|
||||
html.P(
|
||||
f"Tags: {', '.join(item.get('tags', [])) if item.get('tags') else 'None'}",
|
||||
className="card-text",
|
||||
),
|
||||
html.P(f"Type: {item_type}", className="card-text text-muted"),
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
return None, None
|
||||
|
@@ -1,13 +1,11 @@
|
||||
from dash import dcc, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from .upload import UploadComponent
|
||||
from .textinput import TextInputComponent
|
||||
|
||||
|
||||
class DataSourceComponent:
|
||||
def __init__(self):
|
||||
self.upload_component = UploadComponent()
|
||||
self.text_input_component = TextInputComponent()
|
||||
|
||||
def create_tabbed_interface(self):
|
||||
"""Create tabbed interface for different data sources."""
|
||||
@@ -19,7 +17,6 @@ class DataSourceComponent:
|
||||
[
|
||||
dbc.Tab(label="File Upload", tab_id="file-tab"),
|
||||
dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
|
||||
dbc.Tab(label="Text Input", tab_id="text-input-tab"),
|
||||
],
|
||||
id="data-source-tabs",
|
||||
active_tab="file-tab",
|
||||
@@ -211,10 +208,6 @@ class DataSourceComponent:
|
||||
]
|
||||
)
|
||||
|
||||
def create_text_input_tab(self):
|
||||
"""Create text input tab content for browser-based embedding generation."""
|
||||
return html.Div([self.text_input_component.create_text_input_interface()])
|
||||
|
||||
def _create_opensearch_section(self, section_type):
|
||||
"""Create a complete OpenSearch section for either 'data' or 'prompts'."""
|
||||
section_id = section_type # 'data' or 'prompts'
|
||||
|
@@ -2,31 +2,26 @@ from dash import dcc, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from .upload import UploadComponent
|
||||
from .datasource import DataSourceComponent
|
||||
from .textinput import TextInputComponent
|
||||
|
||||
|
||||
class SidebarComponent:
|
||||
def __init__(self):
|
||||
self.upload_component = UploadComponent()
|
||||
self.datasource_component = DataSourceComponent()
|
||||
self.textinput_component = TextInputComponent()
|
||||
|
||||
def create_layout(self):
|
||||
return dbc.Col(
|
||||
[
|
||||
html.H5("Data Sources", className="mb-3"),
|
||||
self.datasource_component.create_error_alert(),
|
||||
self.datasource_component.create_success_alert(),
|
||||
self.datasource_component.create_tabbed_interface(),
|
||||
html.H5("Visualization Controls", className="mb-3 mt-4"),
|
||||
]
|
||||
+ self._create_method_dropdown()
|
||||
+ self._create_color_dropdown()
|
||||
+ self._create_dimension_toggle()
|
||||
+ self._create_prompts_toggle()
|
||||
+ [
|
||||
html.H5("Point Details", className="mb-3"),
|
||||
html.Div(
|
||||
id="point-details", children="Click on a point to see details"
|
||||
),
|
||||
dbc.Accordion(
|
||||
[
|
||||
self._create_data_sources_item(),
|
||||
self._create_generate_embeddings_item(),
|
||||
self._create_visualization_controls_item(),
|
||||
],
|
||||
always_open=True,
|
||||
)
|
||||
],
|
||||
width=3,
|
||||
style={"padding-right": "20px"},
|
||||
@@ -86,3 +81,63 @@ class SidebarComponent:
|
||||
style={"margin-bottom": "20px"},
|
||||
),
|
||||
]
|
||||
|
||||
def _create_generate_embeddings_item(self):
|
||||
return dbc.AccordionItem(
|
||||
[
|
||||
self.textinput_component.create_text_input_interface(),
|
||||
],
|
||||
title=html.Span(
|
||||
[
|
||||
"Generate Embeddings ",
|
||||
html.I(
|
||||
className="fas fa-info-circle text-muted",
|
||||
style={"cursor": "pointer"},
|
||||
id="generate-embeddings-info-icon",
|
||||
title="Create new embeddings from text input using various in-browser models",
|
||||
),
|
||||
]
|
||||
),
|
||||
item_id="generate-embeddings-accordion",
|
||||
)
|
||||
|
||||
def _create_data_sources_item(self):
|
||||
return dbc.AccordionItem(
|
||||
[
|
||||
self.datasource_component.create_error_alert(),
|
||||
self.datasource_component.create_success_alert(),
|
||||
self.datasource_component.create_tabbed_interface(),
|
||||
],
|
||||
title=html.Span(
|
||||
[
|
||||
"Load Embeddings ",
|
||||
html.I(
|
||||
className="fas fa-info-circle text-muted",
|
||||
style={"cursor": "pointer"},
|
||||
id="load-embeddings-info-icon",
|
||||
title="Load existing embeddings: upload files or read from OpenSearch",
|
||||
),
|
||||
]
|
||||
),
|
||||
item_id="data-sources-accordion",
|
||||
)
|
||||
|
||||
def _create_visualization_controls_item(self):
|
||||
return dbc.AccordionItem(
|
||||
self._create_method_dropdown()
|
||||
+ self._create_color_dropdown()
|
||||
+ self._create_dimension_toggle()
|
||||
+ self._create_prompts_toggle(),
|
||||
title=html.Span(
|
||||
[
|
||||
"Visualization Controls ",
|
||||
html.I(
|
||||
className="fas fa-info-circle text-muted",
|
||||
style={"cursor": "pointer"},
|
||||
id="visualization-controls-info-icon",
|
||||
title="Configure plot settings: select dimensionality reduction method, colors, and display options",
|
||||
),
|
||||
]
|
||||
),
|
||||
item_id="visualization-controls-accordion",
|
||||
)
|
||||
|
@@ -30,9 +30,6 @@ class TextInputComponent:
|
||||
# Generation controls
|
||||
self._create_generation_controls(),
|
||||
html.Hr(),
|
||||
# Progress indicators
|
||||
self._create_progress_indicators(),
|
||||
html.Hr(),
|
||||
# Status and results
|
||||
self._create_status_section(),
|
||||
# Hidden components for data flow
|
||||
@@ -297,65 +294,10 @@ class TextInputComponent:
|
||||
]
|
||||
)
|
||||
|
||||
def _create_progress_indicators(self):
|
||||
"""Create progress bars for model loading and embedding generation."""
|
||||
return html.Div(
|
||||
[
|
||||
# Model loading progress
|
||||
html.Div(
|
||||
[
|
||||
html.H6("Model Loading Progress", className="mb-2"),
|
||||
dbc.Progress(
|
||||
id="model-loading-progress",
|
||||
value=0,
|
||||
striped=True,
|
||||
animated=True,
|
||||
className="mb-2",
|
||||
),
|
||||
html.Small(
|
||||
id="model-loading-status",
|
||||
children="No model loading in progress",
|
||||
className="text-muted",
|
||||
),
|
||||
],
|
||||
id="model-loading-section",
|
||||
style={"display": "none"},
|
||||
),
|
||||
html.Br(),
|
||||
# Embedding generation progress
|
||||
html.Div(
|
||||
[
|
||||
html.H6("Embedding Generation Progress", className="mb-2"),
|
||||
dbc.Progress(
|
||||
id="embedding-progress",
|
||||
value=0,
|
||||
striped=True,
|
||||
animated=True,
|
||||
className="mb-2",
|
||||
),
|
||||
html.Small(
|
||||
id="embedding-status",
|
||||
children="No embedding generation in progress",
|
||||
className="text-muted",
|
||||
),
|
||||
],
|
||||
id="embedding-progress-section",
|
||||
style={"display": "none"},
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
def _create_status_section(self):
|
||||
"""Create status alerts and results preview."""
|
||||
return html.Div(
|
||||
[
|
||||
# Immediate status (from client-side)
|
||||
dbc.Alert(
|
||||
id="text-input-status-immediate",
|
||||
children="Ready to generate embeddings",
|
||||
color="light",
|
||||
className="mb-3",
|
||||
),
|
||||
# Server-side status
|
||||
dbc.Alert(
|
||||
id="text-input-status",
|
||||
|
@@ -5,39 +5,75 @@ import dash_bootstrap_components as dbc
|
||||
class UploadComponent:
|
||||
@staticmethod
|
||||
def create_data_upload():
|
||||
return dcc.Upload(
|
||||
id="upload-data",
|
||||
children=html.Div(["Drag and Drop or ", html.A("Select Files")]),
|
||||
style={
|
||||
"width": "100%",
|
||||
"height": "60px",
|
||||
"lineHeight": "60px",
|
||||
"borderWidth": "1px",
|
||||
"borderStyle": "dashed",
|
||||
"borderRadius": "5px",
|
||||
"textAlign": "center",
|
||||
"margin-bottom": "20px",
|
||||
},
|
||||
multiple=False,
|
||||
return html.Div(
|
||||
[
|
||||
dcc.Upload(
|
||||
id="upload-data",
|
||||
children=html.Div(
|
||||
[
|
||||
"Upload Data ",
|
||||
html.I(
|
||||
className="fas fa-info-circle",
|
||||
style={"color": "#6c757d", "fontSize": "14px"},
|
||||
id="data-upload-info",
|
||||
),
|
||||
]
|
||||
),
|
||||
style={
|
||||
"width": "100%",
|
||||
"height": "60px",
|
||||
"lineHeight": "60px",
|
||||
"borderWidth": "1px",
|
||||
"borderStyle": "dashed",
|
||||
"borderRadius": "5px",
|
||||
"textAlign": "center",
|
||||
"margin-bottom": "20px",
|
||||
},
|
||||
multiple=False,
|
||||
),
|
||||
dbc.Tooltip(
|
||||
"Click here or drag and drop NDJSON files containing document embeddings",
|
||||
target="data-upload-info",
|
||||
placement="top",
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def create_prompts_upload():
|
||||
return dcc.Upload(
|
||||
id="upload-prompts",
|
||||
children=html.Div(["Drag and Drop Prompts or ", html.A("Select Files")]),
|
||||
style={
|
||||
"width": "100%",
|
||||
"height": "60px",
|
||||
"lineHeight": "60px",
|
||||
"borderWidth": "1px",
|
||||
"borderStyle": "dashed",
|
||||
"borderRadius": "5px",
|
||||
"textAlign": "center",
|
||||
"margin-bottom": "20px",
|
||||
"borderColor": "#28a745",
|
||||
},
|
||||
multiple=False,
|
||||
return html.Div(
|
||||
[
|
||||
dcc.Upload(
|
||||
id="upload-prompts",
|
||||
children=html.Div(
|
||||
[
|
||||
"Upload Prompts ",
|
||||
html.I(
|
||||
className="fas fa-info-circle",
|
||||
style={"color": "#6c757d", "fontSize": "14px"},
|
||||
id="prompts-upload-info",
|
||||
),
|
||||
]
|
||||
),
|
||||
style={
|
||||
"width": "100%",
|
||||
"height": "60px",
|
||||
"lineHeight": "60px",
|
||||
"borderWidth": "1px",
|
||||
"borderStyle": "dashed",
|
||||
"borderRadius": "5px",
|
||||
"textAlign": "center",
|
||||
"margin-bottom": "20px",
|
||||
"borderColor": "#28a745",
|
||||
},
|
||||
multiple=False,
|
||||
),
|
||||
dbc.Tooltip(
|
||||
"Click here or drag and drop NDJSON files containing prompt embeddings",
|
||||
target="prompts-upload-info",
|
||||
placement="top",
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
@@ -38,9 +38,9 @@ class PlotFactory:
|
||||
if dimensions == "3d":
|
||||
fig = px.scatter_3d(
|
||||
df,
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
z="dim_3",
|
||||
x="x",
|
||||
y="y",
|
||||
z="z",
|
||||
color=color_values,
|
||||
hover_data=hover_fields,
|
||||
title=f"3D Embedding Visualization - {method} (colored by {color_by})",
|
||||
@@ -49,8 +49,8 @@ class PlotFactory:
|
||||
else:
|
||||
fig = px.scatter(
|
||||
df,
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
x="x",
|
||||
y="y",
|
||||
color=color_values,
|
||||
hover_data=hover_fields,
|
||||
title=f"2D Embedding Visualization - {method} (colored by {color_by})",
|
||||
@@ -77,17 +77,17 @@ class PlotFactory:
|
||||
if dimensions == "3d":
|
||||
doc_fig = px.scatter_3d(
|
||||
doc_df,
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
z="dim_3",
|
||||
x="x",
|
||||
y="y",
|
||||
z="z",
|
||||
color=doc_color_values,
|
||||
hover_data=hover_fields,
|
||||
)
|
||||
else:
|
||||
doc_fig = px.scatter(
|
||||
doc_df,
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
x="x",
|
||||
y="y",
|
||||
color=doc_color_values,
|
||||
hover_data=hover_fields,
|
||||
)
|
||||
@@ -114,17 +114,17 @@ class PlotFactory:
|
||||
if dimensions == "3d":
|
||||
prompt_fig = px.scatter_3d(
|
||||
prompt_df,
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
z="dim_3",
|
||||
x="x",
|
||||
y="y",
|
||||
z="z",
|
||||
color=prompt_color_values,
|
||||
hover_data=hover_fields,
|
||||
)
|
||||
else:
|
||||
prompt_fig = px.scatter(
|
||||
prompt_df,
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
x="x",
|
||||
y="y",
|
||||
color=prompt_color_values,
|
||||
hover_data=hover_fields,
|
||||
)
|
||||
@@ -168,11 +168,11 @@ class PlotFactory:
|
||||
"category": doc.category,
|
||||
"subcategory": doc.subcategory,
|
||||
"tags_str": ", ".join(doc.tags) if doc.tags else "None",
|
||||
"dim_1": coordinates[i, 0],
|
||||
"dim_2": coordinates[i, 1],
|
||||
"x": coordinates[i, 0],
|
||||
"y": coordinates[i, 1],
|
||||
}
|
||||
if dimensions == "3d":
|
||||
row["dim_3"] = coordinates[i, 2]
|
||||
row["z"] = coordinates[i, 2]
|
||||
df_data.append(row)
|
||||
|
||||
return pd.DataFrame(df_data)
|
||||
|
25
uv.lock
generated
25
uv.lock
generated
@@ -412,13 +412,12 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "embeddingbuddy"
|
||||
version = "0.3.0"
|
||||
version = "0.5.0"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "dash" },
|
||||
{ name = "dash-bootstrap-components" },
|
||||
{ name = "mypy" },
|
||||
{ name = "numba" },
|
||||
{ name = "numpy" },
|
||||
{ name = "opensearch-py" },
|
||||
{ name = "opentsne" },
|
||||
@@ -431,6 +430,7 @@ dependencies = [
|
||||
[package.optional-dependencies]
|
||||
all = [
|
||||
{ name = "bandit" },
|
||||
{ name = "gunicorn" },
|
||||
{ name = "mypy" },
|
||||
{ name = "pip-audit" },
|
||||
{ name = "pytest" },
|
||||
@@ -451,6 +451,9 @@ lint = [
|
||||
{ name = "mypy" },
|
||||
{ name = "ruff" },
|
||||
]
|
||||
prod = [
|
||||
{ name = "gunicorn" },
|
||||
]
|
||||
security = [
|
||||
{ name = "bandit" },
|
||||
{ name = "pip-audit" },
|
||||
@@ -466,11 +469,11 @@ requires-dist = [
|
||||
{ name = "bandit", extras = ["toml"], marker = "extra == 'security'", specifier = ">=1.7.5" },
|
||||
{ name = "dash", specifier = ">=2.17.1" },
|
||||
{ name = "dash-bootstrap-components", specifier = ">=1.5.0" },
|
||||
{ name = "embeddingbuddy", extras = ["test", "lint", "security"], marker = "extra == 'all'" },
|
||||
{ name = "embeddingbuddy", extras = ["test", "lint", "security"], marker = "extra == 'dev'" },
|
||||
{ name = "embeddingbuddy", extras = ["test", "lint", "security", "prod"], marker = "extra == 'all'" },
|
||||
{ name = "gunicorn", marker = "extra == 'prod'", specifier = ">=21.2.0" },
|
||||
{ name = "mypy", specifier = ">=1.17.1" },
|
||||
{ name = "mypy", marker = "extra == 'lint'", specifier = ">=1.5.0" },
|
||||
{ name = "numba", specifier = ">=0.56.4" },
|
||||
{ name = "numpy", specifier = ">=1.24.4" },
|
||||
{ name = "opensearch-py", specifier = ">=3.0.0" },
|
||||
{ name = "opentsne", specifier = ">=1.0.0" },
|
||||
@@ -484,7 +487,7 @@ requires-dist = [
|
||||
{ name = "scikit-learn", specifier = ">=1.3.2" },
|
||||
{ name = "umap-learn", specifier = ">=0.5.8" },
|
||||
]
|
||||
provides-extras = ["test", "lint", "security", "dev", "all"]
|
||||
provides-extras = ["test", "lint", "security", "prod", "dev", "all"]
|
||||
|
||||
[[package]]
|
||||
name = "events"
|
||||
@@ -520,6 +523,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/68/9d4508e893976286d2ead7f8f571314af6c2037af34853a30fd769c02e9d/flask-3.1.1-py3-none-any.whl", hash = "sha256:07aae2bb5eaf77993ef57e357491839f5fd9f4dc281593a81a9e4d79a24f295c", size = 103305, upload-time = "2025-05-13T15:01:15.591Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gunicorn"
|
||||
version = "23.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "packaging" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/34/72/9614c465dc206155d93eff0ca20d42e1e35afc533971379482de953521a4/gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec", size = 375031, upload-time = "2024-08-10T20:25:27.378Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/cb/7d/6dac2a6e1eba33ee43f318edbed4ff29151a49b5d37f080aad1e6469bca4/gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d", size = 85029, upload-time = "2024-08-10T20:25:24.996Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h11"
|
||||
version = "0.16.0"
|
||||
|
20
wsgi.py
Normal file
20
wsgi.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""
|
||||
WSGI entry point for production deployment.
|
||||
Use this with a production WSGI server like Gunicorn.
|
||||
"""
|
||||
from src.embeddingbuddy.app import create_app
|
||||
|
||||
# Create the application instance
|
||||
application = create_app()
|
||||
|
||||
# For compatibility with different WSGI servers
|
||||
app = application
|
||||
|
||||
if __name__ == "__main__":
|
||||
# This won't be used in production, but useful for testing
|
||||
from src.embeddingbuddy.config.settings import AppSettings
|
||||
application.run(
|
||||
host=AppSettings.HOST,
|
||||
port=AppSettings.PORT,
|
||||
debug=False
|
||||
)
|
Reference in New Issue
Block a user