Compare commits
1 Commits
v0.6.2
...
d35ef995a3
Author | SHA1 | Date | |
---|---|---|---|
d35ef995a3 |
@@ -4,9 +4,7 @@
|
||||
"Bash(mkdir:*)",
|
||||
"Bash(uv run:*)",
|
||||
"Bash(uv add:*)",
|
||||
"Bash(uv sync:*)",
|
||||
"Bash(tree:*)",
|
||||
"WebFetch(domain:www.dash-bootstrap-components.com)"
|
||||
"Bash(uv sync:*)"
|
||||
],
|
||||
"deny": [],
|
||||
"ask": [],
|
||||
|
@@ -71,15 +71,22 @@ jobs:
|
||||
echo '```' >> release-notes.md
|
||||
|
||||
- name: Create Release
|
||||
uses: akkuman/gitea-release-action@v1
|
||||
uses: actions/create-release@v1
|
||||
env:
|
||||
NODE_OPTIONS: '--experimental-fetch'
|
||||
GITHUB_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||
with:
|
||||
token: ${{ secrets.GITEA_TOKEN }}
|
||||
tag_name: ${{ github.ref_name || github.event.inputs.version }}
|
||||
release_name: Release ${{ github.ref_name || github.event.inputs.version }}
|
||||
body_path: release-notes.md
|
||||
draft: false
|
||||
prerelease: false
|
||||
files: |-
|
||||
dist/*
|
||||
|
||||
- name: Upload Release Assets
|
||||
uses: actions/upload-release-asset@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||
with:
|
||||
upload_url: ${{ steps.create_release.outputs.upload_url }}
|
||||
asset_path: dist/
|
||||
asset_name: embeddingbuddy-dist
|
||||
asset_content_type: application/zip
|
54
.github/workflows/docker-release.yml
vendored
54
.github/workflows/docker-release.yml
vendored
@@ -1,54 +0,0 @@
|
||||
name: Docker Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v[0-9]+.[0-9]+.[0-9]+'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=ref,event=tag
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=semver,pattern={{major}}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
@@ -22,13 +22,11 @@ uv sync
|
||||
**Run the application:**
|
||||
|
||||
Development mode (with auto-reload):
|
||||
|
||||
```bash
|
||||
uv run run_dev.py
|
||||
```
|
||||
|
||||
Production mode (with Gunicorn WSGI server):
|
||||
|
||||
```bash
|
||||
# First install production dependencies
|
||||
uv sync --extra prod
|
||||
@@ -38,12 +36,11 @@ uv run run_prod.py
|
||||
```
|
||||
|
||||
Legacy mode (basic Dash server):
|
||||
|
||||
```bash
|
||||
uv run main.py
|
||||
```
|
||||
|
||||
The app will be available at <http://127.0.0.1:8050>
|
||||
The app will be available at http://127.0.0.1:8050
|
||||
|
||||
**Run tests:**
|
||||
|
||||
|
35
Dockerfile
35
Dockerfile
@@ -2,9 +2,6 @@
|
||||
# Stage 1: Builder
|
||||
FROM python:3.11-slim as builder
|
||||
|
||||
# Create non-root user early in builder stage
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
|
||||
# Install system dependencies for building Python packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
@@ -28,15 +25,6 @@ COPY wsgi.py .
|
||||
COPY run_prod.py .
|
||||
COPY assets/ assets/
|
||||
|
||||
# Change ownership of source files before building (lighter I/O)
|
||||
RUN chown -R appuser:appuser /app
|
||||
|
||||
# Create and set permissions for appuser home directory (needed for uv cache)
|
||||
RUN mkdir -p /home/appuser && chown -R appuser:appuser /home/appuser
|
||||
|
||||
# Switch to non-root user before building
|
||||
USER appuser
|
||||
|
||||
# Create virtual environment and install dependencies (including production extras)
|
||||
RUN uv venv .venv
|
||||
RUN uv sync --frozen --extra prod
|
||||
@@ -44,28 +32,23 @@ RUN uv sync --frozen --extra prod
|
||||
# Stage 2: Runtime
|
||||
FROM python:3.11-slim as runtime
|
||||
|
||||
# Create non-root user in runtime stage
|
||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||
|
||||
# Install runtime dependencies for compiled packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set working directory and change ownership (small directory)
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
RUN chown appuser:appuser /app
|
||||
|
||||
# Copy files from builder with correct ownership
|
||||
COPY --from=builder --chown=appuser:appuser /app/.venv /app/.venv
|
||||
COPY --from=builder --chown=appuser:appuser /app/src /app/src
|
||||
COPY --from=builder --chown=appuser:appuser /app/main.py /app/main.py
|
||||
COPY --from=builder --chown=appuser:appuser /app/assets /app/assets
|
||||
COPY --from=builder --chown=appuser:appuser /app/wsgi.py /app/wsgi.py
|
||||
COPY --from=builder --chown=appuser:appuser /app/run_prod.py /app/run_prod.py
|
||||
# Copy virtual environment from builder stage
|
||||
COPY --from=builder /app/.venv /app/.venv
|
||||
|
||||
# Switch to non-root user
|
||||
USER appuser
|
||||
# Copy application files from builder stage
|
||||
COPY --from=builder /app/src /app/src
|
||||
COPY --from=builder /app/main.py /app/main.py
|
||||
COPY --from=builder /app/assets /app/assets
|
||||
COPY --from=builder /app/wsgi.py /app/wsgi.py
|
||||
COPY --from=builder /app/run_prod.py /app/run_prod.py
|
||||
|
||||
# Make sure the virtual environment is in PATH
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
|
21
LICENSE
21
LICENSE
@@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Austin Godber - EmbeddingBuddy
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
48
README.md
48
README.md
@@ -152,38 +152,22 @@ The application follows a modular architecture for improved maintainability and
|
||||
|
||||
```text
|
||||
src/embeddingbuddy/
|
||||
├── app.py # Main application entry point and factory
|
||||
├── config/ # Configuration management
|
||||
│ └── settings.py # Centralized app settings
|
||||
├── data/ # Data parsing and processing
|
||||
│ ├── parser.py # NDJSON parsing logic
|
||||
│ ├── processor.py # Data transformation utilities
|
||||
│ └── sources/ # Data source integrations
|
||||
│ └── opensearch.py # OpenSearch data source
|
||||
├── models/ # Data schemas and algorithms
|
||||
│ ├── schemas.py # Pydantic data models
|
||||
│ ├── reducers.py # Dimensionality reduction algorithms
|
||||
│ └── field_mapper.py # Field mapping utilities
|
||||
├── visualization/ # Plot creation and styling
|
||||
│ ├── plots.py # Plot factory and creation logic
|
||||
│ └── colors.py # Color mapping utilities
|
||||
├── ui/ # User interface components
|
||||
│ ├── layout.py # Main application layout
|
||||
│ ├── components/ # Reusable UI components
|
||||
│ │ ├── sidebar.py # Sidebar component
|
||||
│ │ ├── upload.py # Upload components
|
||||
│ │ ├── textinput.py # Text input components
|
||||
│ │ └── datasource.py # Data source components
|
||||
│ └── callbacks/ # Organized callback functions
|
||||
│ ├── data_processing.py # Data upload/processing callbacks
|
||||
│ ├── visualization.py # Plot update callbacks
|
||||
│ └── interactions.py # User interaction callbacks
|
||||
└── utils/ # Utility functions
|
||||
|
||||
main.py # Application runner (at project root)
|
||||
main.py # Application runner (at project root)
|
||||
run_dev.py # Development server runner
|
||||
run_prod.py # Production server runner
|
||||
├── config/ # Configuration management
|
||||
│ └── settings.py # Centralized app settings
|
||||
├── data/ # Data parsing and processing
|
||||
│ ├── parser.py # NDJSON parsing logic
|
||||
│ └── processor.py # Data transformation utilities
|
||||
├── models/ # Data schemas and algorithms
|
||||
│ ├── schemas.py # Pydantic data models
|
||||
│ └── reducers.py # Dimensionality reduction algorithms
|
||||
├── visualization/ # Plot creation and styling
|
||||
│ ├── plots.py # Plot factory and creation logic
|
||||
│ └── colors.py # Color mapping utilities
|
||||
├── ui/ # User interface components
|
||||
│ ├── layout.py # Main application layout
|
||||
│ ├── components/ # Reusable UI components
|
||||
│ └── callbacks/ # Organized callback functions
|
||||
└── utils/ # Utility functions
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
@@ -1,17 +0,0 @@
|
||||
/* CSS override for transparent hover boxes in Plotly plots */
|
||||
|
||||
/* Make hover boxes transparent while preserving text readability */
|
||||
.hovertext {
|
||||
fill-opacity: 0.8 !important;
|
||||
stroke-opacity: 1 !important;
|
||||
}
|
||||
|
||||
/* Alternative selector for different Plotly versions */
|
||||
g.hovertext > path {
|
||||
opacity: 0.8 !important;
|
||||
}
|
||||
|
||||
/* Ensure text remains fully visible */
|
||||
.hovertext text {
|
||||
opacity: 1 !important;
|
||||
}
|
@@ -45,12 +45,28 @@ class TransformersEmbedder {
|
||||
console.log('✅ Using globally loaded Transformers.js pipeline');
|
||||
}
|
||||
|
||||
this.extractor = await window.transformers.pipeline('feature-extraction', modelName);
|
||||
|
||||
// Show loading progress to user
|
||||
if (window.updateModelLoadingProgress) {
|
||||
window.updateModelLoadingProgress(0, `Loading ${modelName}...`);
|
||||
}
|
||||
|
||||
this.extractor = await window.transformers.pipeline('feature-extraction', modelName, {
|
||||
progress_callback: (data) => {
|
||||
if (window.updateModelLoadingProgress && data.progress !== undefined) {
|
||||
const progress = Math.round(data.progress);
|
||||
window.updateModelLoadingProgress(progress, data.status || 'Loading...');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
this.modelCache.set(modelName, this.extractor);
|
||||
this.currentModel = modelName;
|
||||
this.isLoading = false;
|
||||
|
||||
|
||||
if (window.updateModelLoadingProgress) {
|
||||
window.updateModelLoadingProgress(100, 'Model loaded successfully');
|
||||
}
|
||||
|
||||
return { success: true, model: modelName };
|
||||
} catch (error) {
|
||||
this.isLoading = false;
|
||||
@@ -100,8 +116,17 @@ class TransformersEmbedder {
|
||||
}
|
||||
});
|
||||
|
||||
// Update progress
|
||||
const progress = Math.min(100, ((i + batch.length) / texts.length) * 100);
|
||||
if (window.updateEmbeddingProgress) {
|
||||
window.updateEmbeddingProgress(progress, `Processing ${i + batch.length}/${texts.length} texts`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (window.updateEmbeddingProgress) {
|
||||
window.updateEmbeddingProgress(100, `Generated ${embeddings.length} embeddings successfully`);
|
||||
}
|
||||
|
||||
return embeddings;
|
||||
} catch (error) {
|
||||
console.error('Embedding generation error:', error);
|
||||
@@ -114,6 +139,30 @@ class TransformersEmbedder {
|
||||
window.transformersEmbedder = new TransformersEmbedder();
|
||||
console.log('📦 TransformersEmbedder instance created');
|
||||
|
||||
// Global progress update functions
|
||||
window.updateModelLoadingProgress = function(progress, status) {
|
||||
const progressBar = document.getElementById('model-loading-progress');
|
||||
const statusText = document.getElementById('model-loading-status');
|
||||
if (progressBar) {
|
||||
progressBar.style.width = progress + '%';
|
||||
progressBar.setAttribute('aria-valuenow', progress);
|
||||
}
|
||||
if (statusText) {
|
||||
statusText.textContent = status;
|
||||
}
|
||||
};
|
||||
|
||||
window.updateEmbeddingProgress = function(progress, status) {
|
||||
const progressBar = document.getElementById('embedding-progress');
|
||||
const statusText = document.getElementById('embedding-status');
|
||||
if (progressBar) {
|
||||
progressBar.style.width = progress + '%';
|
||||
progressBar.setAttribute('aria-valuenow', progress);
|
||||
}
|
||||
if (statusText) {
|
||||
statusText.textContent = status;
|
||||
}
|
||||
};
|
||||
|
||||
// Dash clientside callback functions
|
||||
window.dash_clientside = window.dash_clientside || {};
|
||||
@@ -121,28 +170,31 @@ console.log('🔧 Setting up window.dash_clientside.transformers');
|
||||
window.dash_clientside.transformers = {
|
||||
generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) {
|
||||
console.log('🚀 generateEmbeddings called with:', { nClicks, modelName, tokenizationMethod, textLength: textContent?.length });
|
||||
|
||||
|
||||
if (!nClicks || !textContent || textContent.trim().length === 0) {
|
||||
console.log('⚠️ Early return - missing required parameters');
|
||||
return window.dash_clientside.no_update;
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
// Initialize model if needed
|
||||
const initResult = await window.transformersEmbedder.initializeModel(modelName);
|
||||
if (!initResult.success) {
|
||||
return [
|
||||
{ error: `Model loading error: ${initResult.error}` },
|
||||
{ error: initResult.error },
|
||||
`❌ Model loading error: ${initResult.error}`,
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
// Tokenize text based on method
|
||||
let textChunks;
|
||||
const trimmedText = textContent.trim();
|
||||
|
||||
|
||||
switch (tokenizationMethod) {
|
||||
case 'sentence':
|
||||
// Simple sentence splitting - can be enhanced with proper NLP
|
||||
textChunks = trimmedText
|
||||
.split(/[.!?]+/)
|
||||
.map(s => s.trim())
|
||||
@@ -163,24 +215,28 @@ window.dash_clientside.transformers = {
|
||||
default:
|
||||
textChunks = [trimmedText];
|
||||
}
|
||||
|
||||
|
||||
if (textChunks.length === 0) {
|
||||
return [
|
||||
{ error: 'No valid text chunks found after tokenization' },
|
||||
'❌ Error: No valid text chunks found after tokenization',
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
// Generate embeddings
|
||||
const embeddings = await window.transformersEmbedder.generateEmbeddings(textChunks);
|
||||
|
||||
|
||||
if (!embeddings || embeddings.length !== textChunks.length) {
|
||||
return [
|
||||
{ error: 'Embedding generation failed' },
|
||||
{ error: 'Embedding generation failed - mismatch in text chunks and embeddings' },
|
||||
'❌ Error: Embedding generation failed',
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
// Create documents structure
|
||||
const documents = textChunks.map((text, i) => ({
|
||||
id: `text_input_${Date.now()}_${i}`,
|
||||
@@ -190,36 +246,33 @@ window.dash_clientside.transformers = {
|
||||
subcategory: subcategory || "Generated",
|
||||
tags: []
|
||||
}));
|
||||
|
||||
// Return the successful embeddings data
|
||||
const embeddingsData = {
|
||||
documents: documents,
|
||||
embeddings: embeddings
|
||||
};
|
||||
|
||||
console.log('✅ Embeddings generated successfully:', embeddingsData);
|
||||
|
||||
|
||||
return [
|
||||
embeddingsData,
|
||||
{
|
||||
documents: documents,
|
||||
embeddings: embeddings
|
||||
},
|
||||
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
|
||||
"success",
|
||||
false
|
||||
];
|
||||
|
||||
|
||||
} catch (error) {
|
||||
console.error('Client-side embedding error:', error);
|
||||
return [
|
||||
{ error: error.message },
|
||||
`❌ Error: ${error.message}`,
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
console.log('✅ Transformers.js client-side setup complete');
|
||||
console.log('Available:', {
|
||||
transformersEmbedder: !!window.transformersEmbedder,
|
||||
dashClientside: !!window.dash_clientside,
|
||||
transformersModule: !!window.dash_clientside?.transformers,
|
||||
generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings,
|
||||
processAsync: typeof window.processEmbeddingsAsync
|
||||
generateFunction: typeof window.dash_clientside?.transformers?.generateEmbeddings
|
||||
});
|
@@ -104,28 +104,17 @@ window.dash_clientside = window.dash_clientside || {};
|
||||
window.dash_clientside.transformers = {
|
||||
generateEmbeddings: async function(nClicks, textContent, modelName, tokenizationMethod, category, subcategory) {
|
||||
console.log('🚀 Client-side generateEmbeddings called');
|
||||
|
||||
|
||||
if (!nClicks || !textContent || textContent.trim().length === 0) {
|
||||
console.log('⚠️ Missing required parameters');
|
||||
return window.dash_clientside.no_update;
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
// Ensure Transformers.js is loaded
|
||||
if (!window.transformersLibraryLoaded) {
|
||||
const loaded = await initializeTransformers();
|
||||
if (!loaded) {
|
||||
return [
|
||||
{ error: 'Failed to load Transformers.js' },
|
||||
false
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
// Tokenize text
|
||||
let textChunks;
|
||||
const trimmedText = textContent.trim();
|
||||
|
||||
|
||||
switch (tokenizationMethod) {
|
||||
case 'sentence':
|
||||
textChunks = trimmedText.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 0);
|
||||
@@ -139,50 +128,45 @@ window.dash_clientside.transformers = {
|
||||
default:
|
||||
textChunks = [trimmedText];
|
||||
}
|
||||
|
||||
|
||||
if (textChunks.length === 0) {
|
||||
return [
|
||||
{ error: 'No valid text chunks after tokenization' },
|
||||
false
|
||||
];
|
||||
throw new Error('No valid text chunks after tokenization');
|
||||
}
|
||||
|
||||
|
||||
// Generate embeddings
|
||||
const embeddings = await window.simpleEmbedder.generateEmbeddings(textChunks, modelName);
|
||||
|
||||
|
||||
// Create documents
|
||||
const documents = textChunks.map((text, i) => ({
|
||||
id: `text_input_${Date.now()}_${i}`,
|
||||
text: text,
|
||||
embedding: embeddings[i],
|
||||
category: category || "Text Input",
|
||||
subcategory: subcategory || "Generated",
|
||||
subcategory: subcategory || "Generated",
|
||||
tags: []
|
||||
}));
|
||||
|
||||
// Return the successful embeddings data
|
||||
const embeddingsData = {
|
||||
documents: documents,
|
||||
embeddings: embeddings
|
||||
};
|
||||
|
||||
console.log('✅ Embeddings generated successfully:', embeddingsData);
|
||||
|
||||
|
||||
return [
|
||||
embeddingsData,
|
||||
{
|
||||
documents: documents,
|
||||
embeddings: embeddings
|
||||
},
|
||||
`✅ Generated embeddings for ${documents.length} text chunks using ${modelName}`,
|
||||
"success",
|
||||
false
|
||||
];
|
||||
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Error generating embeddings:', error);
|
||||
return [
|
||||
{ error: error.message },
|
||||
`❌ Error: ${error.message}`,
|
||||
"danger",
|
||||
false
|
||||
];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
console.log('✅ Simple Transformers.js setup complete');
|
||||
console.log('Available functions:', Object.keys(window.dash_clientside.transformers));
|
Binary file not shown.
Before Width: | Height: | Size: 844 KiB After Width: | Height: | Size: 339 KiB |
File diff suppressed because one or more lines are too long
1
prompts-raw.ndjson
Normal file
1
prompts-raw.ndjson
Normal file
File diff suppressed because one or more lines are too long
64
prompts.ndjson
Normal file
64
prompts.ndjson
Normal file
File diff suppressed because one or more lines are too long
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "embeddingbuddy"
|
||||
version = "0.6.2"
|
||||
version = "0.4.0"
|
||||
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
@@ -11,16 +11,10 @@ def main():
|
||||
# Force development settings
|
||||
os.environ["EMBEDDINGBUDDY_ENV"] = "development"
|
||||
os.environ["EMBEDDINGBUDDY_DEBUG"] = "true"
|
||||
|
||||
# Check for OpenSearch disable flag (optional for testing)
|
||||
# Set EMBEDDINGBUDDY_OPENSEARCH_ENABLED=false to test without OpenSearch
|
||||
opensearch_status = os.getenv("EMBEDDINGBUDDY_OPENSEARCH_ENABLED", "true")
|
||||
opensearch_enabled = opensearch_status.lower() == "true"
|
||||
|
||||
print("🚀 Starting EmbeddingBuddy in development mode...")
|
||||
print("📁 Auto-reload enabled - changes will trigger restart")
|
||||
print("🌐 Server will be available at http://127.0.0.1:8050")
|
||||
print(f"🔍 OpenSearch: {'Enabled' if opensearch_enabled else 'Disabled'}")
|
||||
print("⏹️ Press Ctrl+C to stop")
|
||||
|
||||
app = create_app()
|
||||
|
@@ -13,9 +13,6 @@ def main():
|
||||
# Force production settings
|
||||
os.environ["EMBEDDINGBUDDY_ENV"] = "production"
|
||||
os.environ["EMBEDDINGBUDDY_DEBUG"] = "false"
|
||||
# Disable OpenSearch by default in production (can be overridden by setting env var)
|
||||
if "EMBEDDINGBUDDY_OPENSEARCH_ENABLED" not in os.environ:
|
||||
os.environ["EMBEDDINGBUDDY_OPENSEARCH_ENABLED"] = "false"
|
||||
|
||||
print("🚀 Starting EmbeddingBuddy in production mode...")
|
||||
print(f"⚙️ Workers: {AppSettings.GUNICORN_WORKERS}")
|
||||
@@ -28,7 +25,7 @@ def main():
|
||||
"--workers", str(AppSettings.GUNICORN_WORKERS),
|
||||
"--bind", AppSettings.GUNICORN_BIND,
|
||||
"--timeout", str(AppSettings.GUNICORN_TIMEOUT),
|
||||
"--keep-alive", str(AppSettings.GUNICORN_KEEPALIVE),
|
||||
"--keepalive", str(AppSettings.GUNICORN_KEEPALIVE),
|
||||
"--access-logfile", "-",
|
||||
"--error-logfile", "-",
|
||||
"--log-level", "info",
|
||||
|
@@ -15,34 +15,7 @@ def create_app():
|
||||
assets_path = os.path.join(project_root, "assets")
|
||||
|
||||
app = dash.Dash(
|
||||
__name__,
|
||||
title="EmbeddingBuddy",
|
||||
external_stylesheets=[
|
||||
dbc.themes.BOOTSTRAP,
|
||||
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css",
|
||||
],
|
||||
assets_folder=assets_path,
|
||||
meta_tags=[
|
||||
{
|
||||
"name": "description",
|
||||
"content": "Interactive embedding visualization tool for exploring high-dimensional vectors through dimensionality reduction techniques like PCA, t-SNE, and UMAP.",
|
||||
},
|
||||
{"name": "author", "content": "EmbeddingBuddy"},
|
||||
{
|
||||
"name": "keywords",
|
||||
"content": "embeddings, visualization, dimensionality reduction, PCA, t-SNE, UMAP, machine learning, data science",
|
||||
},
|
||||
{"name": "viewport", "content": "width=device-width, initial-scale=1.0"},
|
||||
{
|
||||
"property": "og:title",
|
||||
"content": "EmbeddingBuddy - Interactive Embedding Visualization",
|
||||
},
|
||||
{
|
||||
"property": "og:description",
|
||||
"content": "Explore and visualize embedding vectors through interactive 2D/3D plots with multiple dimensionality reduction techniques.",
|
||||
},
|
||||
{"property": "og:type", "content": "website"},
|
||||
],
|
||||
__name__, external_stylesheets=[dbc.themes.BOOTSTRAP], assets_folder=assets_path
|
||||
)
|
||||
|
||||
# Allow callbacks to components that are dynamically created in tabs
|
||||
@@ -72,22 +45,22 @@ def _register_client_side_callbacks(app):
|
||||
if (!nClicks || !textContent || !textContent.trim()) {
|
||||
return window.dash_clientside.no_update;
|
||||
}
|
||||
|
||||
|
||||
console.log('🔍 Checking for Transformers.js...');
|
||||
console.log('window.dash_clientside:', typeof window.dash_clientside);
|
||||
console.log('window.dash_clientside.transformers:', typeof window.dash_clientside?.transformers);
|
||||
console.log('generateEmbeddings function:', typeof window.dash_clientside?.transformers?.generateEmbeddings);
|
||||
|
||||
if (typeof window.dash_clientside !== 'undefined' &&
|
||||
|
||||
if (typeof window.dash_clientside !== 'undefined' &&
|
||||
typeof window.dash_clientside.transformers !== 'undefined' &&
|
||||
typeof window.dash_clientside.transformers.generateEmbeddings === 'function') {
|
||||
|
||||
|
||||
console.log('✅ Calling Transformers.js generateEmbeddings...');
|
||||
return window.dash_clientside.transformers.generateEmbeddings(
|
||||
nClicks, textContent, modelName, tokenizationMethod, category, subcategory
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// More detailed error information
|
||||
let errorMsg = '❌ Transformers.js not available. ';
|
||||
if (typeof window.dash_clientside === 'undefined') {
|
||||
@@ -97,17 +70,21 @@ def _register_client_side_callbacks(app):
|
||||
} else if (typeof window.dash_clientside.transformers.generateEmbeddings !== 'function') {
|
||||
errorMsg += 'generateEmbeddings function not found.';
|
||||
}
|
||||
|
||||
|
||||
console.error(errorMsg);
|
||||
|
||||
|
||||
return [
|
||||
{ error: 'Transformers.js not loaded. Please refresh the page and try again.' },
|
||||
errorMsg + ' Please refresh the page.',
|
||||
'danger',
|
||||
false
|
||||
];
|
||||
}
|
||||
""",
|
||||
[
|
||||
Output("embeddings-generated-trigger", "data"),
|
||||
Output("text-input-status-immediate", "children"),
|
||||
Output("text-input-status-immediate", "color"),
|
||||
Output("generate-embeddings-btn", "disabled", allow_duplicate=True),
|
||||
],
|
||||
[Input("generate-embeddings-btn", "n_clicks")],
|
||||
|
@@ -72,12 +72,10 @@ class AppSettings:
|
||||
DEBUG = os.getenv("EMBEDDINGBUDDY_DEBUG", "True").lower() == "true"
|
||||
HOST = os.getenv("EMBEDDINGBUDDY_HOST", "127.0.0.1")
|
||||
PORT = int(os.getenv("EMBEDDINGBUDDY_PORT", "8050"))
|
||||
|
||||
|
||||
# Environment Configuration
|
||||
ENVIRONMENT = os.getenv(
|
||||
"EMBEDDINGBUDDY_ENV", "development"
|
||||
) # development, production
|
||||
|
||||
ENVIRONMENT = os.getenv("EMBEDDINGBUDDY_ENV", "development") # development, production
|
||||
|
||||
# WSGI Server Configuration (for production)
|
||||
GUNICORN_WORKERS = int(os.getenv("GUNICORN_WORKERS", "4"))
|
||||
GUNICORN_BIND = os.getenv("GUNICORN_BIND", f"{HOST}:{PORT}")
|
||||
@@ -85,9 +83,6 @@ class AppSettings:
|
||||
GUNICORN_KEEPALIVE = int(os.getenv("GUNICORN_KEEPALIVE", "5"))
|
||||
|
||||
# OpenSearch Configuration
|
||||
OPENSEARCH_ENABLED = (
|
||||
os.getenv("EMBEDDINGBUDDY_OPENSEARCH_ENABLED", "True").lower() == "true"
|
||||
)
|
||||
OPENSEARCH_DEFAULT_SIZE = 100
|
||||
OPENSEARCH_SAMPLE_SIZE = 5
|
||||
OPENSEARCH_CONNECTION_TIMEOUT = 30
|
||||
|
@@ -82,23 +82,19 @@ class DataProcessingCallbacks:
|
||||
)
|
||||
def render_tab_content(active_tab):
|
||||
from ...ui.components.datasource import DataSourceComponent
|
||||
from ...config.settings import AppSettings
|
||||
|
||||
datasource = DataSourceComponent()
|
||||
|
||||
if active_tab == "opensearch-tab" and AppSettings.OPENSEARCH_ENABLED:
|
||||
if active_tab == "opensearch-tab":
|
||||
return [datasource.create_opensearch_tab()]
|
||||
elif active_tab == "text-input-tab":
|
||||
return [datasource.create_text_input_tab()]
|
||||
else:
|
||||
return [datasource.create_file_upload_tab()]
|
||||
|
||||
# Register callbacks for both data and prompts sections (only if OpenSearch is enabled)
|
||||
if AppSettings.OPENSEARCH_ENABLED:
|
||||
self._register_opensearch_callbacks("data", self.opensearch_client_data)
|
||||
self._register_opensearch_callbacks(
|
||||
"prompts", self.opensearch_client_prompts
|
||||
)
|
||||
# Register callbacks for both data and prompts sections
|
||||
self._register_opensearch_callbacks("data", self.opensearch_client_data)
|
||||
self._register_opensearch_callbacks("prompts", self.opensearch_client_prompts)
|
||||
|
||||
# Register collapsible section callbacks
|
||||
self._register_collapse_callbacks()
|
||||
@@ -625,12 +621,6 @@ class DataProcessingCallbacks:
|
||||
if not embeddings_data:
|
||||
return no_update, no_update, no_update, no_update, no_update
|
||||
|
||||
# Check if this is a request trigger (contains textContent) vs actual embeddings data
|
||||
if isinstance(embeddings_data, dict) and "textContent" in embeddings_data:
|
||||
# This is a processing request trigger, not the actual results
|
||||
# The JavaScript will handle the async processing and update the UI directly
|
||||
return no_update, no_update, no_update, no_update, no_update
|
||||
|
||||
processed_data = self.processor.process_client_embeddings(embeddings_data)
|
||||
|
||||
if processed_data.error:
|
||||
|
@@ -1,5 +1,6 @@
|
||||
import dash
|
||||
from dash import callback, Input, Output
|
||||
from dash import callback, Input, Output, State, html
|
||||
import dash_bootstrap_components as dbc
|
||||
|
||||
|
||||
class InteractionCallbacks:
|
||||
@@ -8,25 +9,74 @@ class InteractionCallbacks:
|
||||
|
||||
def _register_callbacks(self):
|
||||
@callback(
|
||||
Output("about-modal", "is_open"),
|
||||
[Input("about-button", "n_clicks"), Input("about-modal-close", "n_clicks")],
|
||||
prevent_initial_call=True,
|
||||
Output("point-details", "children"),
|
||||
Input("embedding-plot", "clickData"),
|
||||
[State("processed-data", "data"), State("processed-prompts", "data")],
|
||||
)
|
||||
def toggle_about_modal(about_clicks, close_clicks):
|
||||
if about_clicks or close_clicks:
|
||||
return True if about_clicks else False
|
||||
return False
|
||||
def display_click_data(clickData, data, prompts_data):
|
||||
if not clickData or not data:
|
||||
return "Click on a point to see details"
|
||||
|
||||
point_data = clickData["points"][0]
|
||||
trace_name = point_data.get("fullData", {}).get("name", "Documents")
|
||||
|
||||
if "pointIndex" in point_data:
|
||||
point_index = point_data["pointIndex"]
|
||||
elif "pointNumber" in point_data:
|
||||
point_index = point_data["pointNumber"]
|
||||
else:
|
||||
return "Could not identify clicked point"
|
||||
|
||||
if (
|
||||
trace_name.startswith("Prompts")
|
||||
and prompts_data
|
||||
and "prompts" in prompts_data
|
||||
):
|
||||
item = prompts_data["prompts"][point_index]
|
||||
item_type = "Prompt"
|
||||
else:
|
||||
item = data["documents"][point_index]
|
||||
item_type = "Document"
|
||||
|
||||
return self._create_detail_card(item, item_type)
|
||||
|
||||
@callback(
|
||||
[
|
||||
Output("processed-data", "data", allow_duplicate=True),
|
||||
Output("processed-prompts", "data", allow_duplicate=True),
|
||||
Output("point-details", "children", allow_duplicate=True),
|
||||
],
|
||||
Input("reset-button", "n_clicks"),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def reset_data(n_clicks):
|
||||
if n_clicks is None or n_clicks == 0:
|
||||
return dash.no_update, dash.no_update
|
||||
return dash.no_update, dash.no_update, dash.no_update
|
||||
|
||||
return None, None
|
||||
return None, None, "Click on a point to see details"
|
||||
|
||||
@staticmethod
|
||||
def _create_detail_card(item, item_type):
|
||||
return dbc.Card(
|
||||
[
|
||||
dbc.CardBody(
|
||||
[
|
||||
html.H5(f"{item_type}: {item['id']}", className="card-title"),
|
||||
html.P(f"Text: {item['text']}", className="card-text"),
|
||||
html.P(
|
||||
f"Category: {item.get('category', 'Unknown')}",
|
||||
className="card-text",
|
||||
),
|
||||
html.P(
|
||||
f"Subcategory: {item.get('subcategory', 'Unknown')}",
|
||||
className="card-text",
|
||||
),
|
||||
html.P(
|
||||
f"Tags: {', '.join(item.get('tags', [])) if item.get('tags') else 'None'}",
|
||||
className="card-text",
|
||||
),
|
||||
html.P(f"Type: {item_type}", className="card-text text-muted"),
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
|
@@ -1,90 +0,0 @@
|
||||
from dash import html, dcc
|
||||
import dash_bootstrap_components as dbc
|
||||
|
||||
|
||||
class AboutComponent:
|
||||
def _get_about_content(self):
|
||||
return """
|
||||
# 🔍 Interactive Embedding Vector Visualization
|
||||
|
||||
EmbeddingBuddy is a web application for interactive exploration and
|
||||
visualization of embedding vectors through dimensionality reduction techniques
|
||||
(PCA, t-SNE, UMAP).
|
||||
|
||||
You have two ways to get started:
|
||||
|
||||
1. Generate embeddings directly in the browser if it supports WebGPU.
|
||||
2. Upload your NDJSON file containing embedding vectors and metadata.
|
||||
|
||||
## Generating Embeddings in Browser
|
||||
|
||||
1. Expand the "Generate Embeddings" section.
|
||||
2. Input your text data (one entry per line).
|
||||
1. Optionally you can use the built in sample data by clicking "Load Sample Data" button.
|
||||
3. Click "Generate Embeddings" to create vectors using a pre-trained model.
|
||||
|
||||
## NDJSON File Format
|
||||
|
||||
```json
|
||||
{"id": "doc_001", "embedding": [0.1, -0.3, 0.7, ...], "text": "Sample text content", "category": "news", "subcategory": "politics", "tags": ["election", "politics"]}
|
||||
{"id": "doc_002", "embedding": [0.2, -0.1, 0.9, ...], "text": "Another example", "category": "review", "subcategory": "product", "tags": ["tech", "gadget"]}
|
||||
```
|
||||
|
||||
|
||||
## ✨ Features
|
||||
|
||||
- Drag-and-drop NDJSON file upload
|
||||
- Multiple dimensionality reduction algorithms
|
||||
- 2D/3D interactive plots with Plotly
|
||||
- Color coding by categories, subcategories, or tags
|
||||
- In-browser embedding generation
|
||||
- OpenSearch integration for data loading
|
||||
|
||||
## 🔧 Supported Algorithms
|
||||
|
||||
- **PCA** (Principal Component Analysis)
|
||||
- **t-SNE** (t-Distributed Stochastic Neighbor Embedding)
|
||||
- **UMAP** (Uniform Manifold Approximation and Projection)
|
||||
|
||||
---
|
||||
|
||||
📂 [View on GitHub](https://github.com/godber/EmbeddingBuddy)
|
||||
|
||||
*Built with: Python, Dash, Plotly, scikit-learn, OpenTSNE, UMAP*
|
||||
""".strip()
|
||||
|
||||
def create_about_modal(self):
|
||||
return dbc.Modal(
|
||||
[
|
||||
dbc.ModalHeader(
|
||||
dbc.ModalTitle("Welcome to EmbeddingBuddy"),
|
||||
close_button=True,
|
||||
),
|
||||
dbc.ModalBody(
|
||||
[dcc.Markdown(self._get_about_content(), className="mb-0")]
|
||||
),
|
||||
dbc.ModalFooter(
|
||||
[
|
||||
dbc.Button(
|
||||
"Close",
|
||||
id="about-modal-close",
|
||||
color="secondary",
|
||||
n_clicks=0,
|
||||
)
|
||||
]
|
||||
),
|
||||
],
|
||||
id="about-modal",
|
||||
is_open=True,
|
||||
size="lg",
|
||||
)
|
||||
|
||||
def create_about_button(self):
|
||||
return dbc.Button(
|
||||
[html.I(className="fas fa-info-circle me-2"), "About"],
|
||||
id="about-button",
|
||||
color="outline-info",
|
||||
size="sm",
|
||||
n_clicks=0,
|
||||
className="ms-2",
|
||||
)
|
@@ -1,27 +1,26 @@
|
||||
from dash import dcc, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from .upload import UploadComponent
|
||||
from embeddingbuddy.config.settings import AppSettings
|
||||
from .textinput import TextInputComponent
|
||||
|
||||
|
||||
class DataSourceComponent:
|
||||
def __init__(self):
|
||||
self.upload_component = UploadComponent()
|
||||
self.text_input_component = TextInputComponent()
|
||||
|
||||
def create_tabbed_interface(self):
|
||||
"""Create tabbed interface for different data sources."""
|
||||
tabs = [dbc.Tab(label="File Upload", tab_id="file-tab")]
|
||||
|
||||
# Only add OpenSearch tab if enabled
|
||||
if AppSettings.OPENSEARCH_ENABLED:
|
||||
tabs.append(dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"))
|
||||
|
||||
return dbc.Card(
|
||||
[
|
||||
dbc.CardHeader(
|
||||
[
|
||||
dbc.Tabs(
|
||||
tabs,
|
||||
[
|
||||
dbc.Tab(label="File Upload", tab_id="file-tab"),
|
||||
dbc.Tab(label="OpenSearch", tab_id="opensearch-tab"),
|
||||
dbc.Tab(label="Text Input", tab_id="text-input-tab"),
|
||||
],
|
||||
id="data-source-tabs",
|
||||
active_tab="file-tab",
|
||||
)
|
||||
@@ -212,6 +211,10 @@ class DataSourceComponent:
|
||||
]
|
||||
)
|
||||
|
||||
def create_text_input_tab(self):
|
||||
"""Create text input tab content for browser-based embedding generation."""
|
||||
return html.Div([self.text_input_component.create_text_input_interface()])
|
||||
|
||||
def _create_opensearch_section(self, section_type):
|
||||
"""Create a complete OpenSearch section for either 'data' or 'prompts'."""
|
||||
section_id = section_type # 'data' or 'prompts'
|
||||
|
@@ -2,27 +2,31 @@ from dash import dcc, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from .upload import UploadComponent
|
||||
from .datasource import DataSourceComponent
|
||||
from .textinput import TextInputComponent
|
||||
from embeddingbuddy.config.settings import AppSettings
|
||||
|
||||
|
||||
class SidebarComponent:
|
||||
def __init__(self):
|
||||
self.upload_component = UploadComponent()
|
||||
self.datasource_component = DataSourceComponent()
|
||||
self.textinput_component = TextInputComponent()
|
||||
|
||||
def create_layout(self):
|
||||
return dbc.Col(
|
||||
[
|
||||
dbc.Accordion(
|
||||
[
|
||||
self._create_data_sources_item(),
|
||||
self._create_generate_embeddings_item(),
|
||||
self._create_visualization_controls_item(),
|
||||
],
|
||||
always_open=True,
|
||||
)
|
||||
html.H5("Data Sources", className="mb-3"),
|
||||
self.datasource_component.create_error_alert(),
|
||||
self.datasource_component.create_success_alert(),
|
||||
self.datasource_component.create_tabbed_interface(),
|
||||
html.H5("Visualization Controls", className="mb-3 mt-4"),
|
||||
]
|
||||
+ self._create_method_dropdown()
|
||||
+ self._create_color_dropdown()
|
||||
+ self._create_dimension_toggle()
|
||||
+ self._create_prompts_toggle()
|
||||
+ [
|
||||
html.H5("Point Details", className="mb-3"),
|
||||
html.Div(
|
||||
id="point-details", children="Click on a point to see details"
|
||||
),
|
||||
],
|
||||
width=3,
|
||||
style={"padding-right": "20px"},
|
||||
@@ -82,67 +86,3 @@ class SidebarComponent:
|
||||
style={"margin-bottom": "20px"},
|
||||
),
|
||||
]
|
||||
|
||||
def _create_generate_embeddings_item(self):
|
||||
return dbc.AccordionItem(
|
||||
[
|
||||
self.textinput_component.create_text_input_interface(),
|
||||
],
|
||||
title=html.Span(
|
||||
[
|
||||
"Generate Embeddings ",
|
||||
html.I(
|
||||
className="fas fa-info-circle text-muted",
|
||||
style={"cursor": "pointer"},
|
||||
id="generate-embeddings-info-icon",
|
||||
title="Create new embeddings from text input using various in-browser models",
|
||||
),
|
||||
]
|
||||
),
|
||||
item_id="generate-embeddings-accordion",
|
||||
)
|
||||
|
||||
def _create_data_sources_item(self):
|
||||
tooltip_text = "Load existing embeddings: upload files"
|
||||
if AppSettings.OPENSEARCH_ENABLED:
|
||||
tooltip_text += " or read from OpenSearch"
|
||||
|
||||
return dbc.AccordionItem(
|
||||
[
|
||||
self.datasource_component.create_error_alert(),
|
||||
self.datasource_component.create_success_alert(),
|
||||
self.datasource_component.create_tabbed_interface(),
|
||||
],
|
||||
title=html.Span(
|
||||
[
|
||||
"Load Embeddings ",
|
||||
html.I(
|
||||
className="fas fa-info-circle text-muted",
|
||||
style={"cursor": "pointer"},
|
||||
id="load-embeddings-info-icon",
|
||||
title=tooltip_text,
|
||||
),
|
||||
]
|
||||
),
|
||||
item_id="data-sources-accordion",
|
||||
)
|
||||
|
||||
def _create_visualization_controls_item(self):
|
||||
return dbc.AccordionItem(
|
||||
self._create_method_dropdown()
|
||||
+ self._create_color_dropdown()
|
||||
+ self._create_dimension_toggle()
|
||||
+ self._create_prompts_toggle(),
|
||||
title=html.Span(
|
||||
[
|
||||
"Visualization Controls ",
|
||||
html.I(
|
||||
className="fas fa-info-circle text-muted",
|
||||
style={"cursor": "pointer"},
|
||||
id="visualization-controls-info-icon",
|
||||
title="Configure plot settings: select dimensionality reduction method, colors, and display options",
|
||||
),
|
||||
]
|
||||
),
|
||||
item_id="visualization-controls-accordion",
|
||||
)
|
||||
|
@@ -16,20 +16,23 @@ class TextInputComponent:
|
||||
"""Create the complete text input interface with model selection and processing options."""
|
||||
return html.Div(
|
||||
[
|
||||
# Model selection section
|
||||
self._create_model_selection(),
|
||||
html.Hr(),
|
||||
# Text input section
|
||||
self._create_text_input_area(),
|
||||
# Text action buttons
|
||||
self._create_text_action_buttons(),
|
||||
html.Hr(),
|
||||
# Model selection section
|
||||
self._create_model_selection(),
|
||||
html.Hr(),
|
||||
# Processing options
|
||||
self._create_processing_options(),
|
||||
html.Hr(),
|
||||
# Generation controls
|
||||
self._create_generation_controls(),
|
||||
html.Hr(),
|
||||
# Progress indicators
|
||||
self._create_progress_indicators(),
|
||||
html.Hr(),
|
||||
# Status and results
|
||||
self._create_status_section(),
|
||||
# Hidden components for data flow
|
||||
@@ -294,10 +297,65 @@ class TextInputComponent:
|
||||
]
|
||||
)
|
||||
|
||||
def _create_progress_indicators(self):
|
||||
"""Create progress bars for model loading and embedding generation."""
|
||||
return html.Div(
|
||||
[
|
||||
# Model loading progress
|
||||
html.Div(
|
||||
[
|
||||
html.H6("Model Loading Progress", className="mb-2"),
|
||||
dbc.Progress(
|
||||
id="model-loading-progress",
|
||||
value=0,
|
||||
striped=True,
|
||||
animated=True,
|
||||
className="mb-2",
|
||||
),
|
||||
html.Small(
|
||||
id="model-loading-status",
|
||||
children="No model loading in progress",
|
||||
className="text-muted",
|
||||
),
|
||||
],
|
||||
id="model-loading-section",
|
||||
style={"display": "none"},
|
||||
),
|
||||
html.Br(),
|
||||
# Embedding generation progress
|
||||
html.Div(
|
||||
[
|
||||
html.H6("Embedding Generation Progress", className="mb-2"),
|
||||
dbc.Progress(
|
||||
id="embedding-progress",
|
||||
value=0,
|
||||
striped=True,
|
||||
animated=True,
|
||||
className="mb-2",
|
||||
),
|
||||
html.Small(
|
||||
id="embedding-status",
|
||||
children="No embedding generation in progress",
|
||||
className="text-muted",
|
||||
),
|
||||
],
|
||||
id="embedding-progress-section",
|
||||
style={"display": "none"},
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
def _create_status_section(self):
|
||||
"""Create status alerts and results preview."""
|
||||
return html.Div(
|
||||
[
|
||||
# Immediate status (from client-side)
|
||||
dbc.Alert(
|
||||
id="text-input-status-immediate",
|
||||
children="Ready to generate embeddings",
|
||||
color="light",
|
||||
className="mb-3",
|
||||
),
|
||||
# Server-side status
|
||||
dbc.Alert(
|
||||
id="text-input-status",
|
||||
|
@@ -5,75 +5,39 @@ import dash_bootstrap_components as dbc
|
||||
class UploadComponent:
|
||||
@staticmethod
|
||||
def create_data_upload():
|
||||
return html.Div(
|
||||
[
|
||||
dcc.Upload(
|
||||
id="upload-data",
|
||||
children=html.Div(
|
||||
[
|
||||
"Upload Data ",
|
||||
html.I(
|
||||
className="fas fa-info-circle",
|
||||
style={"color": "#6c757d", "fontSize": "14px"},
|
||||
id="data-upload-info",
|
||||
),
|
||||
]
|
||||
),
|
||||
style={
|
||||
"width": "100%",
|
||||
"height": "60px",
|
||||
"lineHeight": "60px",
|
||||
"borderWidth": "1px",
|
||||
"borderStyle": "dashed",
|
||||
"borderRadius": "5px",
|
||||
"textAlign": "center",
|
||||
"margin-bottom": "20px",
|
||||
},
|
||||
multiple=False,
|
||||
),
|
||||
dbc.Tooltip(
|
||||
"Click here or drag and drop NDJSON files containing document embeddings",
|
||||
target="data-upload-info",
|
||||
placement="top",
|
||||
),
|
||||
]
|
||||
return dcc.Upload(
|
||||
id="upload-data",
|
||||
children=html.Div(["Drag and Drop or ", html.A("Select Files")]),
|
||||
style={
|
||||
"width": "100%",
|
||||
"height": "60px",
|
||||
"lineHeight": "60px",
|
||||
"borderWidth": "1px",
|
||||
"borderStyle": "dashed",
|
||||
"borderRadius": "5px",
|
||||
"textAlign": "center",
|
||||
"margin-bottom": "20px",
|
||||
},
|
||||
multiple=False,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def create_prompts_upload():
|
||||
return html.Div(
|
||||
[
|
||||
dcc.Upload(
|
||||
id="upload-prompts",
|
||||
children=html.Div(
|
||||
[
|
||||
"Upload Prompts ",
|
||||
html.I(
|
||||
className="fas fa-info-circle",
|
||||
style={"color": "#6c757d", "fontSize": "14px"},
|
||||
id="prompts-upload-info",
|
||||
),
|
||||
]
|
||||
),
|
||||
style={
|
||||
"width": "100%",
|
||||
"height": "60px",
|
||||
"lineHeight": "60px",
|
||||
"borderWidth": "1px",
|
||||
"borderStyle": "dashed",
|
||||
"borderRadius": "5px",
|
||||
"textAlign": "center",
|
||||
"margin-bottom": "20px",
|
||||
"borderColor": "#28a745",
|
||||
},
|
||||
multiple=False,
|
||||
),
|
||||
dbc.Tooltip(
|
||||
"Click here or drag and drop NDJSON files containing prompt embeddings",
|
||||
target="prompts-upload-info",
|
||||
placement="top",
|
||||
),
|
||||
]
|
||||
return dcc.Upload(
|
||||
id="upload-prompts",
|
||||
children=html.Div(["Drag and Drop Prompts or ", html.A("Select Files")]),
|
||||
style={
|
||||
"width": "100%",
|
||||
"height": "60px",
|
||||
"lineHeight": "60px",
|
||||
"borderWidth": "1px",
|
||||
"borderStyle": "dashed",
|
||||
"borderRadius": "5px",
|
||||
"textAlign": "center",
|
||||
"margin-bottom": "20px",
|
||||
"borderColor": "#28a745",
|
||||
},
|
||||
multiple=False,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
@@ -1,19 +1,16 @@
|
||||
from dash import dcc, html
|
||||
import dash_bootstrap_components as dbc
|
||||
from .components.sidebar import SidebarComponent
|
||||
from .components.about import AboutComponent
|
||||
|
||||
|
||||
class AppLayout:
|
||||
def __init__(self):
|
||||
self.sidebar = SidebarComponent()
|
||||
self.about = AboutComponent()
|
||||
|
||||
def create_layout(self):
|
||||
return dbc.Container(
|
||||
[self._create_header(), self._create_main_content()]
|
||||
+ self._create_stores()
|
||||
+ [self.about.create_about_modal()],
|
||||
+ self._create_stores(),
|
||||
fluid=True,
|
||||
)
|
||||
|
||||
@@ -22,19 +19,7 @@ class AppLayout:
|
||||
[
|
||||
dbc.Col(
|
||||
[
|
||||
html.Div(
|
||||
[
|
||||
html.H1(
|
||||
"EmbeddingBuddy",
|
||||
className="text-center mb-4 d-inline",
|
||||
),
|
||||
html.Div(
|
||||
[self.about.create_about_button()],
|
||||
className="float-end",
|
||||
),
|
||||
],
|
||||
className="d-flex justify-content-between align-items-center",
|
||||
),
|
||||
html.H1("EmbeddingBuddy", className="text-center mb-4"),
|
||||
# Load Transformers.js from CDN
|
||||
html.Script(
|
||||
"""
|
||||
|
@@ -38,9 +38,9 @@ class PlotFactory:
|
||||
if dimensions == "3d":
|
||||
fig = px.scatter_3d(
|
||||
df,
|
||||
x="x",
|
||||
y="y",
|
||||
z="z",
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
z="dim_3",
|
||||
color=color_values,
|
||||
hover_data=hover_fields,
|
||||
title=f"3D Embedding Visualization - {method} (colored by {color_by})",
|
||||
@@ -49,8 +49,8 @@ class PlotFactory:
|
||||
else:
|
||||
fig = px.scatter(
|
||||
df,
|
||||
x="x",
|
||||
y="y",
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
color=color_values,
|
||||
hover_data=hover_fields,
|
||||
title=f"2D Embedding Visualization - {method} (colored by {color_by})",
|
||||
@@ -77,17 +77,17 @@ class PlotFactory:
|
||||
if dimensions == "3d":
|
||||
doc_fig = px.scatter_3d(
|
||||
doc_df,
|
||||
x="x",
|
||||
y="y",
|
||||
z="z",
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
z="dim_3",
|
||||
color=doc_color_values,
|
||||
hover_data=hover_fields,
|
||||
)
|
||||
else:
|
||||
doc_fig = px.scatter(
|
||||
doc_df,
|
||||
x="x",
|
||||
y="y",
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
color=doc_color_values,
|
||||
hover_data=hover_fields,
|
||||
)
|
||||
@@ -114,17 +114,17 @@ class PlotFactory:
|
||||
if dimensions == "3d":
|
||||
prompt_fig = px.scatter_3d(
|
||||
prompt_df,
|
||||
x="x",
|
||||
y="y",
|
||||
z="z",
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
z="dim_3",
|
||||
color=prompt_color_values,
|
||||
hover_data=hover_fields,
|
||||
)
|
||||
else:
|
||||
prompt_fig = px.scatter(
|
||||
prompt_df,
|
||||
x="x",
|
||||
y="y",
|
||||
x="dim_1",
|
||||
y="dim_2",
|
||||
color=prompt_color_values,
|
||||
hover_data=hover_fields,
|
||||
)
|
||||
@@ -168,11 +168,11 @@ class PlotFactory:
|
||||
"category": doc.category,
|
||||
"subcategory": doc.subcategory,
|
||||
"tags_str": ", ".join(doc.tags) if doc.tags else "None",
|
||||
"x": coordinates[i, 0],
|
||||
"y": coordinates[i, 1],
|
||||
"dim_1": coordinates[i, 0],
|
||||
"dim_2": coordinates[i, 1],
|
||||
}
|
||||
if dimensions == "3d":
|
||||
row["z"] = coordinates[i, 2]
|
||||
row["dim_3"] = coordinates[i, 2]
|
||||
df_data.append(row)
|
||||
|
||||
return pd.DataFrame(df_data)
|
||||
|
Reference in New Issue
Block a user