Files
EmbeddingBuddy/src/embeddingbuddy/config/settings.py
Austin Godber cdaaffd735
Some checks failed
Security Scan / security (pull_request) Successful in 44s
Security Scan / dependency-check (pull_request) Successful in 49s
Test Suite / lint (pull_request) Failing after 40s
Test Suite / test (3.11) (pull_request) Successful in 1m39s
Test Suite / build (pull_request) Has been skipped
add in browser embedding generation
2025-09-06 07:16:30 -07:00

174 lines
5.3 KiB
Python

from typing import Dict, Any
import os
class AppSettings:
# UI Configuration
UPLOAD_STYLE = {
"width": "100%",
"height": "60px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "5px",
"textAlign": "center",
"margin-bottom": "20px",
}
PROMPTS_UPLOAD_STYLE = {**UPLOAD_STYLE, "borderColor": "#28a745"}
PLOT_CONFIG = {"responsive": True, "displayModeBar": True}
PLOT_STYLE = {"height": "85vh", "width": "100%"}
PLOT_LAYOUT_CONFIG = {
"height": None,
"autosize": True,
"margin": dict(l=0, r=0, t=50, b=0),
}
# Dimensionality Reduction Settings
DEFAULT_N_COMPONENTS_3D = 3
DEFAULT_N_COMPONENTS_2D = 2
DEFAULT_RANDOM_STATE = 42
# Available Methods
REDUCTION_METHODS = [
{"label": "PCA", "value": "pca"},
{"label": "t-SNE", "value": "tsne"},
{"label": "UMAP", "value": "umap"},
]
COLOR_OPTIONS = [
{"label": "Category", "value": "category"},
{"label": "Subcategory", "value": "subcategory"},
{"label": "Tags", "value": "tags"},
]
DIMENSION_OPTIONS = [{"label": "2D", "value": "2d"}, {"label": "3D", "value": "3d"}]
# Default Values
DEFAULT_METHOD = "pca"
DEFAULT_COLOR_BY = "category"
DEFAULT_DIMENSIONS = "3d"
DEFAULT_SHOW_PROMPTS = ["show"]
# Plot Marker Settings
DOCUMENT_MARKER_SIZE_2D = 8
DOCUMENT_MARKER_SIZE_3D = 5
PROMPT_MARKER_SIZE_2D = 10
PROMPT_MARKER_SIZE_3D = 6
DOCUMENT_MARKER_SYMBOL = "circle"
PROMPT_MARKER_SYMBOL = "diamond"
DOCUMENT_OPACITY = 1.0
PROMPT_OPACITY = 0.8
# Text Processing
TEXT_PREVIEW_LENGTH = 100
# App Configuration
DEBUG = os.getenv("EMBEDDINGBUDDY_DEBUG", "True").lower() == "true"
HOST = os.getenv("EMBEDDINGBUDDY_HOST", "127.0.0.1")
PORT = int(os.getenv("EMBEDDINGBUDDY_PORT", "8050"))
# OpenSearch Configuration
OPENSEARCH_DEFAULT_SIZE = 100
OPENSEARCH_SAMPLE_SIZE = 5
OPENSEARCH_CONNECTION_TIMEOUT = 30
OPENSEARCH_VERIFY_CERTS = True
# Text Input / Transformers.js Configuration
DEFAULT_EMBEDDING_MODEL = "Xenova/all-mpnet-base-v2"
MAX_TEXT_LENGTH = 50000 # Characters (browser memory limits)
DEFAULT_TOKENIZATION_METHOD = "sentence"
MAX_BATCH_SIZE = 8 # Process in smaller batches for memory management
# Available Transformers.js compatible models
AVAILABLE_MODELS = [
{
"name": "Xenova/all-mpnet-base-v2",
"label": "All-MPNet-Base-v2 (Quality, 768d)",
"description": "Higher quality embeddings with better semantic understanding",
"dimensions": 768,
"size": "109 MB",
"context_length": 512,
"multilingual": False,
"default": True,
},
{
"name": "Xenova/all-MiniLM-L6-v2",
"label": "All-MiniLM-L6-v2 (Fast, 384d)",
"description": "Lightweight model, good for quick testing and general purpose",
"dimensions": 384,
"size": "23 MB",
"context_length": 512,
"multilingual": False,
"default": False,
},
{
"name": "Xenova/paraphrase-multilingual-MiniLM-L12-v2",
"label": "Multilingual MiniLM (50+ languages)",
"description": "Support for multiple languages with good performance",
"dimensions": 384,
"size": "127 MB",
"context_length": 512,
"multilingual": True,
},
{
"name": "Xenova/bge-small-en-v1.5",
"label": "BGE Small English (High quality, 384d)",
"description": "Beijing Academy of AI model with excellent performance on retrieval tasks",
"dimensions": 384,
"size": "67 MB",
"context_length": 512,
"multilingual": False,
},
{
"name": "Xenova/gte-small",
"label": "GTE Small (General Text Embeddings, 384d)",
"description": "Alibaba's general text embedding model, balanced performance",
"dimensions": 384,
"size": "67 MB",
"context_length": 512,
"multilingual": False,
},
]
# Browser compatibility requirements
SUPPORTED_BROWSERS = {
"chrome": ">=88",
"firefox": ">=92",
"safari": ">=15.4",
"edge": ">=88",
}
# Bootstrap Theme
EXTERNAL_STYLESHEETS = [
"https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css"
]
@classmethod
def get_plot_marker_config(
cls, dimensions: str, is_prompt: bool = False
) -> Dict[str, Any]:
if is_prompt:
size = (
cls.PROMPT_MARKER_SIZE_3D
if dimensions == "3d"
else cls.PROMPT_MARKER_SIZE_2D
)
symbol = cls.PROMPT_MARKER_SYMBOL
opacity = cls.PROMPT_OPACITY
else:
size = (
cls.DOCUMENT_MARKER_SIZE_3D
if dimensions == "3d"
else cls.DOCUMENT_MARKER_SIZE_2D
)
symbol = cls.DOCUMENT_MARKER_SYMBOL
opacity = cls.DOCUMENT_OPACITY
return {"size": size, "symbol": symbol, "opacity": opacity}