add in browser embedding generation #4
@@ -3,7 +3,8 @@
|
||||
"allow": [
|
||||
"Bash(mkdir:*)",
|
||||
"Bash(uv run:*)",
|
||||
"Bash(uv add:*)"
|
||||
"Bash(uv add:*)",
|
||||
"Bash(uv sync:*)"
|
||||
],
|
||||
"deny": [],
|
||||
"ask": [],
|
||||
|
@@ -9,14 +9,13 @@ from .ui.callbacks.interactions import InteractionCallbacks
|
||||
|
||||
def create_app():
|
||||
import os
|
||||
|
||||
# Get the project root directory (two levels up from this file)
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
||||
assets_path = os.path.join(project_root, 'assets')
|
||||
assets_path = os.path.join(project_root, "assets")
|
||||
|
||||
app = dash.Dash(
|
||||
__name__,
|
||||
external_stylesheets=[dbc.themes.BOOTSTRAP],
|
||||
assets_folder=assets_path
|
||||
__name__, external_stylesheets=[dbc.themes.BOOTSTRAP], assets_folder=assets_path
|
||||
)
|
||||
|
||||
# Allow callbacks to components that are dynamically created in tabs
|
||||
|
@@ -562,7 +562,7 @@ class DataProcessingCallbacks:
|
||||
if not ctx.triggered:
|
||||
return no_update
|
||||
|
||||
button_id = ctx.triggered[0]['prop_id'].split('.')[0]
|
||||
button_id = ctx.triggered[0]["prop_id"].split(".")[0]
|
||||
|
||||
if button_id == "clear-text-btn" and clear_clicks:
|
||||
return ""
|
||||
@@ -652,11 +652,15 @@ class DataProcessingCallbacks:
|
||||
try:
|
||||
# Get the project root directory (four levels up from this file)
|
||||
current_file = os.path.abspath(__file__)
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(current_file)))))
|
||||
sample_file_path = os.path.join(project_root, 'assets', 'sample-txt.md')
|
||||
project_root = os.path.dirname(
|
||||
os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(current_file)))
|
||||
)
|
||||
)
|
||||
sample_file_path = os.path.join(project_root, "assets", "sample-txt.md")
|
||||
|
||||
if os.path.exists(sample_file_path):
|
||||
with open(sample_file_path, 'r', encoding='utf-8') as file:
|
||||
with open(sample_file_path, "r", encoding="utf-8") as file:
|
||||
return file.read()
|
||||
else:
|
||||
# Fallback sample text if file doesn't exist
|
||||
@@ -678,7 +682,7 @@ A pinch of saffron adds a beautiful color and aroma to traditional paella.
|
||||
If the soup is too salty, add a peeled potato to absorb excess sodium.
|
||||
Let the bread dough rise for at least an hour in a warm, draft-free spot."""
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
# Return a simple fallback if there's any error
|
||||
return "This is sample text for testing embedding generation. You can replace this with your own text."
|
||||
|
||||
|
@@ -27,7 +27,7 @@ class AppLayout:
|
||||
window.transformersPipeline = pipeline;
|
||||
console.log('✅ Transformers.js pipeline loaded globally');
|
||||
""",
|
||||
type="module"
|
||||
type="module",
|
||||
),
|
||||
],
|
||||
width=12,
|
||||
|
@@ -1,6 +1,5 @@
|
||||
"""Tests for client-side embedding processing functionality."""
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from src.embeddingbuddy.data.processor import DataProcessor
|
||||
@@ -23,20 +22,17 @@ class TestClientEmbeddingsProcessing:
|
||||
"text": "First test document",
|
||||
"category": "Text Input",
|
||||
"subcategory": "Generated",
|
||||
"tags": []
|
||||
"tags": [],
|
||||
},
|
||||
{
|
||||
"id": "text_input_1",
|
||||
"text": "Second test document",
|
||||
"category": "Text Input",
|
||||
"subcategory": "Generated",
|
||||
"tags": []
|
||||
}
|
||||
"tags": [],
|
||||
},
|
||||
],
|
||||
"embeddings": [
|
||||
[0.1, 0.2, 0.3, 0.4],
|
||||
[0.5, 0.6, 0.7, 0.8]
|
||||
]
|
||||
"embeddings": [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]],
|
||||
}
|
||||
|
||||
result = self.processor.process_client_embeddings(client_data)
|
||||
@@ -79,12 +75,15 @@ class TestClientEmbeddingsProcessing:
|
||||
"""Test processing with mismatched document and embedding counts."""
|
||||
client_data = {
|
||||
"documents": [
|
||||
{"id": "test", "text": "Test document", "category": "Test", "subcategory": "Test", "tags": []}
|
||||
{
|
||||
"id": "test",
|
||||
"text": "Test document",
|
||||
"category": "Test",
|
||||
"subcategory": "Test",
|
||||
"tags": [],
|
||||
}
|
||||
],
|
||||
"embeddings": [
|
||||
[0.1, 0.2, 0.3, 0.4],
|
||||
[0.5, 0.6, 0.7, 0.8]
|
||||
]
|
||||
"embeddings": [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]],
|
||||
}
|
||||
|
||||
result = self.processor.process_client_embeddings(client_data)
|
||||
@@ -98,12 +97,15 @@ class TestClientEmbeddingsProcessing:
|
||||
client_data = {
|
||||
"documents": [
|
||||
{"text": ""}, # Empty text should be skipped
|
||||
{"id": "test2", "text": "Valid document", "category": "Test", "subcategory": "Test", "tags": []}
|
||||
{
|
||||
"id": "test2",
|
||||
"text": "Valid document",
|
||||
"category": "Test",
|
||||
"subcategory": "Test",
|
||||
"tags": [],
|
||||
},
|
||||
],
|
||||
"embeddings": [
|
||||
[0.1, 0.2, 0.3, 0.4],
|
||||
[0.5, 0.6, 0.7, 0.8]
|
||||
]
|
||||
"embeddings": [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]],
|
||||
}
|
||||
|
||||
result = self.processor.process_client_embeddings(client_data)
|
||||
@@ -117,11 +119,14 @@ class TestClientEmbeddingsProcessing:
|
||||
"""Test automatic ID generation for documents without IDs."""
|
||||
client_data = {
|
||||
"documents": [
|
||||
{"text": "Document without ID", "category": "Test", "subcategory": "Test", "tags": []}
|
||||
{
|
||||
"text": "Document without ID",
|
||||
"category": "Test",
|
||||
"subcategory": "Test",
|
||||
"tags": [],
|
||||
}
|
||||
],
|
||||
"embeddings": [
|
||||
[0.1, 0.2, 0.3, 0.4]
|
||||
]
|
||||
"embeddings": [[0.1, 0.2, 0.3, 0.4]],
|
||||
}
|
||||
|
||||
result = self.processor.process_client_embeddings(client_data)
|
||||
@@ -135,9 +140,15 @@ class TestClientEmbeddingsProcessing:
|
||||
"""Test processing with invalid embedding format."""
|
||||
client_data = {
|
||||
"documents": [
|
||||
{"id": "test", "text": "Test document", "category": "Test", "subcategory": "Test", "tags": []}
|
||||
{
|
||||
"id": "test",
|
||||
"text": "Test document",
|
||||
"category": "Test",
|
||||
"subcategory": "Test",
|
||||
"tags": [],
|
||||
}
|
||||
],
|
||||
"embeddings": 0.5 # Scalar instead of array
|
||||
"embeddings": 0.5, # Scalar instead of array
|
||||
}
|
||||
|
||||
result = self.processor.process_client_embeddings(client_data)
|
||||
|
Reference in New Issue
Block a user