minor formatting
All checks were successful
Security Scan / security (pull_request) Successful in 43s
Security Scan / dependency-check (pull_request) Successful in 45s
Test Suite / lint (pull_request) Successful in 30s
Test Suite / test (3.11) (pull_request) Successful in 1m29s
Test Suite / build (pull_request) Successful in 39s

This commit is contained in:
2025-09-06 07:23:26 -07:00
parent cdaaffd735
commit bced5e07ce
5 changed files with 76 additions and 61 deletions

View File

@@ -1,6 +1,5 @@
"""Tests for client-side embedding processing functionality."""
import pytest
import numpy as np
from src.embeddingbuddy.data.processor import DataProcessor
@@ -23,33 +22,30 @@ class TestClientEmbeddingsProcessing:
"text": "First test document",
"category": "Text Input",
"subcategory": "Generated",
"tags": []
"tags": [],
},
{
"id": "text_input_1",
"id": "text_input_1",
"text": "Second test document",
"category": "Text Input",
"subcategory": "Generated",
"tags": []
}
"tags": [],
},
],
"embeddings": [
[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8]
]
"embeddings": [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]],
}
result = self.processor.process_client_embeddings(client_data)
assert isinstance(result, ProcessedData)
assert result.error is None
assert len(result.documents) == 2
assert result.embeddings.shape == (2, 4)
# Check document content
assert result.documents[0].text == "First test document"
assert result.documents[1].text == "Second test document"
# Check embeddings match
np.testing.assert_array_equal(result.embeddings[0], [0.1, 0.2, 0.3, 0.4])
np.testing.assert_array_equal(result.embeddings[1], [0.5, 0.6, 0.7, 0.8])
@@ -57,9 +53,9 @@ class TestClientEmbeddingsProcessing:
def test_process_client_embeddings_with_error(self):
"""Test processing client data with error."""
client_data = {"error": "Transformers.js not loaded"}
result = self.processor.process_client_embeddings(client_data)
assert isinstance(result, ProcessedData)
assert result.error == "Transformers.js not loaded"
assert len(result.documents) == 0
@@ -68,9 +64,9 @@ class TestClientEmbeddingsProcessing:
def test_process_client_embeddings_missing_data(self):
"""Test processing with missing documents or embeddings."""
client_data = {"documents": []}
result = self.processor.process_client_embeddings(client_data)
assert isinstance(result, ProcessedData)
assert "No documents or embeddings in client data" in result.error
assert len(result.documents) == 0
@@ -79,16 +75,19 @@ class TestClientEmbeddingsProcessing:
"""Test processing with mismatched document and embedding counts."""
client_data = {
"documents": [
{"id": "test", "text": "Test document", "category": "Test", "subcategory": "Test", "tags": []}
{
"id": "test",
"text": "Test document",
"category": "Test",
"subcategory": "Test",
"tags": [],
}
],
"embeddings": [
[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8]
]
"embeddings": [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]],
}
result = self.processor.process_client_embeddings(client_data)
assert isinstance(result, ProcessedData)
assert "Mismatch between number of documents and embeddings" in result.error
assert len(result.documents) == 0
@@ -98,16 +97,19 @@ class TestClientEmbeddingsProcessing:
client_data = {
"documents": [
{"text": ""}, # Empty text should be skipped
{"id": "test2", "text": "Valid document", "category": "Test", "subcategory": "Test", "tags": []}
{
"id": "test2",
"text": "Valid document",
"category": "Test",
"subcategory": "Test",
"tags": [],
},
],
"embeddings": [
[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8]
]
"embeddings": [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]],
}
result = self.processor.process_client_embeddings(client_data)
assert isinstance(result, ProcessedData)
assert result.error is None
assert len(result.documents) == 1 # Only valid document should be processed
@@ -117,15 +119,18 @@ class TestClientEmbeddingsProcessing:
"""Test automatic ID generation for documents without IDs."""
client_data = {
"documents": [
{"text": "Document without ID", "category": "Test", "subcategory": "Test", "tags": []}
{
"text": "Document without ID",
"category": "Test",
"subcategory": "Test",
"tags": [],
}
],
"embeddings": [
[0.1, 0.2, 0.3, 0.4]
]
"embeddings": [[0.1, 0.2, 0.3, 0.4]],
}
result = self.processor.process_client_embeddings(client_data)
assert isinstance(result, ProcessedData)
assert result.error is None
assert len(result.documents) == 1
@@ -135,13 +140,19 @@ class TestClientEmbeddingsProcessing:
"""Test processing with invalid embedding format."""
client_data = {
"documents": [
{"id": "test", "text": "Test document", "category": "Test", "subcategory": "Test", "tags": []}
{
"id": "test",
"text": "Test document",
"category": "Test",
"subcategory": "Test",
"tags": [],
}
],
"embeddings": 0.5 # Scalar instead of array
"embeddings": 0.5, # Scalar instead of array
}
result = self.processor.process_client_embeddings(client_data)
assert isinstance(result, ProcessedData)
assert result.error is not None # Should have some error
assert len(result.documents) == 0
assert len(result.documents) == 0