@@ -6,62 +6,64 @@ from src.embeddingbuddy.models.schemas import Document
|
||||
|
||||
|
||||
class TestNDJSONParser:
|
||||
|
||||
def test_parse_text_basic(self):
|
||||
text_content = '{"id": "test1", "text": "Hello world", "embedding": [0.1, 0.2, 0.3]}'
|
||||
text_content = (
|
||||
'{"id": "test1", "text": "Hello world", "embedding": [0.1, 0.2, 0.3]}'
|
||||
)
|
||||
documents = NDJSONParser.parse_text(text_content)
|
||||
|
||||
|
||||
assert len(documents) == 1
|
||||
assert documents[0].id == "test1"
|
||||
assert documents[0].text == "Hello world"
|
||||
assert documents[0].embedding == [0.1, 0.2, 0.3]
|
||||
|
||||
|
||||
def test_parse_text_with_metadata(self):
|
||||
text_content = '{"id": "test1", "text": "Hello", "embedding": [0.1, 0.2], "category": "greeting", "tags": ["test"]}'
|
||||
documents = NDJSONParser.parse_text(text_content)
|
||||
|
||||
|
||||
assert documents[0].category == "greeting"
|
||||
assert documents[0].tags == ["test"]
|
||||
|
||||
|
||||
def test_parse_text_missing_id(self):
|
||||
text_content = '{"text": "Hello", "embedding": [0.1, 0.2]}'
|
||||
documents = NDJSONParser.parse_text(text_content)
|
||||
|
||||
|
||||
assert len(documents) == 1
|
||||
assert documents[0].id is not None # Should be auto-generated
|
||||
|
||||
|
||||
class TestDataProcessor:
|
||||
|
||||
def test_extract_embeddings(self):
|
||||
documents = [
|
||||
Document(id="1", text="test1", embedding=[0.1, 0.2]),
|
||||
Document(id="2", text="test2", embedding=[0.3, 0.4])
|
||||
Document(id="2", text="test2", embedding=[0.3, 0.4]),
|
||||
]
|
||||
|
||||
|
||||
processor = DataProcessor()
|
||||
embeddings = processor._extract_embeddings(documents)
|
||||
|
||||
|
||||
assert embeddings.shape == (2, 2)
|
||||
assert np.allclose(embeddings[0], [0.1, 0.2])
|
||||
assert np.allclose(embeddings[1], [0.3, 0.4])
|
||||
|
||||
|
||||
def test_combine_data(self):
|
||||
from src.embeddingbuddy.models.schemas import ProcessedData
|
||||
|
||||
|
||||
doc_data = ProcessedData(
|
||||
documents=[Document(id="1", text="doc", embedding=[0.1, 0.2])],
|
||||
embeddings=np.array([[0.1, 0.2]])
|
||||
embeddings=np.array([[0.1, 0.2]]),
|
||||
)
|
||||
|
||||
|
||||
prompt_data = ProcessedData(
|
||||
documents=[Document(id="p1", text="prompt", embedding=[0.3, 0.4])],
|
||||
embeddings=np.array([[0.3, 0.4]])
|
||||
embeddings=np.array([[0.3, 0.4]]),
|
||||
)
|
||||
|
||||
|
||||
processor = DataProcessor()
|
||||
all_embeddings, documents, prompts = processor.combine_data(doc_data, prompt_data)
|
||||
|
||||
all_embeddings, documents, prompts = processor.combine_data(
|
||||
doc_data, prompt_data
|
||||
)
|
||||
|
||||
assert all_embeddings.shape == (2, 2)
|
||||
assert len(documents) == 1
|
||||
assert len(prompts) == 1
|
||||
@@ -70,4 +72,4 @@ class TestDataProcessor:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__])
|
||||
pytest.main([__file__])
|
||||
|
@@ -1,89 +1,90 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from src.embeddingbuddy.models.reducers import ReducerFactory, PCAReducer, TSNEReducer, UMAPReducer
|
||||
from src.embeddingbuddy.models.reducers import (
|
||||
ReducerFactory,
|
||||
PCAReducer,
|
||||
TSNEReducer,
|
||||
UMAPReducer,
|
||||
)
|
||||
|
||||
|
||||
class TestReducerFactory:
|
||||
|
||||
def test_create_pca_reducer(self):
|
||||
reducer = ReducerFactory.create_reducer('pca', n_components=2)
|
||||
reducer = ReducerFactory.create_reducer("pca", n_components=2)
|
||||
assert isinstance(reducer, PCAReducer)
|
||||
assert reducer.n_components == 2
|
||||
|
||||
|
||||
def test_create_tsne_reducer(self):
|
||||
reducer = ReducerFactory.create_reducer('tsne', n_components=3)
|
||||
reducer = ReducerFactory.create_reducer("tsne", n_components=3)
|
||||
assert isinstance(reducer, TSNEReducer)
|
||||
assert reducer.n_components == 3
|
||||
|
||||
|
||||
def test_create_umap_reducer(self):
|
||||
reducer = ReducerFactory.create_reducer('umap', n_components=2)
|
||||
reducer = ReducerFactory.create_reducer("umap", n_components=2)
|
||||
assert isinstance(reducer, UMAPReducer)
|
||||
assert reducer.n_components == 2
|
||||
|
||||
|
||||
def test_invalid_method(self):
|
||||
with pytest.raises(ValueError, match="Unknown reduction method"):
|
||||
ReducerFactory.create_reducer('invalid_method')
|
||||
|
||||
ReducerFactory.create_reducer("invalid_method")
|
||||
|
||||
def test_available_methods(self):
|
||||
methods = ReducerFactory.get_available_methods()
|
||||
assert 'pca' in methods
|
||||
assert 'tsne' in methods
|
||||
assert 'umap' in methods
|
||||
assert "pca" in methods
|
||||
assert "tsne" in methods
|
||||
assert "umap" in methods
|
||||
|
||||
|
||||
class TestPCAReducer:
|
||||
|
||||
def test_fit_transform(self):
|
||||
embeddings = np.random.rand(100, 512)
|
||||
reducer = PCAReducer(n_components=2)
|
||||
|
||||
|
||||
result = reducer.fit_transform(embeddings)
|
||||
|
||||
|
||||
assert result.reduced_embeddings.shape == (100, 2)
|
||||
assert result.variance_explained is not None
|
||||
assert result.method == "PCA"
|
||||
assert result.n_components == 2
|
||||
|
||||
|
||||
def test_method_name(self):
|
||||
reducer = PCAReducer()
|
||||
assert reducer.get_method_name() == "PCA"
|
||||
|
||||
|
||||
class TestTSNEReducer:
|
||||
|
||||
def test_fit_transform_small_dataset(self):
|
||||
embeddings = np.random.rand(30, 10) # Small dataset for faster testing
|
||||
reducer = TSNEReducer(n_components=2)
|
||||
|
||||
|
||||
result = reducer.fit_transform(embeddings)
|
||||
|
||||
|
||||
assert result.reduced_embeddings.shape == (30, 2)
|
||||
assert result.variance_explained is None # t-SNE doesn't provide this
|
||||
assert result.method == "t-SNE"
|
||||
assert result.n_components == 2
|
||||
|
||||
|
||||
def test_method_name(self):
|
||||
reducer = TSNEReducer()
|
||||
assert reducer.get_method_name() == "t-SNE"
|
||||
|
||||
|
||||
class TestUMAPReducer:
|
||||
|
||||
def test_fit_transform(self):
|
||||
embeddings = np.random.rand(50, 10)
|
||||
reducer = UMAPReducer(n_components=2)
|
||||
|
||||
|
||||
result = reducer.fit_transform(embeddings)
|
||||
|
||||
|
||||
assert result.reduced_embeddings.shape == (50, 2)
|
||||
assert result.variance_explained is None # UMAP doesn't provide this
|
||||
assert result.method == "UMAP"
|
||||
assert result.n_components == 2
|
||||
|
||||
|
||||
def test_method_name(self):
|
||||
reducer = UMAPReducer()
|
||||
assert reducer.get_method_name() == "UMAP"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__])
|
||||
pytest.main([__file__])
|
||||
|
Reference in New Issue
Block a user