add ci workflows (#1)

Reviewed-on: #1
2025-08-13 21:03:42 -07:00
parent 809dbeb783
commit 1ec7e2c38c
24 changed files with 2069 additions and 532 deletions
--- a/tests/test_data_processing.py
+++ b/tests/test_data_processing.py
@@ -6,62 +6,64 @@ from src.embeddingbuddy.models.schemas import Document


 class TestNDJSONParser:
-    
    def test_parse_text_basic(self):
-        text_content = '{"id": "test1", "text": "Hello world", "embedding": [0.1, 0.2, 0.3]}'
+        text_content = (
+            '{"id": "test1", "text": "Hello world", "embedding": [0.1, 0.2, 0.3]}'
+        )
        documents = NDJSONParser.parse_text(text_content)
-        
+
        assert len(documents) == 1
        assert documents[0].id == "test1"
        assert documents[0].text == "Hello world"
        assert documents[0].embedding == [0.1, 0.2, 0.3]
-    
+
    def test_parse_text_with_metadata(self):
        text_content = '{"id": "test1", "text": "Hello", "embedding": [0.1, 0.2], "category": "greeting", "tags": ["test"]}'
        documents = NDJSONParser.parse_text(text_content)
-        
+
        assert documents[0].category == "greeting"
        assert documents[0].tags == ["test"]
-    
+
    def test_parse_text_missing_id(self):
        text_content = '{"text": "Hello", "embedding": [0.1, 0.2]}'
        documents = NDJSONParser.parse_text(text_content)
-        
+
        assert len(documents) == 1
        assert documents[0].id is not None  # Should be auto-generated


 class TestDataProcessor:
-    
    def test_extract_embeddings(self):
        documents = [
            Document(id="1", text="test1", embedding=[0.1, 0.2]),
-            Document(id="2", text="test2", embedding=[0.3, 0.4])
+            Document(id="2", text="test2", embedding=[0.3, 0.4]),
        ]
-        
+
        processor = DataProcessor()
        embeddings = processor._extract_embeddings(documents)
-        
+
        assert embeddings.shape == (2, 2)
        assert np.allclose(embeddings[0], [0.1, 0.2])
        assert np.allclose(embeddings[1], [0.3, 0.4])
-    
+
    def test_combine_data(self):
        from src.embeddingbuddy.models.schemas import ProcessedData
-        
+
        doc_data = ProcessedData(
            documents=[Document(id="1", text="doc", embedding=[0.1, 0.2])],
-            embeddings=np.array([[0.1, 0.2]])
+            embeddings=np.array([[0.1, 0.2]]),
        )
-        
+
        prompt_data = ProcessedData(
            documents=[Document(id="p1", text="prompt", embedding=[0.3, 0.4])],
-            embeddings=np.array([[0.3, 0.4]])
+            embeddings=np.array([[0.3, 0.4]]),
        )
-        
+
        processor = DataProcessor()
-        all_embeddings, documents, prompts = processor.combine_data(doc_data, prompt_data)
-        
+        all_embeddings, documents, prompts = processor.combine_data(
+            doc_data, prompt_data
+        )
+
        assert all_embeddings.shape == (2, 2)
        assert len(documents) == 1
        assert len(prompts) == 1
@@ -70,4 +72,4 @@ class TestDataProcessor:


 if __name__ == "__main__":
-    pytest.main([__file__])
+    pytest.main([__file__])