fixed refactored code
Some checks failed
Security Scan / dependency-check (pull_request) Successful in 38s
Security Scan / security (pull_request) Successful in 41s
Test Suite / lint (pull_request) Failing after 28s
Test Suite / test (3.11) (pull_request) Successful in 1m27s
Test Suite / build (pull_request) Has been skipped
Some checks failed
Security Scan / dependency-check (pull_request) Successful in 38s
Security Scan / security (pull_request) Successful in 41s
Test Suite / lint (pull_request) Failing after 28s
Test Suite / test (3.11) (pull_request) Successful in 1m27s
Test Suite / build (pull_request) Has been skipped
This commit is contained in:
2
example/bad/bad_binary_content.ndjson
Normal file
2
example/bad/bad_binary_content.ndjson
Normal file
@@ -0,0 +1,2 @@
|
||||
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>{"id": "doc_001", "embedding": [0.1, -0.3, 0.7, 0.2], "text": "Binary junk at start"}
|
||||
{"id": "doc_002", "embedding": [0.5, 0.1, -0.2, 0.8], "text": "Normal line"}<7D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
6
example/bad/bad_empty_lines.ndjson
Normal file
6
example/bad/bad_empty_lines.ndjson
Normal file
@@ -0,0 +1,6 @@
|
||||
{"id": "doc_001", "embedding": [0.1, -0.3, 0.7, 0.2], "text": "First line"}
|
||||
|
||||
{"id": "doc_002", "embedding": [0.5, 0.1, -0.2, 0.8], "text": "After empty line"}
|
||||
|
||||
|
||||
{"id": "doc_003", "embedding": [0.3, 0.4, 0.1, -0.1], "text": "After multiple empty lines"}
|
4
example/bad/bad_inconsistent_dimensions.ndjson
Normal file
4
example/bad/bad_inconsistent_dimensions.ndjson
Normal file
@@ -0,0 +1,4 @@
|
||||
{"id": "doc_001", "embedding": [0.1, -0.3, 0.7, 0.2], "text": "4D embedding"}
|
||||
{"id": "doc_002", "embedding": [0.5, 0.1, -0.2], "text": "3D embedding"}
|
||||
{"id": "doc_003", "embedding": [0.3, 0.4, 0.1, -0.1, 0.8], "text": "5D embedding"}
|
||||
{"id": "doc_004", "embedding": [0.2, 0.1], "text": "2D embedding"}
|
8
example/bad/bad_invalid_embeddings.ndjson
Normal file
8
example/bad/bad_invalid_embeddings.ndjson
Normal file
@@ -0,0 +1,8 @@
|
||||
{"id": "doc_001", "embedding": "not_an_array", "text": "Embedding as string"}
|
||||
{"id": "doc_002", "embedding": [0.1, "text", 0.7, 0.2], "text": "Mixed types in embedding"}
|
||||
{"id": "doc_003", "embedding": [], "text": "Empty embedding array"}
|
||||
{"id": "doc_004", "embedding": [0.1], "text": "Single dimension embedding"}
|
||||
{"id": "doc_005", "embedding": null, "text": "Null embedding"}
|
||||
{"id": "doc_006", "embedding": [0.1, 0.2, null, 0.4], "text": "Null value in embedding"}
|
||||
{"id": "doc_007", "embedding": [0.1, 0.2, "NaN", 0.4], "text": "String NaN in embedding"}
|
||||
{"id": "doc_008", "embedding": [0.1, 0.2, Infinity, 0.4], "text": "Infinity in embedding"}
|
5
example/bad/bad_malformed_json.ndjson
Normal file
5
example/bad/bad_malformed_json.ndjson
Normal file
@@ -0,0 +1,5 @@
|
||||
{"id": "doc_001", "embedding": [0.1, -0.3, 0.7, "text": "Valid line"}
|
||||
{"id": "doc_002", "embedding": [0.5, 0.1, -0.2, 0.8], "text": "Missing closing brace"
|
||||
{"id": "doc_003" "embedding": [0.3, 0.4, 0.1, -0.1], "text": "Missing colon after id"}
|
||||
{id: "doc_004", "embedding": [0.2, 0.1, 0.3, 0.4], "text": "Unquoted key"}
|
||||
{"id": "doc_005", "embedding": [0.1, 0.2, 0.3, 0.4], "text": "Valid line again"}
|
3
example/bad/bad_missing_embedding.ndjson
Normal file
3
example/bad/bad_missing_embedding.ndjson
Normal file
@@ -0,0 +1,3 @@
|
||||
{"id": "doc_001", "text": "Sample text without embedding field", "category": "test"}
|
||||
{"id": "doc_002", "text": "Another text without embedding", "category": "test"}
|
||||
{"id": "doc_003", "text": "Third text missing embedding", "category": "test"}
|
3
example/bad/bad_missing_text.ndjson
Normal file
3
example/bad/bad_missing_text.ndjson
Normal file
@@ -0,0 +1,3 @@
|
||||
{"id": "doc_001", "embedding": [0.1, -0.3, 0.7, 0.2], "category": "test"}
|
||||
{"id": "doc_002", "embedding": [0.5, 0.1, -0.2, 0.8], "category": "test"}
|
||||
{"id": "doc_003", "embedding": [0.3, 0.4, 0.1, -0.1], "category": "test"}
|
4
example/bad/bad_not_ndjson.json
Normal file
4
example/bad/bad_not_ndjson.json
Normal file
@@ -0,0 +1,4 @@
|
||||
[
|
||||
{"id": "doc_001", "embedding": [0.1, -0.3, 0.7, 0.2], "text": "Regular JSON array"},
|
||||
{"id": "doc_002", "embedding": [0.5, 0.1, -0.2, 0.8], "text": "Instead of NDJSON"}
|
||||
]
|
20
example/sample_data.ndjson
Normal file
20
example/sample_data.ndjson
Normal file
@@ -0,0 +1,20 @@
|
||||
{"id": "doc_001", "embedding": [0.2, -0.1, 0.8, 0.3, -0.5, 0.7, 0.1, -0.3], "text": "Machine learning algorithms are transforming healthcare by enabling predictive analytics and personalized medicine.", "category": "technology", "subcategory": "healthcare", "tags": ["ai", "medicine", "prediction"]}
|
||||
{"id": "doc_002", "embedding": [0.1, 0.4, -0.2, 0.6, 0.3, -0.4, 0.8, 0.2], "text": "Climate change poses significant challenges to global food security and agricultural sustainability.", "category": "environment", "subcategory": "agriculture", "tags": ["climate", "food", "sustainability"]}
|
||||
{"id": "doc_003", "embedding": [-0.3, 0.7, 0.1, -0.2, 0.9, 0.4, -0.1, 0.5], "text": "The rise of electric vehicles is reshaping the automotive industry and urban transportation systems.", "category": "technology", "subcategory": "automotive", "tags": ["electric", "transport", "urban"]}
|
||||
{"id": "doc_004", "embedding": [0.5, -0.6, 0.3, 0.8, -0.2, 0.1, 0.7, -0.4], "text": "Renewable energy sources like solar and wind are becoming increasingly cost-competitive with fossil fuels.", "category": "environment", "subcategory": "energy", "tags": ["renewable", "solar", "wind"]}
|
||||
{"id": "doc_005", "embedding": [0.8, 0.2, -0.5, 0.1, 0.6, -0.3, 0.4, 0.9], "text": "Financial markets are experiencing volatility due to geopolitical tensions and inflation concerns.", "category": "finance", "subcategory": "markets", "tags": ["volatility", "inflation", "geopolitics"]}
|
||||
{"id": "doc_006", "embedding": [-0.1, 0.5, 0.7, -0.4, 0.2, 0.8, -0.6, 0.3], "text": "Quantum computing research is advancing rapidly with potential applications in cryptography and drug discovery.", "category": "technology", "subcategory": "research", "tags": ["quantum", "cryptography", "research"]}
|
||||
{"id": "doc_007", "embedding": [0.4, -0.3, 0.6, 0.7, -0.8, 0.2, 0.5, -0.1], "text": "Ocean pollution from plastic waste is threatening marine ecosystems and biodiversity worldwide.", "category": "environment", "subcategory": "marine", "tags": ["pollution", "plastic", "marine"]}
|
||||
{"id": "doc_008", "embedding": [0.3, 0.8, -0.2, 0.5, 0.1, -0.7, 0.6, 0.4], "text": "Artificial intelligence is revolutionizing customer service through chatbots and automated support systems.", "category": "technology", "subcategory": "customer_service", "tags": ["ai", "chatbots", "automation"]}
|
||||
{"id": "doc_009", "embedding": [-0.5, 0.3, 0.9, -0.1, 0.7, 0.4, -0.2, 0.8], "text": "Global supply chains are being redesigned for resilience after pandemic-related disruptions.", "category": "business", "subcategory": "logistics", "tags": ["supply_chain", "pandemic", "resilience"]}
|
||||
{"id": "doc_010", "embedding": [0.7, -0.4, 0.2, 0.9, -0.3, 0.6, 0.1, -0.8], "text": "Space exploration missions are expanding our understanding of the solar system and potential for life.", "category": "science", "subcategory": "space", "tags": ["space", "exploration", "life"]}
|
||||
{"id": "doc_011", "embedding": [-0.2, 0.6, 0.4, -0.7, 0.8, 0.3, -0.5, 0.1], "text": "Cryptocurrency adoption is growing among institutional investors despite regulatory uncertainties.", "category": "finance", "subcategory": "crypto", "tags": ["cryptocurrency", "institutional", "regulation"]}
|
||||
{"id": "doc_012", "embedding": [0.6, 0.1, -0.8, 0.4, 0.5, -0.2, 0.9, -0.3], "text": "Remote work technologies are transforming traditional office environments and work-life balance.", "category": "technology", "subcategory": "workplace", "tags": ["remote", "work", "balance"]}
|
||||
{"id": "doc_013", "embedding": [0.1, -0.7, 0.5, 0.8, -0.4, 0.3, 0.2, 0.6], "text": "Gene therapy breakthroughs are offering new hope for treating previously incurable genetic diseases.", "category": "science", "subcategory": "medicine", "tags": ["gene_therapy", "genetics", "medicine"]}
|
||||
{"id": "doc_014", "embedding": [-0.4, 0.2, 0.7, -0.1, 0.9, -0.6, 0.3, 0.5], "text": "Urban planning is evolving to create more sustainable and livable cities for growing populations.", "category": "environment", "subcategory": "urban", "tags": ["urban_planning", "sustainability", "cities"]}
|
||||
{"id": "doc_015", "embedding": [0.9, -0.1, 0.3, 0.6, -0.5, 0.8, -0.2, 0.4], "text": "Social media platforms are implementing new policies to combat misinformation and protect user privacy.", "category": "technology", "subcategory": "social_media", "tags": ["social_media", "misinformation", "privacy"]}
|
||||
{"id": "doc_016", "embedding": [-0.3, 0.8, -0.1, 0.4, 0.7, -0.5, 0.6, -0.9], "text": "Educational technology is personalizing learning experiences and improving student outcomes.", "category": "education", "subcategory": "technology", "tags": ["education", "personalization", "technology"]}
|
||||
{"id": "doc_017", "embedding": [0.5, 0.3, -0.6, 0.2, 0.8, 0.1, -0.4, 0.7], "text": "Biodiversity conservation efforts are critical for maintaining ecosystem balance and preventing species extinction.", "category": "environment", "subcategory": "conservation", "tags": ["biodiversity", "conservation", "extinction"]}
|
||||
{"id": "doc_018", "embedding": [0.2, -0.8, 0.4, 0.7, -0.1, 0.5, 0.9, -0.3], "text": "Healthcare systems are adopting telemedicine to improve access and reduce costs for patients.", "category": "technology", "subcategory": "healthcare", "tags": ["telemedicine", "healthcare", "access"]}
|
||||
{"id": "doc_019", "embedding": [-0.7, 0.4, 0.8, -0.2, 0.3, 0.6, -0.1, 0.9], "text": "Autonomous vehicles are being tested extensively with promises of safer and more efficient transportation.", "category": "technology", "subcategory": "automotive", "tags": ["autonomous", "safety", "efficiency"]}
|
||||
{"id": "doc_020", "embedding": [0.4, 0.7, -0.3, 0.9, -0.6, 0.2, 0.5, -0.1], "text": "Mental health awareness is increasing with new approaches to therapy and workplace wellness programs.", "category": "health", "subcategory": "mental", "tags": ["mental_health", "therapy", "wellness"]}
|
10
example/sample_prompts.ndjson
Normal file
10
example/sample_prompts.ndjson
Normal file
@@ -0,0 +1,10 @@
|
||||
{"id": "prompt_001", "embedding": [0.15, -0.28, 0.65, 0.42, -0.11, 0.33, 0.78, -0.52], "text": "Find articles about machine learning applications", "category": "search", "subcategory": "technology", "tags": ["AI", "research"]}
|
||||
{"id": "prompt_002", "embedding": [0.72, 0.18, -0.35, 0.51, 0.09, -0.44, 0.27, 0.63], "text": "Show me product reviews for smartphones", "category": "search", "subcategory": "product", "tags": ["mobile", "reviews"]}
|
||||
{"id": "prompt_003", "embedding": [-0.21, 0.59, 0.34, -0.67, 0.45, 0.12, -0.38, 0.76], "text": "What are the latest political developments?", "category": "search", "subcategory": "news", "tags": ["politics", "current events"]}
|
||||
{"id": "prompt_004", "embedding": [0.48, -0.15, 0.72, 0.31, -0.58, 0.24, 0.67, -0.39], "text": "Summarize recent tech industry trends", "category": "analysis", "subcategory": "technology", "tags": ["tech", "trends", "summary"]}
|
||||
{"id": "prompt_005", "embedding": [-0.33, 0.47, -0.62, 0.28, 0.71, -0.18, 0.54, 0.35], "text": "Compare different smartphone models", "category": "analysis", "subcategory": "product", "tags": ["comparison", "mobile", "evaluation"]}
|
||||
{"id": "prompt_006", "embedding": [0.64, 0.21, 0.39, -0.45, 0.13, 0.58, -0.27, 0.74], "text": "Analyze voter sentiment on recent policies", "category": "analysis", "subcategory": "politics", "tags": ["sentiment", "politics", "analysis"]}
|
||||
{"id": "prompt_007", "embedding": [0.29, -0.43, 0.56, 0.68, -0.22, 0.37, 0.14, -0.61], "text": "Generate a summary of machine learning research", "category": "generation", "subcategory": "technology", "tags": ["AI", "research", "summary"]}
|
||||
{"id": "prompt_008", "embedding": [-0.17, 0.52, -0.48, 0.36, 0.74, -0.29, 0.61, 0.18], "text": "Create a product recommendation report", "category": "generation", "subcategory": "product", "tags": ["recommendation", "report", "analysis"]}
|
||||
{"id": "prompt_009", "embedding": [0.55, 0.08, 0.41, -0.37, 0.26, 0.69, -0.14, 0.58], "text": "Write a news brief on election updates", "category": "generation", "subcategory": "news", "tags": ["election", "news", "brief"]}
|
||||
{"id": "prompt_010", "embedding": [0.23, -0.59, 0.47, 0.61, -0.35, 0.18, 0.72, -0.26], "text": "Explain how neural networks work", "category": "explanation", "subcategory": "technology", "tags": ["AI", "education", "neural networks"]}
|
Reference in New Issue
Block a user