diff --git a/src/embeddingbuddy/models/field_mapper.py b/src/embeddingbuddy/models/field_mapper.py index 938b25a..73faada 100644 --- a/src/embeddingbuddy/models/field_mapper.py +++ b/src/embeddingbuddy/models/field_mapper.py @@ -39,7 +39,6 @@ class FieldMapper: vector_fields = [vf["name"] for vf in field_analysis.get("vector_fields", [])] text_fields = field_analysis.get("text_fields", []) keyword_fields = field_analysis.get("keyword_fields", []) - numeric_fields = field_analysis.get("numeric_fields", []) # Helper function to create ordered suggestions def create_ordered_suggestions(primary_candidates, all_available_fields): @@ -57,8 +56,16 @@ class FieldMapper: suggestions = {} - # Embedding field suggestions (vector fields first, then all fields) + # Embedding field suggestions (vector fields first, then name-based candidates, then all fields) embedding_candidates = vector_fields.copy() + # Add fields that likely contain embeddings based on name + embedding_name_candidates = [f for f in all_fields if any( + keyword in f.lower() for keyword in ["embedding", "embeddings", "vector", "vectors", "embed"] + )] + # Add name-based candidates that aren't already in vector_fields + for candidate in embedding_name_candidates: + if candidate not in embedding_candidates: + embedding_candidates.append(candidate) suggestions["embedding"] = create_ordered_suggestions(embedding_candidates, all_fields) # Text field suggestions (text fields first, then all fields) diff --git a/src/embeddingbuddy/ui/callbacks/data_processing.py b/src/embeddingbuddy/ui/callbacks/data_processing.py index e228ac7..6fd9b99 100644 --- a/src/embeddingbuddy/ui/callbacks/data_processing.py +++ b/src/embeddingbuddy/ui/callbacks/data_processing.py @@ -8,7 +8,8 @@ from ...config.settings import AppSettings class DataProcessingCallbacks: def __init__(self): self.processor = DataProcessor() - self.opensearch_client = OpenSearchClient() + self.opensearch_client_data = OpenSearchClient() # For data/documents + self.opensearch_client_prompts = OpenSearchClient() # For prompts self._register_callbacks() def _register_callbacks(self): @@ -89,10 +90,21 @@ class DataProcessingCallbacks: else: return [datasource.create_file_upload_tab()] + # Register callbacks for both data and prompts sections + self._register_opensearch_callbacks("data", self.opensearch_client_data) + self._register_opensearch_callbacks("prompts", self.opensearch_client_prompts) + + # Register collapsible section callbacks + self._register_collapse_callbacks() + + + def _register_opensearch_callbacks(self, section_type, opensearch_client): + """Register callbacks for a specific section (data or prompts).""" + @callback( - Output("auth-collapse", "is_open"), - [Input("auth-toggle", "n_clicks")], - [State("auth-collapse", "is_open")], + Output(f"{section_type}-auth-collapse", "is_open"), + [Input(f"{section_type}-auth-toggle", "n_clicks")], + [State(f"{section_type}-auth-collapse", "is_open")], prevent_initial_call=True, ) def toggle_auth(n_clicks, is_open): @@ -101,8 +113,8 @@ class DataProcessingCallbacks: return is_open @callback( - Output("auth-toggle", "children"), - [Input("auth-collapse", "is_open")], + Output(f"{section_type}-auth-toggle", "children"), + [Input(f"{section_type}-auth-collapse", "is_open")], prevent_initial_call=False, ) def update_auth_button_text(is_open): @@ -110,36 +122,34 @@ class DataProcessingCallbacks: @callback( [ - Output("connection-status", "children"), - Output("field-mapping-section", "children"), - Output("field-mapping-section", "style"), - Output("load-data-section", "style"), - Output("load-opensearch-data-btn", "disabled"), - Output("embedding-field-dropdown", "options"), - Output("text-field-dropdown", "options"), - Output("id-field-dropdown", "options"), - Output("category-field-dropdown", "options"), - Output("subcategory-field-dropdown", "options"), - Output("tags-field-dropdown", "options"), + Output(f"{section_type}-connection-status", "children"), + Output(f"{section_type}-field-mapping-section", "children"), + Output(f"{section_type}-field-mapping-section", "style"), + Output(f"{section_type}-load-data-section", "style"), + Output(f"{section_type}-load-opensearch-data-btn", "disabled"), + Output(f"{section_type}-embedding-field-dropdown", "options"), + Output(f"{section_type}-text-field-dropdown", "options"), + Output(f"{section_type}-id-field-dropdown", "options"), + Output(f"{section_type}-category-field-dropdown", "options"), + Output(f"{section_type}-subcategory-field-dropdown", "options"), + Output(f"{section_type}-tags-field-dropdown", "options"), ], - [Input("test-connection-btn", "n_clicks")], + [Input(f"{section_type}-test-connection-btn", "n_clicks")], [ - State("opensearch-url", "value"), - State("opensearch-index", "value"), - State("opensearch-username", "value"), - State("opensearch-password", "value"), - State("opensearch-api-key", "value"), + State(f"{section_type}-opensearch-url", "value"), + State(f"{section_type}-opensearch-index", "value"), + State(f"{section_type}-opensearch-username", "value"), + State(f"{section_type}-opensearch-password", "value"), + State(f"{section_type}-opensearch-api-key", "value"), ], prevent_initial_call=True, ) - def test_opensearch_connection( - n_clicks, url, index_name, username, password, api_key - ): + def test_opensearch_connection(n_clicks, url, index_name, username, password, api_key): if not n_clicks or not url or not index_name: return no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update # Test connection - success, message = self.opensearch_client.connect( + success, message = opensearch_client.connect( url=url, username=username, password=password, @@ -163,9 +173,7 @@ class DataProcessingCallbacks: ) # Analyze fields - success, field_analysis, analysis_message = ( - self.opensearch_client.analyze_fields(index_name) - ) + success, field_analysis, analysis_message = opensearch_client.analyze_fields(index_name) if not success: return ( @@ -186,11 +194,8 @@ class DataProcessingCallbacks: field_suggestions = FieldMapper.suggest_mappings(field_analysis) from ...ui.components.datasource import DataSourceComponent - datasource = DataSourceComponent() - field_mapping_ui = datasource.create_field_mapping_interface( - field_suggestions - ) + field_mapping_ui = datasource.create_field_mapping_interface(field_suggestions, section_type) return ( self._create_status_alert(f"✅ {message}", "success"), @@ -206,55 +211,55 @@ class DataProcessingCallbacks: [{"label": field, "value": field} for field in field_suggestions.get("tags", [])], ) + # Determine output target based on section type + output_target = "processed-data" if section_type == "data" else "processed-prompts" + @callback( [ - Output("processed-data", "data", allow_duplicate=True), + Output(output_target, "data", allow_duplicate=True), Output("opensearch-success-alert", "children", allow_duplicate=True), Output("opensearch-success-alert", "is_open", allow_duplicate=True), Output("opensearch-error-alert", "children", allow_duplicate=True), Output("opensearch-error-alert", "is_open", allow_duplicate=True), ], - [Input("load-opensearch-data-btn", "n_clicks")], + [Input(f"{section_type}-load-opensearch-data-btn", "n_clicks")], [ - State("opensearch-index", "value"), - State("embedding-field-dropdown", "value"), - State("text-field-dropdown", "value"), - State("id-field-dropdown", "value"), - State("category-field-dropdown", "value"), - State("subcategory-field-dropdown", "value"), - State("tags-field-dropdown", "value"), + State(f"{section_type}-opensearch-index", "value"), + State(f"{section_type}-opensearch-query-size", "value"), + State(f"{section_type}-embedding-field-dropdown-ui", "value"), + State(f"{section_type}-text-field-dropdown-ui", "value"), + State(f"{section_type}-id-field-dropdown-ui", "value"), + State(f"{section_type}-category-field-dropdown-ui", "value"), + State(f"{section_type}-subcategory-field-dropdown-ui", "value"), + State(f"{section_type}-tags-field-dropdown-ui", "value"), ], prevent_initial_call=True, ) - def load_opensearch_data( - n_clicks, - index_name, - embedding_field, - text_field, - id_field, - category_field, - subcategory_field, - tags_field, - ): + def load_opensearch_data(n_clicks, index_name, query_size, embedding_field, text_field, + id_field, category_field, subcategory_field, tags_field): if not n_clicks or not index_name or not embedding_field or not text_field: return no_update, no_update, no_update, no_update, no_update try: + # Validate and set query size + if not query_size or query_size < 1: + query_size = AppSettings.OPENSEARCH_DEFAULT_SIZE + elif query_size > 1000: + query_size = 1000 # Cap at reasonable maximum + # Create field mapping - field_mapping = FieldMapper.create_mapping_from_dict( - { - "embedding": embedding_field, - "text": text_field, - "id": id_field, - "category": category_field, - "subcategory": subcategory_field, - "tags": tags_field, - } - ) + field_mapping = FieldMapper.create_mapping_from_dict({ + "embedding": embedding_field, + "text": text_field, + "id": id_field, + "category": category_field, + "subcategory": subcategory_field, + "tags": tags_field + }) # Fetch data from OpenSearch - success, raw_documents, message = self.opensearch_client.fetch_data( - index_name, size=AppSettings.OPENSEARCH_DEFAULT_SIZE + success, raw_documents, message = opensearch_client.fetch_data( + index_name, size=query_size ) if not success: @@ -262,92 +267,141 @@ class DataProcessingCallbacks: no_update, "", False, - f"❌ Failed to fetch data: {message}", - True, + f"❌ Failed to fetch {section_type}: {message}", + True ) # Process the data - processed_data = self.processor.process_opensearch_data( - raw_documents, field_mapping - ) + processed_data = self.processor.process_opensearch_data(raw_documents, field_mapping) if processed_data.error: return ( {"error": processed_data.error}, "", False, - f"❌ Data processing error: {processed_data.error}", - True, + f"❌ {section_type.title()} processing error: {processed_data.error}", + True ) - success_message = f"✅ Successfully loaded {len(processed_data.documents)} documents from OpenSearch" + success_message = f"✅ Successfully loaded {len(processed_data.documents)} {section_type} from OpenSearch" - return ( - { - "documents": [ - self._document_to_dict(doc) - for doc in processed_data.documents - ], - "embeddings": processed_data.embeddings.tolist(), - }, - success_message, - True, - "", - False, - ) + # Format for appropriate target (data vs prompts) + if section_type == "data": + return ( + { + "documents": [ + self._document_to_dict(doc) for doc in processed_data.documents + ], + "embeddings": processed_data.embeddings.tolist(), + }, + success_message, + True, + "", + False + ) + else: # prompts + return ( + { + "prompts": [ + self._document_to_dict(doc) for doc in processed_data.documents + ], + "embeddings": processed_data.embeddings.tolist(), + }, + success_message, + True, + "", + False + ) except Exception as e: return (no_update, "", False, f"❌ Unexpected error: {str(e)}", True) # Sync callbacks to update hidden dropdowns from UI dropdowns @callback( - Output("embedding-field-dropdown", "value"), - Input("embedding-field-dropdown-ui", "value"), + Output(f"{section_type}-embedding-field-dropdown", "value"), + Input(f"{section_type}-embedding-field-dropdown-ui", "value"), prevent_initial_call=True, ) def sync_embedding_dropdown(value): return value @callback( - Output("text-field-dropdown", "value"), - Input("text-field-dropdown-ui", "value"), + Output(f"{section_type}-text-field-dropdown", "value"), + Input(f"{section_type}-text-field-dropdown-ui", "value"), prevent_initial_call=True, ) def sync_text_dropdown(value): return value @callback( - Output("id-field-dropdown", "value"), - Input("id-field-dropdown-ui", "value"), + Output(f"{section_type}-id-field-dropdown", "value"), + Input(f"{section_type}-id-field-dropdown-ui", "value"), prevent_initial_call=True, ) def sync_id_dropdown(value): return value @callback( - Output("category-field-dropdown", "value"), - Input("category-field-dropdown-ui", "value"), + Output(f"{section_type}-category-field-dropdown", "value"), + Input(f"{section_type}-category-field-dropdown-ui", "value"), prevent_initial_call=True, ) def sync_category_dropdown(value): return value @callback( - Output("subcategory-field-dropdown", "value"), - Input("subcategory-field-dropdown-ui", "value"), + Output(f"{section_type}-subcategory-field-dropdown", "value"), + Input(f"{section_type}-subcategory-field-dropdown-ui", "value"), prevent_initial_call=True, ) def sync_subcategory_dropdown(value): return value @callback( - Output("tags-field-dropdown", "value"), - Input("tags-field-dropdown-ui", "value"), + Output(f"{section_type}-tags-field-dropdown", "value"), + Input(f"{section_type}-tags-field-dropdown-ui", "value"), prevent_initial_call=True, ) def sync_tags_dropdown(value): return value + def _register_collapse_callbacks(self): + """Register callbacks for collapsible sections.""" + + # Data section collapse callback + @callback( + [ + Output("data-collapse", "is_open"), + Output("data-collapse-icon", "className"), + ], + [Input("data-collapse-toggle", "n_clicks")], + [State("data-collapse", "is_open")], + prevent_initial_call=True, + ) + def toggle_data_collapse(n_clicks, is_open): + if n_clicks: + new_state = not is_open + icon_class = "fas fa-chevron-down me-2" if new_state else "fas fa-chevron-right me-2" + return new_state, icon_class + return is_open, "fas fa-chevron-down me-2" + + # Prompts section collapse callback + @callback( + [ + Output("prompts-collapse", "is_open"), + Output("prompts-collapse-icon", "className"), + ], + [Input("prompts-collapse-toggle", "n_clicks")], + [State("prompts-collapse", "is_open")], + prevent_initial_call=True, + ) + def toggle_prompts_collapse(n_clicks, is_open): + if n_clicks: + new_state = not is_open + icon_class = "fas fa-chevron-down me-2" if new_state else "fas fa-chevron-right me-2" + return new_state, icon_class + return is_open, "fas fa-chevron-down me-2" + @staticmethod def _document_to_dict(doc): return { diff --git a/src/embeddingbuddy/ui/components/datasource.py b/src/embeddingbuddy/ui/components/datasource.py index 922e2fe..93b375d 100644 --- a/src/embeddingbuddy/ui/components/datasource.py +++ b/src/embeddingbuddy/ui/components/datasource.py @@ -39,139 +39,200 @@ class DataSourceComponent: ) def create_opensearch_tab(self): - """Create OpenSearch tab content.""" + """Create OpenSearch tab content with separate Data and Prompts sections.""" return html.Div( [ - # Connection section - html.H6("Connection", className="mb-2"), - dbc.Row( - [ - dbc.Col( - [ - dbc.Label("OpenSearch URL:"), - dbc.Input( - id="opensearch-url", - type="text", - placeholder="https://opensearch.example.com:9200", - className="mb-2", - ), - ], - width=12, - ), - ] - ), - dbc.Row( - [ - dbc.Col( - [ - dbc.Label("Index Name:"), - dbc.Input( - id="opensearch-index", - type="text", - placeholder="my-embeddings-index", - className="mb-2", - ), - ], - width=6, - ), - dbc.Col( - [ - dbc.Button( - "Test Connection", - id="test-connection-btn", - color="primary", - size="sm", - className="mt-4", - ), - ], - width=6, - className="d-flex align-items-end", - ), - ] - ), - # Authentication section (collapsible) - dbc.Collapse( - [ - html.Hr(), - html.H6("Authentication (Optional)", className="mb-2"), - dbc.Row( - [ - dbc.Col( - [ - dbc.Label("Username:"), - dbc.Input( - id="opensearch-username", - type="text", - className="mb-2", - ), - ], - width=6, - ), - dbc.Col( - [ - dbc.Label("Password:"), - dbc.Input( - id="opensearch-password", - type="password", - className="mb-2", - ), - ], - width=6, - ), - ] - ), - dbc.Label("OR"), - dbc.Input( - id="opensearch-api-key", - type="text", - placeholder="API Key", - className="mb-2", - ), - ], - id="auth-collapse", - is_open=False, - ), - dbc.Button( - "Show Authentication", - id="auth-toggle", - color="link", - size="sm", - className="p-0 mb-3", - ), - # Connection status - html.Div(id="connection-status", className="mb-3"), - # Field mapping section (hidden initially) - html.Div(id="field-mapping-section", style={"display": "none"}), - - # Hidden dropdowns to prevent callback errors - html.Div([ - dcc.Dropdown(id="embedding-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="text-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="id-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="category-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="subcategory-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="tags-field-dropdown", style={"display": "none"}), - ], style={"display": "none"}), - # Load data button (hidden initially) - html.Div( - [ + # Data Section + dbc.Card([ + dbc.CardHeader([ dbc.Button( - "Load Data", - id="load-opensearch-data-btn", - color="success", - className="mb-2", - disabled=True, + [ + html.I(className="fas fa-chevron-down me-2", id="data-collapse-icon"), + "📄 Documents/Data" + ], + id="data-collapse-toggle", + color="link", + className="text-start p-0 w-100 text-decoration-none", + style={"border": "none", "font-size": "1.25rem", "font-weight": "500"} ), - ], - id="load-data-section", - style={"display": "none"}, - ), - # OpenSearch status/results - html.Div(id="opensearch-status", className="mb-3"), + ]), + dbc.Collapse([ + dbc.CardBody([ + self._create_opensearch_section("data") + ]) + ], id="data-collapse", is_open=True) + ], className="mb-4"), + + # Prompts Section + dbc.Card([ + dbc.CardHeader([ + dbc.Button( + [ + html.I(className="fas fa-chevron-down me-2", id="prompts-collapse-icon"), + "💬 Prompts" + ], + id="prompts-collapse-toggle", + color="link", + className="text-start p-0 w-100 text-decoration-none", + style={"border": "none", "font-size": "1.25rem", "font-weight": "500"} + ), + ]), + dbc.Collapse([ + dbc.CardBody([ + self._create_opensearch_section("prompts") + ]) + ], id="prompts-collapse", is_open=True) + ], className="mb-4"), + + # Hidden dropdowns to prevent callback errors (for both sections) + html.Div([ + # Data dropdowns (hidden sync targets) + dcc.Dropdown(id="data-embedding-field-dropdown", style={"display": "none"}), + dcc.Dropdown(id="data-text-field-dropdown", style={"display": "none"}), + dcc.Dropdown(id="data-id-field-dropdown", style={"display": "none"}), + dcc.Dropdown(id="data-category-field-dropdown", style={"display": "none"}), + dcc.Dropdown(id="data-subcategory-field-dropdown", style={"display": "none"}), + dcc.Dropdown(id="data-tags-field-dropdown", style={"display": "none"}), + # Data UI dropdowns (hidden placeholders) + dcc.Dropdown(id="data-embedding-field-dropdown-ui", style={"display": "none"}), + dcc.Dropdown(id="data-text-field-dropdown-ui", style={"display": "none"}), + dcc.Dropdown(id="data-id-field-dropdown-ui", style={"display": "none"}), + dcc.Dropdown(id="data-category-field-dropdown-ui", style={"display": "none"}), + dcc.Dropdown(id="data-subcategory-field-dropdown-ui", style={"display": "none"}), + dcc.Dropdown(id="data-tags-field-dropdown-ui", style={"display": "none"}), + # Prompts dropdowns (hidden sync targets) + dcc.Dropdown(id="prompts-embedding-field-dropdown", style={"display": "none"}), + dcc.Dropdown(id="prompts-text-field-dropdown", style={"display": "none"}), + dcc.Dropdown(id="prompts-id-field-dropdown", style={"display": "none"}), + dcc.Dropdown(id="prompts-category-field-dropdown", style={"display": "none"}), + dcc.Dropdown(id="prompts-subcategory-field-dropdown", style={"display": "none"}), + dcc.Dropdown(id="prompts-tags-field-dropdown", style={"display": "none"}), + # Prompts UI dropdowns (hidden placeholders) + dcc.Dropdown(id="prompts-embedding-field-dropdown-ui", style={"display": "none"}), + dcc.Dropdown(id="prompts-text-field-dropdown-ui", style={"display": "none"}), + dcc.Dropdown(id="prompts-id-field-dropdown-ui", style={"display": "none"}), + dcc.Dropdown(id="prompts-category-field-dropdown-ui", style={"display": "none"}), + dcc.Dropdown(id="prompts-subcategory-field-dropdown-ui", style={"display": "none"}), + dcc.Dropdown(id="prompts-tags-field-dropdown-ui", style={"display": "none"}), + ], style={"display": "none"}), ] ) - def create_field_mapping_interface(self, field_suggestions): + def _create_opensearch_section(self, section_type): + """Create a complete OpenSearch section for either 'data' or 'prompts'.""" + section_id = section_type # 'data' or 'prompts' + + return html.Div([ + # Connection section + html.H6("Connection", className="mb-2"), + dbc.Row([ + dbc.Col([ + dbc.Label("OpenSearch URL:"), + dbc.Input( + id=f"{section_id}-opensearch-url", + type="text", + placeholder="https://opensearch.example.com:9200", + className="mb-2", + ), + ], width=12), + ]), + + dbc.Row([ + dbc.Col([ + dbc.Label("Index Name:"), + dbc.Input( + id=f"{section_id}-opensearch-index", + type="text", + placeholder="my-embeddings-index", + className="mb-2", + ), + ], width=6), + dbc.Col([ + dbc.Label("Query Size:"), + dbc.Input( + id=f"{section_id}-opensearch-query-size", + type="number", + value=100, + min=1, + max=1000, + placeholder="100", + className="mb-2", + ), + ], width=6), + ]), + + dbc.Row([ + dbc.Col([ + dbc.Button( + "Test Connection", + id=f"{section_id}-test-connection-btn", + color="primary", + className="mb-3", + ), + ], width=12), + ]), + + # Authentication section (collapsible) + dbc.Collapse([ + html.Hr(), + html.H6("Authentication (Optional)", className="mb-2"), + dbc.Row([ + dbc.Col([ + dbc.Label("Username:"), + dbc.Input( + id=f"{section_id}-opensearch-username", + type="text", + className="mb-2", + ), + ], width=6), + dbc.Col([ + dbc.Label("Password:"), + dbc.Input( + id=f"{section_id}-opensearch-password", + type="password", + className="mb-2", + ), + ], width=6), + ]), + dbc.Label("OR"), + dbc.Input( + id=f"{section_id}-opensearch-api-key", + type="text", + placeholder="API Key", + className="mb-2", + ), + ], id=f"{section_id}-auth-collapse", is_open=False), + + dbc.Button( + "Show Authentication", + id=f"{section_id}-auth-toggle", + color="link", + size="sm", + className="p-0 mb-3", + ), + + # Connection status + html.Div(id=f"{section_id}-connection-status", className="mb-3"), + + # Field mapping section (hidden initially) + html.Div(id=f"{section_id}-field-mapping-section", style={"display": "none"}), + + # Load data button (hidden initially) + html.Div([ + dbc.Button( + f"Load {section_type.title()}", + id=f"{section_id}-load-opensearch-data-btn", + color="success", + className="mb-2", + disabled=True, + ), + ], id=f"{section_id}-load-data-section", style={"display": "none"}), + + # OpenSearch status/results + html.Div(id=f"{section_id}-opensearch-status", className="mb-3"), + ]) + + def create_field_mapping_interface(self, field_suggestions, section_type="data"): """Create field mapping interface based on detected fields.""" return html.Div( [ @@ -190,7 +251,7 @@ class DataSourceComponent: "Embedding Field (required):", className="fw-bold" ), dcc.Dropdown( - id="embedding-field-dropdown-ui", + id=f"{section_type}-embedding-field-dropdown-ui", options=[ {"label": field, "value": field} for field in field_suggestions.get("embedding", []) @@ -208,7 +269,7 @@ class DataSourceComponent: "Text Field (required):", className="fw-bold" ), dcc.Dropdown( - id="text-field-dropdown-ui", + id=f"{section_type}-text-field-dropdown-ui", options=[ {"label": field, "value": field} for field in field_suggestions.get("text", []) @@ -230,7 +291,7 @@ class DataSourceComponent: [ dbc.Label("ID Field:"), dcc.Dropdown( - id="id-field-dropdown-ui", + id=f"{section_type}-id-field-dropdown-ui", options=[ {"label": field, "value": field} for field in field_suggestions.get("id", []) @@ -246,7 +307,7 @@ class DataSourceComponent: [ dbc.Label("Category Field:"), dcc.Dropdown( - id="category-field-dropdown-ui", + id=f"{section_type}-category-field-dropdown-ui", options=[ {"label": field, "value": field} for field in field_suggestions.get("category", []) @@ -266,7 +327,7 @@ class DataSourceComponent: [ dbc.Label("Subcategory Field:"), dcc.Dropdown( - id="subcategory-field-dropdown-ui", + id=f"{section_type}-subcategory-field-dropdown-ui", options=[ {"label": field, "value": field} for field in field_suggestions.get("subcategory", []) @@ -282,7 +343,7 @@ class DataSourceComponent: [ dbc.Label("Tags Field:"), dcc.Dropdown( - id="tags-field-dropdown-ui", + id=f"{section_type}-tags-field-dropdown-ui", options=[ {"label": field, "value": field} for field in field_suggestions.get("tags", []) diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py index 30787f6..b862a59 100644 --- a/tests/test_opensearch.py +++ b/tests/test_opensearch.py @@ -117,6 +117,27 @@ class TestFieldMapper: assert suggestions["category"][0] in ["category", "type"] # category-like field should be first assert suggestions["tags"][0] == "tags" # tags field should be first + def test_suggest_mappings_name_based_embedding(self): + """Test that fields named 'embedding' are prioritized even without vector type.""" + field_analysis = { + "vector_fields": [], # No explicit vector fields detected + "text_fields": ["content", "description"], + "keyword_fields": ["doc_id", "category", "type", "tags"], + "numeric_fields": ["count"], + "all_fields": ["content", "description", "doc_id", "category", "embedding", "type", "tags", "count"], + } + + suggestions = FieldMapper.suggest_mappings(field_analysis) + + # Check that 'embedding' field is prioritized despite not being detected as vector type + assert suggestions["embedding"][0] == "embedding", "Field named 'embedding' should be first priority" + + # Check that all fields are still available + all_fields = ["content", "description", "doc_id", "category", "embedding", "type", "tags", "count"] + for field_type in ["embedding", "text", "id", "category", "subcategory", "tags"]: + for field in all_fields: + assert field in suggestions[field_type], f"Field '{field}' missing from {field_type} suggestions" + def test_validate_mapping_success(self): mapping = FieldMapping( embedding_field="embedding", text_field="text", id_field="doc_id"