From 1b6845774bb51f4b2ca565c3796ef14b0f570f12 Mon Sep 17 00:00:00 2001 From: Austin Godber Date: Thu, 14 Aug 2025 19:02:17 -0700 Subject: [PATCH] fix formatting and bump version to v0.3.0 --- pyproject.toml | 2 +- src/embeddingbuddy/app.py | 2 +- src/embeddingbuddy/models/field_mapper.py | 66 ++- .../ui/callbacks/data_processing.py | 127 +++-- .../ui/components/datasource.py | 494 +++++++++++------- tests/test_opensearch.py | 90 +++- uv.lock | 2 +- 7 files changed, 537 insertions(+), 246 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 741cde7..d3fbf7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "embeddingbuddy" -version = "0.2.0" +version = "0.3.0" description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques." readme = "README.md" requires-python = ">=3.11" diff --git a/src/embeddingbuddy/app.py b/src/embeddingbuddy/app.py index df5eaa6..d850d03 100644 --- a/src/embeddingbuddy/app.py +++ b/src/embeddingbuddy/app.py @@ -9,7 +9,7 @@ from .ui.callbacks.interactions import InteractionCallbacks def create_app(): app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) - + # Allow callbacks to components that are dynamically created in tabs app.config.suppress_callback_exceptions = True diff --git a/src/embeddingbuddy/models/field_mapper.py b/src/embeddingbuddy/models/field_mapper.py index 73faada..b2dc91d 100644 --- a/src/embeddingbuddy/models/field_mapper.py +++ b/src/embeddingbuddy/models/field_mapper.py @@ -25,7 +25,7 @@ class FieldMapper: def suggest_mappings(field_analysis: Dict) -> Dict[str, List[str]]: """ Suggest field mappings based on field analysis. - + Each dropdown will show ALL available fields, but ordered by relevance with the most likely candidates first. @@ -59,42 +59,70 @@ class FieldMapper: # Embedding field suggestions (vector fields first, then name-based candidates, then all fields) embedding_candidates = vector_fields.copy() # Add fields that likely contain embeddings based on name - embedding_name_candidates = [f for f in all_fields if any( - keyword in f.lower() for keyword in ["embedding", "embeddings", "vector", "vectors", "embed"] - )] + embedding_name_candidates = [ + f + for f in all_fields + if any( + keyword in f.lower() + for keyword in ["embedding", "embeddings", "vector", "vectors", "embed"] + ) + ] # Add name-based candidates that aren't already in vector_fields for candidate in embedding_name_candidates: if candidate not in embedding_candidates: embedding_candidates.append(candidate) - suggestions["embedding"] = create_ordered_suggestions(embedding_candidates, all_fields) + suggestions["embedding"] = create_ordered_suggestions( + embedding_candidates, all_fields + ) # Text field suggestions (text fields first, then all fields) text_candidates = text_fields.copy() suggestions["text"] = create_ordered_suggestions(text_candidates, all_fields) # ID field suggestions (ID-like fields first, then all fields) - id_candidates = [f for f in keyword_fields if any( - keyword in f.lower() for keyword in ["id", "_id", "doc", "document"] - )] + id_candidates = [ + f + for f in keyword_fields + if any(keyword in f.lower() for keyword in ["id", "_id", "doc", "document"]) + ] id_candidates.append("_id") # _id is always available suggestions["id"] = create_ordered_suggestions(id_candidates, all_fields) # Category field suggestions (category-like fields first, then all fields) - category_candidates = [f for f in keyword_fields if any( - keyword in f.lower() for keyword in ["category", "class", "type", "label"] - )] - suggestions["category"] = create_ordered_suggestions(category_candidates, all_fields) + category_candidates = [ + f + for f in keyword_fields + if any( + keyword in f.lower() + for keyword in ["category", "class", "type", "label"] + ) + ] + suggestions["category"] = create_ordered_suggestions( + category_candidates, all_fields + ) # Subcategory field suggestions (subcategory-like fields first, then all fields) - subcategory_candidates = [f for f in keyword_fields if any( - keyword in f.lower() for keyword in ["subcategory", "subclass", "subtype", "subtopic"] - )] - suggestions["subcategory"] = create_ordered_suggestions(subcategory_candidates, all_fields) + subcategory_candidates = [ + f + for f in keyword_fields + if any( + keyword in f.lower() + for keyword in ["subcategory", "subclass", "subtype", "subtopic"] + ) + ] + suggestions["subcategory"] = create_ordered_suggestions( + subcategory_candidates, all_fields + ) # Tags field suggestions (tag-like fields first, then all fields) - tags_candidates = [f for f in keyword_fields if any( - keyword in f.lower() for keyword in ["tag", "tags", "keyword", "keywords"] - )] + tags_candidates = [ + f + for f in keyword_fields + if any( + keyword in f.lower() + for keyword in ["tag", "tags", "keyword", "keywords"] + ) + ] suggestions["tags"] = create_ordered_suggestions(tags_candidates, all_fields) return suggestions diff --git a/src/embeddingbuddy/ui/callbacks/data_processing.py b/src/embeddingbuddy/ui/callbacks/data_processing.py index 6fd9b99..f739bc7 100644 --- a/src/embeddingbuddy/ui/callbacks/data_processing.py +++ b/src/embeddingbuddy/ui/callbacks/data_processing.py @@ -97,10 +97,9 @@ class DataProcessingCallbacks: # Register collapsible section callbacks self._register_collapse_callbacks() - def _register_opensearch_callbacks(self, section_type, opensearch_client): """Register callbacks for a specific section (data or prompts).""" - + @callback( Output(f"{section_type}-auth-collapse", "is_open"), [Input(f"{section_type}-auth-toggle", "n_clicks")], @@ -144,9 +143,23 @@ class DataProcessingCallbacks: ], prevent_initial_call=True, ) - def test_opensearch_connection(n_clicks, url, index_name, username, password, api_key): + def test_opensearch_connection( + n_clicks, url, index_name, username, password, api_key + ): if not n_clicks or not url or not index_name: - return no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update + return ( + no_update, + no_update, + no_update, + no_update, + no_update, + no_update, + no_update, + no_update, + no_update, + no_update, + no_update, + ) # Test connection success, message = opensearch_client.connect( @@ -173,7 +186,9 @@ class DataProcessingCallbacks: ) # Analyze fields - success, field_analysis, analysis_message = opensearch_client.analyze_fields(index_name) + success, field_analysis, analysis_message = ( + opensearch_client.analyze_fields(index_name) + ) if not success: return ( @@ -194,8 +209,11 @@ class DataProcessingCallbacks: field_suggestions = FieldMapper.suggest_mappings(field_analysis) from ...ui.components.datasource import DataSourceComponent + datasource = DataSourceComponent() - field_mapping_ui = datasource.create_field_mapping_interface(field_suggestions, section_type) + field_mapping_ui = datasource.create_field_mapping_interface( + field_suggestions, section_type + ) return ( self._create_status_alert(f"✅ {message}", "success"), @@ -203,16 +221,36 @@ class DataProcessingCallbacks: {"display": "block"}, {"display": "block"}, False, - [{"label": field, "value": field} for field in field_suggestions.get("embedding", [])], - [{"label": field, "value": field} for field in field_suggestions.get("text", [])], - [{"label": field, "value": field} for field in field_suggestions.get("id", [])], - [{"label": field, "value": field} for field in field_suggestions.get("category", [])], - [{"label": field, "value": field} for field in field_suggestions.get("subcategory", [])], - [{"label": field, "value": field} for field in field_suggestions.get("tags", [])], + [ + {"label": field, "value": field} + for field in field_suggestions.get("embedding", []) + ], + [ + {"label": field, "value": field} + for field in field_suggestions.get("text", []) + ], + [ + {"label": field, "value": field} + for field in field_suggestions.get("id", []) + ], + [ + {"label": field, "value": field} + for field in field_suggestions.get("category", []) + ], + [ + {"label": field, "value": field} + for field in field_suggestions.get("subcategory", []) + ], + [ + {"label": field, "value": field} + for field in field_suggestions.get("tags", []) + ], ) # Determine output target based on section type - output_target = "processed-data" if section_type == "data" else "processed-prompts" + output_target = ( + "processed-data" if section_type == "data" else "processed-prompts" + ) @callback( [ @@ -235,8 +273,17 @@ class DataProcessingCallbacks: ], prevent_initial_call=True, ) - def load_opensearch_data(n_clicks, index_name, query_size, embedding_field, text_field, - id_field, category_field, subcategory_field, tags_field): + def load_opensearch_data( + n_clicks, + index_name, + query_size, + embedding_field, + text_field, + id_field, + category_field, + subcategory_field, + tags_field, + ): if not n_clicks or not index_name or not embedding_field or not text_field: return no_update, no_update, no_update, no_update, no_update @@ -248,14 +295,16 @@ class DataProcessingCallbacks: query_size = 1000 # Cap at reasonable maximum # Create field mapping - field_mapping = FieldMapper.create_mapping_from_dict({ - "embedding": embedding_field, - "text": text_field, - "id": id_field, - "category": category_field, - "subcategory": subcategory_field, - "tags": tags_field - }) + field_mapping = FieldMapper.create_mapping_from_dict( + { + "embedding": embedding_field, + "text": text_field, + "id": id_field, + "category": category_field, + "subcategory": subcategory_field, + "tags": tags_field, + } + ) # Fetch data from OpenSearch success, raw_documents, message = opensearch_client.fetch_data( @@ -268,11 +317,13 @@ class DataProcessingCallbacks: "", False, f"❌ Failed to fetch {section_type}: {message}", - True + True, ) # Process the data - processed_data = self.processor.process_opensearch_data(raw_documents, field_mapping) + processed_data = self.processor.process_opensearch_data( + raw_documents, field_mapping + ) if processed_data.error: return ( @@ -280,7 +331,7 @@ class DataProcessingCallbacks: "", False, f"❌ {section_type.title()} processing error: {processed_data.error}", - True + True, ) success_message = f"✅ Successfully loaded {len(processed_data.documents)} {section_type} from OpenSearch" @@ -290,27 +341,29 @@ class DataProcessingCallbacks: return ( { "documents": [ - self._document_to_dict(doc) for doc in processed_data.documents + self._document_to_dict(doc) + for doc in processed_data.documents ], "embeddings": processed_data.embeddings.tolist(), }, success_message, True, "", - False + False, ) else: # prompts return ( { "prompts": [ - self._document_to_dict(doc) for doc in processed_data.documents + self._document_to_dict(doc) + for doc in processed_data.documents ], "embeddings": processed_data.embeddings.tolist(), }, success_message, True, "", - False + False, ) except Exception as e: @@ -367,7 +420,7 @@ class DataProcessingCallbacks: def _register_collapse_callbacks(self): """Register callbacks for collapsible sections.""" - + # Data section collapse callback @callback( [ @@ -381,7 +434,11 @@ class DataProcessingCallbacks: def toggle_data_collapse(n_clicks, is_open): if n_clicks: new_state = not is_open - icon_class = "fas fa-chevron-down me-2" if new_state else "fas fa-chevron-right me-2" + icon_class = ( + "fas fa-chevron-down me-2" + if new_state + else "fas fa-chevron-right me-2" + ) return new_state, icon_class return is_open, "fas fa-chevron-down me-2" @@ -398,7 +455,11 @@ class DataProcessingCallbacks: def toggle_prompts_collapse(n_clicks, is_open): if n_clicks: new_state = not is_open - icon_class = "fas fa-chevron-down me-2" if new_state else "fas fa-chevron-right me-2" + icon_class = ( + "fas fa-chevron-down me-2" + if new_state + else "fas fa-chevron-right me-2" + ) return new_state, icon_class return is_open, "fas fa-chevron-down me-2" diff --git a/src/embeddingbuddy/ui/components/datasource.py b/src/embeddingbuddy/ui/components/datasource.py index 93b375d..2d4c630 100644 --- a/src/embeddingbuddy/ui/components/datasource.py +++ b/src/embeddingbuddy/ui/components/datasource.py @@ -43,194 +43,314 @@ class DataSourceComponent: return html.Div( [ # Data Section - dbc.Card([ - dbc.CardHeader([ - dbc.Button( + dbc.Card( + [ + dbc.CardHeader( [ - html.I(className="fas fa-chevron-down me-2", id="data-collapse-icon"), - "📄 Documents/Data" - ], - id="data-collapse-toggle", - color="link", - className="text-start p-0 w-100 text-decoration-none", - style={"border": "none", "font-size": "1.25rem", "font-weight": "500"} + dbc.Button( + [ + html.I( + className="fas fa-chevron-down me-2", + id="data-collapse-icon", + ), + "📄 Documents/Data", + ], + id="data-collapse-toggle", + color="link", + className="text-start p-0 w-100 text-decoration-none", + style={ + "border": "none", + "font-size": "1.25rem", + "font-weight": "500", + }, + ), + ] ), - ]), - dbc.Collapse([ - dbc.CardBody([ - self._create_opensearch_section("data") - ]) - ], id="data-collapse", is_open=True) - ], className="mb-4"), - + dbc.Collapse( + [dbc.CardBody([self._create_opensearch_section("data")])], + id="data-collapse", + is_open=True, + ), + ], + className="mb-4", + ), # Prompts Section - dbc.Card([ - dbc.CardHeader([ - dbc.Button( + dbc.Card( + [ + dbc.CardHeader( [ - html.I(className="fas fa-chevron-down me-2", id="prompts-collapse-icon"), - "💬 Prompts" - ], - id="prompts-collapse-toggle", - color="link", - className="text-start p-0 w-100 text-decoration-none", - style={"border": "none", "font-size": "1.25rem", "font-weight": "500"} + dbc.Button( + [ + html.I( + className="fas fa-chevron-down me-2", + id="prompts-collapse-icon", + ), + "💬 Prompts", + ], + id="prompts-collapse-toggle", + color="link", + className="text-start p-0 w-100 text-decoration-none", + style={ + "border": "none", + "font-size": "1.25rem", + "font-weight": "500", + }, + ), + ] ), - ]), - dbc.Collapse([ - dbc.CardBody([ - self._create_opensearch_section("prompts") - ]) - ], id="prompts-collapse", is_open=True) - ], className="mb-4"), - + dbc.Collapse( + [ + dbc.CardBody( + [self._create_opensearch_section("prompts")] + ) + ], + id="prompts-collapse", + is_open=True, + ), + ], + className="mb-4", + ), # Hidden dropdowns to prevent callback errors (for both sections) - html.Div([ - # Data dropdowns (hidden sync targets) - dcc.Dropdown(id="data-embedding-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="data-text-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="data-id-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="data-category-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="data-subcategory-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="data-tags-field-dropdown", style={"display": "none"}), - # Data UI dropdowns (hidden placeholders) - dcc.Dropdown(id="data-embedding-field-dropdown-ui", style={"display": "none"}), - dcc.Dropdown(id="data-text-field-dropdown-ui", style={"display": "none"}), - dcc.Dropdown(id="data-id-field-dropdown-ui", style={"display": "none"}), - dcc.Dropdown(id="data-category-field-dropdown-ui", style={"display": "none"}), - dcc.Dropdown(id="data-subcategory-field-dropdown-ui", style={"display": "none"}), - dcc.Dropdown(id="data-tags-field-dropdown-ui", style={"display": "none"}), - # Prompts dropdowns (hidden sync targets) - dcc.Dropdown(id="prompts-embedding-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="prompts-text-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="prompts-id-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="prompts-category-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="prompts-subcategory-field-dropdown", style={"display": "none"}), - dcc.Dropdown(id="prompts-tags-field-dropdown", style={"display": "none"}), - # Prompts UI dropdowns (hidden placeholders) - dcc.Dropdown(id="prompts-embedding-field-dropdown-ui", style={"display": "none"}), - dcc.Dropdown(id="prompts-text-field-dropdown-ui", style={"display": "none"}), - dcc.Dropdown(id="prompts-id-field-dropdown-ui", style={"display": "none"}), - dcc.Dropdown(id="prompts-category-field-dropdown-ui", style={"display": "none"}), - dcc.Dropdown(id="prompts-subcategory-field-dropdown-ui", style={"display": "none"}), - dcc.Dropdown(id="prompts-tags-field-dropdown-ui", style={"display": "none"}), - ], style={"display": "none"}), + html.Div( + [ + # Data dropdowns (hidden sync targets) + dcc.Dropdown( + id="data-embedding-field-dropdown", + style={"display": "none"}, + ), + dcc.Dropdown( + id="data-text-field-dropdown", style={"display": "none"} + ), + dcc.Dropdown( + id="data-id-field-dropdown", style={"display": "none"} + ), + dcc.Dropdown( + id="data-category-field-dropdown", style={"display": "none"} + ), + dcc.Dropdown( + id="data-subcategory-field-dropdown", + style={"display": "none"}, + ), + dcc.Dropdown( + id="data-tags-field-dropdown", style={"display": "none"} + ), + # Data UI dropdowns (hidden placeholders) + dcc.Dropdown( + id="data-embedding-field-dropdown-ui", + style={"display": "none"}, + ), + dcc.Dropdown( + id="data-text-field-dropdown-ui", style={"display": "none"} + ), + dcc.Dropdown( + id="data-id-field-dropdown-ui", style={"display": "none"} + ), + dcc.Dropdown( + id="data-category-field-dropdown-ui", + style={"display": "none"}, + ), + dcc.Dropdown( + id="data-subcategory-field-dropdown-ui", + style={"display": "none"}, + ), + dcc.Dropdown( + id="data-tags-field-dropdown-ui", style={"display": "none"} + ), + # Prompts dropdowns (hidden sync targets) + dcc.Dropdown( + id="prompts-embedding-field-dropdown", + style={"display": "none"}, + ), + dcc.Dropdown( + id="prompts-text-field-dropdown", style={"display": "none"} + ), + dcc.Dropdown( + id="prompts-id-field-dropdown", style={"display": "none"} + ), + dcc.Dropdown( + id="prompts-category-field-dropdown", + style={"display": "none"}, + ), + dcc.Dropdown( + id="prompts-subcategory-field-dropdown", + style={"display": "none"}, + ), + dcc.Dropdown( + id="prompts-tags-field-dropdown", style={"display": "none"} + ), + # Prompts UI dropdowns (hidden placeholders) + dcc.Dropdown( + id="prompts-embedding-field-dropdown-ui", + style={"display": "none"}, + ), + dcc.Dropdown( + id="prompts-text-field-dropdown-ui", + style={"display": "none"}, + ), + dcc.Dropdown( + id="prompts-id-field-dropdown-ui", style={"display": "none"} + ), + dcc.Dropdown( + id="prompts-category-field-dropdown-ui", + style={"display": "none"}, + ), + dcc.Dropdown( + id="prompts-subcategory-field-dropdown-ui", + style={"display": "none"}, + ), + dcc.Dropdown( + id="prompts-tags-field-dropdown-ui", + style={"display": "none"}, + ), + ], + style={"display": "none"}, + ), ] ) def _create_opensearch_section(self, section_type): """Create a complete OpenSearch section for either 'data' or 'prompts'.""" section_id = section_type # 'data' or 'prompts' - - return html.Div([ - # Connection section - html.H6("Connection", className="mb-2"), - dbc.Row([ - dbc.Col([ - dbc.Label("OpenSearch URL:"), - dbc.Input( - id=f"{section_id}-opensearch-url", - type="text", - placeholder="https://opensearch.example.com:9200", - className="mb-2", - ), - ], width=12), - ]), - - dbc.Row([ - dbc.Col([ - dbc.Label("Index Name:"), - dbc.Input( - id=f"{section_id}-opensearch-index", - type="text", - placeholder="my-embeddings-index", - className="mb-2", - ), - ], width=6), - dbc.Col([ - dbc.Label("Query Size:"), - dbc.Input( - id=f"{section_id}-opensearch-query-size", - type="number", - value=100, - min=1, - max=1000, - placeholder="100", - className="mb-2", - ), - ], width=6), - ]), - - dbc.Row([ - dbc.Col([ - dbc.Button( - "Test Connection", - id=f"{section_id}-test-connection-btn", - color="primary", - className="mb-3", - ), - ], width=12), - ]), - - # Authentication section (collapsible) - dbc.Collapse([ - html.Hr(), - html.H6("Authentication (Optional)", className="mb-2"), - dbc.Row([ - dbc.Col([ - dbc.Label("Username:"), + + return html.Div( + [ + # Connection section + html.H6("Connection", className="mb-2"), + dbc.Row( + [ + dbc.Col( + [ + dbc.Label("OpenSearch URL:"), + dbc.Input( + id=f"{section_id}-opensearch-url", + type="text", + placeholder="https://opensearch.example.com:9200", + className="mb-2", + ), + ], + width=12, + ), + ] + ), + dbc.Row( + [ + dbc.Col( + [ + dbc.Label("Index Name:"), + dbc.Input( + id=f"{section_id}-opensearch-index", + type="text", + placeholder="my-embeddings-index", + className="mb-2", + ), + ], + width=6, + ), + dbc.Col( + [ + dbc.Label("Query Size:"), + dbc.Input( + id=f"{section_id}-opensearch-query-size", + type="number", + value=100, + min=1, + max=1000, + placeholder="100", + className="mb-2", + ), + ], + width=6, + ), + ] + ), + dbc.Row( + [ + dbc.Col( + [ + dbc.Button( + "Test Connection", + id=f"{section_id}-test-connection-btn", + color="primary", + className="mb-3", + ), + ], + width=12, + ), + ] + ), + # Authentication section (collapsible) + dbc.Collapse( + [ + html.Hr(), + html.H6("Authentication (Optional)", className="mb-2"), + dbc.Row( + [ + dbc.Col( + [ + dbc.Label("Username:"), + dbc.Input( + id=f"{section_id}-opensearch-username", + type="text", + className="mb-2", + ), + ], + width=6, + ), + dbc.Col( + [ + dbc.Label("Password:"), + dbc.Input( + id=f"{section_id}-opensearch-password", + type="password", + className="mb-2", + ), + ], + width=6, + ), + ] + ), + dbc.Label("OR"), dbc.Input( - id=f"{section_id}-opensearch-username", + id=f"{section_id}-opensearch-api-key", type="text", + placeholder="API Key", className="mb-2", ), - ], width=6), - dbc.Col([ - dbc.Label("Password:"), - dbc.Input( - id=f"{section_id}-opensearch-password", - type="password", - className="mb-2", - ), - ], width=6), - ]), - dbc.Label("OR"), - dbc.Input( - id=f"{section_id}-opensearch-api-key", - type="text", - placeholder="API Key", - className="mb-2", + ], + id=f"{section_id}-auth-collapse", + is_open=False, ), - ], id=f"{section_id}-auth-collapse", is_open=False), - - dbc.Button( - "Show Authentication", - id=f"{section_id}-auth-toggle", - color="link", - size="sm", - className="p-0 mb-3", - ), - - # Connection status - html.Div(id=f"{section_id}-connection-status", className="mb-3"), - - # Field mapping section (hidden initially) - html.Div(id=f"{section_id}-field-mapping-section", style={"display": "none"}), - - # Load data button (hidden initially) - html.Div([ dbc.Button( - f"Load {section_type.title()}", - id=f"{section_id}-load-opensearch-data-btn", - color="success", - className="mb-2", - disabled=True, + "Show Authentication", + id=f"{section_id}-auth-toggle", + color="link", + size="sm", + className="p-0 mb-3", ), - ], id=f"{section_id}-load-data-section", style={"display": "none"}), - - # OpenSearch status/results - html.Div(id=f"{section_id}-opensearch-status", className="mb-3"), - ]) + # Connection status + html.Div(id=f"{section_id}-connection-status", className="mb-3"), + # Field mapping section (hidden initially) + html.Div( + id=f"{section_id}-field-mapping-section", style={"display": "none"} + ), + # Load data button (hidden initially) + html.Div( + [ + dbc.Button( + f"Load {section_type.title()}", + id=f"{section_id}-load-opensearch-data-btn", + color="success", + className="mb-2", + disabled=True, + ), + ], + id=f"{section_id}-load-data-section", + style={"display": "none"}, + ), + # OpenSearch status/results + html.Div(id=f"{section_id}-opensearch-status", className="mb-3"), + ] + ) def create_field_mapping_interface(self, field_suggestions, section_type="data"): """Create field mapping interface based on detected fields.""" @@ -254,9 +374,13 @@ class DataSourceComponent: id=f"{section_type}-embedding-field-dropdown-ui", options=[ {"label": field, "value": field} - for field in field_suggestions.get("embedding", []) + for field in field_suggestions.get( + "embedding", [] + ) ], - value=field_suggestions.get("embedding", [None])[0], # Default to first suggestion + value=field_suggestions.get("embedding", [None])[ + 0 + ], # Default to first suggestion placeholder="Select embedding field...", className="mb-2", ), @@ -274,7 +398,9 @@ class DataSourceComponent: {"label": field, "value": field} for field in field_suggestions.get("text", []) ], - value=field_suggestions.get("text", [None])[0], # Default to first suggestion + value=field_suggestions.get("text", [None])[ + 0 + ], # Default to first suggestion placeholder="Select text field...", className="mb-2", ), @@ -296,7 +422,9 @@ class DataSourceComponent: {"label": field, "value": field} for field in field_suggestions.get("id", []) ], - value=field_suggestions.get("id", [None])[0], # Default to first suggestion + value=field_suggestions.get("id", [None])[ + 0 + ], # Default to first suggestion placeholder="Select ID field...", className="mb-2", ), @@ -310,9 +438,13 @@ class DataSourceComponent: id=f"{section_type}-category-field-dropdown-ui", options=[ {"label": field, "value": field} - for field in field_suggestions.get("category", []) + for field in field_suggestions.get( + "category", [] + ) ], - value=field_suggestions.get("category", [None])[0], # Default to first suggestion + value=field_suggestions.get("category", [None])[ + 0 + ], # Default to first suggestion placeholder="Select category field...", className="mb-2", ), @@ -330,9 +462,13 @@ class DataSourceComponent: id=f"{section_type}-subcategory-field-dropdown-ui", options=[ {"label": field, "value": field} - for field in field_suggestions.get("subcategory", []) + for field in field_suggestions.get( + "subcategory", [] + ) ], - value=field_suggestions.get("subcategory", [None])[0], # Default to first suggestion + value=field_suggestions.get("subcategory", [None])[ + 0 + ], # Default to first suggestion placeholder="Select subcategory field...", className="mb-2", ), @@ -348,7 +484,9 @@ class DataSourceComponent: {"label": field, "value": field} for field in field_suggestions.get("tags", []) ], - value=field_suggestions.get("tags", [None])[0], # Default to first suggestion + value=field_suggestions.get("tags", [None])[ + 0 + ], # Default to first suggestion placeholder="Select tags field...", className="mb-2", ), diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py index b862a59..4f8b869 100644 --- a/tests/test_opensearch.py +++ b/tests/test_opensearch.py @@ -99,22 +99,57 @@ class TestFieldMapper: "text_fields": ["content", "description"], "keyword_fields": ["doc_id", "category", "type", "tags"], "numeric_fields": ["count"], - "all_fields": ["embedding", "content", "description", "doc_id", "category", "type", "tags", "count"], + "all_fields": [ + "embedding", + "content", + "description", + "doc_id", + "category", + "type", + "tags", + "count", + ], } suggestions = FieldMapper.suggest_mappings(field_analysis) # Check that all dropdowns contain all fields - all_fields = ["embedding", "content", "description", "doc_id", "category", "type", "tags", "count"] - for field_type in ["embedding", "text", "id", "category", "subcategory", "tags"]: + all_fields = [ + "embedding", + "content", + "description", + "doc_id", + "category", + "type", + "tags", + "count", + ] + for field_type in [ + "embedding", + "text", + "id", + "category", + "subcategory", + "tags", + ]: for field in all_fields: - assert field in suggestions[field_type], f"Field '{field}' missing from {field_type} suggestions" + assert field in suggestions[field_type], ( + f"Field '{field}' missing from {field_type} suggestions" + ) # Check that best candidates are first - assert suggestions["embedding"][0] == "embedding" # vector field should be first - assert suggestions["text"][0] in ["content", "description"] # text fields should be first + assert ( + suggestions["embedding"][0] == "embedding" + ) # vector field should be first + assert suggestions["text"][0] in [ + "content", + "description", + ] # text fields should be first assert suggestions["id"][0] == "doc_id" # ID-like field should be first - assert suggestions["category"][0] in ["category", "type"] # category-like field should be first + assert suggestions["category"][0] in [ + "category", + "type", + ] # category-like field should be first assert suggestions["tags"][0] == "tags" # tags field should be first def test_suggest_mappings_name_based_embedding(self): @@ -124,19 +159,48 @@ class TestFieldMapper: "text_fields": ["content", "description"], "keyword_fields": ["doc_id", "category", "type", "tags"], "numeric_fields": ["count"], - "all_fields": ["content", "description", "doc_id", "category", "embedding", "type", "tags", "count"], + "all_fields": [ + "content", + "description", + "doc_id", + "category", + "embedding", + "type", + "tags", + "count", + ], } suggestions = FieldMapper.suggest_mappings(field_analysis) # Check that 'embedding' field is prioritized despite not being detected as vector type - assert suggestions["embedding"][0] == "embedding", "Field named 'embedding' should be first priority" - + assert suggestions["embedding"][0] == "embedding", ( + "Field named 'embedding' should be first priority" + ) + # Check that all fields are still available - all_fields = ["content", "description", "doc_id", "category", "embedding", "type", "tags", "count"] - for field_type in ["embedding", "text", "id", "category", "subcategory", "tags"]: + all_fields = [ + "content", + "description", + "doc_id", + "category", + "embedding", + "type", + "tags", + "count", + ] + for field_type in [ + "embedding", + "text", + "id", + "category", + "subcategory", + "tags", + ]: for field in all_fields: - assert field in suggestions[field_type], f"Field '{field}' missing from {field_type} suggestions" + assert field in suggestions[field_type], ( + f"Field '{field}' missing from {field_type} suggestions" + ) def test_validate_mapping_success(self): mapping = FieldMapping( diff --git a/uv.lock b/uv.lock index f6632ce..0bff82e 100644 --- a/uv.lock +++ b/uv.lock @@ -412,7 +412,7 @@ wheels = [ [[package]] name = "embeddingbuddy" -version = "0.2.0" +version = "0.3.0" source = { editable = "." } dependencies = [ { name = "dash" },