opensearch load improvements
Some checks failed
Security Scan / dependency-check (pull_request) Successful in 44s
Test Suite / lint (pull_request) Failing after 32s
Security Scan / security (pull_request) Successful in 45s
Test Suite / test (3.11) (pull_request) Successful in 1m31s
Test Suite / build (pull_request) Has been skipped

This commit is contained in:
2025-08-14 14:30:52 -07:00
parent 9cf2f0e6fa
commit 09e3c86f0a
4 changed files with 375 additions and 232 deletions

View File

@@ -39,7 +39,6 @@ class FieldMapper:
vector_fields = [vf["name"] for vf in field_analysis.get("vector_fields", [])] vector_fields = [vf["name"] for vf in field_analysis.get("vector_fields", [])]
text_fields = field_analysis.get("text_fields", []) text_fields = field_analysis.get("text_fields", [])
keyword_fields = field_analysis.get("keyword_fields", []) keyword_fields = field_analysis.get("keyword_fields", [])
numeric_fields = field_analysis.get("numeric_fields", [])
# Helper function to create ordered suggestions # Helper function to create ordered suggestions
def create_ordered_suggestions(primary_candidates, all_available_fields): def create_ordered_suggestions(primary_candidates, all_available_fields):
@@ -57,8 +56,16 @@ class FieldMapper:
suggestions = {} suggestions = {}
# Embedding field suggestions (vector fields first, then all fields) # Embedding field suggestions (vector fields first, then name-based candidates, then all fields)
embedding_candidates = vector_fields.copy() embedding_candidates = vector_fields.copy()
# Add fields that likely contain embeddings based on name
embedding_name_candidates = [f for f in all_fields if any(
keyword in f.lower() for keyword in ["embedding", "embeddings", "vector", "vectors", "embed"]
)]
# Add name-based candidates that aren't already in vector_fields
for candidate in embedding_name_candidates:
if candidate not in embedding_candidates:
embedding_candidates.append(candidate)
suggestions["embedding"] = create_ordered_suggestions(embedding_candidates, all_fields) suggestions["embedding"] = create_ordered_suggestions(embedding_candidates, all_fields)
# Text field suggestions (text fields first, then all fields) # Text field suggestions (text fields first, then all fields)

View File

@@ -8,7 +8,8 @@ from ...config.settings import AppSettings
class DataProcessingCallbacks: class DataProcessingCallbacks:
def __init__(self): def __init__(self):
self.processor = DataProcessor() self.processor = DataProcessor()
self.opensearch_client = OpenSearchClient() self.opensearch_client_data = OpenSearchClient() # For data/documents
self.opensearch_client_prompts = OpenSearchClient() # For prompts
self._register_callbacks() self._register_callbacks()
def _register_callbacks(self): def _register_callbacks(self):
@@ -89,10 +90,21 @@ class DataProcessingCallbacks:
else: else:
return [datasource.create_file_upload_tab()] return [datasource.create_file_upload_tab()]
# Register callbacks for both data and prompts sections
self._register_opensearch_callbacks("data", self.opensearch_client_data)
self._register_opensearch_callbacks("prompts", self.opensearch_client_prompts)
# Register collapsible section callbacks
self._register_collapse_callbacks()
def _register_opensearch_callbacks(self, section_type, opensearch_client):
"""Register callbacks for a specific section (data or prompts)."""
@callback( @callback(
Output("auth-collapse", "is_open"), Output(f"{section_type}-auth-collapse", "is_open"),
[Input("auth-toggle", "n_clicks")], [Input(f"{section_type}-auth-toggle", "n_clicks")],
[State("auth-collapse", "is_open")], [State(f"{section_type}-auth-collapse", "is_open")],
prevent_initial_call=True, prevent_initial_call=True,
) )
def toggle_auth(n_clicks, is_open): def toggle_auth(n_clicks, is_open):
@@ -101,8 +113,8 @@ class DataProcessingCallbacks:
return is_open return is_open
@callback( @callback(
Output("auth-toggle", "children"), Output(f"{section_type}-auth-toggle", "children"),
[Input("auth-collapse", "is_open")], [Input(f"{section_type}-auth-collapse", "is_open")],
prevent_initial_call=False, prevent_initial_call=False,
) )
def update_auth_button_text(is_open): def update_auth_button_text(is_open):
@@ -110,36 +122,34 @@ class DataProcessingCallbacks:
@callback( @callback(
[ [
Output("connection-status", "children"), Output(f"{section_type}-connection-status", "children"),
Output("field-mapping-section", "children"), Output(f"{section_type}-field-mapping-section", "children"),
Output("field-mapping-section", "style"), Output(f"{section_type}-field-mapping-section", "style"),
Output("load-data-section", "style"), Output(f"{section_type}-load-data-section", "style"),
Output("load-opensearch-data-btn", "disabled"), Output(f"{section_type}-load-opensearch-data-btn", "disabled"),
Output("embedding-field-dropdown", "options"), Output(f"{section_type}-embedding-field-dropdown", "options"),
Output("text-field-dropdown", "options"), Output(f"{section_type}-text-field-dropdown", "options"),
Output("id-field-dropdown", "options"), Output(f"{section_type}-id-field-dropdown", "options"),
Output("category-field-dropdown", "options"), Output(f"{section_type}-category-field-dropdown", "options"),
Output("subcategory-field-dropdown", "options"), Output(f"{section_type}-subcategory-field-dropdown", "options"),
Output("tags-field-dropdown", "options"), Output(f"{section_type}-tags-field-dropdown", "options"),
], ],
[Input("test-connection-btn", "n_clicks")], [Input(f"{section_type}-test-connection-btn", "n_clicks")],
[ [
State("opensearch-url", "value"), State(f"{section_type}-opensearch-url", "value"),
State("opensearch-index", "value"), State(f"{section_type}-opensearch-index", "value"),
State("opensearch-username", "value"), State(f"{section_type}-opensearch-username", "value"),
State("opensearch-password", "value"), State(f"{section_type}-opensearch-password", "value"),
State("opensearch-api-key", "value"), State(f"{section_type}-opensearch-api-key", "value"),
], ],
prevent_initial_call=True, prevent_initial_call=True,
) )
def test_opensearch_connection( def test_opensearch_connection(n_clicks, url, index_name, username, password, api_key):
n_clicks, url, index_name, username, password, api_key
):
if not n_clicks or not url or not index_name: if not n_clicks or not url or not index_name:
return no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update return no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update
# Test connection # Test connection
success, message = self.opensearch_client.connect( success, message = opensearch_client.connect(
url=url, url=url,
username=username, username=username,
password=password, password=password,
@@ -163,9 +173,7 @@ class DataProcessingCallbacks:
) )
# Analyze fields # Analyze fields
success, field_analysis, analysis_message = ( success, field_analysis, analysis_message = opensearch_client.analyze_fields(index_name)
self.opensearch_client.analyze_fields(index_name)
)
if not success: if not success:
return ( return (
@@ -186,11 +194,8 @@ class DataProcessingCallbacks:
field_suggestions = FieldMapper.suggest_mappings(field_analysis) field_suggestions = FieldMapper.suggest_mappings(field_analysis)
from ...ui.components.datasource import DataSourceComponent from ...ui.components.datasource import DataSourceComponent
datasource = DataSourceComponent() datasource = DataSourceComponent()
field_mapping_ui = datasource.create_field_mapping_interface( field_mapping_ui = datasource.create_field_mapping_interface(field_suggestions, section_type)
field_suggestions
)
return ( return (
self._create_status_alert(f"{message}", "success"), self._create_status_alert(f"{message}", "success"),
@@ -206,55 +211,55 @@ class DataProcessingCallbacks:
[{"label": field, "value": field} for field in field_suggestions.get("tags", [])], [{"label": field, "value": field} for field in field_suggestions.get("tags", [])],
) )
# Determine output target based on section type
output_target = "processed-data" if section_type == "data" else "processed-prompts"
@callback( @callback(
[ [
Output("processed-data", "data", allow_duplicate=True), Output(output_target, "data", allow_duplicate=True),
Output("opensearch-success-alert", "children", allow_duplicate=True), Output("opensearch-success-alert", "children", allow_duplicate=True),
Output("opensearch-success-alert", "is_open", allow_duplicate=True), Output("opensearch-success-alert", "is_open", allow_duplicate=True),
Output("opensearch-error-alert", "children", allow_duplicate=True), Output("opensearch-error-alert", "children", allow_duplicate=True),
Output("opensearch-error-alert", "is_open", allow_duplicate=True), Output("opensearch-error-alert", "is_open", allow_duplicate=True),
], ],
[Input("load-opensearch-data-btn", "n_clicks")], [Input(f"{section_type}-load-opensearch-data-btn", "n_clicks")],
[ [
State("opensearch-index", "value"), State(f"{section_type}-opensearch-index", "value"),
State("embedding-field-dropdown", "value"), State(f"{section_type}-opensearch-query-size", "value"),
State("text-field-dropdown", "value"), State(f"{section_type}-embedding-field-dropdown-ui", "value"),
State("id-field-dropdown", "value"), State(f"{section_type}-text-field-dropdown-ui", "value"),
State("category-field-dropdown", "value"), State(f"{section_type}-id-field-dropdown-ui", "value"),
State("subcategory-field-dropdown", "value"), State(f"{section_type}-category-field-dropdown-ui", "value"),
State("tags-field-dropdown", "value"), State(f"{section_type}-subcategory-field-dropdown-ui", "value"),
State(f"{section_type}-tags-field-dropdown-ui", "value"),
], ],
prevent_initial_call=True, prevent_initial_call=True,
) )
def load_opensearch_data( def load_opensearch_data(n_clicks, index_name, query_size, embedding_field, text_field,
n_clicks, id_field, category_field, subcategory_field, tags_field):
index_name,
embedding_field,
text_field,
id_field,
category_field,
subcategory_field,
tags_field,
):
if not n_clicks or not index_name or not embedding_field or not text_field: if not n_clicks or not index_name or not embedding_field or not text_field:
return no_update, no_update, no_update, no_update, no_update return no_update, no_update, no_update, no_update, no_update
try: try:
# Validate and set query size
if not query_size or query_size < 1:
query_size = AppSettings.OPENSEARCH_DEFAULT_SIZE
elif query_size > 1000:
query_size = 1000 # Cap at reasonable maximum
# Create field mapping # Create field mapping
field_mapping = FieldMapper.create_mapping_from_dict( field_mapping = FieldMapper.create_mapping_from_dict({
{
"embedding": embedding_field, "embedding": embedding_field,
"text": text_field, "text": text_field,
"id": id_field, "id": id_field,
"category": category_field, "category": category_field,
"subcategory": subcategory_field, "subcategory": subcategory_field,
"tags": tags_field, "tags": tags_field
} })
)
# Fetch data from OpenSearch # Fetch data from OpenSearch
success, raw_documents, message = self.opensearch_client.fetch_data( success, raw_documents, message = opensearch_client.fetch_data(
index_name, size=AppSettings.OPENSEARCH_DEFAULT_SIZE index_name, size=query_size
) )
if not success: if not success:
@@ -262,38 +267,50 @@ class DataProcessingCallbacks:
no_update, no_update,
"", "",
False, False,
f"❌ Failed to fetch data: {message}", f"❌ Failed to fetch {section_type}: {message}",
True, True
) )
# Process the data # Process the data
processed_data = self.processor.process_opensearch_data( processed_data = self.processor.process_opensearch_data(raw_documents, field_mapping)
raw_documents, field_mapping
)
if processed_data.error: if processed_data.error:
return ( return (
{"error": processed_data.error}, {"error": processed_data.error},
"", "",
False, False,
f"Data processing error: {processed_data.error}", f"{section_type.title()} processing error: {processed_data.error}",
True, True
) )
success_message = f"✅ Successfully loaded {len(processed_data.documents)} documents from OpenSearch" success_message = f"✅ Successfully loaded {len(processed_data.documents)} {section_type} from OpenSearch"
# Format for appropriate target (data vs prompts)
if section_type == "data":
return ( return (
{ {
"documents": [ "documents": [
self._document_to_dict(doc) self._document_to_dict(doc) for doc in processed_data.documents
for doc in processed_data.documents
], ],
"embeddings": processed_data.embeddings.tolist(), "embeddings": processed_data.embeddings.tolist(),
}, },
success_message, success_message,
True, True,
"", "",
False, False
)
else: # prompts
return (
{
"prompts": [
self._document_to_dict(doc) for doc in processed_data.documents
],
"embeddings": processed_data.embeddings.tolist(),
},
success_message,
True,
"",
False
) )
except Exception as e: except Exception as e:
@@ -301,53 +318,90 @@ class DataProcessingCallbacks:
# Sync callbacks to update hidden dropdowns from UI dropdowns # Sync callbacks to update hidden dropdowns from UI dropdowns
@callback( @callback(
Output("embedding-field-dropdown", "value"), Output(f"{section_type}-embedding-field-dropdown", "value"),
Input("embedding-field-dropdown-ui", "value"), Input(f"{section_type}-embedding-field-dropdown-ui", "value"),
prevent_initial_call=True, prevent_initial_call=True,
) )
def sync_embedding_dropdown(value): def sync_embedding_dropdown(value):
return value return value
@callback( @callback(
Output("text-field-dropdown", "value"), Output(f"{section_type}-text-field-dropdown", "value"),
Input("text-field-dropdown-ui", "value"), Input(f"{section_type}-text-field-dropdown-ui", "value"),
prevent_initial_call=True, prevent_initial_call=True,
) )
def sync_text_dropdown(value): def sync_text_dropdown(value):
return value return value
@callback( @callback(
Output("id-field-dropdown", "value"), Output(f"{section_type}-id-field-dropdown", "value"),
Input("id-field-dropdown-ui", "value"), Input(f"{section_type}-id-field-dropdown-ui", "value"),
prevent_initial_call=True, prevent_initial_call=True,
) )
def sync_id_dropdown(value): def sync_id_dropdown(value):
return value return value
@callback( @callback(
Output("category-field-dropdown", "value"), Output(f"{section_type}-category-field-dropdown", "value"),
Input("category-field-dropdown-ui", "value"), Input(f"{section_type}-category-field-dropdown-ui", "value"),
prevent_initial_call=True, prevent_initial_call=True,
) )
def sync_category_dropdown(value): def sync_category_dropdown(value):
return value return value
@callback( @callback(
Output("subcategory-field-dropdown", "value"), Output(f"{section_type}-subcategory-field-dropdown", "value"),
Input("subcategory-field-dropdown-ui", "value"), Input(f"{section_type}-subcategory-field-dropdown-ui", "value"),
prevent_initial_call=True, prevent_initial_call=True,
) )
def sync_subcategory_dropdown(value): def sync_subcategory_dropdown(value):
return value return value
@callback( @callback(
Output("tags-field-dropdown", "value"), Output(f"{section_type}-tags-field-dropdown", "value"),
Input("tags-field-dropdown-ui", "value"), Input(f"{section_type}-tags-field-dropdown-ui", "value"),
prevent_initial_call=True, prevent_initial_call=True,
) )
def sync_tags_dropdown(value): def sync_tags_dropdown(value):
return value return value
def _register_collapse_callbacks(self):
"""Register callbacks for collapsible sections."""
# Data section collapse callback
@callback(
[
Output("data-collapse", "is_open"),
Output("data-collapse-icon", "className"),
],
[Input("data-collapse-toggle", "n_clicks")],
[State("data-collapse", "is_open")],
prevent_initial_call=True,
)
def toggle_data_collapse(n_clicks, is_open):
if n_clicks:
new_state = not is_open
icon_class = "fas fa-chevron-down me-2" if new_state else "fas fa-chevron-right me-2"
return new_state, icon_class
return is_open, "fas fa-chevron-down me-2"
# Prompts section collapse callback
@callback(
[
Output("prompts-collapse", "is_open"),
Output("prompts-collapse-icon", "className"),
],
[Input("prompts-collapse-toggle", "n_clicks")],
[State("prompts-collapse", "is_open")],
prevent_initial_call=True,
)
def toggle_prompts_collapse(n_clicks, is_open):
if n_clicks:
new_state = not is_open
icon_class = "fas fa-chevron-down me-2" if new_state else "fas fa-chevron-right me-2"
return new_state, icon_class
return is_open, "fas fa-chevron-down me-2"
@staticmethod @staticmethod
def _document_to_dict(doc): def _document_to_dict(doc):
return { return {

View File

@@ -39,139 +39,200 @@ class DataSourceComponent:
) )
def create_opensearch_tab(self): def create_opensearch_tab(self):
"""Create OpenSearch tab content.""" """Create OpenSearch tab content with separate Data and Prompts sections."""
return html.Div( return html.Div(
[ [
# Data Section
dbc.Card([
dbc.CardHeader([
dbc.Button(
[
html.I(className="fas fa-chevron-down me-2", id="data-collapse-icon"),
"📄 Documents/Data"
],
id="data-collapse-toggle",
color="link",
className="text-start p-0 w-100 text-decoration-none",
style={"border": "none", "font-size": "1.25rem", "font-weight": "500"}
),
]),
dbc.Collapse([
dbc.CardBody([
self._create_opensearch_section("data")
])
], id="data-collapse", is_open=True)
], className="mb-4"),
# Prompts Section
dbc.Card([
dbc.CardHeader([
dbc.Button(
[
html.I(className="fas fa-chevron-down me-2", id="prompts-collapse-icon"),
"💬 Prompts"
],
id="prompts-collapse-toggle",
color="link",
className="text-start p-0 w-100 text-decoration-none",
style={"border": "none", "font-size": "1.25rem", "font-weight": "500"}
),
]),
dbc.Collapse([
dbc.CardBody([
self._create_opensearch_section("prompts")
])
], id="prompts-collapse", is_open=True)
], className="mb-4"),
# Hidden dropdowns to prevent callback errors (for both sections)
html.Div([
# Data dropdowns (hidden sync targets)
dcc.Dropdown(id="data-embedding-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="data-text-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="data-id-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="data-category-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="data-subcategory-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="data-tags-field-dropdown", style={"display": "none"}),
# Data UI dropdowns (hidden placeholders)
dcc.Dropdown(id="data-embedding-field-dropdown-ui", style={"display": "none"}),
dcc.Dropdown(id="data-text-field-dropdown-ui", style={"display": "none"}),
dcc.Dropdown(id="data-id-field-dropdown-ui", style={"display": "none"}),
dcc.Dropdown(id="data-category-field-dropdown-ui", style={"display": "none"}),
dcc.Dropdown(id="data-subcategory-field-dropdown-ui", style={"display": "none"}),
dcc.Dropdown(id="data-tags-field-dropdown-ui", style={"display": "none"}),
# Prompts dropdowns (hidden sync targets)
dcc.Dropdown(id="prompts-embedding-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="prompts-text-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="prompts-id-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="prompts-category-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="prompts-subcategory-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="prompts-tags-field-dropdown", style={"display": "none"}),
# Prompts UI dropdowns (hidden placeholders)
dcc.Dropdown(id="prompts-embedding-field-dropdown-ui", style={"display": "none"}),
dcc.Dropdown(id="prompts-text-field-dropdown-ui", style={"display": "none"}),
dcc.Dropdown(id="prompts-id-field-dropdown-ui", style={"display": "none"}),
dcc.Dropdown(id="prompts-category-field-dropdown-ui", style={"display": "none"}),
dcc.Dropdown(id="prompts-subcategory-field-dropdown-ui", style={"display": "none"}),
dcc.Dropdown(id="prompts-tags-field-dropdown-ui", style={"display": "none"}),
], style={"display": "none"}),
]
)
def _create_opensearch_section(self, section_type):
"""Create a complete OpenSearch section for either 'data' or 'prompts'."""
section_id = section_type # 'data' or 'prompts'
return html.Div([
# Connection section # Connection section
html.H6("Connection", className="mb-2"), html.H6("Connection", className="mb-2"),
dbc.Row( dbc.Row([
[ dbc.Col([
dbc.Col(
[
dbc.Label("OpenSearch URL:"), dbc.Label("OpenSearch URL:"),
dbc.Input( dbc.Input(
id="opensearch-url", id=f"{section_id}-opensearch-url",
type="text", type="text",
placeholder="https://opensearch.example.com:9200", placeholder="https://opensearch.example.com:9200",
className="mb-2", className="mb-2",
), ),
], ], width=12),
width=12, ]),
),
] dbc.Row([
), dbc.Col([
dbc.Row(
[
dbc.Col(
[
dbc.Label("Index Name:"), dbc.Label("Index Name:"),
dbc.Input( dbc.Input(
id="opensearch-index", id=f"{section_id}-opensearch-index",
type="text", type="text",
placeholder="my-embeddings-index", placeholder="my-embeddings-index",
className="mb-2", className="mb-2",
), ),
], ], width=6),
width=6, dbc.Col([
dbc.Label("Query Size:"),
dbc.Input(
id=f"{section_id}-opensearch-query-size",
type="number",
value=100,
min=1,
max=1000,
placeholder="100",
className="mb-2",
), ),
dbc.Col( ], width=6),
[ ]),
dbc.Row([
dbc.Col([
dbc.Button( dbc.Button(
"Test Connection", "Test Connection",
id="test-connection-btn", id=f"{section_id}-test-connection-btn",
color="primary", color="primary",
size="sm", className="mb-3",
className="mt-4",
),
],
width=6,
className="d-flex align-items-end",
),
]
), ),
], width=12),
]),
# Authentication section (collapsible) # Authentication section (collapsible)
dbc.Collapse( dbc.Collapse([
[
html.Hr(), html.Hr(),
html.H6("Authentication (Optional)", className="mb-2"), html.H6("Authentication (Optional)", className="mb-2"),
dbc.Row( dbc.Row([
[ dbc.Col([
dbc.Col(
[
dbc.Label("Username:"), dbc.Label("Username:"),
dbc.Input( dbc.Input(
id="opensearch-username", id=f"{section_id}-opensearch-username",
type="text", type="text",
className="mb-2", className="mb-2",
), ),
], ], width=6),
width=6, dbc.Col([
),
dbc.Col(
[
dbc.Label("Password:"), dbc.Label("Password:"),
dbc.Input( dbc.Input(
id="opensearch-password", id=f"{section_id}-opensearch-password",
type="password", type="password",
className="mb-2", className="mb-2",
), ),
], ], width=6),
width=6, ]),
),
]
),
dbc.Label("OR"), dbc.Label("OR"),
dbc.Input( dbc.Input(
id="opensearch-api-key", id=f"{section_id}-opensearch-api-key",
type="text", type="text",
placeholder="API Key", placeholder="API Key",
className="mb-2", className="mb-2",
), ),
], ], id=f"{section_id}-auth-collapse", is_open=False),
id="auth-collapse",
is_open=False,
),
dbc.Button( dbc.Button(
"Show Authentication", "Show Authentication",
id="auth-toggle", id=f"{section_id}-auth-toggle",
color="link", color="link",
size="sm", size="sm",
className="p-0 mb-3", className="p-0 mb-3",
), ),
# Connection status
html.Div(id="connection-status", className="mb-3"),
# Field mapping section (hidden initially)
html.Div(id="field-mapping-section", style={"display": "none"}),
# Hidden dropdowns to prevent callback errors # Connection status
html.Div([ html.Div(id=f"{section_id}-connection-status", className="mb-3"),
dcc.Dropdown(id="embedding-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="text-field-dropdown", style={"display": "none"}), # Field mapping section (hidden initially)
dcc.Dropdown(id="id-field-dropdown", style={"display": "none"}), html.Div(id=f"{section_id}-field-mapping-section", style={"display": "none"}),
dcc.Dropdown(id="category-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="subcategory-field-dropdown", style={"display": "none"}),
dcc.Dropdown(id="tags-field-dropdown", style={"display": "none"}),
], style={"display": "none"}),
# Load data button (hidden initially) # Load data button (hidden initially)
html.Div( html.Div([
[
dbc.Button( dbc.Button(
"Load Data", f"Load {section_type.title()}",
id="load-opensearch-data-btn", id=f"{section_id}-load-opensearch-data-btn",
color="success", color="success",
className="mb-2", className="mb-2",
disabled=True, disabled=True,
), ),
], ], id=f"{section_id}-load-data-section", style={"display": "none"}),
id="load-data-section",
style={"display": "none"},
),
# OpenSearch status/results
html.Div(id="opensearch-status", className="mb-3"),
]
)
def create_field_mapping_interface(self, field_suggestions): # OpenSearch status/results
html.Div(id=f"{section_id}-opensearch-status", className="mb-3"),
])
def create_field_mapping_interface(self, field_suggestions, section_type="data"):
"""Create field mapping interface based on detected fields.""" """Create field mapping interface based on detected fields."""
return html.Div( return html.Div(
[ [
@@ -190,7 +251,7 @@ class DataSourceComponent:
"Embedding Field (required):", className="fw-bold" "Embedding Field (required):", className="fw-bold"
), ),
dcc.Dropdown( dcc.Dropdown(
id="embedding-field-dropdown-ui", id=f"{section_type}-embedding-field-dropdown-ui",
options=[ options=[
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("embedding", []) for field in field_suggestions.get("embedding", [])
@@ -208,7 +269,7 @@ class DataSourceComponent:
"Text Field (required):", className="fw-bold" "Text Field (required):", className="fw-bold"
), ),
dcc.Dropdown( dcc.Dropdown(
id="text-field-dropdown-ui", id=f"{section_type}-text-field-dropdown-ui",
options=[ options=[
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("text", []) for field in field_suggestions.get("text", [])
@@ -230,7 +291,7 @@ class DataSourceComponent:
[ [
dbc.Label("ID Field:"), dbc.Label("ID Field:"),
dcc.Dropdown( dcc.Dropdown(
id="id-field-dropdown-ui", id=f"{section_type}-id-field-dropdown-ui",
options=[ options=[
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("id", []) for field in field_suggestions.get("id", [])
@@ -246,7 +307,7 @@ class DataSourceComponent:
[ [
dbc.Label("Category Field:"), dbc.Label("Category Field:"),
dcc.Dropdown( dcc.Dropdown(
id="category-field-dropdown-ui", id=f"{section_type}-category-field-dropdown-ui",
options=[ options=[
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("category", []) for field in field_suggestions.get("category", [])
@@ -266,7 +327,7 @@ class DataSourceComponent:
[ [
dbc.Label("Subcategory Field:"), dbc.Label("Subcategory Field:"),
dcc.Dropdown( dcc.Dropdown(
id="subcategory-field-dropdown-ui", id=f"{section_type}-subcategory-field-dropdown-ui",
options=[ options=[
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("subcategory", []) for field in field_suggestions.get("subcategory", [])
@@ -282,7 +343,7 @@ class DataSourceComponent:
[ [
dbc.Label("Tags Field:"), dbc.Label("Tags Field:"),
dcc.Dropdown( dcc.Dropdown(
id="tags-field-dropdown-ui", id=f"{section_type}-tags-field-dropdown-ui",
options=[ options=[
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("tags", []) for field in field_suggestions.get("tags", [])

View File

@@ -117,6 +117,27 @@ class TestFieldMapper:
assert suggestions["category"][0] in ["category", "type"] # category-like field should be first assert suggestions["category"][0] in ["category", "type"] # category-like field should be first
assert suggestions["tags"][0] == "tags" # tags field should be first assert suggestions["tags"][0] == "tags" # tags field should be first
def test_suggest_mappings_name_based_embedding(self):
"""Test that fields named 'embedding' are prioritized even without vector type."""
field_analysis = {
"vector_fields": [], # No explicit vector fields detected
"text_fields": ["content", "description"],
"keyword_fields": ["doc_id", "category", "type", "tags"],
"numeric_fields": ["count"],
"all_fields": ["content", "description", "doc_id", "category", "embedding", "type", "tags", "count"],
}
suggestions = FieldMapper.suggest_mappings(field_analysis)
# Check that 'embedding' field is prioritized despite not being detected as vector type
assert suggestions["embedding"][0] == "embedding", "Field named 'embedding' should be first priority"
# Check that all fields are still available
all_fields = ["content", "description", "doc_id", "category", "embedding", "type", "tags", "count"]
for field_type in ["embedding", "text", "id", "category", "subcategory", "tags"]:
for field in all_fields:
assert field in suggestions[field_type], f"Field '{field}' missing from {field_type} suggestions"
def test_validate_mapping_success(self): def test_validate_mapping_success(self):
mapping = FieldMapping( mapping = FieldMapping(
embedding_field="embedding", text_field="text", id_field="doc_id" embedding_field="embedding", text_field="text", id_field="doc_id"