fix formatting and bump version to v0.3.0
All checks were successful
Security Scan / dependency-check (pull_request) Successful in 44s
Test Suite / lint (pull_request) Successful in 34s
Test Suite / build (pull_request) Successful in 38s
Security Scan / security (pull_request) Successful in 49s
Test Suite / test (3.11) (pull_request) Successful in 1m32s

This commit is contained in:
2025-08-14 19:02:17 -07:00
parent 09e3c86f0a
commit 1b6845774b
7 changed files with 537 additions and 246 deletions

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "embeddingbuddy" name = "embeddingbuddy"
version = "0.2.0" version = "0.3.0"
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques." description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"

View File

@@ -59,42 +59,70 @@ class FieldMapper:
# Embedding field suggestions (vector fields first, then name-based candidates, then all fields) # Embedding field suggestions (vector fields first, then name-based candidates, then all fields)
embedding_candidates = vector_fields.copy() embedding_candidates = vector_fields.copy()
# Add fields that likely contain embeddings based on name # Add fields that likely contain embeddings based on name
embedding_name_candidates = [f for f in all_fields if any( embedding_name_candidates = [
keyword in f.lower() for keyword in ["embedding", "embeddings", "vector", "vectors", "embed"] f
)] for f in all_fields
if any(
keyword in f.lower()
for keyword in ["embedding", "embeddings", "vector", "vectors", "embed"]
)
]
# Add name-based candidates that aren't already in vector_fields # Add name-based candidates that aren't already in vector_fields
for candidate in embedding_name_candidates: for candidate in embedding_name_candidates:
if candidate not in embedding_candidates: if candidate not in embedding_candidates:
embedding_candidates.append(candidate) embedding_candidates.append(candidate)
suggestions["embedding"] = create_ordered_suggestions(embedding_candidates, all_fields) suggestions["embedding"] = create_ordered_suggestions(
embedding_candidates, all_fields
)
# Text field suggestions (text fields first, then all fields) # Text field suggestions (text fields first, then all fields)
text_candidates = text_fields.copy() text_candidates = text_fields.copy()
suggestions["text"] = create_ordered_suggestions(text_candidates, all_fields) suggestions["text"] = create_ordered_suggestions(text_candidates, all_fields)
# ID field suggestions (ID-like fields first, then all fields) # ID field suggestions (ID-like fields first, then all fields)
id_candidates = [f for f in keyword_fields if any( id_candidates = [
keyword in f.lower() for keyword in ["id", "_id", "doc", "document"] f
)] for f in keyword_fields
if any(keyword in f.lower() for keyword in ["id", "_id", "doc", "document"])
]
id_candidates.append("_id") # _id is always available id_candidates.append("_id") # _id is always available
suggestions["id"] = create_ordered_suggestions(id_candidates, all_fields) suggestions["id"] = create_ordered_suggestions(id_candidates, all_fields)
# Category field suggestions (category-like fields first, then all fields) # Category field suggestions (category-like fields first, then all fields)
category_candidates = [f for f in keyword_fields if any( category_candidates = [
keyword in f.lower() for keyword in ["category", "class", "type", "label"] f
)] for f in keyword_fields
suggestions["category"] = create_ordered_suggestions(category_candidates, all_fields) if any(
keyword in f.lower()
for keyword in ["category", "class", "type", "label"]
)
]
suggestions["category"] = create_ordered_suggestions(
category_candidates, all_fields
)
# Subcategory field suggestions (subcategory-like fields first, then all fields) # Subcategory field suggestions (subcategory-like fields first, then all fields)
subcategory_candidates = [f for f in keyword_fields if any( subcategory_candidates = [
keyword in f.lower() for keyword in ["subcategory", "subclass", "subtype", "subtopic"] f
)] for f in keyword_fields
suggestions["subcategory"] = create_ordered_suggestions(subcategory_candidates, all_fields) if any(
keyword in f.lower()
for keyword in ["subcategory", "subclass", "subtype", "subtopic"]
)
]
suggestions["subcategory"] = create_ordered_suggestions(
subcategory_candidates, all_fields
)
# Tags field suggestions (tag-like fields first, then all fields) # Tags field suggestions (tag-like fields first, then all fields)
tags_candidates = [f for f in keyword_fields if any( tags_candidates = [
keyword in f.lower() for keyword in ["tag", "tags", "keyword", "keywords"] f
)] for f in keyword_fields
if any(
keyword in f.lower()
for keyword in ["tag", "tags", "keyword", "keywords"]
)
]
suggestions["tags"] = create_ordered_suggestions(tags_candidates, all_fields) suggestions["tags"] = create_ordered_suggestions(tags_candidates, all_fields)
return suggestions return suggestions

View File

@@ -97,7 +97,6 @@ class DataProcessingCallbacks:
# Register collapsible section callbacks # Register collapsible section callbacks
self._register_collapse_callbacks() self._register_collapse_callbacks()
def _register_opensearch_callbacks(self, section_type, opensearch_client): def _register_opensearch_callbacks(self, section_type, opensearch_client):
"""Register callbacks for a specific section (data or prompts).""" """Register callbacks for a specific section (data or prompts)."""
@@ -144,9 +143,23 @@ class DataProcessingCallbacks:
], ],
prevent_initial_call=True, prevent_initial_call=True,
) )
def test_opensearch_connection(n_clicks, url, index_name, username, password, api_key): def test_opensearch_connection(
n_clicks, url, index_name, username, password, api_key
):
if not n_clicks or not url or not index_name: if not n_clicks or not url or not index_name:
return no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update return (
no_update,
no_update,
no_update,
no_update,
no_update,
no_update,
no_update,
no_update,
no_update,
no_update,
no_update,
)
# Test connection # Test connection
success, message = opensearch_client.connect( success, message = opensearch_client.connect(
@@ -173,7 +186,9 @@ class DataProcessingCallbacks:
) )
# Analyze fields # Analyze fields
success, field_analysis, analysis_message = opensearch_client.analyze_fields(index_name) success, field_analysis, analysis_message = (
opensearch_client.analyze_fields(index_name)
)
if not success: if not success:
return ( return (
@@ -194,8 +209,11 @@ class DataProcessingCallbacks:
field_suggestions = FieldMapper.suggest_mappings(field_analysis) field_suggestions = FieldMapper.suggest_mappings(field_analysis)
from ...ui.components.datasource import DataSourceComponent from ...ui.components.datasource import DataSourceComponent
datasource = DataSourceComponent() datasource = DataSourceComponent()
field_mapping_ui = datasource.create_field_mapping_interface(field_suggestions, section_type) field_mapping_ui = datasource.create_field_mapping_interface(
field_suggestions, section_type
)
return ( return (
self._create_status_alert(f"{message}", "success"), self._create_status_alert(f"{message}", "success"),
@@ -203,16 +221,36 @@ class DataProcessingCallbacks:
{"display": "block"}, {"display": "block"},
{"display": "block"}, {"display": "block"},
False, False,
[{"label": field, "value": field} for field in field_suggestions.get("embedding", [])], [
[{"label": field, "value": field} for field in field_suggestions.get("text", [])], {"label": field, "value": field}
[{"label": field, "value": field} for field in field_suggestions.get("id", [])], for field in field_suggestions.get("embedding", [])
[{"label": field, "value": field} for field in field_suggestions.get("category", [])], ],
[{"label": field, "value": field} for field in field_suggestions.get("subcategory", [])], [
[{"label": field, "value": field} for field in field_suggestions.get("tags", [])], {"label": field, "value": field}
for field in field_suggestions.get("text", [])
],
[
{"label": field, "value": field}
for field in field_suggestions.get("id", [])
],
[
{"label": field, "value": field}
for field in field_suggestions.get("category", [])
],
[
{"label": field, "value": field}
for field in field_suggestions.get("subcategory", [])
],
[
{"label": field, "value": field}
for field in field_suggestions.get("tags", [])
],
) )
# Determine output target based on section type # Determine output target based on section type
output_target = "processed-data" if section_type == "data" else "processed-prompts" output_target = (
"processed-data" if section_type == "data" else "processed-prompts"
)
@callback( @callback(
[ [
@@ -235,8 +273,17 @@ class DataProcessingCallbacks:
], ],
prevent_initial_call=True, prevent_initial_call=True,
) )
def load_opensearch_data(n_clicks, index_name, query_size, embedding_field, text_field, def load_opensearch_data(
id_field, category_field, subcategory_field, tags_field): n_clicks,
index_name,
query_size,
embedding_field,
text_field,
id_field,
category_field,
subcategory_field,
tags_field,
):
if not n_clicks or not index_name or not embedding_field or not text_field: if not n_clicks or not index_name or not embedding_field or not text_field:
return no_update, no_update, no_update, no_update, no_update return no_update, no_update, no_update, no_update, no_update
@@ -248,14 +295,16 @@ class DataProcessingCallbacks:
query_size = 1000 # Cap at reasonable maximum query_size = 1000 # Cap at reasonable maximum
# Create field mapping # Create field mapping
field_mapping = FieldMapper.create_mapping_from_dict({ field_mapping = FieldMapper.create_mapping_from_dict(
{
"embedding": embedding_field, "embedding": embedding_field,
"text": text_field, "text": text_field,
"id": id_field, "id": id_field,
"category": category_field, "category": category_field,
"subcategory": subcategory_field, "subcategory": subcategory_field,
"tags": tags_field "tags": tags_field,
}) }
)
# Fetch data from OpenSearch # Fetch data from OpenSearch
success, raw_documents, message = opensearch_client.fetch_data( success, raw_documents, message = opensearch_client.fetch_data(
@@ -268,11 +317,13 @@ class DataProcessingCallbacks:
"", "",
False, False,
f"❌ Failed to fetch {section_type}: {message}", f"❌ Failed to fetch {section_type}: {message}",
True True,
) )
# Process the data # Process the data
processed_data = self.processor.process_opensearch_data(raw_documents, field_mapping) processed_data = self.processor.process_opensearch_data(
raw_documents, field_mapping
)
if processed_data.error: if processed_data.error:
return ( return (
@@ -280,7 +331,7 @@ class DataProcessingCallbacks:
"", "",
False, False,
f"{section_type.title()} processing error: {processed_data.error}", f"{section_type.title()} processing error: {processed_data.error}",
True True,
) )
success_message = f"✅ Successfully loaded {len(processed_data.documents)} {section_type} from OpenSearch" success_message = f"✅ Successfully loaded {len(processed_data.documents)} {section_type} from OpenSearch"
@@ -290,27 +341,29 @@ class DataProcessingCallbacks:
return ( return (
{ {
"documents": [ "documents": [
self._document_to_dict(doc) for doc in processed_data.documents self._document_to_dict(doc)
for doc in processed_data.documents
], ],
"embeddings": processed_data.embeddings.tolist(), "embeddings": processed_data.embeddings.tolist(),
}, },
success_message, success_message,
True, True,
"", "",
False False,
) )
else: # prompts else: # prompts
return ( return (
{ {
"prompts": [ "prompts": [
self._document_to_dict(doc) for doc in processed_data.documents self._document_to_dict(doc)
for doc in processed_data.documents
], ],
"embeddings": processed_data.embeddings.tolist(), "embeddings": processed_data.embeddings.tolist(),
}, },
success_message, success_message,
True, True,
"", "",
False False,
) )
except Exception as e: except Exception as e:
@@ -381,7 +434,11 @@ class DataProcessingCallbacks:
def toggle_data_collapse(n_clicks, is_open): def toggle_data_collapse(n_clicks, is_open):
if n_clicks: if n_clicks:
new_state = not is_open new_state = not is_open
icon_class = "fas fa-chevron-down me-2" if new_state else "fas fa-chevron-right me-2" icon_class = (
"fas fa-chevron-down me-2"
if new_state
else "fas fa-chevron-right me-2"
)
return new_state, icon_class return new_state, icon_class
return is_open, "fas fa-chevron-down me-2" return is_open, "fas fa-chevron-down me-2"
@@ -398,7 +455,11 @@ class DataProcessingCallbacks:
def toggle_prompts_collapse(n_clicks, is_open): def toggle_prompts_collapse(n_clicks, is_open):
if n_clicks: if n_clicks:
new_state = not is_open new_state = not is_open
icon_class = "fas fa-chevron-down me-2" if new_state else "fas fa-chevron-right me-2" icon_class = (
"fas fa-chevron-down me-2"
if new_state
else "fas fa-chevron-right me-2"
)
return new_state, icon_class return new_state, icon_class
return is_open, "fas fa-chevron-down me-2" return is_open, "fas fa-chevron-down me-2"

View File

@@ -43,78 +43,168 @@ class DataSourceComponent:
return html.Div( return html.Div(
[ [
# Data Section # Data Section
dbc.Card([ dbc.Card(
dbc.CardHeader([ [
dbc.CardHeader(
[
dbc.Button( dbc.Button(
[ [
html.I(className="fas fa-chevron-down me-2", id="data-collapse-icon"), html.I(
"📄 Documents/Data" className="fas fa-chevron-down me-2",
id="data-collapse-icon",
),
"📄 Documents/Data",
], ],
id="data-collapse-toggle", id="data-collapse-toggle",
color="link", color="link",
className="text-start p-0 w-100 text-decoration-none", className="text-start p-0 w-100 text-decoration-none",
style={"border": "none", "font-size": "1.25rem", "font-weight": "500"} style={
"border": "none",
"font-size": "1.25rem",
"font-weight": "500",
},
),
]
),
dbc.Collapse(
[dbc.CardBody([self._create_opensearch_section("data")])],
id="data-collapse",
is_open=True,
),
],
className="mb-4",
), ),
]),
dbc.Collapse([
dbc.CardBody([
self._create_opensearch_section("data")
])
], id="data-collapse", is_open=True)
], className="mb-4"),
# Prompts Section # Prompts Section
dbc.Card([ dbc.Card(
dbc.CardHeader([ [
dbc.CardHeader(
[
dbc.Button( dbc.Button(
[ [
html.I(className="fas fa-chevron-down me-2", id="prompts-collapse-icon"), html.I(
"💬 Prompts" className="fas fa-chevron-down me-2",
id="prompts-collapse-icon",
),
"💬 Prompts",
], ],
id="prompts-collapse-toggle", id="prompts-collapse-toggle",
color="link", color="link",
className="text-start p-0 w-100 text-decoration-none", className="text-start p-0 w-100 text-decoration-none",
style={"border": "none", "font-size": "1.25rem", "font-weight": "500"} style={
"border": "none",
"font-size": "1.25rem",
"font-weight": "500",
},
),
]
),
dbc.Collapse(
[
dbc.CardBody(
[self._create_opensearch_section("prompts")]
)
],
id="prompts-collapse",
is_open=True,
),
],
className="mb-4",
), ),
]),
dbc.Collapse([
dbc.CardBody([
self._create_opensearch_section("prompts")
])
], id="prompts-collapse", is_open=True)
], className="mb-4"),
# Hidden dropdowns to prevent callback errors (for both sections) # Hidden dropdowns to prevent callback errors (for both sections)
html.Div([ html.Div(
[
# Data dropdowns (hidden sync targets) # Data dropdowns (hidden sync targets)
dcc.Dropdown(id="data-embedding-field-dropdown", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="data-text-field-dropdown", style={"display": "none"}), id="data-embedding-field-dropdown",
dcc.Dropdown(id="data-id-field-dropdown", style={"display": "none"}), style={"display": "none"},
dcc.Dropdown(id="data-category-field-dropdown", style={"display": "none"}), ),
dcc.Dropdown(id="data-subcategory-field-dropdown", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="data-tags-field-dropdown", style={"display": "none"}), id="data-text-field-dropdown", style={"display": "none"}
),
dcc.Dropdown(
id="data-id-field-dropdown", style={"display": "none"}
),
dcc.Dropdown(
id="data-category-field-dropdown", style={"display": "none"}
),
dcc.Dropdown(
id="data-subcategory-field-dropdown",
style={"display": "none"},
),
dcc.Dropdown(
id="data-tags-field-dropdown", style={"display": "none"}
),
# Data UI dropdowns (hidden placeholders) # Data UI dropdowns (hidden placeholders)
dcc.Dropdown(id="data-embedding-field-dropdown-ui", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="data-text-field-dropdown-ui", style={"display": "none"}), id="data-embedding-field-dropdown-ui",
dcc.Dropdown(id="data-id-field-dropdown-ui", style={"display": "none"}), style={"display": "none"},
dcc.Dropdown(id="data-category-field-dropdown-ui", style={"display": "none"}), ),
dcc.Dropdown(id="data-subcategory-field-dropdown-ui", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="data-tags-field-dropdown-ui", style={"display": "none"}), id="data-text-field-dropdown-ui", style={"display": "none"}
),
dcc.Dropdown(
id="data-id-field-dropdown-ui", style={"display": "none"}
),
dcc.Dropdown(
id="data-category-field-dropdown-ui",
style={"display": "none"},
),
dcc.Dropdown(
id="data-subcategory-field-dropdown-ui",
style={"display": "none"},
),
dcc.Dropdown(
id="data-tags-field-dropdown-ui", style={"display": "none"}
),
# Prompts dropdowns (hidden sync targets) # Prompts dropdowns (hidden sync targets)
dcc.Dropdown(id="prompts-embedding-field-dropdown", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="prompts-text-field-dropdown", style={"display": "none"}), id="prompts-embedding-field-dropdown",
dcc.Dropdown(id="prompts-id-field-dropdown", style={"display": "none"}), style={"display": "none"},
dcc.Dropdown(id="prompts-category-field-dropdown", style={"display": "none"}), ),
dcc.Dropdown(id="prompts-subcategory-field-dropdown", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="prompts-tags-field-dropdown", style={"display": "none"}), id="prompts-text-field-dropdown", style={"display": "none"}
),
dcc.Dropdown(
id="prompts-id-field-dropdown", style={"display": "none"}
),
dcc.Dropdown(
id="prompts-category-field-dropdown",
style={"display": "none"},
),
dcc.Dropdown(
id="prompts-subcategory-field-dropdown",
style={"display": "none"},
),
dcc.Dropdown(
id="prompts-tags-field-dropdown", style={"display": "none"}
),
# Prompts UI dropdowns (hidden placeholders) # Prompts UI dropdowns (hidden placeholders)
dcc.Dropdown(id="prompts-embedding-field-dropdown-ui", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="prompts-text-field-dropdown-ui", style={"display": "none"}), id="prompts-embedding-field-dropdown-ui",
dcc.Dropdown(id="prompts-id-field-dropdown-ui", style={"display": "none"}), style={"display": "none"},
dcc.Dropdown(id="prompts-category-field-dropdown-ui", style={"display": "none"}), ),
dcc.Dropdown(id="prompts-subcategory-field-dropdown-ui", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="prompts-tags-field-dropdown-ui", style={"display": "none"}), id="prompts-text-field-dropdown-ui",
], style={"display": "none"}), style={"display": "none"},
),
dcc.Dropdown(
id="prompts-id-field-dropdown-ui", style={"display": "none"}
),
dcc.Dropdown(
id="prompts-category-field-dropdown-ui",
style={"display": "none"},
),
dcc.Dropdown(
id="prompts-subcategory-field-dropdown-ui",
style={"display": "none"},
),
dcc.Dropdown(
id="prompts-tags-field-dropdown-ui",
style={"display": "none"},
),
],
style={"display": "none"},
),
] ]
) )
@@ -122,11 +212,14 @@ class DataSourceComponent:
"""Create a complete OpenSearch section for either 'data' or 'prompts'.""" """Create a complete OpenSearch section for either 'data' or 'prompts'."""
section_id = section_type # 'data' or 'prompts' section_id = section_type # 'data' or 'prompts'
return html.Div([ return html.Div(
[
# Connection section # Connection section
html.H6("Connection", className="mb-2"), html.H6("Connection", className="mb-2"),
dbc.Row([ dbc.Row(
dbc.Col([ [
dbc.Col(
[
dbc.Label("OpenSearch URL:"), dbc.Label("OpenSearch URL:"),
dbc.Input( dbc.Input(
id=f"{section_id}-opensearch-url", id=f"{section_id}-opensearch-url",
@@ -134,11 +227,15 @@ class DataSourceComponent:
placeholder="https://opensearch.example.com:9200", placeholder="https://opensearch.example.com:9200",
className="mb-2", className="mb-2",
), ),
], width=12), ],
]), width=12,
),
dbc.Row([ ]
dbc.Col([ ),
dbc.Row(
[
dbc.Col(
[
dbc.Label("Index Name:"), dbc.Label("Index Name:"),
dbc.Input( dbc.Input(
id=f"{section_id}-opensearch-index", id=f"{section_id}-opensearch-index",
@@ -146,8 +243,11 @@ class DataSourceComponent:
placeholder="my-embeddings-index", placeholder="my-embeddings-index",
className="mb-2", className="mb-2",
), ),
], width=6), ],
dbc.Col([ width=6,
),
dbc.Col(
[
dbc.Label("Query Size:"), dbc.Label("Query Size:"),
dbc.Input( dbc.Input(
id=f"{section_id}-opensearch-query-size", id=f"{section_id}-opensearch-query-size",
@@ -158,42 +258,57 @@ class DataSourceComponent:
placeholder="100", placeholder="100",
className="mb-2", className="mb-2",
), ),
], width=6), ],
]), width=6,
),
dbc.Row([ ]
dbc.Col([ ),
dbc.Row(
[
dbc.Col(
[
dbc.Button( dbc.Button(
"Test Connection", "Test Connection",
id=f"{section_id}-test-connection-btn", id=f"{section_id}-test-connection-btn",
color="primary", color="primary",
className="mb-3", className="mb-3",
), ),
], width=12), ],
]), width=12,
),
]
),
# Authentication section (collapsible) # Authentication section (collapsible)
dbc.Collapse([ dbc.Collapse(
[
html.Hr(), html.Hr(),
html.H6("Authentication (Optional)", className="mb-2"), html.H6("Authentication (Optional)", className="mb-2"),
dbc.Row([ dbc.Row(
dbc.Col([ [
dbc.Col(
[
dbc.Label("Username:"), dbc.Label("Username:"),
dbc.Input( dbc.Input(
id=f"{section_id}-opensearch-username", id=f"{section_id}-opensearch-username",
type="text", type="text",
className="mb-2", className="mb-2",
), ),
], width=6), ],
dbc.Col([ width=6,
),
dbc.Col(
[
dbc.Label("Password:"), dbc.Label("Password:"),
dbc.Input( dbc.Input(
id=f"{section_id}-opensearch-password", id=f"{section_id}-opensearch-password",
type="password", type="password",
className="mb-2", className="mb-2",
), ),
], width=6), ],
]), width=6,
),
]
),
dbc.Label("OR"), dbc.Label("OR"),
dbc.Input( dbc.Input(
id=f"{section_id}-opensearch-api-key", id=f"{section_id}-opensearch-api-key",
@@ -201,8 +316,10 @@ class DataSourceComponent:
placeholder="API Key", placeholder="API Key",
className="mb-2", className="mb-2",
), ),
], id=f"{section_id}-auth-collapse", is_open=False), ],
id=f"{section_id}-auth-collapse",
is_open=False,
),
dbc.Button( dbc.Button(
"Show Authentication", "Show Authentication",
id=f"{section_id}-auth-toggle", id=f"{section_id}-auth-toggle",
@@ -210,15 +327,15 @@ class DataSourceComponent:
size="sm", size="sm",
className="p-0 mb-3", className="p-0 mb-3",
), ),
# Connection status # Connection status
html.Div(id=f"{section_id}-connection-status", className="mb-3"), html.Div(id=f"{section_id}-connection-status", className="mb-3"),
# Field mapping section (hidden initially) # Field mapping section (hidden initially)
html.Div(id=f"{section_id}-field-mapping-section", style={"display": "none"}), html.Div(
id=f"{section_id}-field-mapping-section", style={"display": "none"}
),
# Load data button (hidden initially) # Load data button (hidden initially)
html.Div([ html.Div(
[
dbc.Button( dbc.Button(
f"Load {section_type.title()}", f"Load {section_type.title()}",
id=f"{section_id}-load-opensearch-data-btn", id=f"{section_id}-load-opensearch-data-btn",
@@ -226,11 +343,14 @@ class DataSourceComponent:
className="mb-2", className="mb-2",
disabled=True, disabled=True,
), ),
], id=f"{section_id}-load-data-section", style={"display": "none"}), ],
id=f"{section_id}-load-data-section",
style={"display": "none"},
),
# OpenSearch status/results # OpenSearch status/results
html.Div(id=f"{section_id}-opensearch-status", className="mb-3"), html.Div(id=f"{section_id}-opensearch-status", className="mb-3"),
]) ]
)
def create_field_mapping_interface(self, field_suggestions, section_type="data"): def create_field_mapping_interface(self, field_suggestions, section_type="data"):
"""Create field mapping interface based on detected fields.""" """Create field mapping interface based on detected fields."""
@@ -254,9 +374,13 @@ class DataSourceComponent:
id=f"{section_type}-embedding-field-dropdown-ui", id=f"{section_type}-embedding-field-dropdown-ui",
options=[ options=[
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("embedding", []) for field in field_suggestions.get(
"embedding", []
)
], ],
value=field_suggestions.get("embedding", [None])[0], # Default to first suggestion value=field_suggestions.get("embedding", [None])[
0
], # Default to first suggestion
placeholder="Select embedding field...", placeholder="Select embedding field...",
className="mb-2", className="mb-2",
), ),
@@ -274,7 +398,9 @@ class DataSourceComponent:
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("text", []) for field in field_suggestions.get("text", [])
], ],
value=field_suggestions.get("text", [None])[0], # Default to first suggestion value=field_suggestions.get("text", [None])[
0
], # Default to first suggestion
placeholder="Select text field...", placeholder="Select text field...",
className="mb-2", className="mb-2",
), ),
@@ -296,7 +422,9 @@ class DataSourceComponent:
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("id", []) for field in field_suggestions.get("id", [])
], ],
value=field_suggestions.get("id", [None])[0], # Default to first suggestion value=field_suggestions.get("id", [None])[
0
], # Default to first suggestion
placeholder="Select ID field...", placeholder="Select ID field...",
className="mb-2", className="mb-2",
), ),
@@ -310,9 +438,13 @@ class DataSourceComponent:
id=f"{section_type}-category-field-dropdown-ui", id=f"{section_type}-category-field-dropdown-ui",
options=[ options=[
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("category", []) for field in field_suggestions.get(
"category", []
)
], ],
value=field_suggestions.get("category", [None])[0], # Default to first suggestion value=field_suggestions.get("category", [None])[
0
], # Default to first suggestion
placeholder="Select category field...", placeholder="Select category field...",
className="mb-2", className="mb-2",
), ),
@@ -330,9 +462,13 @@ class DataSourceComponent:
id=f"{section_type}-subcategory-field-dropdown-ui", id=f"{section_type}-subcategory-field-dropdown-ui",
options=[ options=[
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("subcategory", []) for field in field_suggestions.get(
"subcategory", []
)
], ],
value=field_suggestions.get("subcategory", [None])[0], # Default to first suggestion value=field_suggestions.get("subcategory", [None])[
0
], # Default to first suggestion
placeholder="Select subcategory field...", placeholder="Select subcategory field...",
className="mb-2", className="mb-2",
), ),
@@ -348,7 +484,9 @@ class DataSourceComponent:
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("tags", []) for field in field_suggestions.get("tags", [])
], ],
value=field_suggestions.get("tags", [None])[0], # Default to first suggestion value=field_suggestions.get("tags", [None])[
0
], # Default to first suggestion
placeholder="Select tags field...", placeholder="Select tags field...",
className="mb-2", className="mb-2",
), ),

View File

@@ -99,22 +99,57 @@ class TestFieldMapper:
"text_fields": ["content", "description"], "text_fields": ["content", "description"],
"keyword_fields": ["doc_id", "category", "type", "tags"], "keyword_fields": ["doc_id", "category", "type", "tags"],
"numeric_fields": ["count"], "numeric_fields": ["count"],
"all_fields": ["embedding", "content", "description", "doc_id", "category", "type", "tags", "count"], "all_fields": [
"embedding",
"content",
"description",
"doc_id",
"category",
"type",
"tags",
"count",
],
} }
suggestions = FieldMapper.suggest_mappings(field_analysis) suggestions = FieldMapper.suggest_mappings(field_analysis)
# Check that all dropdowns contain all fields # Check that all dropdowns contain all fields
all_fields = ["embedding", "content", "description", "doc_id", "category", "type", "tags", "count"] all_fields = [
for field_type in ["embedding", "text", "id", "category", "subcategory", "tags"]: "embedding",
"content",
"description",
"doc_id",
"category",
"type",
"tags",
"count",
]
for field_type in [
"embedding",
"text",
"id",
"category",
"subcategory",
"tags",
]:
for field in all_fields: for field in all_fields:
assert field in suggestions[field_type], f"Field '{field}' missing from {field_type} suggestions" assert field in suggestions[field_type], (
f"Field '{field}' missing from {field_type} suggestions"
)
# Check that best candidates are first # Check that best candidates are first
assert suggestions["embedding"][0] == "embedding" # vector field should be first assert (
assert suggestions["text"][0] in ["content", "description"] # text fields should be first suggestions["embedding"][0] == "embedding"
) # vector field should be first
assert suggestions["text"][0] in [
"content",
"description",
] # text fields should be first
assert suggestions["id"][0] == "doc_id" # ID-like field should be first assert suggestions["id"][0] == "doc_id" # ID-like field should be first
assert suggestions["category"][0] in ["category", "type"] # category-like field should be first assert suggestions["category"][0] in [
"category",
"type",
] # category-like field should be first
assert suggestions["tags"][0] == "tags" # tags field should be first assert suggestions["tags"][0] == "tags" # tags field should be first
def test_suggest_mappings_name_based_embedding(self): def test_suggest_mappings_name_based_embedding(self):
@@ -124,19 +159,48 @@ class TestFieldMapper:
"text_fields": ["content", "description"], "text_fields": ["content", "description"],
"keyword_fields": ["doc_id", "category", "type", "tags"], "keyword_fields": ["doc_id", "category", "type", "tags"],
"numeric_fields": ["count"], "numeric_fields": ["count"],
"all_fields": ["content", "description", "doc_id", "category", "embedding", "type", "tags", "count"], "all_fields": [
"content",
"description",
"doc_id",
"category",
"embedding",
"type",
"tags",
"count",
],
} }
suggestions = FieldMapper.suggest_mappings(field_analysis) suggestions = FieldMapper.suggest_mappings(field_analysis)
# Check that 'embedding' field is prioritized despite not being detected as vector type # Check that 'embedding' field is prioritized despite not being detected as vector type
assert suggestions["embedding"][0] == "embedding", "Field named 'embedding' should be first priority" assert suggestions["embedding"][0] == "embedding", (
"Field named 'embedding' should be first priority"
)
# Check that all fields are still available # Check that all fields are still available
all_fields = ["content", "description", "doc_id", "category", "embedding", "type", "tags", "count"] all_fields = [
for field_type in ["embedding", "text", "id", "category", "subcategory", "tags"]: "content",
"description",
"doc_id",
"category",
"embedding",
"type",
"tags",
"count",
]
for field_type in [
"embedding",
"text",
"id",
"category",
"subcategory",
"tags",
]:
for field in all_fields: for field in all_fields:
assert field in suggestions[field_type], f"Field '{field}' missing from {field_type} suggestions" assert field in suggestions[field_type], (
f"Field '{field}' missing from {field_type} suggestions"
)
def test_validate_mapping_success(self): def test_validate_mapping_success(self):
mapping = FieldMapping( mapping = FieldMapping(

2
uv.lock generated
View File

@@ -412,7 +412,7 @@ wheels = [
[[package]] [[package]]
name = "embeddingbuddy" name = "embeddingbuddy"
version = "0.2.0" version = "0.3.0"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "dash" }, { name = "dash" },