fix formatting and bump version to v0.3.0
All checks were successful
Security Scan / dependency-check (pull_request) Successful in 44s
Test Suite / lint (pull_request) Successful in 34s
Test Suite / build (pull_request) Successful in 38s
Security Scan / security (pull_request) Successful in 49s
Test Suite / test (3.11) (pull_request) Successful in 1m32s

This commit is contained in:
2025-08-14 19:02:17 -07:00
parent 09e3c86f0a
commit 1b6845774b
7 changed files with 537 additions and 246 deletions

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "embeddingbuddy" name = "embeddingbuddy"
version = "0.2.0" version = "0.3.0"
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques." description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"

View File

@@ -9,7 +9,7 @@ from .ui.callbacks.interactions import InteractionCallbacks
def create_app(): def create_app():
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
# Allow callbacks to components that are dynamically created in tabs # Allow callbacks to components that are dynamically created in tabs
app.config.suppress_callback_exceptions = True app.config.suppress_callback_exceptions = True

View File

@@ -25,7 +25,7 @@ class FieldMapper:
def suggest_mappings(field_analysis: Dict) -> Dict[str, List[str]]: def suggest_mappings(field_analysis: Dict) -> Dict[str, List[str]]:
""" """
Suggest field mappings based on field analysis. Suggest field mappings based on field analysis.
Each dropdown will show ALL available fields, but ordered by relevance Each dropdown will show ALL available fields, but ordered by relevance
with the most likely candidates first. with the most likely candidates first.
@@ -59,42 +59,70 @@ class FieldMapper:
# Embedding field suggestions (vector fields first, then name-based candidates, then all fields) # Embedding field suggestions (vector fields first, then name-based candidates, then all fields)
embedding_candidates = vector_fields.copy() embedding_candidates = vector_fields.copy()
# Add fields that likely contain embeddings based on name # Add fields that likely contain embeddings based on name
embedding_name_candidates = [f for f in all_fields if any( embedding_name_candidates = [
keyword in f.lower() for keyword in ["embedding", "embeddings", "vector", "vectors", "embed"] f
)] for f in all_fields
if any(
keyword in f.lower()
for keyword in ["embedding", "embeddings", "vector", "vectors", "embed"]
)
]
# Add name-based candidates that aren't already in vector_fields # Add name-based candidates that aren't already in vector_fields
for candidate in embedding_name_candidates: for candidate in embedding_name_candidates:
if candidate not in embedding_candidates: if candidate not in embedding_candidates:
embedding_candidates.append(candidate) embedding_candidates.append(candidate)
suggestions["embedding"] = create_ordered_suggestions(embedding_candidates, all_fields) suggestions["embedding"] = create_ordered_suggestions(
embedding_candidates, all_fields
)
# Text field suggestions (text fields first, then all fields) # Text field suggestions (text fields first, then all fields)
text_candidates = text_fields.copy() text_candidates = text_fields.copy()
suggestions["text"] = create_ordered_suggestions(text_candidates, all_fields) suggestions["text"] = create_ordered_suggestions(text_candidates, all_fields)
# ID field suggestions (ID-like fields first, then all fields) # ID field suggestions (ID-like fields first, then all fields)
id_candidates = [f for f in keyword_fields if any( id_candidates = [
keyword in f.lower() for keyword in ["id", "_id", "doc", "document"] f
)] for f in keyword_fields
if any(keyword in f.lower() for keyword in ["id", "_id", "doc", "document"])
]
id_candidates.append("_id") # _id is always available id_candidates.append("_id") # _id is always available
suggestions["id"] = create_ordered_suggestions(id_candidates, all_fields) suggestions["id"] = create_ordered_suggestions(id_candidates, all_fields)
# Category field suggestions (category-like fields first, then all fields) # Category field suggestions (category-like fields first, then all fields)
category_candidates = [f for f in keyword_fields if any( category_candidates = [
keyword in f.lower() for keyword in ["category", "class", "type", "label"] f
)] for f in keyword_fields
suggestions["category"] = create_ordered_suggestions(category_candidates, all_fields) if any(
keyword in f.lower()
for keyword in ["category", "class", "type", "label"]
)
]
suggestions["category"] = create_ordered_suggestions(
category_candidates, all_fields
)
# Subcategory field suggestions (subcategory-like fields first, then all fields) # Subcategory field suggestions (subcategory-like fields first, then all fields)
subcategory_candidates = [f for f in keyword_fields if any( subcategory_candidates = [
keyword in f.lower() for keyword in ["subcategory", "subclass", "subtype", "subtopic"] f
)] for f in keyword_fields
suggestions["subcategory"] = create_ordered_suggestions(subcategory_candidates, all_fields) if any(
keyword in f.lower()
for keyword in ["subcategory", "subclass", "subtype", "subtopic"]
)
]
suggestions["subcategory"] = create_ordered_suggestions(
subcategory_candidates, all_fields
)
# Tags field suggestions (tag-like fields first, then all fields) # Tags field suggestions (tag-like fields first, then all fields)
tags_candidates = [f for f in keyword_fields if any( tags_candidates = [
keyword in f.lower() for keyword in ["tag", "tags", "keyword", "keywords"] f
)] for f in keyword_fields
if any(
keyword in f.lower()
for keyword in ["tag", "tags", "keyword", "keywords"]
)
]
suggestions["tags"] = create_ordered_suggestions(tags_candidates, all_fields) suggestions["tags"] = create_ordered_suggestions(tags_candidates, all_fields)
return suggestions return suggestions

View File

@@ -97,10 +97,9 @@ class DataProcessingCallbacks:
# Register collapsible section callbacks # Register collapsible section callbacks
self._register_collapse_callbacks() self._register_collapse_callbacks()
def _register_opensearch_callbacks(self, section_type, opensearch_client): def _register_opensearch_callbacks(self, section_type, opensearch_client):
"""Register callbacks for a specific section (data or prompts).""" """Register callbacks for a specific section (data or prompts)."""
@callback( @callback(
Output(f"{section_type}-auth-collapse", "is_open"), Output(f"{section_type}-auth-collapse", "is_open"),
[Input(f"{section_type}-auth-toggle", "n_clicks")], [Input(f"{section_type}-auth-toggle", "n_clicks")],
@@ -144,9 +143,23 @@ class DataProcessingCallbacks:
], ],
prevent_initial_call=True, prevent_initial_call=True,
) )
def test_opensearch_connection(n_clicks, url, index_name, username, password, api_key): def test_opensearch_connection(
n_clicks, url, index_name, username, password, api_key
):
if not n_clicks or not url or not index_name: if not n_clicks or not url or not index_name:
return no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update return (
no_update,
no_update,
no_update,
no_update,
no_update,
no_update,
no_update,
no_update,
no_update,
no_update,
no_update,
)
# Test connection # Test connection
success, message = opensearch_client.connect( success, message = opensearch_client.connect(
@@ -173,7 +186,9 @@ class DataProcessingCallbacks:
) )
# Analyze fields # Analyze fields
success, field_analysis, analysis_message = opensearch_client.analyze_fields(index_name) success, field_analysis, analysis_message = (
opensearch_client.analyze_fields(index_name)
)
if not success: if not success:
return ( return (
@@ -194,8 +209,11 @@ class DataProcessingCallbacks:
field_suggestions = FieldMapper.suggest_mappings(field_analysis) field_suggestions = FieldMapper.suggest_mappings(field_analysis)
from ...ui.components.datasource import DataSourceComponent from ...ui.components.datasource import DataSourceComponent
datasource = DataSourceComponent() datasource = DataSourceComponent()
field_mapping_ui = datasource.create_field_mapping_interface(field_suggestions, section_type) field_mapping_ui = datasource.create_field_mapping_interface(
field_suggestions, section_type
)
return ( return (
self._create_status_alert(f"{message}", "success"), self._create_status_alert(f"{message}", "success"),
@@ -203,16 +221,36 @@ class DataProcessingCallbacks:
{"display": "block"}, {"display": "block"},
{"display": "block"}, {"display": "block"},
False, False,
[{"label": field, "value": field} for field in field_suggestions.get("embedding", [])], [
[{"label": field, "value": field} for field in field_suggestions.get("text", [])], {"label": field, "value": field}
[{"label": field, "value": field} for field in field_suggestions.get("id", [])], for field in field_suggestions.get("embedding", [])
[{"label": field, "value": field} for field in field_suggestions.get("category", [])], ],
[{"label": field, "value": field} for field in field_suggestions.get("subcategory", [])], [
[{"label": field, "value": field} for field in field_suggestions.get("tags", [])], {"label": field, "value": field}
for field in field_suggestions.get("text", [])
],
[
{"label": field, "value": field}
for field in field_suggestions.get("id", [])
],
[
{"label": field, "value": field}
for field in field_suggestions.get("category", [])
],
[
{"label": field, "value": field}
for field in field_suggestions.get("subcategory", [])
],
[
{"label": field, "value": field}
for field in field_suggestions.get("tags", [])
],
) )
# Determine output target based on section type # Determine output target based on section type
output_target = "processed-data" if section_type == "data" else "processed-prompts" output_target = (
"processed-data" if section_type == "data" else "processed-prompts"
)
@callback( @callback(
[ [
@@ -235,8 +273,17 @@ class DataProcessingCallbacks:
], ],
prevent_initial_call=True, prevent_initial_call=True,
) )
def load_opensearch_data(n_clicks, index_name, query_size, embedding_field, text_field, def load_opensearch_data(
id_field, category_field, subcategory_field, tags_field): n_clicks,
index_name,
query_size,
embedding_field,
text_field,
id_field,
category_field,
subcategory_field,
tags_field,
):
if not n_clicks or not index_name or not embedding_field or not text_field: if not n_clicks or not index_name or not embedding_field or not text_field:
return no_update, no_update, no_update, no_update, no_update return no_update, no_update, no_update, no_update, no_update
@@ -248,14 +295,16 @@ class DataProcessingCallbacks:
query_size = 1000 # Cap at reasonable maximum query_size = 1000 # Cap at reasonable maximum
# Create field mapping # Create field mapping
field_mapping = FieldMapper.create_mapping_from_dict({ field_mapping = FieldMapper.create_mapping_from_dict(
"embedding": embedding_field, {
"text": text_field, "embedding": embedding_field,
"id": id_field, "text": text_field,
"category": category_field, "id": id_field,
"subcategory": subcategory_field, "category": category_field,
"tags": tags_field "subcategory": subcategory_field,
}) "tags": tags_field,
}
)
# Fetch data from OpenSearch # Fetch data from OpenSearch
success, raw_documents, message = opensearch_client.fetch_data( success, raw_documents, message = opensearch_client.fetch_data(
@@ -268,11 +317,13 @@ class DataProcessingCallbacks:
"", "",
False, False,
f"❌ Failed to fetch {section_type}: {message}", f"❌ Failed to fetch {section_type}: {message}",
True True,
) )
# Process the data # Process the data
processed_data = self.processor.process_opensearch_data(raw_documents, field_mapping) processed_data = self.processor.process_opensearch_data(
raw_documents, field_mapping
)
if processed_data.error: if processed_data.error:
return ( return (
@@ -280,7 +331,7 @@ class DataProcessingCallbacks:
"", "",
False, False,
f"{section_type.title()} processing error: {processed_data.error}", f"{section_type.title()} processing error: {processed_data.error}",
True True,
) )
success_message = f"✅ Successfully loaded {len(processed_data.documents)} {section_type} from OpenSearch" success_message = f"✅ Successfully loaded {len(processed_data.documents)} {section_type} from OpenSearch"
@@ -290,27 +341,29 @@ class DataProcessingCallbacks:
return ( return (
{ {
"documents": [ "documents": [
self._document_to_dict(doc) for doc in processed_data.documents self._document_to_dict(doc)
for doc in processed_data.documents
], ],
"embeddings": processed_data.embeddings.tolist(), "embeddings": processed_data.embeddings.tolist(),
}, },
success_message, success_message,
True, True,
"", "",
False False,
) )
else: # prompts else: # prompts
return ( return (
{ {
"prompts": [ "prompts": [
self._document_to_dict(doc) for doc in processed_data.documents self._document_to_dict(doc)
for doc in processed_data.documents
], ],
"embeddings": processed_data.embeddings.tolist(), "embeddings": processed_data.embeddings.tolist(),
}, },
success_message, success_message,
True, True,
"", "",
False False,
) )
except Exception as e: except Exception as e:
@@ -367,7 +420,7 @@ class DataProcessingCallbacks:
def _register_collapse_callbacks(self): def _register_collapse_callbacks(self):
"""Register callbacks for collapsible sections.""" """Register callbacks for collapsible sections."""
# Data section collapse callback # Data section collapse callback
@callback( @callback(
[ [
@@ -381,7 +434,11 @@ class DataProcessingCallbacks:
def toggle_data_collapse(n_clicks, is_open): def toggle_data_collapse(n_clicks, is_open):
if n_clicks: if n_clicks:
new_state = not is_open new_state = not is_open
icon_class = "fas fa-chevron-down me-2" if new_state else "fas fa-chevron-right me-2" icon_class = (
"fas fa-chevron-down me-2"
if new_state
else "fas fa-chevron-right me-2"
)
return new_state, icon_class return new_state, icon_class
return is_open, "fas fa-chevron-down me-2" return is_open, "fas fa-chevron-down me-2"
@@ -398,7 +455,11 @@ class DataProcessingCallbacks:
def toggle_prompts_collapse(n_clicks, is_open): def toggle_prompts_collapse(n_clicks, is_open):
if n_clicks: if n_clicks:
new_state = not is_open new_state = not is_open
icon_class = "fas fa-chevron-down me-2" if new_state else "fas fa-chevron-right me-2" icon_class = (
"fas fa-chevron-down me-2"
if new_state
else "fas fa-chevron-right me-2"
)
return new_state, icon_class return new_state, icon_class
return is_open, "fas fa-chevron-down me-2" return is_open, "fas fa-chevron-down me-2"

View File

@@ -43,194 +43,314 @@ class DataSourceComponent:
return html.Div( return html.Div(
[ [
# Data Section # Data Section
dbc.Card([ dbc.Card(
dbc.CardHeader([ [
dbc.Button( dbc.CardHeader(
[ [
html.I(className="fas fa-chevron-down me-2", id="data-collapse-icon"), dbc.Button(
"📄 Documents/Data" [
], html.I(
id="data-collapse-toggle", className="fas fa-chevron-down me-2",
color="link", id="data-collapse-icon",
className="text-start p-0 w-100 text-decoration-none", ),
style={"border": "none", "font-size": "1.25rem", "font-weight": "500"} "📄 Documents/Data",
],
id="data-collapse-toggle",
color="link",
className="text-start p-0 w-100 text-decoration-none",
style={
"border": "none",
"font-size": "1.25rem",
"font-weight": "500",
},
),
]
), ),
]), dbc.Collapse(
dbc.Collapse([ [dbc.CardBody([self._create_opensearch_section("data")])],
dbc.CardBody([ id="data-collapse",
self._create_opensearch_section("data") is_open=True,
]) ),
], id="data-collapse", is_open=True) ],
], className="mb-4"), className="mb-4",
),
# Prompts Section # Prompts Section
dbc.Card([ dbc.Card(
dbc.CardHeader([ [
dbc.Button( dbc.CardHeader(
[ [
html.I(className="fas fa-chevron-down me-2", id="prompts-collapse-icon"), dbc.Button(
"💬 Prompts" [
], html.I(
id="prompts-collapse-toggle", className="fas fa-chevron-down me-2",
color="link", id="prompts-collapse-icon",
className="text-start p-0 w-100 text-decoration-none", ),
style={"border": "none", "font-size": "1.25rem", "font-weight": "500"} "💬 Prompts",
],
id="prompts-collapse-toggle",
color="link",
className="text-start p-0 w-100 text-decoration-none",
style={
"border": "none",
"font-size": "1.25rem",
"font-weight": "500",
},
),
]
), ),
]), dbc.Collapse(
dbc.Collapse([ [
dbc.CardBody([ dbc.CardBody(
self._create_opensearch_section("prompts") [self._create_opensearch_section("prompts")]
]) )
], id="prompts-collapse", is_open=True) ],
], className="mb-4"), id="prompts-collapse",
is_open=True,
),
],
className="mb-4",
),
# Hidden dropdowns to prevent callback errors (for both sections) # Hidden dropdowns to prevent callback errors (for both sections)
html.Div([ html.Div(
# Data dropdowns (hidden sync targets) [
dcc.Dropdown(id="data-embedding-field-dropdown", style={"display": "none"}), # Data dropdowns (hidden sync targets)
dcc.Dropdown(id="data-text-field-dropdown", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="data-id-field-dropdown", style={"display": "none"}), id="data-embedding-field-dropdown",
dcc.Dropdown(id="data-category-field-dropdown", style={"display": "none"}), style={"display": "none"},
dcc.Dropdown(id="data-subcategory-field-dropdown", style={"display": "none"}), ),
dcc.Dropdown(id="data-tags-field-dropdown", style={"display": "none"}), dcc.Dropdown(
# Data UI dropdowns (hidden placeholders) id="data-text-field-dropdown", style={"display": "none"}
dcc.Dropdown(id="data-embedding-field-dropdown-ui", style={"display": "none"}), ),
dcc.Dropdown(id="data-text-field-dropdown-ui", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="data-id-field-dropdown-ui", style={"display": "none"}), id="data-id-field-dropdown", style={"display": "none"}
dcc.Dropdown(id="data-category-field-dropdown-ui", style={"display": "none"}), ),
dcc.Dropdown(id="data-subcategory-field-dropdown-ui", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="data-tags-field-dropdown-ui", style={"display": "none"}), id="data-category-field-dropdown", style={"display": "none"}
# Prompts dropdowns (hidden sync targets) ),
dcc.Dropdown(id="prompts-embedding-field-dropdown", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="prompts-text-field-dropdown", style={"display": "none"}), id="data-subcategory-field-dropdown",
dcc.Dropdown(id="prompts-id-field-dropdown", style={"display": "none"}), style={"display": "none"},
dcc.Dropdown(id="prompts-category-field-dropdown", style={"display": "none"}), ),
dcc.Dropdown(id="prompts-subcategory-field-dropdown", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="prompts-tags-field-dropdown", style={"display": "none"}), id="data-tags-field-dropdown", style={"display": "none"}
# Prompts UI dropdowns (hidden placeholders) ),
dcc.Dropdown(id="prompts-embedding-field-dropdown-ui", style={"display": "none"}), # Data UI dropdowns (hidden placeholders)
dcc.Dropdown(id="prompts-text-field-dropdown-ui", style={"display": "none"}), dcc.Dropdown(
dcc.Dropdown(id="prompts-id-field-dropdown-ui", style={"display": "none"}), id="data-embedding-field-dropdown-ui",
dcc.Dropdown(id="prompts-category-field-dropdown-ui", style={"display": "none"}), style={"display": "none"},
dcc.Dropdown(id="prompts-subcategory-field-dropdown-ui", style={"display": "none"}), ),
dcc.Dropdown(id="prompts-tags-field-dropdown-ui", style={"display": "none"}), dcc.Dropdown(
], style={"display": "none"}), id="data-text-field-dropdown-ui", style={"display": "none"}
),
dcc.Dropdown(
id="data-id-field-dropdown-ui", style={"display": "none"}
),
dcc.Dropdown(
id="data-category-field-dropdown-ui",
style={"display": "none"},
),
dcc.Dropdown(
id="data-subcategory-field-dropdown-ui",
style={"display": "none"},
),
dcc.Dropdown(
id="data-tags-field-dropdown-ui", style={"display": "none"}
),
# Prompts dropdowns (hidden sync targets)
dcc.Dropdown(
id="prompts-embedding-field-dropdown",
style={"display": "none"},
),
dcc.Dropdown(
id="prompts-text-field-dropdown", style={"display": "none"}
),
dcc.Dropdown(
id="prompts-id-field-dropdown", style={"display": "none"}
),
dcc.Dropdown(
id="prompts-category-field-dropdown",
style={"display": "none"},
),
dcc.Dropdown(
id="prompts-subcategory-field-dropdown",
style={"display": "none"},
),
dcc.Dropdown(
id="prompts-tags-field-dropdown", style={"display": "none"}
),
# Prompts UI dropdowns (hidden placeholders)
dcc.Dropdown(
id="prompts-embedding-field-dropdown-ui",
style={"display": "none"},
),
dcc.Dropdown(
id="prompts-text-field-dropdown-ui",
style={"display": "none"},
),
dcc.Dropdown(
id="prompts-id-field-dropdown-ui", style={"display": "none"}
),
dcc.Dropdown(
id="prompts-category-field-dropdown-ui",
style={"display": "none"},
),
dcc.Dropdown(
id="prompts-subcategory-field-dropdown-ui",
style={"display": "none"},
),
dcc.Dropdown(
id="prompts-tags-field-dropdown-ui",
style={"display": "none"},
),
],
style={"display": "none"},
),
] ]
) )
def _create_opensearch_section(self, section_type): def _create_opensearch_section(self, section_type):
"""Create a complete OpenSearch section for either 'data' or 'prompts'.""" """Create a complete OpenSearch section for either 'data' or 'prompts'."""
section_id = section_type # 'data' or 'prompts' section_id = section_type # 'data' or 'prompts'
return html.Div([ return html.Div(
# Connection section [
html.H6("Connection", className="mb-2"), # Connection section
dbc.Row([ html.H6("Connection", className="mb-2"),
dbc.Col([ dbc.Row(
dbc.Label("OpenSearch URL:"), [
dbc.Input( dbc.Col(
id=f"{section_id}-opensearch-url", [
type="text", dbc.Label("OpenSearch URL:"),
placeholder="https://opensearch.example.com:9200", dbc.Input(
className="mb-2", id=f"{section_id}-opensearch-url",
), type="text",
], width=12), placeholder="https://opensearch.example.com:9200",
]), className="mb-2",
),
dbc.Row([ ],
dbc.Col([ width=12,
dbc.Label("Index Name:"), ),
dbc.Input( ]
id=f"{section_id}-opensearch-index", ),
type="text", dbc.Row(
placeholder="my-embeddings-index", [
className="mb-2", dbc.Col(
), [
], width=6), dbc.Label("Index Name:"),
dbc.Col([ dbc.Input(
dbc.Label("Query Size:"), id=f"{section_id}-opensearch-index",
dbc.Input( type="text",
id=f"{section_id}-opensearch-query-size", placeholder="my-embeddings-index",
type="number", className="mb-2",
value=100, ),
min=1, ],
max=1000, width=6,
placeholder="100", ),
className="mb-2", dbc.Col(
), [
], width=6), dbc.Label("Query Size:"),
]), dbc.Input(
id=f"{section_id}-opensearch-query-size",
dbc.Row([ type="number",
dbc.Col([ value=100,
dbc.Button( min=1,
"Test Connection", max=1000,
id=f"{section_id}-test-connection-btn", placeholder="100",
color="primary", className="mb-2",
className="mb-3", ),
), ],
], width=12), width=6,
]), ),
]
# Authentication section (collapsible) ),
dbc.Collapse([ dbc.Row(
html.Hr(), [
html.H6("Authentication (Optional)", className="mb-2"), dbc.Col(
dbc.Row([ [
dbc.Col([ dbc.Button(
dbc.Label("Username:"), "Test Connection",
id=f"{section_id}-test-connection-btn",
color="primary",
className="mb-3",
),
],
width=12,
),
]
),
# Authentication section (collapsible)
dbc.Collapse(
[
html.Hr(),
html.H6("Authentication (Optional)", className="mb-2"),
dbc.Row(
[
dbc.Col(
[
dbc.Label("Username:"),
dbc.Input(
id=f"{section_id}-opensearch-username",
type="text",
className="mb-2",
),
],
width=6,
),
dbc.Col(
[
dbc.Label("Password:"),
dbc.Input(
id=f"{section_id}-opensearch-password",
type="password",
className="mb-2",
),
],
width=6,
),
]
),
dbc.Label("OR"),
dbc.Input( dbc.Input(
id=f"{section_id}-opensearch-username", id=f"{section_id}-opensearch-api-key",
type="text", type="text",
placeholder="API Key",
className="mb-2", className="mb-2",
), ),
], width=6), ],
dbc.Col([ id=f"{section_id}-auth-collapse",
dbc.Label("Password:"), is_open=False,
dbc.Input(
id=f"{section_id}-opensearch-password",
type="password",
className="mb-2",
),
], width=6),
]),
dbc.Label("OR"),
dbc.Input(
id=f"{section_id}-opensearch-api-key",
type="text",
placeholder="API Key",
className="mb-2",
), ),
], id=f"{section_id}-auth-collapse", is_open=False),
dbc.Button(
"Show Authentication",
id=f"{section_id}-auth-toggle",
color="link",
size="sm",
className="p-0 mb-3",
),
# Connection status
html.Div(id=f"{section_id}-connection-status", className="mb-3"),
# Field mapping section (hidden initially)
html.Div(id=f"{section_id}-field-mapping-section", style={"display": "none"}),
# Load data button (hidden initially)
html.Div([
dbc.Button( dbc.Button(
f"Load {section_type.title()}", "Show Authentication",
id=f"{section_id}-load-opensearch-data-btn", id=f"{section_id}-auth-toggle",
color="success", color="link",
className="mb-2", size="sm",
disabled=True, className="p-0 mb-3",
), ),
], id=f"{section_id}-load-data-section", style={"display": "none"}), # Connection status
html.Div(id=f"{section_id}-connection-status", className="mb-3"),
# OpenSearch status/results # Field mapping section (hidden initially)
html.Div(id=f"{section_id}-opensearch-status", className="mb-3"), html.Div(
]) id=f"{section_id}-field-mapping-section", style={"display": "none"}
),
# Load data button (hidden initially)
html.Div(
[
dbc.Button(
f"Load {section_type.title()}",
id=f"{section_id}-load-opensearch-data-btn",
color="success",
className="mb-2",
disabled=True,
),
],
id=f"{section_id}-load-data-section",
style={"display": "none"},
),
# OpenSearch status/results
html.Div(id=f"{section_id}-opensearch-status", className="mb-3"),
]
)
def create_field_mapping_interface(self, field_suggestions, section_type="data"): def create_field_mapping_interface(self, field_suggestions, section_type="data"):
"""Create field mapping interface based on detected fields.""" """Create field mapping interface based on detected fields."""
@@ -254,9 +374,13 @@ class DataSourceComponent:
id=f"{section_type}-embedding-field-dropdown-ui", id=f"{section_type}-embedding-field-dropdown-ui",
options=[ options=[
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("embedding", []) for field in field_suggestions.get(
"embedding", []
)
], ],
value=field_suggestions.get("embedding", [None])[0], # Default to first suggestion value=field_suggestions.get("embedding", [None])[
0
], # Default to first suggestion
placeholder="Select embedding field...", placeholder="Select embedding field...",
className="mb-2", className="mb-2",
), ),
@@ -274,7 +398,9 @@ class DataSourceComponent:
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("text", []) for field in field_suggestions.get("text", [])
], ],
value=field_suggestions.get("text", [None])[0], # Default to first suggestion value=field_suggestions.get("text", [None])[
0
], # Default to first suggestion
placeholder="Select text field...", placeholder="Select text field...",
className="mb-2", className="mb-2",
), ),
@@ -296,7 +422,9 @@ class DataSourceComponent:
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("id", []) for field in field_suggestions.get("id", [])
], ],
value=field_suggestions.get("id", [None])[0], # Default to first suggestion value=field_suggestions.get("id", [None])[
0
], # Default to first suggestion
placeholder="Select ID field...", placeholder="Select ID field...",
className="mb-2", className="mb-2",
), ),
@@ -310,9 +438,13 @@ class DataSourceComponent:
id=f"{section_type}-category-field-dropdown-ui", id=f"{section_type}-category-field-dropdown-ui",
options=[ options=[
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("category", []) for field in field_suggestions.get(
"category", []
)
], ],
value=field_suggestions.get("category", [None])[0], # Default to first suggestion value=field_suggestions.get("category", [None])[
0
], # Default to first suggestion
placeholder="Select category field...", placeholder="Select category field...",
className="mb-2", className="mb-2",
), ),
@@ -330,9 +462,13 @@ class DataSourceComponent:
id=f"{section_type}-subcategory-field-dropdown-ui", id=f"{section_type}-subcategory-field-dropdown-ui",
options=[ options=[
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("subcategory", []) for field in field_suggestions.get(
"subcategory", []
)
], ],
value=field_suggestions.get("subcategory", [None])[0], # Default to first suggestion value=field_suggestions.get("subcategory", [None])[
0
], # Default to first suggestion
placeholder="Select subcategory field...", placeholder="Select subcategory field...",
className="mb-2", className="mb-2",
), ),
@@ -348,7 +484,9 @@ class DataSourceComponent:
{"label": field, "value": field} {"label": field, "value": field}
for field in field_suggestions.get("tags", []) for field in field_suggestions.get("tags", [])
], ],
value=field_suggestions.get("tags", [None])[0], # Default to first suggestion value=field_suggestions.get("tags", [None])[
0
], # Default to first suggestion
placeholder="Select tags field...", placeholder="Select tags field...",
className="mb-2", className="mb-2",
), ),

View File

@@ -99,22 +99,57 @@ class TestFieldMapper:
"text_fields": ["content", "description"], "text_fields": ["content", "description"],
"keyword_fields": ["doc_id", "category", "type", "tags"], "keyword_fields": ["doc_id", "category", "type", "tags"],
"numeric_fields": ["count"], "numeric_fields": ["count"],
"all_fields": ["embedding", "content", "description", "doc_id", "category", "type", "tags", "count"], "all_fields": [
"embedding",
"content",
"description",
"doc_id",
"category",
"type",
"tags",
"count",
],
} }
suggestions = FieldMapper.suggest_mappings(field_analysis) suggestions = FieldMapper.suggest_mappings(field_analysis)
# Check that all dropdowns contain all fields # Check that all dropdowns contain all fields
all_fields = ["embedding", "content", "description", "doc_id", "category", "type", "tags", "count"] all_fields = [
for field_type in ["embedding", "text", "id", "category", "subcategory", "tags"]: "embedding",
"content",
"description",
"doc_id",
"category",
"type",
"tags",
"count",
]
for field_type in [
"embedding",
"text",
"id",
"category",
"subcategory",
"tags",
]:
for field in all_fields: for field in all_fields:
assert field in suggestions[field_type], f"Field '{field}' missing from {field_type} suggestions" assert field in suggestions[field_type], (
f"Field '{field}' missing from {field_type} suggestions"
)
# Check that best candidates are first # Check that best candidates are first
assert suggestions["embedding"][0] == "embedding" # vector field should be first assert (
assert suggestions["text"][0] in ["content", "description"] # text fields should be first suggestions["embedding"][0] == "embedding"
) # vector field should be first
assert suggestions["text"][0] in [
"content",
"description",
] # text fields should be first
assert suggestions["id"][0] == "doc_id" # ID-like field should be first assert suggestions["id"][0] == "doc_id" # ID-like field should be first
assert suggestions["category"][0] in ["category", "type"] # category-like field should be first assert suggestions["category"][0] in [
"category",
"type",
] # category-like field should be first
assert suggestions["tags"][0] == "tags" # tags field should be first assert suggestions["tags"][0] == "tags" # tags field should be first
def test_suggest_mappings_name_based_embedding(self): def test_suggest_mappings_name_based_embedding(self):
@@ -124,19 +159,48 @@ class TestFieldMapper:
"text_fields": ["content", "description"], "text_fields": ["content", "description"],
"keyword_fields": ["doc_id", "category", "type", "tags"], "keyword_fields": ["doc_id", "category", "type", "tags"],
"numeric_fields": ["count"], "numeric_fields": ["count"],
"all_fields": ["content", "description", "doc_id", "category", "embedding", "type", "tags", "count"], "all_fields": [
"content",
"description",
"doc_id",
"category",
"embedding",
"type",
"tags",
"count",
],
} }
suggestions = FieldMapper.suggest_mappings(field_analysis) suggestions = FieldMapper.suggest_mappings(field_analysis)
# Check that 'embedding' field is prioritized despite not being detected as vector type # Check that 'embedding' field is prioritized despite not being detected as vector type
assert suggestions["embedding"][0] == "embedding", "Field named 'embedding' should be first priority" assert suggestions["embedding"][0] == "embedding", (
"Field named 'embedding' should be first priority"
)
# Check that all fields are still available # Check that all fields are still available
all_fields = ["content", "description", "doc_id", "category", "embedding", "type", "tags", "count"] all_fields = [
for field_type in ["embedding", "text", "id", "category", "subcategory", "tags"]: "content",
"description",
"doc_id",
"category",
"embedding",
"type",
"tags",
"count",
]
for field_type in [
"embedding",
"text",
"id",
"category",
"subcategory",
"tags",
]:
for field in all_fields: for field in all_fields:
assert field in suggestions[field_type], f"Field '{field}' missing from {field_type} suggestions" assert field in suggestions[field_type], (
f"Field '{field}' missing from {field_type} suggestions"
)
def test_validate_mapping_success(self): def test_validate_mapping_success(self):
mapping = FieldMapping( mapping = FieldMapping(

2
uv.lock generated
View File

@@ -412,7 +412,7 @@ wheels = [
[[package]] [[package]]
name = "embeddingbuddy" name = "embeddingbuddy"
version = "0.2.0" version = "0.3.0"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "dash" }, { name = "dash" },