fix formatting and bump version to v0.3.0
All checks were successful
Security Scan / dependency-check (pull_request) Successful in 44s
Test Suite / lint (pull_request) Successful in 34s
Test Suite / build (pull_request) Successful in 38s
Security Scan / security (pull_request) Successful in 49s
Test Suite / test (3.11) (pull_request) Successful in 1m32s
All checks were successful
Security Scan / dependency-check (pull_request) Successful in 44s
Test Suite / lint (pull_request) Successful in 34s
Test Suite / build (pull_request) Successful in 38s
Security Scan / security (pull_request) Successful in 49s
Test Suite / test (3.11) (pull_request) Successful in 1m32s
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "embeddingbuddy"
|
name = "embeddingbuddy"
|
||||||
version = "0.2.0"
|
version = "0.3.0"
|
||||||
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
|
description = "A Python Dash application for interactive exploration and visualization of embedding vectors through dimensionality reduction techniques."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
|
@@ -59,42 +59,70 @@ class FieldMapper:
|
|||||||
# Embedding field suggestions (vector fields first, then name-based candidates, then all fields)
|
# Embedding field suggestions (vector fields first, then name-based candidates, then all fields)
|
||||||
embedding_candidates = vector_fields.copy()
|
embedding_candidates = vector_fields.copy()
|
||||||
# Add fields that likely contain embeddings based on name
|
# Add fields that likely contain embeddings based on name
|
||||||
embedding_name_candidates = [f for f in all_fields if any(
|
embedding_name_candidates = [
|
||||||
keyword in f.lower() for keyword in ["embedding", "embeddings", "vector", "vectors", "embed"]
|
f
|
||||||
)]
|
for f in all_fields
|
||||||
|
if any(
|
||||||
|
keyword in f.lower()
|
||||||
|
for keyword in ["embedding", "embeddings", "vector", "vectors", "embed"]
|
||||||
|
)
|
||||||
|
]
|
||||||
# Add name-based candidates that aren't already in vector_fields
|
# Add name-based candidates that aren't already in vector_fields
|
||||||
for candidate in embedding_name_candidates:
|
for candidate in embedding_name_candidates:
|
||||||
if candidate not in embedding_candidates:
|
if candidate not in embedding_candidates:
|
||||||
embedding_candidates.append(candidate)
|
embedding_candidates.append(candidate)
|
||||||
suggestions["embedding"] = create_ordered_suggestions(embedding_candidates, all_fields)
|
suggestions["embedding"] = create_ordered_suggestions(
|
||||||
|
embedding_candidates, all_fields
|
||||||
|
)
|
||||||
|
|
||||||
# Text field suggestions (text fields first, then all fields)
|
# Text field suggestions (text fields first, then all fields)
|
||||||
text_candidates = text_fields.copy()
|
text_candidates = text_fields.copy()
|
||||||
suggestions["text"] = create_ordered_suggestions(text_candidates, all_fields)
|
suggestions["text"] = create_ordered_suggestions(text_candidates, all_fields)
|
||||||
|
|
||||||
# ID field suggestions (ID-like fields first, then all fields)
|
# ID field suggestions (ID-like fields first, then all fields)
|
||||||
id_candidates = [f for f in keyword_fields if any(
|
id_candidates = [
|
||||||
keyword in f.lower() for keyword in ["id", "_id", "doc", "document"]
|
f
|
||||||
)]
|
for f in keyword_fields
|
||||||
|
if any(keyword in f.lower() for keyword in ["id", "_id", "doc", "document"])
|
||||||
|
]
|
||||||
id_candidates.append("_id") # _id is always available
|
id_candidates.append("_id") # _id is always available
|
||||||
suggestions["id"] = create_ordered_suggestions(id_candidates, all_fields)
|
suggestions["id"] = create_ordered_suggestions(id_candidates, all_fields)
|
||||||
|
|
||||||
# Category field suggestions (category-like fields first, then all fields)
|
# Category field suggestions (category-like fields first, then all fields)
|
||||||
category_candidates = [f for f in keyword_fields if any(
|
category_candidates = [
|
||||||
keyword in f.lower() for keyword in ["category", "class", "type", "label"]
|
f
|
||||||
)]
|
for f in keyword_fields
|
||||||
suggestions["category"] = create_ordered_suggestions(category_candidates, all_fields)
|
if any(
|
||||||
|
keyword in f.lower()
|
||||||
|
for keyword in ["category", "class", "type", "label"]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
suggestions["category"] = create_ordered_suggestions(
|
||||||
|
category_candidates, all_fields
|
||||||
|
)
|
||||||
|
|
||||||
# Subcategory field suggestions (subcategory-like fields first, then all fields)
|
# Subcategory field suggestions (subcategory-like fields first, then all fields)
|
||||||
subcategory_candidates = [f for f in keyword_fields if any(
|
subcategory_candidates = [
|
||||||
keyword in f.lower() for keyword in ["subcategory", "subclass", "subtype", "subtopic"]
|
f
|
||||||
)]
|
for f in keyword_fields
|
||||||
suggestions["subcategory"] = create_ordered_suggestions(subcategory_candidates, all_fields)
|
if any(
|
||||||
|
keyword in f.lower()
|
||||||
|
for keyword in ["subcategory", "subclass", "subtype", "subtopic"]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
suggestions["subcategory"] = create_ordered_suggestions(
|
||||||
|
subcategory_candidates, all_fields
|
||||||
|
)
|
||||||
|
|
||||||
# Tags field suggestions (tag-like fields first, then all fields)
|
# Tags field suggestions (tag-like fields first, then all fields)
|
||||||
tags_candidates = [f for f in keyword_fields if any(
|
tags_candidates = [
|
||||||
keyword in f.lower() for keyword in ["tag", "tags", "keyword", "keywords"]
|
f
|
||||||
)]
|
for f in keyword_fields
|
||||||
|
if any(
|
||||||
|
keyword in f.lower()
|
||||||
|
for keyword in ["tag", "tags", "keyword", "keywords"]
|
||||||
|
)
|
||||||
|
]
|
||||||
suggestions["tags"] = create_ordered_suggestions(tags_candidates, all_fields)
|
suggestions["tags"] = create_ordered_suggestions(tags_candidates, all_fields)
|
||||||
|
|
||||||
return suggestions
|
return suggestions
|
||||||
|
@@ -97,7 +97,6 @@ class DataProcessingCallbacks:
|
|||||||
# Register collapsible section callbacks
|
# Register collapsible section callbacks
|
||||||
self._register_collapse_callbacks()
|
self._register_collapse_callbacks()
|
||||||
|
|
||||||
|
|
||||||
def _register_opensearch_callbacks(self, section_type, opensearch_client):
|
def _register_opensearch_callbacks(self, section_type, opensearch_client):
|
||||||
"""Register callbacks for a specific section (data or prompts)."""
|
"""Register callbacks for a specific section (data or prompts)."""
|
||||||
|
|
||||||
@@ -144,9 +143,23 @@ class DataProcessingCallbacks:
|
|||||||
],
|
],
|
||||||
prevent_initial_call=True,
|
prevent_initial_call=True,
|
||||||
)
|
)
|
||||||
def test_opensearch_connection(n_clicks, url, index_name, username, password, api_key):
|
def test_opensearch_connection(
|
||||||
|
n_clicks, url, index_name, username, password, api_key
|
||||||
|
):
|
||||||
if not n_clicks or not url or not index_name:
|
if not n_clicks or not url or not index_name:
|
||||||
return no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update, no_update
|
return (
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
)
|
||||||
|
|
||||||
# Test connection
|
# Test connection
|
||||||
success, message = opensearch_client.connect(
|
success, message = opensearch_client.connect(
|
||||||
@@ -173,7 +186,9 @@ class DataProcessingCallbacks:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Analyze fields
|
# Analyze fields
|
||||||
success, field_analysis, analysis_message = opensearch_client.analyze_fields(index_name)
|
success, field_analysis, analysis_message = (
|
||||||
|
opensearch_client.analyze_fields(index_name)
|
||||||
|
)
|
||||||
|
|
||||||
if not success:
|
if not success:
|
||||||
return (
|
return (
|
||||||
@@ -194,8 +209,11 @@ class DataProcessingCallbacks:
|
|||||||
field_suggestions = FieldMapper.suggest_mappings(field_analysis)
|
field_suggestions = FieldMapper.suggest_mappings(field_analysis)
|
||||||
|
|
||||||
from ...ui.components.datasource import DataSourceComponent
|
from ...ui.components.datasource import DataSourceComponent
|
||||||
|
|
||||||
datasource = DataSourceComponent()
|
datasource = DataSourceComponent()
|
||||||
field_mapping_ui = datasource.create_field_mapping_interface(field_suggestions, section_type)
|
field_mapping_ui = datasource.create_field_mapping_interface(
|
||||||
|
field_suggestions, section_type
|
||||||
|
)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
self._create_status_alert(f"✅ {message}", "success"),
|
self._create_status_alert(f"✅ {message}", "success"),
|
||||||
@@ -203,16 +221,36 @@ class DataProcessingCallbacks:
|
|||||||
{"display": "block"},
|
{"display": "block"},
|
||||||
{"display": "block"},
|
{"display": "block"},
|
||||||
False,
|
False,
|
||||||
[{"label": field, "value": field} for field in field_suggestions.get("embedding", [])],
|
[
|
||||||
[{"label": field, "value": field} for field in field_suggestions.get("text", [])],
|
{"label": field, "value": field}
|
||||||
[{"label": field, "value": field} for field in field_suggestions.get("id", [])],
|
for field in field_suggestions.get("embedding", [])
|
||||||
[{"label": field, "value": field} for field in field_suggestions.get("category", [])],
|
],
|
||||||
[{"label": field, "value": field} for field in field_suggestions.get("subcategory", [])],
|
[
|
||||||
[{"label": field, "value": field} for field in field_suggestions.get("tags", [])],
|
{"label": field, "value": field}
|
||||||
|
for field in field_suggestions.get("text", [])
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{"label": field, "value": field}
|
||||||
|
for field in field_suggestions.get("id", [])
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{"label": field, "value": field}
|
||||||
|
for field in field_suggestions.get("category", [])
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{"label": field, "value": field}
|
||||||
|
for field in field_suggestions.get("subcategory", [])
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{"label": field, "value": field}
|
||||||
|
for field in field_suggestions.get("tags", [])
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
# Determine output target based on section type
|
# Determine output target based on section type
|
||||||
output_target = "processed-data" if section_type == "data" else "processed-prompts"
|
output_target = (
|
||||||
|
"processed-data" if section_type == "data" else "processed-prompts"
|
||||||
|
)
|
||||||
|
|
||||||
@callback(
|
@callback(
|
||||||
[
|
[
|
||||||
@@ -235,8 +273,17 @@ class DataProcessingCallbacks:
|
|||||||
],
|
],
|
||||||
prevent_initial_call=True,
|
prevent_initial_call=True,
|
||||||
)
|
)
|
||||||
def load_opensearch_data(n_clicks, index_name, query_size, embedding_field, text_field,
|
def load_opensearch_data(
|
||||||
id_field, category_field, subcategory_field, tags_field):
|
n_clicks,
|
||||||
|
index_name,
|
||||||
|
query_size,
|
||||||
|
embedding_field,
|
||||||
|
text_field,
|
||||||
|
id_field,
|
||||||
|
category_field,
|
||||||
|
subcategory_field,
|
||||||
|
tags_field,
|
||||||
|
):
|
||||||
if not n_clicks or not index_name or not embedding_field or not text_field:
|
if not n_clicks or not index_name or not embedding_field or not text_field:
|
||||||
return no_update, no_update, no_update, no_update, no_update
|
return no_update, no_update, no_update, no_update, no_update
|
||||||
|
|
||||||
@@ -248,14 +295,16 @@ class DataProcessingCallbacks:
|
|||||||
query_size = 1000 # Cap at reasonable maximum
|
query_size = 1000 # Cap at reasonable maximum
|
||||||
|
|
||||||
# Create field mapping
|
# Create field mapping
|
||||||
field_mapping = FieldMapper.create_mapping_from_dict({
|
field_mapping = FieldMapper.create_mapping_from_dict(
|
||||||
|
{
|
||||||
"embedding": embedding_field,
|
"embedding": embedding_field,
|
||||||
"text": text_field,
|
"text": text_field,
|
||||||
"id": id_field,
|
"id": id_field,
|
||||||
"category": category_field,
|
"category": category_field,
|
||||||
"subcategory": subcategory_field,
|
"subcategory": subcategory_field,
|
||||||
"tags": tags_field
|
"tags": tags_field,
|
||||||
})
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Fetch data from OpenSearch
|
# Fetch data from OpenSearch
|
||||||
success, raw_documents, message = opensearch_client.fetch_data(
|
success, raw_documents, message = opensearch_client.fetch_data(
|
||||||
@@ -268,11 +317,13 @@ class DataProcessingCallbacks:
|
|||||||
"",
|
"",
|
||||||
False,
|
False,
|
||||||
f"❌ Failed to fetch {section_type}: {message}",
|
f"❌ Failed to fetch {section_type}: {message}",
|
||||||
True
|
True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process the data
|
# Process the data
|
||||||
processed_data = self.processor.process_opensearch_data(raw_documents, field_mapping)
|
processed_data = self.processor.process_opensearch_data(
|
||||||
|
raw_documents, field_mapping
|
||||||
|
)
|
||||||
|
|
||||||
if processed_data.error:
|
if processed_data.error:
|
||||||
return (
|
return (
|
||||||
@@ -280,7 +331,7 @@ class DataProcessingCallbacks:
|
|||||||
"",
|
"",
|
||||||
False,
|
False,
|
||||||
f"❌ {section_type.title()} processing error: {processed_data.error}",
|
f"❌ {section_type.title()} processing error: {processed_data.error}",
|
||||||
True
|
True,
|
||||||
)
|
)
|
||||||
|
|
||||||
success_message = f"✅ Successfully loaded {len(processed_data.documents)} {section_type} from OpenSearch"
|
success_message = f"✅ Successfully loaded {len(processed_data.documents)} {section_type} from OpenSearch"
|
||||||
@@ -290,27 +341,29 @@ class DataProcessingCallbacks:
|
|||||||
return (
|
return (
|
||||||
{
|
{
|
||||||
"documents": [
|
"documents": [
|
||||||
self._document_to_dict(doc) for doc in processed_data.documents
|
self._document_to_dict(doc)
|
||||||
|
for doc in processed_data.documents
|
||||||
],
|
],
|
||||||
"embeddings": processed_data.embeddings.tolist(),
|
"embeddings": processed_data.embeddings.tolist(),
|
||||||
},
|
},
|
||||||
success_message,
|
success_message,
|
||||||
True,
|
True,
|
||||||
"",
|
"",
|
||||||
False
|
False,
|
||||||
)
|
)
|
||||||
else: # prompts
|
else: # prompts
|
||||||
return (
|
return (
|
||||||
{
|
{
|
||||||
"prompts": [
|
"prompts": [
|
||||||
self._document_to_dict(doc) for doc in processed_data.documents
|
self._document_to_dict(doc)
|
||||||
|
for doc in processed_data.documents
|
||||||
],
|
],
|
||||||
"embeddings": processed_data.embeddings.tolist(),
|
"embeddings": processed_data.embeddings.tolist(),
|
||||||
},
|
},
|
||||||
success_message,
|
success_message,
|
||||||
True,
|
True,
|
||||||
"",
|
"",
|
||||||
False
|
False,
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -381,7 +434,11 @@ class DataProcessingCallbacks:
|
|||||||
def toggle_data_collapse(n_clicks, is_open):
|
def toggle_data_collapse(n_clicks, is_open):
|
||||||
if n_clicks:
|
if n_clicks:
|
||||||
new_state = not is_open
|
new_state = not is_open
|
||||||
icon_class = "fas fa-chevron-down me-2" if new_state else "fas fa-chevron-right me-2"
|
icon_class = (
|
||||||
|
"fas fa-chevron-down me-2"
|
||||||
|
if new_state
|
||||||
|
else "fas fa-chevron-right me-2"
|
||||||
|
)
|
||||||
return new_state, icon_class
|
return new_state, icon_class
|
||||||
return is_open, "fas fa-chevron-down me-2"
|
return is_open, "fas fa-chevron-down me-2"
|
||||||
|
|
||||||
@@ -398,7 +455,11 @@ class DataProcessingCallbacks:
|
|||||||
def toggle_prompts_collapse(n_clicks, is_open):
|
def toggle_prompts_collapse(n_clicks, is_open):
|
||||||
if n_clicks:
|
if n_clicks:
|
||||||
new_state = not is_open
|
new_state = not is_open
|
||||||
icon_class = "fas fa-chevron-down me-2" if new_state else "fas fa-chevron-right me-2"
|
icon_class = (
|
||||||
|
"fas fa-chevron-down me-2"
|
||||||
|
if new_state
|
||||||
|
else "fas fa-chevron-right me-2"
|
||||||
|
)
|
||||||
return new_state, icon_class
|
return new_state, icon_class
|
||||||
return is_open, "fas fa-chevron-down me-2"
|
return is_open, "fas fa-chevron-down me-2"
|
||||||
|
|
||||||
|
@@ -43,78 +43,168 @@ class DataSourceComponent:
|
|||||||
return html.Div(
|
return html.Div(
|
||||||
[
|
[
|
||||||
# Data Section
|
# Data Section
|
||||||
dbc.Card([
|
dbc.Card(
|
||||||
dbc.CardHeader([
|
[
|
||||||
|
dbc.CardHeader(
|
||||||
|
[
|
||||||
dbc.Button(
|
dbc.Button(
|
||||||
[
|
[
|
||||||
html.I(className="fas fa-chevron-down me-2", id="data-collapse-icon"),
|
html.I(
|
||||||
"📄 Documents/Data"
|
className="fas fa-chevron-down me-2",
|
||||||
|
id="data-collapse-icon",
|
||||||
|
),
|
||||||
|
"📄 Documents/Data",
|
||||||
],
|
],
|
||||||
id="data-collapse-toggle",
|
id="data-collapse-toggle",
|
||||||
color="link",
|
color="link",
|
||||||
className="text-start p-0 w-100 text-decoration-none",
|
className="text-start p-0 w-100 text-decoration-none",
|
||||||
style={"border": "none", "font-size": "1.25rem", "font-weight": "500"}
|
style={
|
||||||
|
"border": "none",
|
||||||
|
"font-size": "1.25rem",
|
||||||
|
"font-weight": "500",
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
dbc.Collapse(
|
||||||
|
[dbc.CardBody([self._create_opensearch_section("data")])],
|
||||||
|
id="data-collapse",
|
||||||
|
is_open=True,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
className="mb-4",
|
||||||
),
|
),
|
||||||
]),
|
|
||||||
dbc.Collapse([
|
|
||||||
dbc.CardBody([
|
|
||||||
self._create_opensearch_section("data")
|
|
||||||
])
|
|
||||||
], id="data-collapse", is_open=True)
|
|
||||||
], className="mb-4"),
|
|
||||||
|
|
||||||
# Prompts Section
|
# Prompts Section
|
||||||
dbc.Card([
|
dbc.Card(
|
||||||
dbc.CardHeader([
|
[
|
||||||
|
dbc.CardHeader(
|
||||||
|
[
|
||||||
dbc.Button(
|
dbc.Button(
|
||||||
[
|
[
|
||||||
html.I(className="fas fa-chevron-down me-2", id="prompts-collapse-icon"),
|
html.I(
|
||||||
"💬 Prompts"
|
className="fas fa-chevron-down me-2",
|
||||||
|
id="prompts-collapse-icon",
|
||||||
|
),
|
||||||
|
"💬 Prompts",
|
||||||
],
|
],
|
||||||
id="prompts-collapse-toggle",
|
id="prompts-collapse-toggle",
|
||||||
color="link",
|
color="link",
|
||||||
className="text-start p-0 w-100 text-decoration-none",
|
className="text-start p-0 w-100 text-decoration-none",
|
||||||
style={"border": "none", "font-size": "1.25rem", "font-weight": "500"}
|
style={
|
||||||
|
"border": "none",
|
||||||
|
"font-size": "1.25rem",
|
||||||
|
"font-weight": "500",
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
dbc.Collapse(
|
||||||
|
[
|
||||||
|
dbc.CardBody(
|
||||||
|
[self._create_opensearch_section("prompts")]
|
||||||
|
)
|
||||||
|
],
|
||||||
|
id="prompts-collapse",
|
||||||
|
is_open=True,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
className="mb-4",
|
||||||
),
|
),
|
||||||
]),
|
|
||||||
dbc.Collapse([
|
|
||||||
dbc.CardBody([
|
|
||||||
self._create_opensearch_section("prompts")
|
|
||||||
])
|
|
||||||
], id="prompts-collapse", is_open=True)
|
|
||||||
], className="mb-4"),
|
|
||||||
|
|
||||||
# Hidden dropdowns to prevent callback errors (for both sections)
|
# Hidden dropdowns to prevent callback errors (for both sections)
|
||||||
html.Div([
|
html.Div(
|
||||||
|
[
|
||||||
# Data dropdowns (hidden sync targets)
|
# Data dropdowns (hidden sync targets)
|
||||||
dcc.Dropdown(id="data-embedding-field-dropdown", style={"display": "none"}),
|
dcc.Dropdown(
|
||||||
dcc.Dropdown(id="data-text-field-dropdown", style={"display": "none"}),
|
id="data-embedding-field-dropdown",
|
||||||
dcc.Dropdown(id="data-id-field-dropdown", style={"display": "none"}),
|
style={"display": "none"},
|
||||||
dcc.Dropdown(id="data-category-field-dropdown", style={"display": "none"}),
|
),
|
||||||
dcc.Dropdown(id="data-subcategory-field-dropdown", style={"display": "none"}),
|
dcc.Dropdown(
|
||||||
dcc.Dropdown(id="data-tags-field-dropdown", style={"display": "none"}),
|
id="data-text-field-dropdown", style={"display": "none"}
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="data-id-field-dropdown", style={"display": "none"}
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="data-category-field-dropdown", style={"display": "none"}
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="data-subcategory-field-dropdown",
|
||||||
|
style={"display": "none"},
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="data-tags-field-dropdown", style={"display": "none"}
|
||||||
|
),
|
||||||
# Data UI dropdowns (hidden placeholders)
|
# Data UI dropdowns (hidden placeholders)
|
||||||
dcc.Dropdown(id="data-embedding-field-dropdown-ui", style={"display": "none"}),
|
dcc.Dropdown(
|
||||||
dcc.Dropdown(id="data-text-field-dropdown-ui", style={"display": "none"}),
|
id="data-embedding-field-dropdown-ui",
|
||||||
dcc.Dropdown(id="data-id-field-dropdown-ui", style={"display": "none"}),
|
style={"display": "none"},
|
||||||
dcc.Dropdown(id="data-category-field-dropdown-ui", style={"display": "none"}),
|
),
|
||||||
dcc.Dropdown(id="data-subcategory-field-dropdown-ui", style={"display": "none"}),
|
dcc.Dropdown(
|
||||||
dcc.Dropdown(id="data-tags-field-dropdown-ui", style={"display": "none"}),
|
id="data-text-field-dropdown-ui", style={"display": "none"}
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="data-id-field-dropdown-ui", style={"display": "none"}
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="data-category-field-dropdown-ui",
|
||||||
|
style={"display": "none"},
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="data-subcategory-field-dropdown-ui",
|
||||||
|
style={"display": "none"},
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="data-tags-field-dropdown-ui", style={"display": "none"}
|
||||||
|
),
|
||||||
# Prompts dropdowns (hidden sync targets)
|
# Prompts dropdowns (hidden sync targets)
|
||||||
dcc.Dropdown(id="prompts-embedding-field-dropdown", style={"display": "none"}),
|
dcc.Dropdown(
|
||||||
dcc.Dropdown(id="prompts-text-field-dropdown", style={"display": "none"}),
|
id="prompts-embedding-field-dropdown",
|
||||||
dcc.Dropdown(id="prompts-id-field-dropdown", style={"display": "none"}),
|
style={"display": "none"},
|
||||||
dcc.Dropdown(id="prompts-category-field-dropdown", style={"display": "none"}),
|
),
|
||||||
dcc.Dropdown(id="prompts-subcategory-field-dropdown", style={"display": "none"}),
|
dcc.Dropdown(
|
||||||
dcc.Dropdown(id="prompts-tags-field-dropdown", style={"display": "none"}),
|
id="prompts-text-field-dropdown", style={"display": "none"}
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="prompts-id-field-dropdown", style={"display": "none"}
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="prompts-category-field-dropdown",
|
||||||
|
style={"display": "none"},
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="prompts-subcategory-field-dropdown",
|
||||||
|
style={"display": "none"},
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="prompts-tags-field-dropdown", style={"display": "none"}
|
||||||
|
),
|
||||||
# Prompts UI dropdowns (hidden placeholders)
|
# Prompts UI dropdowns (hidden placeholders)
|
||||||
dcc.Dropdown(id="prompts-embedding-field-dropdown-ui", style={"display": "none"}),
|
dcc.Dropdown(
|
||||||
dcc.Dropdown(id="prompts-text-field-dropdown-ui", style={"display": "none"}),
|
id="prompts-embedding-field-dropdown-ui",
|
||||||
dcc.Dropdown(id="prompts-id-field-dropdown-ui", style={"display": "none"}),
|
style={"display": "none"},
|
||||||
dcc.Dropdown(id="prompts-category-field-dropdown-ui", style={"display": "none"}),
|
),
|
||||||
dcc.Dropdown(id="prompts-subcategory-field-dropdown-ui", style={"display": "none"}),
|
dcc.Dropdown(
|
||||||
dcc.Dropdown(id="prompts-tags-field-dropdown-ui", style={"display": "none"}),
|
id="prompts-text-field-dropdown-ui",
|
||||||
], style={"display": "none"}),
|
style={"display": "none"},
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="prompts-id-field-dropdown-ui", style={"display": "none"}
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="prompts-category-field-dropdown-ui",
|
||||||
|
style={"display": "none"},
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="prompts-subcategory-field-dropdown-ui",
|
||||||
|
style={"display": "none"},
|
||||||
|
),
|
||||||
|
dcc.Dropdown(
|
||||||
|
id="prompts-tags-field-dropdown-ui",
|
||||||
|
style={"display": "none"},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
style={"display": "none"},
|
||||||
|
),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -122,11 +212,14 @@ class DataSourceComponent:
|
|||||||
"""Create a complete OpenSearch section for either 'data' or 'prompts'."""
|
"""Create a complete OpenSearch section for either 'data' or 'prompts'."""
|
||||||
section_id = section_type # 'data' or 'prompts'
|
section_id = section_type # 'data' or 'prompts'
|
||||||
|
|
||||||
return html.Div([
|
return html.Div(
|
||||||
|
[
|
||||||
# Connection section
|
# Connection section
|
||||||
html.H6("Connection", className="mb-2"),
|
html.H6("Connection", className="mb-2"),
|
||||||
dbc.Row([
|
dbc.Row(
|
||||||
dbc.Col([
|
[
|
||||||
|
dbc.Col(
|
||||||
|
[
|
||||||
dbc.Label("OpenSearch URL:"),
|
dbc.Label("OpenSearch URL:"),
|
||||||
dbc.Input(
|
dbc.Input(
|
||||||
id=f"{section_id}-opensearch-url",
|
id=f"{section_id}-opensearch-url",
|
||||||
@@ -134,11 +227,15 @@ class DataSourceComponent:
|
|||||||
placeholder="https://opensearch.example.com:9200",
|
placeholder="https://opensearch.example.com:9200",
|
||||||
className="mb-2",
|
className="mb-2",
|
||||||
),
|
),
|
||||||
], width=12),
|
],
|
||||||
]),
|
width=12,
|
||||||
|
),
|
||||||
dbc.Row([
|
]
|
||||||
dbc.Col([
|
),
|
||||||
|
dbc.Row(
|
||||||
|
[
|
||||||
|
dbc.Col(
|
||||||
|
[
|
||||||
dbc.Label("Index Name:"),
|
dbc.Label("Index Name:"),
|
||||||
dbc.Input(
|
dbc.Input(
|
||||||
id=f"{section_id}-opensearch-index",
|
id=f"{section_id}-opensearch-index",
|
||||||
@@ -146,8 +243,11 @@ class DataSourceComponent:
|
|||||||
placeholder="my-embeddings-index",
|
placeholder="my-embeddings-index",
|
||||||
className="mb-2",
|
className="mb-2",
|
||||||
),
|
),
|
||||||
], width=6),
|
],
|
||||||
dbc.Col([
|
width=6,
|
||||||
|
),
|
||||||
|
dbc.Col(
|
||||||
|
[
|
||||||
dbc.Label("Query Size:"),
|
dbc.Label("Query Size:"),
|
||||||
dbc.Input(
|
dbc.Input(
|
||||||
id=f"{section_id}-opensearch-query-size",
|
id=f"{section_id}-opensearch-query-size",
|
||||||
@@ -158,42 +258,57 @@ class DataSourceComponent:
|
|||||||
placeholder="100",
|
placeholder="100",
|
||||||
className="mb-2",
|
className="mb-2",
|
||||||
),
|
),
|
||||||
], width=6),
|
],
|
||||||
]),
|
width=6,
|
||||||
|
),
|
||||||
dbc.Row([
|
]
|
||||||
dbc.Col([
|
),
|
||||||
|
dbc.Row(
|
||||||
|
[
|
||||||
|
dbc.Col(
|
||||||
|
[
|
||||||
dbc.Button(
|
dbc.Button(
|
||||||
"Test Connection",
|
"Test Connection",
|
||||||
id=f"{section_id}-test-connection-btn",
|
id=f"{section_id}-test-connection-btn",
|
||||||
color="primary",
|
color="primary",
|
||||||
className="mb-3",
|
className="mb-3",
|
||||||
),
|
),
|
||||||
], width=12),
|
],
|
||||||
]),
|
width=12,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
),
|
||||||
# Authentication section (collapsible)
|
# Authentication section (collapsible)
|
||||||
dbc.Collapse([
|
dbc.Collapse(
|
||||||
|
[
|
||||||
html.Hr(),
|
html.Hr(),
|
||||||
html.H6("Authentication (Optional)", className="mb-2"),
|
html.H6("Authentication (Optional)", className="mb-2"),
|
||||||
dbc.Row([
|
dbc.Row(
|
||||||
dbc.Col([
|
[
|
||||||
|
dbc.Col(
|
||||||
|
[
|
||||||
dbc.Label("Username:"),
|
dbc.Label("Username:"),
|
||||||
dbc.Input(
|
dbc.Input(
|
||||||
id=f"{section_id}-opensearch-username",
|
id=f"{section_id}-opensearch-username",
|
||||||
type="text",
|
type="text",
|
||||||
className="mb-2",
|
className="mb-2",
|
||||||
),
|
),
|
||||||
], width=6),
|
],
|
||||||
dbc.Col([
|
width=6,
|
||||||
|
),
|
||||||
|
dbc.Col(
|
||||||
|
[
|
||||||
dbc.Label("Password:"),
|
dbc.Label("Password:"),
|
||||||
dbc.Input(
|
dbc.Input(
|
||||||
id=f"{section_id}-opensearch-password",
|
id=f"{section_id}-opensearch-password",
|
||||||
type="password",
|
type="password",
|
||||||
className="mb-2",
|
className="mb-2",
|
||||||
),
|
),
|
||||||
], width=6),
|
],
|
||||||
]),
|
width=6,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
),
|
||||||
dbc.Label("OR"),
|
dbc.Label("OR"),
|
||||||
dbc.Input(
|
dbc.Input(
|
||||||
id=f"{section_id}-opensearch-api-key",
|
id=f"{section_id}-opensearch-api-key",
|
||||||
@@ -201,8 +316,10 @@ class DataSourceComponent:
|
|||||||
placeholder="API Key",
|
placeholder="API Key",
|
||||||
className="mb-2",
|
className="mb-2",
|
||||||
),
|
),
|
||||||
], id=f"{section_id}-auth-collapse", is_open=False),
|
],
|
||||||
|
id=f"{section_id}-auth-collapse",
|
||||||
|
is_open=False,
|
||||||
|
),
|
||||||
dbc.Button(
|
dbc.Button(
|
||||||
"Show Authentication",
|
"Show Authentication",
|
||||||
id=f"{section_id}-auth-toggle",
|
id=f"{section_id}-auth-toggle",
|
||||||
@@ -210,15 +327,15 @@ class DataSourceComponent:
|
|||||||
size="sm",
|
size="sm",
|
||||||
className="p-0 mb-3",
|
className="p-0 mb-3",
|
||||||
),
|
),
|
||||||
|
|
||||||
# Connection status
|
# Connection status
|
||||||
html.Div(id=f"{section_id}-connection-status", className="mb-3"),
|
html.Div(id=f"{section_id}-connection-status", className="mb-3"),
|
||||||
|
|
||||||
# Field mapping section (hidden initially)
|
# Field mapping section (hidden initially)
|
||||||
html.Div(id=f"{section_id}-field-mapping-section", style={"display": "none"}),
|
html.Div(
|
||||||
|
id=f"{section_id}-field-mapping-section", style={"display": "none"}
|
||||||
|
),
|
||||||
# Load data button (hidden initially)
|
# Load data button (hidden initially)
|
||||||
html.Div([
|
html.Div(
|
||||||
|
[
|
||||||
dbc.Button(
|
dbc.Button(
|
||||||
f"Load {section_type.title()}",
|
f"Load {section_type.title()}",
|
||||||
id=f"{section_id}-load-opensearch-data-btn",
|
id=f"{section_id}-load-opensearch-data-btn",
|
||||||
@@ -226,11 +343,14 @@ class DataSourceComponent:
|
|||||||
className="mb-2",
|
className="mb-2",
|
||||||
disabled=True,
|
disabled=True,
|
||||||
),
|
),
|
||||||
], id=f"{section_id}-load-data-section", style={"display": "none"}),
|
],
|
||||||
|
id=f"{section_id}-load-data-section",
|
||||||
|
style={"display": "none"},
|
||||||
|
),
|
||||||
# OpenSearch status/results
|
# OpenSearch status/results
|
||||||
html.Div(id=f"{section_id}-opensearch-status", className="mb-3"),
|
html.Div(id=f"{section_id}-opensearch-status", className="mb-3"),
|
||||||
])
|
]
|
||||||
|
)
|
||||||
|
|
||||||
def create_field_mapping_interface(self, field_suggestions, section_type="data"):
|
def create_field_mapping_interface(self, field_suggestions, section_type="data"):
|
||||||
"""Create field mapping interface based on detected fields."""
|
"""Create field mapping interface based on detected fields."""
|
||||||
@@ -254,9 +374,13 @@ class DataSourceComponent:
|
|||||||
id=f"{section_type}-embedding-field-dropdown-ui",
|
id=f"{section_type}-embedding-field-dropdown-ui",
|
||||||
options=[
|
options=[
|
||||||
{"label": field, "value": field}
|
{"label": field, "value": field}
|
||||||
for field in field_suggestions.get("embedding", [])
|
for field in field_suggestions.get(
|
||||||
|
"embedding", []
|
||||||
|
)
|
||||||
],
|
],
|
||||||
value=field_suggestions.get("embedding", [None])[0], # Default to first suggestion
|
value=field_suggestions.get("embedding", [None])[
|
||||||
|
0
|
||||||
|
], # Default to first suggestion
|
||||||
placeholder="Select embedding field...",
|
placeholder="Select embedding field...",
|
||||||
className="mb-2",
|
className="mb-2",
|
||||||
),
|
),
|
||||||
@@ -274,7 +398,9 @@ class DataSourceComponent:
|
|||||||
{"label": field, "value": field}
|
{"label": field, "value": field}
|
||||||
for field in field_suggestions.get("text", [])
|
for field in field_suggestions.get("text", [])
|
||||||
],
|
],
|
||||||
value=field_suggestions.get("text", [None])[0], # Default to first suggestion
|
value=field_suggestions.get("text", [None])[
|
||||||
|
0
|
||||||
|
], # Default to first suggestion
|
||||||
placeholder="Select text field...",
|
placeholder="Select text field...",
|
||||||
className="mb-2",
|
className="mb-2",
|
||||||
),
|
),
|
||||||
@@ -296,7 +422,9 @@ class DataSourceComponent:
|
|||||||
{"label": field, "value": field}
|
{"label": field, "value": field}
|
||||||
for field in field_suggestions.get("id", [])
|
for field in field_suggestions.get("id", [])
|
||||||
],
|
],
|
||||||
value=field_suggestions.get("id", [None])[0], # Default to first suggestion
|
value=field_suggestions.get("id", [None])[
|
||||||
|
0
|
||||||
|
], # Default to first suggestion
|
||||||
placeholder="Select ID field...",
|
placeholder="Select ID field...",
|
||||||
className="mb-2",
|
className="mb-2",
|
||||||
),
|
),
|
||||||
@@ -310,9 +438,13 @@ class DataSourceComponent:
|
|||||||
id=f"{section_type}-category-field-dropdown-ui",
|
id=f"{section_type}-category-field-dropdown-ui",
|
||||||
options=[
|
options=[
|
||||||
{"label": field, "value": field}
|
{"label": field, "value": field}
|
||||||
for field in field_suggestions.get("category", [])
|
for field in field_suggestions.get(
|
||||||
|
"category", []
|
||||||
|
)
|
||||||
],
|
],
|
||||||
value=field_suggestions.get("category", [None])[0], # Default to first suggestion
|
value=field_suggestions.get("category", [None])[
|
||||||
|
0
|
||||||
|
], # Default to first suggestion
|
||||||
placeholder="Select category field...",
|
placeholder="Select category field...",
|
||||||
className="mb-2",
|
className="mb-2",
|
||||||
),
|
),
|
||||||
@@ -330,9 +462,13 @@ class DataSourceComponent:
|
|||||||
id=f"{section_type}-subcategory-field-dropdown-ui",
|
id=f"{section_type}-subcategory-field-dropdown-ui",
|
||||||
options=[
|
options=[
|
||||||
{"label": field, "value": field}
|
{"label": field, "value": field}
|
||||||
for field in field_suggestions.get("subcategory", [])
|
for field in field_suggestions.get(
|
||||||
|
"subcategory", []
|
||||||
|
)
|
||||||
],
|
],
|
||||||
value=field_suggestions.get("subcategory", [None])[0], # Default to first suggestion
|
value=field_suggestions.get("subcategory", [None])[
|
||||||
|
0
|
||||||
|
], # Default to first suggestion
|
||||||
placeholder="Select subcategory field...",
|
placeholder="Select subcategory field...",
|
||||||
className="mb-2",
|
className="mb-2",
|
||||||
),
|
),
|
||||||
@@ -348,7 +484,9 @@ class DataSourceComponent:
|
|||||||
{"label": field, "value": field}
|
{"label": field, "value": field}
|
||||||
for field in field_suggestions.get("tags", [])
|
for field in field_suggestions.get("tags", [])
|
||||||
],
|
],
|
||||||
value=field_suggestions.get("tags", [None])[0], # Default to first suggestion
|
value=field_suggestions.get("tags", [None])[
|
||||||
|
0
|
||||||
|
], # Default to first suggestion
|
||||||
placeholder="Select tags field...",
|
placeholder="Select tags field...",
|
||||||
className="mb-2",
|
className="mb-2",
|
||||||
),
|
),
|
||||||
|
@@ -99,22 +99,57 @@ class TestFieldMapper:
|
|||||||
"text_fields": ["content", "description"],
|
"text_fields": ["content", "description"],
|
||||||
"keyword_fields": ["doc_id", "category", "type", "tags"],
|
"keyword_fields": ["doc_id", "category", "type", "tags"],
|
||||||
"numeric_fields": ["count"],
|
"numeric_fields": ["count"],
|
||||||
"all_fields": ["embedding", "content", "description", "doc_id", "category", "type", "tags", "count"],
|
"all_fields": [
|
||||||
|
"embedding",
|
||||||
|
"content",
|
||||||
|
"description",
|
||||||
|
"doc_id",
|
||||||
|
"category",
|
||||||
|
"type",
|
||||||
|
"tags",
|
||||||
|
"count",
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
suggestions = FieldMapper.suggest_mappings(field_analysis)
|
suggestions = FieldMapper.suggest_mappings(field_analysis)
|
||||||
|
|
||||||
# Check that all dropdowns contain all fields
|
# Check that all dropdowns contain all fields
|
||||||
all_fields = ["embedding", "content", "description", "doc_id", "category", "type", "tags", "count"]
|
all_fields = [
|
||||||
for field_type in ["embedding", "text", "id", "category", "subcategory", "tags"]:
|
"embedding",
|
||||||
|
"content",
|
||||||
|
"description",
|
||||||
|
"doc_id",
|
||||||
|
"category",
|
||||||
|
"type",
|
||||||
|
"tags",
|
||||||
|
"count",
|
||||||
|
]
|
||||||
|
for field_type in [
|
||||||
|
"embedding",
|
||||||
|
"text",
|
||||||
|
"id",
|
||||||
|
"category",
|
||||||
|
"subcategory",
|
||||||
|
"tags",
|
||||||
|
]:
|
||||||
for field in all_fields:
|
for field in all_fields:
|
||||||
assert field in suggestions[field_type], f"Field '{field}' missing from {field_type} suggestions"
|
assert field in suggestions[field_type], (
|
||||||
|
f"Field '{field}' missing from {field_type} suggestions"
|
||||||
|
)
|
||||||
|
|
||||||
# Check that best candidates are first
|
# Check that best candidates are first
|
||||||
assert suggestions["embedding"][0] == "embedding" # vector field should be first
|
assert (
|
||||||
assert suggestions["text"][0] in ["content", "description"] # text fields should be first
|
suggestions["embedding"][0] == "embedding"
|
||||||
|
) # vector field should be first
|
||||||
|
assert suggestions["text"][0] in [
|
||||||
|
"content",
|
||||||
|
"description",
|
||||||
|
] # text fields should be first
|
||||||
assert suggestions["id"][0] == "doc_id" # ID-like field should be first
|
assert suggestions["id"][0] == "doc_id" # ID-like field should be first
|
||||||
assert suggestions["category"][0] in ["category", "type"] # category-like field should be first
|
assert suggestions["category"][0] in [
|
||||||
|
"category",
|
||||||
|
"type",
|
||||||
|
] # category-like field should be first
|
||||||
assert suggestions["tags"][0] == "tags" # tags field should be first
|
assert suggestions["tags"][0] == "tags" # tags field should be first
|
||||||
|
|
||||||
def test_suggest_mappings_name_based_embedding(self):
|
def test_suggest_mappings_name_based_embedding(self):
|
||||||
@@ -124,19 +159,48 @@ class TestFieldMapper:
|
|||||||
"text_fields": ["content", "description"],
|
"text_fields": ["content", "description"],
|
||||||
"keyword_fields": ["doc_id", "category", "type", "tags"],
|
"keyword_fields": ["doc_id", "category", "type", "tags"],
|
||||||
"numeric_fields": ["count"],
|
"numeric_fields": ["count"],
|
||||||
"all_fields": ["content", "description", "doc_id", "category", "embedding", "type", "tags", "count"],
|
"all_fields": [
|
||||||
|
"content",
|
||||||
|
"description",
|
||||||
|
"doc_id",
|
||||||
|
"category",
|
||||||
|
"embedding",
|
||||||
|
"type",
|
||||||
|
"tags",
|
||||||
|
"count",
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
suggestions = FieldMapper.suggest_mappings(field_analysis)
|
suggestions = FieldMapper.suggest_mappings(field_analysis)
|
||||||
|
|
||||||
# Check that 'embedding' field is prioritized despite not being detected as vector type
|
# Check that 'embedding' field is prioritized despite not being detected as vector type
|
||||||
assert suggestions["embedding"][0] == "embedding", "Field named 'embedding' should be first priority"
|
assert suggestions["embedding"][0] == "embedding", (
|
||||||
|
"Field named 'embedding' should be first priority"
|
||||||
|
)
|
||||||
|
|
||||||
# Check that all fields are still available
|
# Check that all fields are still available
|
||||||
all_fields = ["content", "description", "doc_id", "category", "embedding", "type", "tags", "count"]
|
all_fields = [
|
||||||
for field_type in ["embedding", "text", "id", "category", "subcategory", "tags"]:
|
"content",
|
||||||
|
"description",
|
||||||
|
"doc_id",
|
||||||
|
"category",
|
||||||
|
"embedding",
|
||||||
|
"type",
|
||||||
|
"tags",
|
||||||
|
"count",
|
||||||
|
]
|
||||||
|
for field_type in [
|
||||||
|
"embedding",
|
||||||
|
"text",
|
||||||
|
"id",
|
||||||
|
"category",
|
||||||
|
"subcategory",
|
||||||
|
"tags",
|
||||||
|
]:
|
||||||
for field in all_fields:
|
for field in all_fields:
|
||||||
assert field in suggestions[field_type], f"Field '{field}' missing from {field_type} suggestions"
|
assert field in suggestions[field_type], (
|
||||||
|
f"Field '{field}' missing from {field_type} suggestions"
|
||||||
|
)
|
||||||
|
|
||||||
def test_validate_mapping_success(self):
|
def test_validate_mapping_success(self):
|
||||||
mapping = FieldMapping(
|
mapping = FieldMapping(
|
||||||
|
Reference in New Issue
Block a user