fixed refactored code
Some checks failed
Security Scan / dependency-check (pull_request) Successful in 38s
Security Scan / security (pull_request) Successful in 41s
Test Suite / lint (pull_request) Failing after 28s
Test Suite / test (3.11) (pull_request) Successful in 1m27s
Test Suite / build (pull_request) Has been skipped

This commit is contained in:
2025-08-14 07:55:40 -07:00
parent 1ec7e2c38c
commit 7b81c20a26
18 changed files with 386 additions and 537 deletions

View File

@@ -16,11 +16,26 @@ class NDJSONParser:
@staticmethod
def parse_text(text_content: str) -> List[Document]:
documents = []
for line in text_content.strip().split("\n"):
for line_num, line in enumerate(text_content.strip().split("\n"), 1):
if line.strip():
doc_dict = json.loads(line)
doc = NDJSONParser._dict_to_document(doc_dict)
documents.append(doc)
try:
doc_dict = json.loads(line)
doc = NDJSONParser._dict_to_document(doc_dict)
documents.append(doc)
except json.JSONDecodeError as e:
raise json.JSONDecodeError(
f"Invalid JSON on line {line_num}: {e.msg}",
e.doc,
e.pos
)
except KeyError as e:
raise KeyError(
f"Missing required field {e} on line {line_num}"
)
except (TypeError, ValueError) as e:
raise ValueError(
f"Invalid data format on line {line_num}: {str(e)}"
)
return documents
@staticmethod
@@ -28,10 +43,29 @@ class NDJSONParser:
if "id" not in doc_dict:
doc_dict["id"] = str(uuid.uuid4())
# Validate required fields
if "text" not in doc_dict:
raise KeyError("'text'")
if "embedding" not in doc_dict:
raise KeyError("'embedding'")
# Validate embedding format
embedding = doc_dict["embedding"]
if not isinstance(embedding, list):
raise ValueError(f"Embedding must be a list, got {type(embedding).__name__}")
if not embedding:
raise ValueError("Embedding cannot be empty")
# Check that all embedding values are numbers
for i, val in enumerate(embedding):
if not isinstance(val, (int, float)) or val != val: # NaN check
raise ValueError(f"Embedding contains invalid value at index {i}: {val}")
return Document(
id=doc_dict["id"],
text=doc_dict["text"],
embedding=doc_dict["embedding"],
embedding=embedding,
category=doc_dict.get("category"),
subcategory=doc_dict.get("subcategory"),
tags=doc_dict.get("tags"),

View File

@@ -9,30 +9,52 @@ class DataProcessingCallbacks:
def _register_callbacks(self):
@callback(
Output("processed-data", "data"),
[
Output("processed-data", "data", allow_duplicate=True),
Output("upload-error-alert", "children", allow_duplicate=True),
Output("upload-error-alert", "is_open", allow_duplicate=True),
Output("upload-success-alert", "children", allow_duplicate=True),
Output("upload-success-alert", "is_open", allow_duplicate=True),
],
Input("upload-data", "contents"),
State("upload-data", "filename"),
prevent_initial_call=True,
)
def process_uploaded_file(contents, filename):
if contents is None:
return None
return None, "", False, "", False
processed_data = self.processor.process_upload(contents, filename)
if processed_data.error:
return {"error": processed_data.error}
error_message = self._format_error_message(processed_data.error, filename)
return (
{"error": processed_data.error},
error_message,
True, # Show error alert
"",
False, # Hide success alert
)
return {
"documents": [
self._document_to_dict(doc) for doc in processed_data.documents
],
"embeddings": processed_data.embeddings.tolist(),
}
success_message = f"Successfully loaded {len(processed_data.documents)} documents from {filename or 'file'}"
return (
{
"documents": [
self._document_to_dict(doc) for doc in processed_data.documents
],
"embeddings": processed_data.embeddings.tolist(),
},
"",
False, # Hide error alert
success_message,
True, # Show success alert
)
@callback(
Output("processed-prompts", "data"),
Output("processed-prompts", "data", allow_duplicate=True),
Input("upload-prompts", "contents"),
State("upload-prompts", "filename"),
prevent_initial_call=True,
)
def process_uploaded_prompts(contents, filename):
if contents is None:
@@ -60,3 +82,40 @@ class DataProcessingCallbacks:
"subcategory": doc.subcategory,
"tags": doc.tags,
}
@staticmethod
def _format_error_message(error: str, filename: str | None = None) -> str:
"""Format error message with helpful guidance for users."""
file_part = f" in file '{filename}'" if filename else ""
# Check for common error patterns and provide helpful messages
if "embedding" in error.lower() and ("key" in error.lower() or "required field" in error.lower()):
return (
f"❌ Missing 'embedding' field{file_part}. "
"Each line must contain an 'embedding' field with a list of numbers."
)
elif "text" in error.lower() and ("key" in error.lower() or "required field" in error.lower()):
return (
f"❌ Missing 'text' field{file_part}. "
"Each line must contain a 'text' field with the document content."
)
elif "json" in error.lower() and "decode" in error.lower():
return (
f"❌ Invalid JSON format{file_part}. "
"Please check that each line is valid JSON with proper syntax (quotes, braces, etc.)."
)
elif "unicode" in error.lower() or "decode" in error.lower():
return (
f"❌ File encoding issue{file_part}. "
"Please ensure the file is saved in UTF-8 format and contains no binary data."
)
elif "array" in error.lower() or "list" in error.lower():
return (
f"❌ Invalid embedding format{file_part}. "
"Embeddings must be arrays/lists of numbers, not strings or other types."
)
else:
return (
f"❌ Error processing file{file_part}: {error}. "
"Please check that your file is valid NDJSON with required 'text' and 'embedding' fields."
)

View File

@@ -11,14 +11,18 @@ class SidebarComponent:
return dbc.Col(
[
html.H5("Upload Data", className="mb-3"),
self.upload_component.create_error_alert(),
self.upload_component.create_success_alert(),
self.upload_component.create_data_upload(),
self.upload_component.create_prompts_upload(),
self.upload_component.create_reset_button(),
html.H5("Visualization Controls", className="mb-3"),
self._create_method_dropdown(),
self._create_color_dropdown(),
self._create_dimension_toggle(),
self._create_prompts_toggle(),
]
+ self._create_method_dropdown()
+ self._create_color_dropdown()
+ self._create_dimension_toggle()
+ self._create_prompts_toggle()
+ [
html.H5("Point Details", className="mb-3"),
html.Div(
id="point-details", children="Click on a point to see details"

View File

@@ -51,3 +51,25 @@ class UploadComponent:
className="mb-3",
style={"width": "100%"},
)
@staticmethod
def create_error_alert():
"""Create error alert component for data upload issues."""
return dbc.Alert(
id="upload-error-alert",
dismissable=True,
is_open=False,
color="danger",
className="mb-3",
)
@staticmethod
def create_success_alert():
"""Create success alert component for successful uploads."""
return dbc.Alert(
id="upload-success-alert",
dismissable=True,
is_open=False,
color="success",
className="mb-3",
)

View File

@@ -9,7 +9,7 @@ class AppLayout:
def create_layout(self):
return dbc.Container(
[self._create_header(), self._create_main_content(), self._create_stores()],
[self._create_header(), self._create_main_content()] + self._create_stores(),
fluid=True,
)