fixed refactored code
Some checks failed
Security Scan / dependency-check (pull_request) Successful in 38s
Security Scan / security (pull_request) Successful in 41s
Test Suite / lint (pull_request) Failing after 28s
Test Suite / test (3.11) (pull_request) Successful in 1m27s
Test Suite / build (pull_request) Has been skipped
Some checks failed
Security Scan / dependency-check (pull_request) Successful in 38s
Security Scan / security (pull_request) Successful in 41s
Test Suite / lint (pull_request) Failing after 28s
Test Suite / test (3.11) (pull_request) Successful in 1m27s
Test Suite / build (pull_request) Has been skipped
This commit is contained in:
@@ -16,11 +16,26 @@ class NDJSONParser:
|
||||
@staticmethod
|
||||
def parse_text(text_content: str) -> List[Document]:
|
||||
documents = []
|
||||
for line in text_content.strip().split("\n"):
|
||||
for line_num, line in enumerate(text_content.strip().split("\n"), 1):
|
||||
if line.strip():
|
||||
doc_dict = json.loads(line)
|
||||
doc = NDJSONParser._dict_to_document(doc_dict)
|
||||
documents.append(doc)
|
||||
try:
|
||||
doc_dict = json.loads(line)
|
||||
doc = NDJSONParser._dict_to_document(doc_dict)
|
||||
documents.append(doc)
|
||||
except json.JSONDecodeError as e:
|
||||
raise json.JSONDecodeError(
|
||||
f"Invalid JSON on line {line_num}: {e.msg}",
|
||||
e.doc,
|
||||
e.pos
|
||||
)
|
||||
except KeyError as e:
|
||||
raise KeyError(
|
||||
f"Missing required field {e} on line {line_num}"
|
||||
)
|
||||
except (TypeError, ValueError) as e:
|
||||
raise ValueError(
|
||||
f"Invalid data format on line {line_num}: {str(e)}"
|
||||
)
|
||||
return documents
|
||||
|
||||
@staticmethod
|
||||
@@ -28,10 +43,29 @@ class NDJSONParser:
|
||||
if "id" not in doc_dict:
|
||||
doc_dict["id"] = str(uuid.uuid4())
|
||||
|
||||
# Validate required fields
|
||||
if "text" not in doc_dict:
|
||||
raise KeyError("'text'")
|
||||
if "embedding" not in doc_dict:
|
||||
raise KeyError("'embedding'")
|
||||
|
||||
# Validate embedding format
|
||||
embedding = doc_dict["embedding"]
|
||||
if not isinstance(embedding, list):
|
||||
raise ValueError(f"Embedding must be a list, got {type(embedding).__name__}")
|
||||
|
||||
if not embedding:
|
||||
raise ValueError("Embedding cannot be empty")
|
||||
|
||||
# Check that all embedding values are numbers
|
||||
for i, val in enumerate(embedding):
|
||||
if not isinstance(val, (int, float)) or val != val: # NaN check
|
||||
raise ValueError(f"Embedding contains invalid value at index {i}: {val}")
|
||||
|
||||
return Document(
|
||||
id=doc_dict["id"],
|
||||
text=doc_dict["text"],
|
||||
embedding=doc_dict["embedding"],
|
||||
embedding=embedding,
|
||||
category=doc_dict.get("category"),
|
||||
subcategory=doc_dict.get("subcategory"),
|
||||
tags=doc_dict.get("tags"),
|
||||
|
@@ -9,30 +9,52 @@ class DataProcessingCallbacks:
|
||||
|
||||
def _register_callbacks(self):
|
||||
@callback(
|
||||
Output("processed-data", "data"),
|
||||
[
|
||||
Output("processed-data", "data", allow_duplicate=True),
|
||||
Output("upload-error-alert", "children", allow_duplicate=True),
|
||||
Output("upload-error-alert", "is_open", allow_duplicate=True),
|
||||
Output("upload-success-alert", "children", allow_duplicate=True),
|
||||
Output("upload-success-alert", "is_open", allow_duplicate=True),
|
||||
],
|
||||
Input("upload-data", "contents"),
|
||||
State("upload-data", "filename"),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def process_uploaded_file(contents, filename):
|
||||
if contents is None:
|
||||
return None
|
||||
return None, "", False, "", False
|
||||
|
||||
processed_data = self.processor.process_upload(contents, filename)
|
||||
|
||||
if processed_data.error:
|
||||
return {"error": processed_data.error}
|
||||
error_message = self._format_error_message(processed_data.error, filename)
|
||||
return (
|
||||
{"error": processed_data.error},
|
||||
error_message,
|
||||
True, # Show error alert
|
||||
"",
|
||||
False, # Hide success alert
|
||||
)
|
||||
|
||||
return {
|
||||
"documents": [
|
||||
self._document_to_dict(doc) for doc in processed_data.documents
|
||||
],
|
||||
"embeddings": processed_data.embeddings.tolist(),
|
||||
}
|
||||
success_message = f"Successfully loaded {len(processed_data.documents)} documents from {filename or 'file'}"
|
||||
return (
|
||||
{
|
||||
"documents": [
|
||||
self._document_to_dict(doc) for doc in processed_data.documents
|
||||
],
|
||||
"embeddings": processed_data.embeddings.tolist(),
|
||||
},
|
||||
"",
|
||||
False, # Hide error alert
|
||||
success_message,
|
||||
True, # Show success alert
|
||||
)
|
||||
|
||||
@callback(
|
||||
Output("processed-prompts", "data"),
|
||||
Output("processed-prompts", "data", allow_duplicate=True),
|
||||
Input("upload-prompts", "contents"),
|
||||
State("upload-prompts", "filename"),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def process_uploaded_prompts(contents, filename):
|
||||
if contents is None:
|
||||
@@ -60,3 +82,40 @@ class DataProcessingCallbacks:
|
||||
"subcategory": doc.subcategory,
|
||||
"tags": doc.tags,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _format_error_message(error: str, filename: str | None = None) -> str:
|
||||
"""Format error message with helpful guidance for users."""
|
||||
file_part = f" in file '{filename}'" if filename else ""
|
||||
|
||||
# Check for common error patterns and provide helpful messages
|
||||
if "embedding" in error.lower() and ("key" in error.lower() or "required field" in error.lower()):
|
||||
return (
|
||||
f"❌ Missing 'embedding' field{file_part}. "
|
||||
"Each line must contain an 'embedding' field with a list of numbers."
|
||||
)
|
||||
elif "text" in error.lower() and ("key" in error.lower() or "required field" in error.lower()):
|
||||
return (
|
||||
f"❌ Missing 'text' field{file_part}. "
|
||||
"Each line must contain a 'text' field with the document content."
|
||||
)
|
||||
elif "json" in error.lower() and "decode" in error.lower():
|
||||
return (
|
||||
f"❌ Invalid JSON format{file_part}. "
|
||||
"Please check that each line is valid JSON with proper syntax (quotes, braces, etc.)."
|
||||
)
|
||||
elif "unicode" in error.lower() or "decode" in error.lower():
|
||||
return (
|
||||
f"❌ File encoding issue{file_part}. "
|
||||
"Please ensure the file is saved in UTF-8 format and contains no binary data."
|
||||
)
|
||||
elif "array" in error.lower() or "list" in error.lower():
|
||||
return (
|
||||
f"❌ Invalid embedding format{file_part}. "
|
||||
"Embeddings must be arrays/lists of numbers, not strings or other types."
|
||||
)
|
||||
else:
|
||||
return (
|
||||
f"❌ Error processing file{file_part}: {error}. "
|
||||
"Please check that your file is valid NDJSON with required 'text' and 'embedding' fields."
|
||||
)
|
||||
|
@@ -11,14 +11,18 @@ class SidebarComponent:
|
||||
return dbc.Col(
|
||||
[
|
||||
html.H5("Upload Data", className="mb-3"),
|
||||
self.upload_component.create_error_alert(),
|
||||
self.upload_component.create_success_alert(),
|
||||
self.upload_component.create_data_upload(),
|
||||
self.upload_component.create_prompts_upload(),
|
||||
self.upload_component.create_reset_button(),
|
||||
html.H5("Visualization Controls", className="mb-3"),
|
||||
self._create_method_dropdown(),
|
||||
self._create_color_dropdown(),
|
||||
self._create_dimension_toggle(),
|
||||
self._create_prompts_toggle(),
|
||||
]
|
||||
+ self._create_method_dropdown()
|
||||
+ self._create_color_dropdown()
|
||||
+ self._create_dimension_toggle()
|
||||
+ self._create_prompts_toggle()
|
||||
+ [
|
||||
html.H5("Point Details", className="mb-3"),
|
||||
html.Div(
|
||||
id="point-details", children="Click on a point to see details"
|
||||
|
@@ -51,3 +51,25 @@ class UploadComponent:
|
||||
className="mb-3",
|
||||
style={"width": "100%"},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def create_error_alert():
|
||||
"""Create error alert component for data upload issues."""
|
||||
return dbc.Alert(
|
||||
id="upload-error-alert",
|
||||
dismissable=True,
|
||||
is_open=False,
|
||||
color="danger",
|
||||
className="mb-3",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def create_success_alert():
|
||||
"""Create success alert component for successful uploads."""
|
||||
return dbc.Alert(
|
||||
id="upload-success-alert",
|
||||
dismissable=True,
|
||||
is_open=False,
|
||||
color="success",
|
||||
className="mb-3",
|
||||
)
|
||||
|
@@ -9,7 +9,7 @@ class AppLayout:
|
||||
|
||||
def create_layout(self):
|
||||
return dbc.Container(
|
||||
[self._create_header(), self._create_main_content(), self._create_stores()],
|
||||
[self._create_header(), self._create_main_content()] + self._create_stores(),
|
||||
fluid=True,
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user