All checks were successful
Security Scan / dependency-check (pull_request) Successful in 35s
Security Scan / security (pull_request) Successful in 39s
Test Suite / lint (pull_request) Successful in 30s
Test Suite / test (3.11) (pull_request) Successful in 1m26s
Test Suite / build (pull_request) Successful in 37s
121 lines
4.4 KiB
Python
121 lines
4.4 KiB
Python
from dash import callback, Input, Output, State
|
|
from ...data.processor import DataProcessor
|
|
|
|
|
|
class DataProcessingCallbacks:
|
|
def __init__(self):
|
|
self.processor = DataProcessor()
|
|
self._register_callbacks()
|
|
|
|
def _register_callbacks(self):
|
|
@callback(
|
|
[
|
|
Output("processed-data", "data", allow_duplicate=True),
|
|
Output("upload-error-alert", "children", allow_duplicate=True),
|
|
Output("upload-error-alert", "is_open", allow_duplicate=True),
|
|
],
|
|
Input("upload-data", "contents"),
|
|
State("upload-data", "filename"),
|
|
prevent_initial_call=True,
|
|
)
|
|
def process_uploaded_file(contents, filename):
|
|
if contents is None:
|
|
return None, "", False
|
|
|
|
processed_data = self.processor.process_upload(contents, filename)
|
|
|
|
if processed_data.error:
|
|
error_message = self._format_error_message(
|
|
processed_data.error, filename
|
|
)
|
|
return (
|
|
{"error": processed_data.error},
|
|
error_message,
|
|
True, # Show error alert
|
|
)
|
|
|
|
return (
|
|
{
|
|
"documents": [
|
|
self._document_to_dict(doc) for doc in processed_data.documents
|
|
],
|
|
"embeddings": processed_data.embeddings.tolist(),
|
|
},
|
|
"",
|
|
False, # Hide error alert
|
|
)
|
|
|
|
@callback(
|
|
Output("processed-prompts", "data", allow_duplicate=True),
|
|
Input("upload-prompts", "contents"),
|
|
State("upload-prompts", "filename"),
|
|
prevent_initial_call=True,
|
|
)
|
|
def process_uploaded_prompts(contents, filename):
|
|
if contents is None:
|
|
return None
|
|
|
|
processed_data = self.processor.process_upload(contents, filename)
|
|
|
|
if processed_data.error:
|
|
return {"error": processed_data.error}
|
|
|
|
return {
|
|
"prompts": [
|
|
self._document_to_dict(doc) for doc in processed_data.documents
|
|
],
|
|
"embeddings": processed_data.embeddings.tolist(),
|
|
}
|
|
|
|
@staticmethod
|
|
def _document_to_dict(doc):
|
|
return {
|
|
"id": doc.id,
|
|
"text": doc.text,
|
|
"embedding": doc.embedding,
|
|
"category": doc.category,
|
|
"subcategory": doc.subcategory,
|
|
"tags": doc.tags,
|
|
}
|
|
|
|
@staticmethod
|
|
def _format_error_message(error: str, filename: str | None = None) -> str:
|
|
"""Format error message with helpful guidance for users."""
|
|
file_part = f" in file '{filename}'" if filename else ""
|
|
|
|
# Check for common error patterns and provide helpful messages
|
|
if "embedding" in error.lower() and (
|
|
"key" in error.lower() or "required field" in error.lower()
|
|
):
|
|
return (
|
|
f"❌ Missing 'embedding' field{file_part}. "
|
|
"Each line must contain an 'embedding' field with a list of numbers."
|
|
)
|
|
elif "text" in error.lower() and (
|
|
"key" in error.lower() or "required field" in error.lower()
|
|
):
|
|
return (
|
|
f"❌ Missing 'text' field{file_part}. "
|
|
"Each line must contain a 'text' field with the document content."
|
|
)
|
|
elif "json" in error.lower() and "decode" in error.lower():
|
|
return (
|
|
f"❌ Invalid JSON format{file_part}. "
|
|
"Please check that each line is valid JSON with proper syntax (quotes, braces, etc.)."
|
|
)
|
|
elif "unicode" in error.lower() or "decode" in error.lower():
|
|
return (
|
|
f"❌ File encoding issue{file_part}. "
|
|
"Please ensure the file is saved in UTF-8 format and contains no binary data."
|
|
)
|
|
elif "array" in error.lower() or "list" in error.lower():
|
|
return (
|
|
f"❌ Invalid embedding format{file_part}. "
|
|
"Embeddings must be arrays/lists of numbers, not strings or other types."
|
|
)
|
|
else:
|
|
return (
|
|
f"❌ Error processing file{file_part}: {error}. "
|
|
"Please check that your file is valid NDJSON with required 'text' and 'embedding' fields."
|
|
)
|