This commit is contained in:
2025-08-12 15:58:13 -07:00
parent 89df126b1e
commit 6ddd2a4771
2 changed files with 39 additions and 34 deletions

View File

@@ -38,13 +38,14 @@ EmbeddingBuddy accepts newline-delimited JSON (NDJSON) files where each line con
- `subcategory`: Secondary classification - `subcategory`: Secondary classification
- `tags`: Array of string tags for flexible labeling - `tags`: Array of string tags for flexible labeling
## Features (Initial Version) ## Features
- **Drag-and-drop file upload** for NDJSON embedding datasets - **Drag-and-drop file upload** for NDJSON embedding datasets
- **PCA dimensionality reduction** (automatically applied) - **Multiple dimensionality reduction methods**: PCA, t-SNE, and UMAP
- **Interactive 2D/3D visualizations** with toggle between views - **Interactive 2D/3D visualizations** with toggle between views
- **Color coding options** by category, subcategory, or tags - **Color coding options** by category, subcategory, or tags
- **Point inspection** - click points to view full document content - **Point inspection** - click points to view full document content
- **Sidebar layout** with controls on left, large visualization area on right
- **Real-time visualization** optimized for small to medium datasets - **Real-time visualization** optimized for small to medium datasets
## Installation & Usage ## Installation & Usage
@@ -72,5 +73,7 @@ uv run python app.py
- **Python Dash**: Web application framework - **Python Dash**: Web application framework
- **Plotly**: Interactive plotting and visualization - **Plotly**: Interactive plotting and visualization
- **scikit-learn**: PCA implementation - **scikit-learn**: PCA implementation
- **UMAP-learn**: UMAP dimensionality reduction
- **openTSNE**: Fast t-SNE implementation
- **NumPy/Pandas**: Data manipulation and analysis - **NumPy/Pandas**: Data manipulation and analysis
- **uv**: Modern Python package and project manager - **uv**: Modern Python package and project manager

66
app.py
View File

@@ -94,7 +94,11 @@ def create_plot(df, dimensions='3d', color_by='category', method='PCA'):
) )
fig.update_traces(marker=dict(size=8)) fig.update_traces(marker=dict(size=8))
fig.update_layout(height=600) fig.update_layout(
height=None, # Let CSS height control this
autosize=True,
margin=dict(l=0, r=0, t=50, b=0)
)
return fig return fig
# Layout # Layout
@@ -102,12 +106,13 @@ app.layout = dbc.Container([
dbc.Row([ dbc.Row([
dbc.Col([ dbc.Col([
html.H1("EmbeddingBuddy", className="text-center mb-4"), html.H1("EmbeddingBuddy", className="text-center mb-4"),
html.P("Upload NDJSON file with embeddings to visualize", className="text-center text-muted") ], width=12)
])
]), ]),
dbc.Row([ dbc.Row([
# Left sidebar with controls
dbc.Col([ dbc.Col([
html.H5("Upload Data", className="mb-3"),
dcc.Upload( dcc.Upload(
id='upload-data', id='upload-data',
children=html.Div([ children=html.Div([
@@ -122,15 +127,13 @@ app.layout = dbc.Container([
'borderStyle': 'dashed', 'borderStyle': 'dashed',
'borderRadius': '5px', 'borderRadius': '5px',
'textAlign': 'center', 'textAlign': 'center',
'margin': '10px' 'margin-bottom': '20px'
}, },
multiple=False multiple=False
) ),
])
]), html.H5("Visualization Controls", className="mb-3"),
dbc.Row([
dbc.Col([
dbc.Label("Method:"), dbc.Label("Method:"),
dcc.Dropdown( dcc.Dropdown(
id='method-dropdown', id='method-dropdown',
@@ -140,10 +143,9 @@ app.layout = dbc.Container([
{'label': 'UMAP', 'value': 'umap'} {'label': 'UMAP', 'value': 'umap'}
], ],
value='pca', value='pca',
style={'margin-bottom': '10px'} style={'margin-bottom': '15px'}
) ),
], width=4),
dbc.Col([
dbc.Label("Color by:"), dbc.Label("Color by:"),
dcc.Dropdown( dcc.Dropdown(
id='color-dropdown', id='color-dropdown',
@@ -153,10 +155,9 @@ app.layout = dbc.Container([
{'label': 'Tags', 'value': 'tags'} {'label': 'Tags', 'value': 'tags'}
], ],
value='category', value='category',
style={'margin-bottom': '10px'} style={'margin-bottom': '15px'}
) ),
], width=4),
dbc.Col([
dbc.Label("Dimensions:"), dbc.Label("Dimensions:"),
dcc.RadioItems( dcc.RadioItems(
id='dimension-toggle', id='dimension-toggle',
@@ -165,21 +166,22 @@ app.layout = dbc.Container([
{'label': '3D', 'value': '3d'} {'label': '3D', 'value': '3d'}
], ],
value='3d', value='3d',
inline=True style={'margin-bottom': '20px'}
),
html.H5("Point Details", className="mb-3"),
html.Div(id='point-details', children="Click on a point to see details")
], width=3, style={'padding-right': '20px'}),
# Main visualization area
dbc.Col([
dcc.Graph(
id='embedding-plot',
style={'height': '85vh', 'width': '100%'},
config={'responsive': True, 'displayModeBar': True}
) )
], width=4) ], width=9)
], className="mb-3"),
dbc.Row([
dbc.Col([
dcc.Graph(id='embedding-plot')
])
]),
dbc.Row([
dbc.Col([
html.Div(id='point-details', style={'margin-top': '20px'})
])
]), ]),
dcc.Store(id='processed-data') dcc.Store(id='processed-data')