refactor and add tests, v0.2.0
This commit is contained in:
95
src/embeddingbuddy/models/reducers.py
Normal file
95
src/embeddingbuddy/models/reducers.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import numpy as np
|
||||
from typing import Optional, Tuple
|
||||
from sklearn.decomposition import PCA
|
||||
import umap
|
||||
from openTSNE import TSNE
|
||||
from .schemas import ReducedData
|
||||
|
||||
|
||||
class DimensionalityReducer(ABC):
|
||||
|
||||
def __init__(self, n_components: int = 3, random_state: int = 42):
|
||||
self.n_components = n_components
|
||||
self.random_state = random_state
|
||||
self._reducer = None
|
||||
|
||||
@abstractmethod
|
||||
def fit_transform(self, embeddings: np.ndarray) -> ReducedData:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_method_name(self) -> str:
|
||||
pass
|
||||
|
||||
|
||||
class PCAReducer(DimensionalityReducer):
|
||||
|
||||
def fit_transform(self, embeddings: np.ndarray) -> ReducedData:
|
||||
self._reducer = PCA(n_components=self.n_components)
|
||||
reduced = self._reducer.fit_transform(embeddings)
|
||||
variance_explained = self._reducer.explained_variance_ratio_
|
||||
|
||||
return ReducedData(
|
||||
reduced_embeddings=reduced,
|
||||
variance_explained=variance_explained,
|
||||
method=self.get_method_name(),
|
||||
n_components=self.n_components
|
||||
)
|
||||
|
||||
def get_method_name(self) -> str:
|
||||
return "PCA"
|
||||
|
||||
|
||||
class TSNEReducer(DimensionalityReducer):
|
||||
|
||||
def fit_transform(self, embeddings: np.ndarray) -> ReducedData:
|
||||
self._reducer = TSNE(n_components=self.n_components, random_state=self.random_state)
|
||||
reduced = self._reducer.fit(embeddings)
|
||||
|
||||
return ReducedData(
|
||||
reduced_embeddings=reduced,
|
||||
variance_explained=None,
|
||||
method=self.get_method_name(),
|
||||
n_components=self.n_components
|
||||
)
|
||||
|
||||
def get_method_name(self) -> str:
|
||||
return "t-SNE"
|
||||
|
||||
|
||||
class UMAPReducer(DimensionalityReducer):
|
||||
|
||||
def fit_transform(self, embeddings: np.ndarray) -> ReducedData:
|
||||
self._reducer = umap.UMAP(n_components=self.n_components, random_state=self.random_state)
|
||||
reduced = self._reducer.fit_transform(embeddings)
|
||||
|
||||
return ReducedData(
|
||||
reduced_embeddings=reduced,
|
||||
variance_explained=None,
|
||||
method=self.get_method_name(),
|
||||
n_components=self.n_components
|
||||
)
|
||||
|
||||
def get_method_name(self) -> str:
|
||||
return "UMAP"
|
||||
|
||||
|
||||
class ReducerFactory:
|
||||
|
||||
@staticmethod
|
||||
def create_reducer(method: str, n_components: int = 3, random_state: int = 42) -> DimensionalityReducer:
|
||||
method_lower = method.lower()
|
||||
|
||||
if method_lower == 'pca':
|
||||
return PCAReducer(n_components=n_components, random_state=random_state)
|
||||
elif method_lower == 'tsne':
|
||||
return TSNEReducer(n_components=n_components, random_state=random_state)
|
||||
elif method_lower == 'umap':
|
||||
return UMAPReducer(n_components=n_components, random_state=random_state)
|
||||
else:
|
||||
raise ValueError(f"Unknown reduction method: {method}")
|
||||
|
||||
@staticmethod
|
||||
def get_available_methods() -> list:
|
||||
return ['pca', 'tsne', 'umap']
|
Reference in New Issue
Block a user