Some checks failed
Security Scan / dependency-check (pull_request) Successful in 35s
Security Scan / security (pull_request) Successful in 40s
Test Suite / lint (pull_request) Failing after 40s
Test Suite / test (3.11) (pull_request) Successful in 1m26s
Test Suite / build (pull_request) Has been skipped
96 lines
2.9 KiB
Python
96 lines
2.9 KiB
Python
from abc import ABC, abstractmethod
|
|
import numpy as np
|
|
from sklearn.decomposition import PCA
|
|
import umap
|
|
from openTSNE import TSNE
|
|
from .schemas import ReducedData
|
|
|
|
|
|
class DimensionalityReducer(ABC):
|
|
def __init__(self, n_components: int = 3, random_state: int = 42):
|
|
self.n_components = n_components
|
|
self.random_state = random_state
|
|
self._reducer = None
|
|
|
|
@abstractmethod
|
|
def fit_transform(self, embeddings: np.ndarray) -> ReducedData:
|
|
pass
|
|
|
|
@abstractmethod
|
|
def get_method_name(self) -> str:
|
|
pass
|
|
|
|
|
|
class PCAReducer(DimensionalityReducer):
|
|
def fit_transform(self, embeddings: np.ndarray) -> ReducedData:
|
|
self._reducer = PCA(n_components=self.n_components)
|
|
reduced = self._reducer.fit_transform(embeddings)
|
|
variance_explained = self._reducer.explained_variance_ratio_
|
|
|
|
return ReducedData(
|
|
reduced_embeddings=reduced,
|
|
variance_explained=variance_explained,
|
|
method=self.get_method_name(),
|
|
n_components=self.n_components,
|
|
)
|
|
|
|
def get_method_name(self) -> str:
|
|
return "PCA"
|
|
|
|
|
|
class TSNEReducer(DimensionalityReducer):
|
|
def fit_transform(self, embeddings: np.ndarray) -> ReducedData:
|
|
self._reducer = TSNE(
|
|
n_components=self.n_components, random_state=self.random_state
|
|
)
|
|
reduced = self._reducer.fit(embeddings)
|
|
|
|
return ReducedData(
|
|
reduced_embeddings=reduced,
|
|
variance_explained=None,
|
|
method=self.get_method_name(),
|
|
n_components=self.n_components,
|
|
)
|
|
|
|
def get_method_name(self) -> str:
|
|
return "t-SNE"
|
|
|
|
|
|
class UMAPReducer(DimensionalityReducer):
|
|
def fit_transform(self, embeddings: np.ndarray) -> ReducedData:
|
|
self._reducer = umap.UMAP(
|
|
n_components=self.n_components, random_state=self.random_state
|
|
)
|
|
reduced = self._reducer.fit_transform(embeddings)
|
|
|
|
return ReducedData(
|
|
reduced_embeddings=reduced,
|
|
variance_explained=None,
|
|
method=self.get_method_name(),
|
|
n_components=self.n_components,
|
|
)
|
|
|
|
def get_method_name(self) -> str:
|
|
return "UMAP"
|
|
|
|
|
|
class ReducerFactory:
|
|
@staticmethod
|
|
def create_reducer(
|
|
method: str, n_components: int = 3, random_state: int = 42
|
|
) -> DimensionalityReducer:
|
|
method_lower = method.lower()
|
|
|
|
if method_lower == "pca":
|
|
return PCAReducer(n_components=n_components, random_state=random_state)
|
|
elif method_lower == "tsne":
|
|
return TSNEReducer(n_components=n_components, random_state=random_state)
|
|
elif method_lower == "umap":
|
|
return UMAPReducer(n_components=n_components, random_state=random_state)
|
|
else:
|
|
raise ValueError(f"Unknown reduction method: {method}")
|
|
|
|
@staticmethod
|
|
def get_available_methods() -> list:
|
|
return ["pca", "tsne", "umap"]
|