Source code for pysad.transform.projection.random_projector

from abc import abstractmethod
from sklearn.random_projection import SparseRandomProjection, GaussianRandomProjection
from pysad.core.base_transformer import BaseTransformer


class BaseSKLearnProjector(BaseTransformer):

    def __init__(self, num_components):
        """Abstract base projector class to wrap the random sklearn projectors.

        Args:
            num_components (int): The number of dimensions that the target will be projected into.
        """
        super().__init__(num_components)

    @property
    @abstractmethod
    def _projector(self):
        """ Helper property to wrap sklearn projectors.

        """
        pass

    def fit_partial(self, X):
        """Fits particular (next) timestep's features to train the projector.

        Args:
            X (np.float64 array of shape (num_components,)): Input feature vector.
        Returns:
            object: self.
        """
        return self

    def transform_partial(self, X):
        """Projects particular (next) timestep's vector to (possibly) lower dimensional space.

        Args:
            X (np.float64 array of shape (num_features,)): Input feature vector.

        Returns:
            projected_X: np.float64 array of shape (num_components,)
                Projected feature vector.
        """
        x = X.reshape(1, -1)

        return self._projector().fit_transform(x).reshape(-1)


[docs]class GaussianRandomProjector(BaseSKLearnProjector): """Reduces dimensionality through Gaussian random projection. The components of the random matrix are drawn from N(0, 1 / n_components). This text is taken from the `Sklearn documentation <https://scikit-learn.org/stable/modules/generated/sklearn.random_projection.GaussianRandomProjection.html#sklearn.random_projection.GaussianRandomProjection>`_. Args: n_components (int or 'auto'): Dimensionality of the target projection space, optional (default = 'auto'). n_components can be automatically adjusted according to the number of samples in the dataset and the bound given by the Johnson-Lindenstrauss lemma. In that case the quality of the embedding is controlled by the ``eps`` parameter. It should be noted that Johnson-Lindenstrauss lemma can yield very conservative estimated of the required number of components as it makes no assumption on the structure of the dataset. eps (strictly positive float, optional): (default=0.1) Parameter to control the quality of the embedding according to the Johnson-Lindenstrauss lemma when n_components is set to 'auto'. Smaller values lead to better embedding and higher number of dimensions (n_components) in the target projection space. """ def __init__(self, num_components='auto', *, eps=0.1): super().__init__(num_components) self.eps = eps self.num_components = num_components def _projector(self): return GaussianRandomProjection( n_components=self.num_components, eps=self.eps)
[docs]class SparseRandomProjector(BaseSKLearnProjector): """The wrapper method for Sklearn's SparseRandomProjection. Reduces dimensionality through Gaussian random projection. The components of the random matrix are drawn from N(0, 1 / n_components). This text is taken from the `Sklearn documentation <https://scikit-learn.org/stable/modules/generated/sklearn.random_projection.SparseRandomProjection.html#sklearn.random_projection.SparseRandomProjection>`_. Parameters n_components (int or 'auto'): Optional (default = 'auto') Dimensionality of the target projection space. n_components can be automatically adjusted according to the number of samples in the dataset and the bound given by the Johnson-Lindenstrauss lemma. In that case the quality of the embedding is controlled by the ``eps`` parameter. It should be noted that Johnson-Lindenstrauss lemma can yield very conservative estimated of the required number of components as it makes no assumption on the structure of the dataset. eps (strictly positive float): Optional (default=0.1) Parameter to control the quality of the embedding according to the Johnson-Lindenstrauss lemma when n_components is set to 'auto'. Smaller values lead to better embedding and higher number of dimensions (n_components) in the target projection space. """ def __init__(self, num_components='auto', density="auto", eps=0.1): super().__init__(num_components) self.eps = eps self.density = density self.num_components = num_components def _projector(self): return SparseRandomProjection( n_components=self.num_components, density=self.density, eps=self.eps, dense_output=True)