Source code for pysad.models.loda

from pysad.core.base_model import BaseModel
import numpy as np


[docs]class LODA(BaseModel): """The LODA model :cite:`pevny2016loda` The implemnetation is adapted to the steraming framework from the `PyOD framework <https://pyod.readthedocs.io/en/latest/_modules/pyod/models/loda.html#LODA>`_. Args: num_bins (int): The number of bins of the histogram. num_random_cuts (int): The number of random cuts. """ def __init__(self, num_bins=10, num_random_cuts=100): self.to_init = True self.n_bins = num_bins self.n_random_cuts = num_random_cuts
[docs] def fit_partial(self, X, y=None): """Fits the model to next instance. Args: X (np.float64 array of shape (num_features,)): The instance to fit. y (int): Ignored since the model is unsupervised (Default=None). Returns: object: Returns the self. """ if self.to_init: self.num_features = X.shape[0] self.weights = np.ones( self.n_random_cuts, dtype=np.float64) / self.n_random_cuts self.projections_ = np.random.randn( self.n_random_cuts, self.num_features) self.histograms_ = np.zeros((self.n_random_cuts, self.n_bins)) self.limits_ = np.zeros((self.n_random_cuts, self.n_bins + 1)) n_nonzero_components = np.sqrt(self.num_features) self.n_zero_components = self.num_features - \ np.int32(n_nonzero_components) self.to_init = False X = X.reshape(1, -1) for i in range(self.n_random_cuts): rands = np.random.permutation(self.num_features)[ :self.n_zero_components] self.projections_[i, rands] = 0. projected_data = self.projections_[i, :].dot(X.T) self.histograms_[i, :], self.limits_[i, :] = np.histogram( projected_data, bins=self.n_bins, density=False) self.histograms_[i, :] += 1e-12 self.histograms_[i, :] /= np.sum(self.histograms_[i, :]) return self
[docs] def score_partial(self, X): """Scores the anomalousness of the next instance. Args: X (np.float64 array of shape (num_features,)): The instance to score. Higher scores represent more anomalous instances whereas lower scores correspond to more normal instances. Returns: float: The anomalousness score of the input instance. """ X = X.reshape(1, -1) pred_scores = np.zeros([X.shape[0], 1]) for i in range(self.n_random_cuts): projected_data = self.projections_[i, :].dot(X.T) inds = np.searchsorted(self.limits_[i, :self.n_bins - 1], projected_data, side='left') pred_scores[:, 0] += -self.weights[i] * np.log( self.histograms_[i, inds]) pred_scores /= self.n_random_cuts return pred_scores.ravel()