import numpy as np
from pysad.core.base_transformer import BaseTransformer
[docs]class StreamhashProjector(BaseTransformer):
"""Streamhash projection method from Manzoor et. al.that is similar (or equivalent) to SparseRandomProjection. :cite:`xstream` The implementation is taken from the `cmuxstream-core repository <https://github.com/cmuxstream/cmuxstream-core>`_.
Args:
num_components (int): The number of dimensions that the target will be projected into.
density (float): Density parameter of the streamhash projection.
"""
def __init__(self, num_components, density=1 / 3.0):
super().__init__(num_components)
self.keys = np.arange(0, num_components, 1)
self.constant = np.sqrt(1. / density) / np.sqrt(num_components)
self.density = density
self.n_components = num_components
[docs] def fit_partial(self, X):
"""Fits particular (next) timestep's features to train the projector.
Args:
X (np.float64 array of shape (n_components,)): Input feature vector.
Returns:
object: self.
"""
return self
def _hash_string(self, k, s):
import mmh3
hash_value = int(mmh3.hash(s, signed=False, seed=k)) / (2.0 ** 32 - 1)
s = self.density
if hash_value <= s / 2.0:
return -1 * self.constant
elif hash_value <= s:
return self.constant
else:
return 0