Python Examples of sklearn.decomposition

Source File: decomposition.py From tridesclous with MIT License

6 votes

def __init__(self, catalogueconstructor=None, selection=None, n_components=5, **params):

        cc = catalogueconstructor
        
        self.n_components = n_components
        
        self.waveforms = cc.get_some_waveforms()
        
        if selection is None:
            waveforms = self.waveforms
            #~ print('all selection', waveforms.shape[0])
        else:
            peaks_index, = np.nonzero(selection)
            waveforms = cc.get_some_waveforms(peaks_index=peaks_index)
            #~ print('subset selection', waveforms.shape[0])
            
        flatten_waveforms = waveforms.reshape(waveforms.shape[0], -1)
        #~ self.pca =  sklearn.decomposition.IncrementalPCA(n_components=n_components, **params)
        self.pca =  sklearn.decomposition.TruncatedSVD(n_components=n_components, **params)
        self.pca.fit(flatten_waveforms)
        
        
        #In GlobalPCA all feature represent all channels
        self.channel_to_features = np.ones((cc.nb_channel, self.n_components), dtype='bool')

Source File: decomposition.py From tridesclous with MIT License

6 votes

def transform(self, waveforms):
        #~ print('ici', waveforms.shape, self.ind_peak)
        features = waveforms[:, self.ind_peak, : ].copy()
        return features



#~ Parallel(n_jobs=n_jobs)(delayed(count_match_spikes)(sorting1.get_unit_spike_train(u1),
                                                                                  #~ s2_spiketrains, delta_frames) for
                                                      #~ i1, u1 in enumerate(unit1_ids))

#~ def get_pca_one_channel(wf_chan, chan, thresh, n_left, n_components_by_channel, params):
    #~ print(chan)
    #~ pca = sklearn.decomposition.IncrementalPCA(n_components=n_components_by_channel, **params)
    #~ wf_chan = waveforms[:,:,chan]
    #~ print(wf_chan.shape)
    #~ print(wf_chan[:, -n_left].shape)
    #~ keep = np.any((wf_chan>thresh) | (wf_chan<-thresh))
    #~ keep = (wf_chan[:, -n_left]>thresh) | (wf_chan[:, -n_left]<-thresh)

    #~ if keep.sum() >=n_components_by_channel:
        #~ pca.fit(wf_chan[keep, :])
        #~ return pca
    #~ else:
        #~ return None

Source File: test_monkeypatch.py From daal4py with Apache License 2.0

6 votes

def test_monkey_patching(self):
        _tokens = daal4py.sklearn.sklearn_patch_names()
        self.assertTrue(isinstance(_tokens, list) and len(_tokens) > 0)
        for t in _tokens:
            daal4py.sklearn.unpatch_sklearn(t)
        for t in _tokens:
            daal4py.sklearn.patch_sklearn(t)

        import sklearn
        for a in [(sklearn.decomposition, 'PCA'),
                  (sklearn.linear_model, 'Ridge'),
                  (sklearn.linear_model, 'LinearRegression'),
                  (sklearn.cluster, 'KMeans'),
                  (sklearn.svm, 'SVC'),]:
            class_module = getattr(a[0], a[1]).__module__
            self.assertTrue(class_module.startswith('daal4py'))

Source File: feature_extraction.py From retentioneering-tools with Mozilla Public License 2.0

5 votes

def get_manifold(data, manifold_type, **kwargs):
    """
    Reduces number of dimensions.

    Parameters
    ---------
    data: pd.DataFrame
        Dataframe with features for clustering indexed as in ``retention_config.index_col``
    manifold_type: str
        Name dimensionality reduction method from ``sklearn.decomposition`` and ``sklearn.manifold``
    kwargs: optional
        Parameters for ``sklearn.decomposition`` and ``sklearn.manifold`` methods.

    Returns
    --------
    pd.DataFrame with reduced dimensions.

    Return type
    --------
    pd.DataFrame
    """
    if hasattr(decomposition, manifold_type):
        man = getattr(decomposition, manifold_type)
    elif hasattr(manifold, manifold_type):
        man = getattr(manifold, manifold_type)
    else:
        raise ValueError(f'There is not such manifold {manifold_type}')
    tsvd = man(**{i: j for i, j in kwargs.items() if i in man.get_params(man)})
    res = tsvd.fit_transform(data)
    return pd.DataFrame(res, index=data.index)

Source File: feature_extraction.py From retentioneering-tools with Mozilla Public License 2.0

5 votes

def merge_features(features, metadata, meta_index_col=None, manifold_type=None, fillna=None, drop=False, **kwargs):
    """
    Adds metadata to TFIDF of trajectories. Eeduced if ``manifold_type`` is not ``None``.

    Parameters
    --------
    features: pd.DataFrame
        Dataframe with users` metadata.
    metadata: pd.DataFrame
        Dataframe with user or session properties or any other information you would like to extract as features (e.g. user properties, LTV values, etc.). Default: ``None``
    meta_index_col: str, optional
        Used when metadata is not ``None``. Name of column in ``metadata`` dataframe that contains the same ID as in ``index_col``, or if not defined, same as in retention_config (e.g ID of users or sessions). If ``None``, then index of metadata dataframe is used instead. Default: ``None``
    manifold_type: str, optional
        Name dimensionality reduction method from ``sklearn.decomposition`` and ``sklearn.manifold``. Default: ``None``
    fillna: optional
        Value for filling missing metadata for any ``index_col`` value. Default: ``None``
    drop: bool, optional
        If ``True``, then drops users which do not exist in ``metadata`` dataframe. Default: ``False``
    kwargs: optional
        Keyword arguments for ``sklearn.decomposition`` and ``sklearn.manifold`` methods.

    Returns
    -------
    Dataframe with trajectory features (possibly reduced) and users metadata.

    Return type
    -------
    pd.DataFrame
    """
    if manifold_type is not None:
        features = get_manifold(features, manifold_type, **kwargs)
    if meta_index_col is not None:
        metadata.index = metadata[meta_index_col].values
        metadata = metadata.drop(meta_index_col, 1)
    res = features.join(metadata, rsuffix='_meta',)
    if drop and (fillna is None):
        res = res[res.isnull().sum(1) == 0].copy()
    if fillna is not None:
        res = res.fillna(fillna)
    return res

Source File: decomposition.py From tridesclous with MIT License

5 votes

def transform(self, waveforms):
        flatten_waveforms = waveforms.reshape(waveforms.shape[0], -1)
        return self.lda.transform(flatten_waveforms)



#~ class NeighborhoodPca:
    #~ def __init__(self, waveforms, catalogueconstructor=None, n_components_by_neighborhood=6, radius_um=300., **params):
        #~ cc = catalogueconstructor
        
        #~ self.n_components_by_neighborhood = n_components_by_neighborhood
        #~ self.neighborhood = tools.get_neighborhood(cc.geometry, radius_um)
        
        #~ self.pcas = []
        #~ for c in range(cc.nb_channel):
            #~ neighbors = self.neighborhood[c, :]
            #~ pca = sklearn.decomposition.IncrementalPCA(n_components=n_components_by_neighborhood, **params)
            #~ wfs = waveforms[:,:,neighbors]
            #~ wfs = wfs.reshape(wfs.shape[0], -1)
            #~ pca.fit(wfs)
            #~ self.pcas.append(pca)

        #~ #In full NeighborhoodPca n_components_by_neighborhood feature correspond to one channel
        #~ self.channel_to_features = np.zeros((cc.nb_channel, cc.nb_channel*n_components_by_neighborhood), dtype='bool')
        #~ for c in range(cc.nb_channel):
            #~ self.channel_to_features[c, c*n_components_by_neighborhood:(c+1)*n_components_by_neighborhood] = True

    #~ def transform(self, waveforms):
        #~ n = self.n_components_by_neighborhood
        #~ all = np.zeros((waveforms.shape[0], waveforms.shape[2]*n), dtype=waveforms.dtype)
        #~ for c, pca in enumerate(self.pcas):
            #~ neighbors = self.neighborhood[c, :]
            #~ wfs = waveforms[:,:,neighbors]
            #~ wfs = wfs.reshape(wfs.shape[0], -1)
            #~ all[:, c*n:(c+1)*n] = pca.transform(wfs)
        #~ return all

Source File: cleancluster.py From tridesclous with MIT License

5 votes

def _compute_one_dip_test(cc, dirname, chan_grp, label, n_components_local_pca, adjacency_radius_um):
    # compute dip test to try to over split
    from .dataio import DataIO
    from .catalogueconstructor import CatalogueConstructor
    
    if cc is None:
        dataio = DataIO(dirname)
        cc = CatalogueConstructor(dataio=dataio, chan_grp=chan_grp)

    peak_sign = cc.info['peak_detector_params']['peak_sign']
    dense_mode = cc.info['mode'] == 'dense'
    n_left = cc.info['waveform_extractor_params']['n_left']
    n_right = cc.info['waveform_extractor_params']['n_right']
    peak_width = n_right - n_left
    nb_channel = cc.nb_channel
    
    if dense_mode:
        channel_adjacency = {c: np.arange(nb_channel) for c in range(nb_channel)}
    else:
        channel_adjacency = {}
        for c in range(nb_channel):
            nearest, = np.nonzero(cc.channel_distances[c, :] < adjacency_radius_um)
            channel_adjacency[c] = nearest

    
    waveforms, wf_flat, peak_index = _get_sparse_waveforms_flatten(cc, dense_mode, label, channel_adjacency, n_spike_for_centroid=cc.n_spike_for_centroid)
    
    
    #~ pca =  sklearn.decomposition.IncrementalPCA(n_components=n_components_local_pca, whiten=True)
    
    n_components = min(wf_flat.shape[1]-1, n_components_local_pca)
    pca =  sklearn.decomposition.TruncatedSVD(n_components=n_components)
    
    feats = pca.fit_transform(wf_flat)
    pval = diptest(np.sort(feats[:, 0]), numt=200)
    
    return pval

Source File: common.py From deep-image-retrieval with BSD 3-Clause "New" or "Revised" License

5 votes

def transform(pca, X, whitenp=0.5, whitenv=None, whitenm=1.0, use_sklearn=True):
    if use_sklearn:
        # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/decomposition/base.py#L99
        if pca.mean_ is not None:
            X = X - pca.mean_
        X_transformed = np.dot(X, pca.components_[:whitenv].T)
        if pca.whiten:
            X_transformed /= whitenm * np.power(pca.explained_variance_[:whitenv], whitenp)
    else:
        X = X - pca['means']
        X_transformed = np.dot(X, pca['W'])
    return X_transformed

Source File: test_custom_schemas.py From lale with Apache License 2.0

5 votes

def setUp(self):
        import sklearn.decomposition
        import lale.lib.sklearn
        from lale.operators import make_operator
        self.sk_pca = make_operator(sklearn.decomposition.PCA, schemas={})
        self.ll_pca = lale.lib.sklearn.PCA
        self.maxDiff = None

Source File: pca.py From lale with Apache License 2.0

5 votes

def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = sklearn.decomposition.PCA(**self._hyperparams)

Source File: signal.py From gumpy with MIT License

5 votes

def dwt(raw_eeg_data, level, **kwargs):
    """Multilevel Discrete Wavelet Transform (DWT).

    Compute the DWT for a raw eeg signal on multiple levels.

    Args:
        raw_eeg_data (array_like): input data
        level (int >= 0): decomposition levels
        **kwargs: Additional arguments that will be forwarded to ``pywt.wavedec``

    Returns:
        A 2-element tuple containing

        - **float**: mean value of the first decomposition coefficients
        - **list**: list of mean values for the individual (detail) decomposition coefficients

    """
    wt_coeffs = pywt.wavedec(data = raw_eeg_data, level=level, **kwargs)

    # A7:  0 Hz - 1 Hz
    cAL_mean = np.nanmean(wt_coeffs[0], axis=0)
    details = []

    # For Fs = 128 H
    for i in range(1, level+1):
        # D7:  1 Hz - 2 Hz
        cDL_mean = np.nanmean(wt_coeffs[i], axis=0)
        details.append(cDL_mean)

    return cAL_mean, details

Source File: signal.py From gumpy with MIT License

5 votes

def artifact_removal(X, n_components=None, check_result=True):
    """Remove artifacts from data.

    The artifacts are detected via Independent Component Analysis (ICA) and
    subsequently removed. To plot the results, use
    :func:`gumpy.plot.artifact_removal`

    Args:
        X (array_like): Data to remove artifacts from
        n_components (int): Number of components for ICA. If None is passed, all will be used
        check_result (bool): Examine/test the ICA model by reverting the mixing.


    Returns:
        A 2-tuple containing

        - **ndarray**: The reconstructed signal without artifacts.
        - **ndarray**: The mixing matrix that wqas used by ICA.

    """

    ica = sklearn.decomposition.FastICA(n_components)
    S_reconst = ica.fit_transform(X)
    A_mixing = ica.mixing_
    if check_result:
        assert np.allclose(X, np.dot(S_reconst, A_mixing.T) + ica.mean_)

    return S_reconst, A_mixing

Source File: decomposition.py From tridesclous with MIT License

4 votes

def __init__(self, catalogueconstructor=None, selection=None, n_components_by_channel=3, adjacency_radius_um=200, **params):
        
        cc = catalogueconstructor
        
        thresh = cc.info['peak_detector_params']['relative_threshold']
        n_left = cc.info['waveform_extractor_params']['n_left']
        self.dtype = cc.info['internal_dtype']
        
        
        #~ self.waveforms = waveforms
        self.n_components_by_channel = n_components_by_channel
        self.adjacency_radius_um = adjacency_radius_um
        
        
        #~ t1 = time.perf_counter()
        if selection is None:
            peaks_index = cc.some_peaks_index
        else:
            peaks_index,  = np.nonzero(selection)
        
        some_peaks = cc.all_peaks[peaks_index]
        
        self.pcas = []
        for chan in range(cc.nb_channel):
        #~ for chan in range(20):
            #~ print('fit', chan)
            sel = some_peaks['channel'] == chan
            wf_chan = cc.get_some_waveforms(peaks_index=peaks_index[sel], channel_indexes=[chan])
            wf_chan = wf_chan[:, :, 0]
            #~ print(wf_chan.shape)
            
            if wf_chan.shape[0] - 1 > n_components_by_channel:
                #~ pca = sklearn.decomposition.IncrementalPCA(n_components=n_components_by_channel, **params)
                #~ print('PcaByChannel SVD')
                pca = sklearn.decomposition.TruncatedSVD(n_components=n_components_by_channel, **params)
                pca.fit(wf_chan)
            else:
                pca = None
            self.pcas.append(pca)

        #~ t2 = time.perf_counter()
        #~ print('pca fit', t2-t1)
            


            
            #~ pca = get_pca_one_channel(waveforms, chan, thresh, n_left, n_components_by_channel, params)
            
        #~ n_jobs = -1
        #~ self.pcas = joblib.Parallel(n_jobs=n_jobs)(joblib.delayed(get_pca_one_channel)(waveforms, chan, thresh, n_components_by_channel, params) for chan in range(cc.nb_channel))
        

        #In full PcaByChannel n_components_by_channel feature correspond to one channel
        self.channel_to_features = np.zeros((cc.nb_channel, cc.nb_channel*n_components_by_channel), dtype='bool')
        for c in range(cc.nb_channel):
            self.channel_to_features[c, c*n_components_by_channel:(c+1)*n_components_by_channel] = True

Python sklearn.decomposition() Examples