Python sklearn.decomposition() Examples
The following are 13
code examples of sklearn.decomposition().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn
, or try the search function
.
Example #1
Source File: decomposition.py From tridesclous with MIT License | 6 votes |
def __init__(self, catalogueconstructor=None, selection=None, n_components=5, **params): cc = catalogueconstructor self.n_components = n_components self.waveforms = cc.get_some_waveforms() if selection is None: waveforms = self.waveforms #~ print('all selection', waveforms.shape[0]) else: peaks_index, = np.nonzero(selection) waveforms = cc.get_some_waveforms(peaks_index=peaks_index) #~ print('subset selection', waveforms.shape[0]) flatten_waveforms = waveforms.reshape(waveforms.shape[0], -1) #~ self.pca = sklearn.decomposition.IncrementalPCA(n_components=n_components, **params) self.pca = sklearn.decomposition.TruncatedSVD(n_components=n_components, **params) self.pca.fit(flatten_waveforms) #In GlobalPCA all feature represent all channels self.channel_to_features = np.ones((cc.nb_channel, self.n_components), dtype='bool')
Example #2
Source File: decomposition.py From tridesclous with MIT License | 6 votes |
def transform(self, waveforms): #~ print('ici', waveforms.shape, self.ind_peak) features = waveforms[:, self.ind_peak, : ].copy() return features #~ Parallel(n_jobs=n_jobs)(delayed(count_match_spikes)(sorting1.get_unit_spike_train(u1), #~ s2_spiketrains, delta_frames) for #~ i1, u1 in enumerate(unit1_ids)) #~ def get_pca_one_channel(wf_chan, chan, thresh, n_left, n_components_by_channel, params): #~ print(chan) #~ pca = sklearn.decomposition.IncrementalPCA(n_components=n_components_by_channel, **params) #~ wf_chan = waveforms[:,:,chan] #~ print(wf_chan.shape) #~ print(wf_chan[:, -n_left].shape) #~ keep = np.any((wf_chan>thresh) | (wf_chan<-thresh)) #~ keep = (wf_chan[:, -n_left]>thresh) | (wf_chan[:, -n_left]<-thresh) #~ if keep.sum() >=n_components_by_channel: #~ pca.fit(wf_chan[keep, :]) #~ return pca #~ else: #~ return None
Example #3
Source File: test_monkeypatch.py From daal4py with Apache License 2.0 | 6 votes |
def test_monkey_patching(self): _tokens = daal4py.sklearn.sklearn_patch_names() self.assertTrue(isinstance(_tokens, list) and len(_tokens) > 0) for t in _tokens: daal4py.sklearn.unpatch_sklearn(t) for t in _tokens: daal4py.sklearn.patch_sklearn(t) import sklearn for a in [(sklearn.decomposition, 'PCA'), (sklearn.linear_model, 'Ridge'), (sklearn.linear_model, 'LinearRegression'), (sklearn.cluster, 'KMeans'), (sklearn.svm, 'SVC'),]: class_module = getattr(a[0], a[1]).__module__ self.assertTrue(class_module.startswith('daal4py'))
Example #4
Source File: feature_extraction.py From retentioneering-tools with Mozilla Public License 2.0 | 5 votes |
def get_manifold(data, manifold_type, **kwargs): """ Reduces number of dimensions. Parameters --------- data: pd.DataFrame Dataframe with features for clustering indexed as in ``retention_config.index_col`` manifold_type: str Name dimensionality reduction method from ``sklearn.decomposition`` and ``sklearn.manifold`` kwargs: optional Parameters for ``sklearn.decomposition`` and ``sklearn.manifold`` methods. Returns -------- pd.DataFrame with reduced dimensions. Return type -------- pd.DataFrame """ if hasattr(decomposition, manifold_type): man = getattr(decomposition, manifold_type) elif hasattr(manifold, manifold_type): man = getattr(manifold, manifold_type) else: raise ValueError(f'There is not such manifold {manifold_type}') tsvd = man(**{i: j for i, j in kwargs.items() if i in man.get_params(man)}) res = tsvd.fit_transform(data) return pd.DataFrame(res, index=data.index)
Example #5
Source File: feature_extraction.py From retentioneering-tools with Mozilla Public License 2.0 | 5 votes |
def merge_features(features, metadata, meta_index_col=None, manifold_type=None, fillna=None, drop=False, **kwargs): """ Adds metadata to TFIDF of trajectories. Eeduced if ``manifold_type`` is not ``None``. Parameters -------- features: pd.DataFrame Dataframe with users` metadata. metadata: pd.DataFrame Dataframe with user or session properties or any other information you would like to extract as features (e.g. user properties, LTV values, etc.). Default: ``None`` meta_index_col: str, optional Used when metadata is not ``None``. Name of column in ``metadata`` dataframe that contains the same ID as in ``index_col``, or if not defined, same as in retention_config (e.g ID of users or sessions). If ``None``, then index of metadata dataframe is used instead. Default: ``None`` manifold_type: str, optional Name dimensionality reduction method from ``sklearn.decomposition`` and ``sklearn.manifold``. Default: ``None`` fillna: optional Value for filling missing metadata for any ``index_col`` value. Default: ``None`` drop: bool, optional If ``True``, then drops users which do not exist in ``metadata`` dataframe. Default: ``False`` kwargs: optional Keyword arguments for ``sklearn.decomposition`` and ``sklearn.manifold`` methods. Returns ------- Dataframe with trajectory features (possibly reduced) and users metadata. Return type ------- pd.DataFrame """ if manifold_type is not None: features = get_manifold(features, manifold_type, **kwargs) if meta_index_col is not None: metadata.index = metadata[meta_index_col].values metadata = metadata.drop(meta_index_col, 1) res = features.join(metadata, rsuffix='_meta',) if drop and (fillna is None): res = res[res.isnull().sum(1) == 0].copy() if fillna is not None: res = res.fillna(fillna) return res
Example #6
Source File: decomposition.py From tridesclous with MIT License | 5 votes |
def transform(self, waveforms): flatten_waveforms = waveforms.reshape(waveforms.shape[0], -1) return self.lda.transform(flatten_waveforms) #~ class NeighborhoodPca: #~ def __init__(self, waveforms, catalogueconstructor=None, n_components_by_neighborhood=6, radius_um=300., **params): #~ cc = catalogueconstructor #~ self.n_components_by_neighborhood = n_components_by_neighborhood #~ self.neighborhood = tools.get_neighborhood(cc.geometry, radius_um) #~ self.pcas = [] #~ for c in range(cc.nb_channel): #~ neighbors = self.neighborhood[c, :] #~ pca = sklearn.decomposition.IncrementalPCA(n_components=n_components_by_neighborhood, **params) #~ wfs = waveforms[:,:,neighbors] #~ wfs = wfs.reshape(wfs.shape[0], -1) #~ pca.fit(wfs) #~ self.pcas.append(pca) #~ #In full NeighborhoodPca n_components_by_neighborhood feature correspond to one channel #~ self.channel_to_features = np.zeros((cc.nb_channel, cc.nb_channel*n_components_by_neighborhood), dtype='bool') #~ for c in range(cc.nb_channel): #~ self.channel_to_features[c, c*n_components_by_neighborhood:(c+1)*n_components_by_neighborhood] = True #~ def transform(self, waveforms): #~ n = self.n_components_by_neighborhood #~ all = np.zeros((waveforms.shape[0], waveforms.shape[2]*n), dtype=waveforms.dtype) #~ for c, pca in enumerate(self.pcas): #~ neighbors = self.neighborhood[c, :] #~ wfs = waveforms[:,:,neighbors] #~ wfs = wfs.reshape(wfs.shape[0], -1) #~ all[:, c*n:(c+1)*n] = pca.transform(wfs) #~ return all
Example #7
Source File: cleancluster.py From tridesclous with MIT License | 5 votes |
def _compute_one_dip_test(cc, dirname, chan_grp, label, n_components_local_pca, adjacency_radius_um): # compute dip test to try to over split from .dataio import DataIO from .catalogueconstructor import CatalogueConstructor if cc is None: dataio = DataIO(dirname) cc = CatalogueConstructor(dataio=dataio, chan_grp=chan_grp) peak_sign = cc.info['peak_detector_params']['peak_sign'] dense_mode = cc.info['mode'] == 'dense' n_left = cc.info['waveform_extractor_params']['n_left'] n_right = cc.info['waveform_extractor_params']['n_right'] peak_width = n_right - n_left nb_channel = cc.nb_channel if dense_mode: channel_adjacency = {c: np.arange(nb_channel) for c in range(nb_channel)} else: channel_adjacency = {} for c in range(nb_channel): nearest, = np.nonzero(cc.channel_distances[c, :] < adjacency_radius_um) channel_adjacency[c] = nearest waveforms, wf_flat, peak_index = _get_sparse_waveforms_flatten(cc, dense_mode, label, channel_adjacency, n_spike_for_centroid=cc.n_spike_for_centroid) #~ pca = sklearn.decomposition.IncrementalPCA(n_components=n_components_local_pca, whiten=True) n_components = min(wf_flat.shape[1]-1, n_components_local_pca) pca = sklearn.decomposition.TruncatedSVD(n_components=n_components) feats = pca.fit_transform(wf_flat) pval = diptest(np.sort(feats[:, 0]), numt=200) return pval
Example #8
Source File: common.py From deep-image-retrieval with BSD 3-Clause "New" or "Revised" License | 5 votes |
def transform(pca, X, whitenp=0.5, whitenv=None, whitenm=1.0, use_sklearn=True): if use_sklearn: # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/decomposition/base.py#L99 if pca.mean_ is not None: X = X - pca.mean_ X_transformed = np.dot(X, pca.components_[:whitenv].T) if pca.whiten: X_transformed /= whitenm * np.power(pca.explained_variance_[:whitenv], whitenp) else: X = X - pca['means'] X_transformed = np.dot(X, pca['W']) return X_transformed
Example #9
Source File: test_custom_schemas.py From lale with Apache License 2.0 | 5 votes |
def setUp(self): import sklearn.decomposition import lale.lib.sklearn from lale.operators import make_operator self.sk_pca = make_operator(sklearn.decomposition.PCA, schemas={}) self.ll_pca = lale.lib.sklearn.PCA self.maxDiff = None
Example #10
Source File: pca.py From lale with Apache License 2.0 | 5 votes |
def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = sklearn.decomposition.PCA(**self._hyperparams)
Example #11
Source File: signal.py From gumpy with MIT License | 5 votes |
def dwt(raw_eeg_data, level, **kwargs): """Multilevel Discrete Wavelet Transform (DWT). Compute the DWT for a raw eeg signal on multiple levels. Args: raw_eeg_data (array_like): input data level (int >= 0): decomposition levels **kwargs: Additional arguments that will be forwarded to ``pywt.wavedec`` Returns: A 2-element tuple containing - **float**: mean value of the first decomposition coefficients - **list**: list of mean values for the individual (detail) decomposition coefficients """ wt_coeffs = pywt.wavedec(data = raw_eeg_data, level=level, **kwargs) # A7: 0 Hz - 1 Hz cAL_mean = np.nanmean(wt_coeffs[0], axis=0) details = [] # For Fs = 128 H for i in range(1, level+1): # D7: 1 Hz - 2 Hz cDL_mean = np.nanmean(wt_coeffs[i], axis=0) details.append(cDL_mean) return cAL_mean, details
Example #12
Source File: signal.py From gumpy with MIT License | 5 votes |
def artifact_removal(X, n_components=None, check_result=True): """Remove artifacts from data. The artifacts are detected via Independent Component Analysis (ICA) and subsequently removed. To plot the results, use :func:`gumpy.plot.artifact_removal` Args: X (array_like): Data to remove artifacts from n_components (int): Number of components for ICA. If None is passed, all will be used check_result (bool): Examine/test the ICA model by reverting the mixing. Returns: A 2-tuple containing - **ndarray**: The reconstructed signal without artifacts. - **ndarray**: The mixing matrix that wqas used by ICA. """ ica = sklearn.decomposition.FastICA(n_components) S_reconst = ica.fit_transform(X) A_mixing = ica.mixing_ if check_result: assert np.allclose(X, np.dot(S_reconst, A_mixing.T) + ica.mean_) return S_reconst, A_mixing
Example #13
Source File: decomposition.py From tridesclous with MIT License | 4 votes |
def __init__(self, catalogueconstructor=None, selection=None, n_components_by_channel=3, adjacency_radius_um=200, **params): cc = catalogueconstructor thresh = cc.info['peak_detector_params']['relative_threshold'] n_left = cc.info['waveform_extractor_params']['n_left'] self.dtype = cc.info['internal_dtype'] #~ self.waveforms = waveforms self.n_components_by_channel = n_components_by_channel self.adjacency_radius_um = adjacency_radius_um #~ t1 = time.perf_counter() if selection is None: peaks_index = cc.some_peaks_index else: peaks_index, = np.nonzero(selection) some_peaks = cc.all_peaks[peaks_index] self.pcas = [] for chan in range(cc.nb_channel): #~ for chan in range(20): #~ print('fit', chan) sel = some_peaks['channel'] == chan wf_chan = cc.get_some_waveforms(peaks_index=peaks_index[sel], channel_indexes=[chan]) wf_chan = wf_chan[:, :, 0] #~ print(wf_chan.shape) if wf_chan.shape[0] - 1 > n_components_by_channel: #~ pca = sklearn.decomposition.IncrementalPCA(n_components=n_components_by_channel, **params) #~ print('PcaByChannel SVD') pca = sklearn.decomposition.TruncatedSVD(n_components=n_components_by_channel, **params) pca.fit(wf_chan) else: pca = None self.pcas.append(pca) #~ t2 = time.perf_counter() #~ print('pca fit', t2-t1) #~ pca = get_pca_one_channel(waveforms, chan, thresh, n_left, n_components_by_channel, params) #~ n_jobs = -1 #~ self.pcas = joblib.Parallel(n_jobs=n_jobs)(joblib.delayed(get_pca_one_channel)(waveforms, chan, thresh, n_components_by_channel, params) for chan in range(cc.nb_channel)) #In full PcaByChannel n_components_by_channel feature correspond to one channel self.channel_to_features = np.zeros((cc.nb_channel, cc.nb_channel*n_components_by_channel), dtype='bool') for c in range(cc.nb_channel): self.channel_to_features[c, c*n_components_by_channel:(c+1)*n_components_by_channel] = True