Python sklearn.feature_selection.VarianceThreshold() Examples
The following are 14
code examples of sklearn.feature_selection.VarianceThreshold().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.feature_selection
, or try the search function
.
Example #1
Source File: low_variance.py From scikit-feature with GNU General Public License v2.0 | 6 votes |
def low_variance_feature_selection(X, threshold): """ This function implements the low_variance feature selection (existing method in scikit-learn) Input ----- X: {numpy array}, shape (n_samples, n_features) input data p:{float} parameter used to calculate the threshold(threshold = p*(1-p)) Output ------ X_new: {numpy array}, shape (n_samples, n_selected_features) data with selected features """ sel = VarianceThreshold(threshold) return sel.fit_transform(X)
Example #2
Source File: test_feature_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.feature_selection.GenericUnivariateSelect, fs.GenericUnivariateSelect) self.assertIs(df.feature_selection.SelectPercentile, fs.SelectPercentile) self.assertIs(df.feature_selection.SelectKBest, fs.SelectKBest) self.assertIs(df.feature_selection.SelectFpr, fs.SelectFpr) self.assertIs(df.feature_selection.SelectFromModel, fs.SelectFromModel) self.assertIs(df.feature_selection.SelectFdr, fs.SelectFdr) self.assertIs(df.feature_selection.SelectFwe, fs.SelectFwe) self.assertIs(df.feature_selection.RFE, fs.RFE) self.assertIs(df.feature_selection.RFECV, fs.RFECV) self.assertIs(df.feature_selection.VarianceThreshold, fs.VarianceThreshold)
Example #3
Source File: test_variance_threshold.py From sparkit-learn with Apache License 2.0 | 6 votes |
def test_same_variances(self): local = VarianceThreshold() dist = SparkVarianceThreshold() shapes = [((10, 5), None), ((1e3, 20), None), ((1e3, 20), 100), ((1e4, 100), None), ((1e4, 100), 600)] for shape, block_size in shapes: X_dense, X_dense_rdd = self.make_dense_rdd() X_sparse, X_sparse_rdd = self.make_sparse_rdd() Z = DictRDD([X_sparse_rdd, X_dense_rdd], columns=('X', 'Y')) local.fit(X_dense) dist.fit(X_dense_rdd) assert_array_almost_equal(local.variances_, dist.variances_) local.fit(X_sparse) dist.fit(X_sparse_rdd) assert_array_almost_equal(local.variances_, dist.variances_) dist.fit(Z) assert_array_almost_equal(local.variances_, dist.variances_)
Example #4
Source File: test_variance_threshold.py From sparkit-learn with Apache License 2.0 | 6 votes |
def test_same_transform_result(self): local = VarianceThreshold() dist = SparkVarianceThreshold() X_dense, X_dense_rdd = self.make_dense_rdd() X_sparse, X_sparse_rdd = self.make_sparse_rdd() Z_rdd = DictRDD([X_sparse_rdd, X_dense_rdd], columns=('X', 'Y')) result_local = local.fit_transform(X_dense) result_dist = dist.fit_transform(X_dense_rdd) assert_true(check_rdd_dtype(result_dist, (np.ndarray,))) assert_array_almost_equal(result_local, result_dist.toarray()) result_local = local.fit_transform(X_sparse) result_dist = dist.fit_transform(X_sparse_rdd) assert_true(check_rdd_dtype(result_dist, (sp.spmatrix,))) assert_array_almost_equal(result_local.toarray(), result_dist.toarray()) result_dist = dist.fit_transform(Z_rdd)[:, 'X'] assert_true(check_rdd_dtype(result_dist, (sp.spmatrix,))) assert_array_almost_equal(result_local.toarray(), result_dist.toarray())
Example #5
Source File: test_variance_threshold.py From sparkit-learn with Apache License 2.0 | 6 votes |
def test_same_transform_with_treshold(self): local = VarianceThreshold(.03) dist = SparkVarianceThreshold(.03) X_dense, X_dense_rdd = self.make_dense_rdd() X_sparse, X_sparse_rdd = self.make_sparse_rdd() Z_rdd = DictRDD([X_sparse_rdd, X_dense_rdd], columns=('X', 'Y')) result_local = local.fit_transform(X_dense) result_dist = dist.fit_transform(X_dense_rdd) assert_true(check_rdd_dtype(result_dist, (np.ndarray,))) assert_array_almost_equal(result_local, result_dist.toarray()) result_local = local.fit_transform(X_sparse) result_dist = dist.fit_transform(X_sparse_rdd) assert_true(check_rdd_dtype(result_dist, (sp.spmatrix,))) assert_array_almost_equal(result_local.toarray(), result_dist.toarray()) result_dist = dist.fit_transform(Z_rdd)[:, 'X'] assert_true(check_rdd_dtype(result_dist, (sp.spmatrix,))) assert_array_almost_equal(result_local.toarray(), result_dist.toarray())
Example #6
Source File: test_pipeline.py From sparkit-learn with Apache License 2.0 | 6 votes |
def test_pipeline_same_results(self): X, y, Z = self.make_classification(2, 10000, 2000) loc_clf = LogisticRegression() loc_filter = VarianceThreshold() loc_pipe = Pipeline([ ('threshold', loc_filter), ('logistic', loc_clf) ]) dist_clf = SparkLogisticRegression() dist_filter = SparkVarianceThreshold() dist_pipe = SparkPipeline([ ('threshold', dist_filter), ('logistic', dist_clf) ]) dist_filter.fit(Z) loc_pipe.fit(X, y) dist_pipe.fit(Z, logistic__classes=np.unique(y)) assert_true(np.mean(np.abs( loc_pipe.predict(X) - np.concatenate(dist_pipe.predict(Z[:, 'X']).collect()) )) < 0.1)
Example #7
Source File: test_variance_threshold.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_zero_variance(): # Test VarianceThreshold with default setting, zero variance. for X in [data, csr_matrix(data), csc_matrix(data), bsr_matrix(data)]: sel = VarianceThreshold().fit(X) assert_array_equal([0, 1, 3, 4], sel.get_support(indices=True)) assert_raises(ValueError, VarianceThreshold().fit, [[0, 1, 2, 3]]) assert_raises(ValueError, VarianceThreshold().fit, [[0, 1], [0, 1]])
Example #8
Source File: test_variance_threshold.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_variance_threshold(): # Test VarianceThreshold with custom variance. for X in [data, csr_matrix(data)]: X = VarianceThreshold(threshold=.4).fit_transform(X) assert_equal((len(data), 1), X.shape)
Example #9
Source File: data_cleaning.py From open-solution-value-prediction with MIT License | 5 votes |
def __init__(self, threshold): self.selector = fs.VarianceThreshold(threshold=threshold)
Example #10
Source File: classifiers.py From oddt with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, *args, **kwargs): """ Assemble Neural network or SVM using sklearn pipeline """ # Cherrypick arguments for model. Exclude 'steps', which is pipeline argument local_kwargs = {key: kwargs.pop(key) for key in list(kwargs.keys()) if key != 'steps' and '__' not in key} if self._model is None: raise ValueError('Model not specified!') model = self._model(*args, **local_kwargs) self.pipeline = Pipeline([('empty_dims_remover', VarianceThreshold()), ('scaler', StandardScaler()), ('model', model)]).set_params(**kwargs)
Example #11
Source File: regressors.py From oddt with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, *args, **kwargs): """ Assemble Neural network or SVM using sklearn pipeline """ # Cherrypick arguments for model. Exclude 'steps', which is pipeline argument local_kwargs = {key: kwargs.pop(key) for key in list(kwargs.keys()) if key != 'steps' and '__' not in key} if self._model is None: raise ValueError('Model not specified!') model = self._model(*args, **local_kwargs) self.pipeline = Pipeline([('empty_dims_remover', VarianceThreshold()), ('scaler', StandardScaler()), ('model', model)]).set_params(**kwargs)
Example #12
Source File: test_variance_threshold.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_zero_variance(): # Test VarianceThreshold with default setting, zero variance. for X in [data, csr_matrix(data), csc_matrix(data), bsr_matrix(data)]: sel = VarianceThreshold().fit(X) assert_array_equal([0, 1, 3, 4], sel.get_support(indices=True)) assert_raises(ValueError, VarianceThreshold().fit, [[0, 1, 2, 3]]) assert_raises(ValueError, VarianceThreshold().fit, [[0, 1], [0, 1]])
Example #13
Source File: test_variance_threshold.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_variance_threshold(): # Test VarianceThreshold with custom variance. for X in [data, csr_matrix(data)]: X = VarianceThreshold(threshold=.4).fit_transform(X) assert_equal((len(data), 1), X.shape)
Example #14
Source File: features.py From AlphaPy with Apache License 2.0 | 4 votes |
def remove_lv_features(model, X): r"""Remove low-variance features. Parameters ---------- model : alphapy.Model Model specifications for removing features. X : numpy array The feature matrix. Returns ------- X_reduced : numpy array The reduced feature matrix. References ---------- You can find more information on low-variance feature selection here [LV]_. .. [LV] http://scikit-learn.org/stable/modules/feature_selection.html#variance-threshold """ logger.info("Removing Low-Variance Features") # Extract model parameters lv_remove = model.specs['lv_remove'] lv_threshold = model.specs['lv_threshold'] predict_mode = model.specs['predict_mode'] # Remove low-variance features if lv_remove: logger.info("Low-Variance Threshold : %.2f", lv_threshold) logger.info("Original Feature Count : %d", X.shape[1]) if not predict_mode: selector = VarianceThreshold(threshold=lv_threshold) selector.fit(X) support = selector.get_support() model.feature_map['lv_support'] = support else: support = model.feature_map['lv_support'] X_reduced = X[:, support] model.feature_names = list(itertools.compress(model.feature_names, support)) logger.info("Reduced Feature Count : %d", X_reduced.shape[1]) else: X_reduced = X logger.info("Skipping Low-Variance Features") assert X_reduced.shape[1] == len(model.feature_names), "Mismatched Features and Names" return X_reduced