Python sklearn.feature_selection.SelectFromModel() Examples
The following are 30
code examples of sklearn.feature_selection.SelectFromModel().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.feature_selection
, or try the search function
.
Example #1
Source File: utils_feature_selection.py From auto_ml with MIT License | 7 votes |
def get_feature_selection_model_from_name(type_of_estimator, model_name): model_map = { 'classifier': { 'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'), 'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1), 'GenericUnivariateSelect': GenericUnivariateSelect(), 'KeepAll': 'KeepAll' }, 'regressor': { 'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'), 'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1), 'GenericUnivariateSelect': GenericUnivariateSelect(), 'KeepAll': 'KeepAll' } } return model_map[type_of_estimator][model_name]
Example #2
Source File: test_from_model.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_sample_weight(): # Ensure sample weights are passed to underlying estimator X, y = datasets.make_classification( n_samples=100, n_features=10, n_informative=3, n_redundant=0, n_repeated=0, shuffle=False, random_state=0) # Check with sample weights sample_weight = np.ones(y.shape) sample_weight[y == 1] *= 100 est = LogisticRegression(random_state=0, fit_intercept=False) transformer = SelectFromModel(estimator=est) transformer.fit(X, y, sample_weight=None) mask = transformer._get_support_mask() transformer.fit(X, y, sample_weight=sample_weight) weighted_mask = transformer._get_support_mask() assert not np.all(weighted_mask == mask) transformer.fit(X, y, sample_weight=3 * sample_weight) reweighted_mask = transformer._get_support_mask() assert np.all(weighted_mask == reweighted_mask)
Example #3
Source File: test_feature_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.feature_selection.GenericUnivariateSelect, fs.GenericUnivariateSelect) self.assertIs(df.feature_selection.SelectPercentile, fs.SelectPercentile) self.assertIs(df.feature_selection.SelectKBest, fs.SelectKBest) self.assertIs(df.feature_selection.SelectFpr, fs.SelectFpr) self.assertIs(df.feature_selection.SelectFromModel, fs.SelectFromModel) self.assertIs(df.feature_selection.SelectFdr, fs.SelectFdr) self.assertIs(df.feature_selection.SelectFwe, fs.SelectFwe) self.assertIs(df.feature_selection.RFE, fs.RFE) self.assertIs(df.feature_selection.RFECV, fs.RFECV) self.assertIs(df.feature_selection.VarianceThreshold, fs.VarianceThreshold)
Example #4
Source File: text_models.py From mindmeld with Apache License 2.0 | 6 votes |
def _get_feature_selector(self): """Get a feature selector instance based on the feature_selector model parameter Returns: (Object): a feature selector which returns a reduced feature matrix, \ given the full feature matrix, X and the class labels, y """ if self.config.model_settings is None: selector_type = None else: selector_type = self.config.model_settings.get("feature_selector") selector = { "l1": SelectFromModel(LogisticRegression(penalty="l1", C=1)), "f": SelectPercentile(), }.get(selector_type) return selector
Example #5
Source File: benchmark_test.py From nni with MIT License | 6 votes |
def test_time(pipeline_name, name, path): if pipeline_name == "LR": pipeline = make_pipeline(LogisticRegression()) if pipeline_name == "FGS": pipeline = make_pipeline(FeatureGradientSelector(), LogisticRegression()) if pipeline_name == "Tree": pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression()) test_benchmark = Benchmark() print("Dataset:\t", name) print("Pipeline:\t", pipeline_name) starttime = datetime.datetime.now() test_benchmark.run_test(pipeline, name, path) endtime = datetime.datetime.now() print("Used time: ", (endtime - starttime).microseconds/1000) print("")
Example #6
Source File: sklearn_test.py From nni with MIT License | 6 votes |
def test(): url_zip_train = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2' urllib.request.urlretrieve(url_zip_train, filename='train.bz2') f_svm = open('train.svm', 'wt') with bz2.open('train.bz2', 'rb') as f_zip: data = f_zip.read() f_svm.write(data.decode('utf-8')) f_svm.close() X, y = load_svmlight_file('train.svm') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) pipeline = make_pipeline(FeatureGradientSelector(n_epochs=1, n_features=10), LogisticRegression()) # pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression()) pipeline.fit(X_train, y_train) print("Pipeline Score: ", pipeline.score(X_train, y_train))
Example #7
Source File: test_from_model.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_prefit(): # Test all possible combinations of the prefit parameter. # Passing a prefit parameter with the selected model # and fitting a unfit model with prefit=False should give same results. clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True, random_state=0, tol=None) model = SelectFromModel(clf) model.fit(data, y) X_transform = model.transform(data) clf.fit(data, y) model = SelectFromModel(clf, prefit=True) assert_array_almost_equal(model.transform(data), X_transform) # Check that the model is rewritten if prefit=False and a fitted model is # passed model = SelectFromModel(clf, prefit=False) model.fit(data, y) assert_array_almost_equal(model.transform(data), X_transform) # Check that prefit=True and calling fit raises a ValueError model = SelectFromModel(clf, prefit=True) assert_raises(ValueError, model.fit, data, y)
Example #8
Source File: test_from_model.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_feature_importances(): X, y = datasets.make_classification( n_samples=1000, n_features=10, n_informative=3, n_redundant=0, n_repeated=0, shuffle=False, random_state=0) est = RandomForestClassifier(n_estimators=50, random_state=0) for threshold, func in zip(["mean", "median"], [np.mean, np.median]): transformer = SelectFromModel(estimator=est, threshold=threshold) transformer.fit(X, y) assert_true(hasattr(transformer.estimator_, 'feature_importances_')) X_new = transformer.transform(X) assert_less(X_new.shape[1], X.shape[1]) importances = transformer.estimator_.feature_importances_ feature_mask = np.abs(importances) > func(importances) assert_array_almost_equal(X_new, X[:, feature_mask])
Example #9
Source File: test_from_model.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_partial_fit(): est = PassiveAggressiveClassifier(random_state=0, shuffle=False, max_iter=5, tol=None) transformer = SelectFromModel(estimator=est) transformer.partial_fit(data, y, classes=np.unique(y)) old_model = transformer.estimator_ transformer.partial_fit(data, y, classes=np.unique(y)) new_model = transformer.estimator_ assert old_model is new_model X_transform = transformer.transform(data) transformer.fit(np.vstack((data, data)), np.concatenate((y, y))) assert_array_almost_equal(X_transform, transformer.transform(data)) # check that if est doesn't have partial_fit, neither does SelectFromModel transformer = SelectFromModel(estimator=RandomForestClassifier()) assert not hasattr(transformer, "partial_fit")
Example #10
Source File: test_from_model.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_sample_weight(): # Ensure sample weights are passed to underlying estimator X, y = datasets.make_classification( n_samples=100, n_features=10, n_informative=3, n_redundant=0, n_repeated=0, shuffle=False, random_state=0) # Check with sample weights sample_weight = np.ones(y.shape) sample_weight[y == 1] *= 100 est = LogisticRegression(random_state=0, fit_intercept=False) transformer = SelectFromModel(estimator=est) transformer.fit(X, y, sample_weight=None) mask = transformer._get_support_mask() transformer.fit(X, y, sample_weight=sample_weight) weighted_mask = transformer._get_support_mask() assert not np.all(weighted_mask == mask) transformer.fit(X, y, sample_weight=3 * sample_weight) reweighted_mask = transformer._get_support_mask() assert np.all(weighted_mask == reweighted_mask)
Example #11
Source File: test_from_model.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_feature_importances(): X, y = datasets.make_classification( n_samples=1000, n_features=10, n_informative=3, n_redundant=0, n_repeated=0, shuffle=False, random_state=0) est = RandomForestClassifier(n_estimators=50, random_state=0) for threshold, func in zip(["mean", "median"], [np.mean, np.median]): transformer = SelectFromModel(estimator=est, threshold=threshold) transformer.fit(X, y) assert hasattr(transformer.estimator_, 'feature_importances_') X_new = transformer.transform(X) assert_less(X_new.shape[1], X.shape[1]) importances = transformer.estimator_.feature_importances_ feature_mask = np.abs(importances) > func(importances) assert_array_almost_equal(X_new, X[:, feature_mask])
Example #12
Source File: test_from_model.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_threshold_and_max_features(): X, y = datasets.make_classification( n_samples=1000, n_features=10, n_informative=3, n_redundant=0, n_repeated=0, shuffle=False, random_state=0) est = RandomForestClassifier(n_estimators=50, random_state=0) transformer1 = SelectFromModel(estimator=est, max_features=3, threshold=-np.inf) X_new1 = transformer1.fit_transform(X, y) transformer2 = SelectFromModel(estimator=est, threshold=0.04) X_new2 = transformer2.fit_transform(X, y) transformer3 = SelectFromModel(estimator=est, max_features=3, threshold=0.04) X_new3 = transformer3.fit_transform(X, y) assert X_new3.shape[1] == min(X_new1.shape[1], X_new2.shape[1]) selected_indices = transformer3.transform( np.arange(X.shape[1])[np.newaxis, :]) assert_allclose(X_new3, X[:, selected_indices[0]])
Example #13
Source File: test_from_model.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_max_features_tiebreak(): # Test if max_features can break tie among feature importance X, y = datasets.make_classification( n_samples=1000, n_features=10, n_informative=3, n_redundant=0, n_repeated=0, shuffle=False, random_state=0) max_features = X.shape[1] feature_importances = np.array([4, 4, 4, 4, 3, 3, 3, 2, 2, 1]) for n_features in range(1, max_features + 1): transformer = SelectFromModel( FixedImportanceEstimator(feature_importances), max_features=n_features, threshold=-np.inf) X_new = transformer.fit_transform(X, y) selected_feature_indices = np.where(transformer._get_support_mask())[0] assert_array_equal(selected_feature_indices, np.arange(n_features)) assert X_new.shape[1] == n_features
Example #14
Source File: test_from_model.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_2d_coef(): X, y = datasets.make_classification( n_samples=1000, n_features=10, n_informative=3, n_redundant=0, n_repeated=0, shuffle=False, random_state=0, n_classes=4) est = LogisticRegression() for threshold, func in zip(["mean", "median"], [np.mean, np.median]): for order in [1, 2, np.inf]: # Fit SelectFromModel a multi-class problem transformer = SelectFromModel(estimator=LogisticRegression(), threshold=threshold, norm_order=order) transformer.fit(X, y) assert_true(hasattr(transformer.estimator_, 'coef_')) X_new = transformer.transform(X) assert_less(X_new.shape[1], X.shape[1]) # Manually check that the norm is correctly performed est.fit(X, y) importances = np.linalg.norm(est.coef_, axis=0, ord=order) feature_mask = importances > func(importances) assert_array_equal(X_new, X[:, feature_mask])
Example #15
Source File: test_from_model.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_partial_fit(): est = PassiveAggressiveClassifier(random_state=0, shuffle=False, max_iter=5, tol=None) transformer = SelectFromModel(estimator=est) transformer.partial_fit(data, y, classes=np.unique(y)) old_model = transformer.estimator_ transformer.partial_fit(data, y, classes=np.unique(y)) new_model = transformer.estimator_ assert_true(old_model is new_model) X_transform = transformer.transform(data) transformer.fit(np.vstack((data, data)), np.concatenate((y, y))) assert_array_equal(X_transform, transformer.transform(data)) # check that if est doesn't have partial_fit, neither does SelectFromModel transformer = SelectFromModel(estimator=RandomForestClassifier()) assert_false(hasattr(transformer, "partial_fit"))
Example #16
Source File: export_tests.py From tpot with GNU Lesser General Public License v3.0 | 6 votes |
def test_set_param_recursive_2(): """Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in SelectFromModel.""" pipeline_string = ( 'DecisionTreeRegressor(SelectFromModel(input_matrix, ' 'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, ' 'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,' 'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)' ) tpot_obj = TPOTRegressor() tpot_obj._fit_init() deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline) set_param_recursive(sklearn_pipeline.steps, 'random_state', 42) assert getattr(getattr(sklearn_pipeline.steps[0][1], 'estimator'), 'random_state') == 42 assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
Example #17
Source File: test_from_model.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_prefit(): # Test all possible combinations of the prefit parameter. # Passing a prefit parameter with the selected model # and fitting a unfit model with prefit=False should give same results. clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True, random_state=0, tol=None) model = SelectFromModel(clf) model.fit(data, y) X_transform = model.transform(data) clf.fit(data, y) model = SelectFromModel(clf, prefit=True) assert_array_equal(model.transform(data), X_transform) # Check that the model is rewritten if prefit=False and a fitted model is # passed model = SelectFromModel(clf, prefit=False) model.fit(data, y) assert_array_equal(model.transform(data), X_transform) # Check that prefit=True and calling fit raises a ValueError model = SelectFromModel(clf, prefit=True) assert_raises(ValueError, model.fit, data, y)
Example #18
Source File: test_from_model.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_input_estimator_unchanged(): # Test that SelectFromModel fits on a clone of the estimator. est = RandomForestClassifier() transformer = SelectFromModel(estimator=est) transformer.fit(data, y) assert_true(transformer.estimator is est)
Example #19
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def test_filter(self): selector = SelectFromModel(DecisionTreeRegressor(), prefit = False) self.assertIsInstance(selector, SelectFromModel) self.assertIsInstance(selector.estimator, DecisionTreeRegressor) self.assertFalse(hasattr(selector, "estimator_")) selector_proxy = _filter_steps([("selector", selector)])[0][1] self.assertIsInstance(selector_proxy, SelectorProxy) selector_proxy.fit(numpy.array([[0, 1], [0, 2], [0, 3]]), numpy.array([0.5, 1.0, 1.5])) self.assertEqual([0, 1], selector_proxy.support_mask_.tolist())
Example #20
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def test_filter_prefit(self): regressor = DecisionTreeRegressor() regressor.fit(numpy.array([[0, 1], [0, 2], [0, 3]]), numpy.array([0.5, 1.0, 1.5])) selector = SelectFromModel(regressor, prefit = True) self.assertTrue(hasattr(selector, "estimator")) self.assertFalse(hasattr(selector, "estimator_")) selector_proxy = _filter_steps([("selector", selector, {})])[0][1] self.assertIsInstance(selector_proxy, SelectorProxy) self.assertEqual([0, 1], selector_proxy.support_mask_.tolist())
Example #21
Source File: export_tests.py From tpot with GNU Lesser General Public License v3.0 | 5 votes |
def test_get_by_name(): """Assert that the Operator class returns operators by name appropriately.""" assert get_by_name("SelectPercentile", tpot_obj.operators).__class__ == TPOTSelectPercentile.__class__ assert get_by_name("SelectFromModel", tpot_obj.operators).__class__ == TPOTSelectFromModel.__class__
Example #22
Source File: memm.py From mindmeld with Apache License 2.0 | 5 votes |
def _get_feature_selector(selector_type): """Get a feature selector instance based on the feature_selector model parameter. Returns: (Object): A feature selector which returns a reduced feature matrix, \ given the full feature matrix, X and the class labels, y. """ selector = { "l1": SelectFromModel(LogisticRegression(penalty="l1", C=1)), "f": SelectPercentile(), }.get(selector_type) return selector
Example #23
Source File: _thermal.py From CO2MPAS-TA with European Union Public License 1.1 | 5 votes |
def _get_support_mask(self): if self._cache_support_mask is not None: return self._cache_support_mask if self.prefit: estimator = self.estimator elif hasattr(self, 'estimator_'): estimator = self.estimator_ else: raise ValueError( 'Either fit the model before transform or set "prefit=True"' ' while passing the fitted estimator to the constructor.') try: with np.errstate(divide='ignore', invalid='ignore'): importances = getattr(estimator, "feature_importances_", None) if importances is not None and np.isnan(importances).all(): mask = np.ones(importances.shape, bool) else: mask = super(_SelectFromModel, self)._get_support_mask() except ValueError: sfm = SelectFromModel( estimator.estimator_, self.threshold, True ) mask = sfm._get_support_mask() for i in self._out_mask: mask[i] = False for i in self._in_mask: mask[i] = True self._cache_support_mask = mask return mask # noinspection PyMissingOrEmptyDocstring,PyPep8Naming
Example #24
Source File: feature_selection.py From MLPrimitives with MIT License | 5 votes |
def __init__(self, estimator_class=None, bypass=False, threshold=None, norm_order=1, *args, **kwargs): self.bypass = bypass if not bypass: estimator = (self.ESTIMATOR or estimator_class)(*args, **kwargs) self.selector = SelectFromModel(estimator, threshold, False, norm_order)
Example #25
Source File: test_from_model.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_invalid_input(): clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True, random_state=None, tol=None) for threshold in ["gobbledigook", ".5 * gobbledigook"]: model = SelectFromModel(clf, threshold=threshold) model.fit(data, y) assert_raises(ValueError, model.transform, data)
Example #26
Source File: utils_feature_selection.py From auto_ml with MIT License | 5 votes |
def __init__(self, type_of_estimator, column_descriptions, feature_selection_model='SelectFromModel'): self.column_descriptions = column_descriptions self.type_of_estimator = type_of_estimator self.feature_selection_model = feature_selection_model
Example #27
Source File: test_from_model.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_calling_fit_reinitializes(): est = LinearSVC(random_state=0) transformer = SelectFromModel(estimator=est) transformer.fit(data, y) transformer.set_params(estimator__C=100) transformer.fit(data, y) assert_equal(transformer.estimator_.C, 100)
Example #28
Source File: test_from_model.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_threshold_string(): est = RandomForestClassifier(n_estimators=50, random_state=0) model = SelectFromModel(est, threshold="0.5*mean") model.fit(data, y) X_transform = model.transform(data) # Calculate the threshold from the estimator directly. est.fit(data, y) threshold = 0.5 * np.mean(est.feature_importances_) mask = est.feature_importances_ > threshold assert_array_equal(X_transform, data[:, mask])
Example #29
Source File: export_tests.py From tpot with GNU Lesser General Public License v3.0 | 5 votes |
def test_export_pipeline_5(): """Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with SelectFromModel.""" pipeline_string = ( 'DecisionTreeRegressor(SelectFromModel(input_matrix, ' 'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, ' 'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,' 'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)' ) pipeline = creator.Individual.from_string(pipeline_string, tpot_obj_reg._pset) expected_code = """import numpy as np import pandas as pd from sklearn.ensemble import ExtraTreesRegressor from sklearn.feature_selection import SelectFromModel from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.tree import DecisionTreeRegressor # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['target'], random_state=None) exported_pipeline = make_pipeline( SelectFromModel(estimator=ExtraTreesRegressor(max_features=0.05, n_estimators=100), threshold=0.05), DecisionTreeRegressor(max_depth=8, min_samples_leaf=5, min_samples_split=5) ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) """ assert expected_code == export_pipeline(pipeline, tpot_obj_reg.operators, tpot_obj_reg._pset)
Example #30
Source File: xgb.py From speedml with MIT License | 5 votes |
def feature_selection(self): """ Returns threshold and accuracy for ``n`` number of features. """ Base.data_n() X = Base.train_n.drop([Base.target], axis=1) Y = Base.train[Base.target] # Split data into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=7) # Fit model on all training data model = xgb.XGBClassifier() model.fit(X_train, y_train) # Make predictions for test data and evaluate y_pred = model.predict(X_test) predictions = [round(value) for value in y_pred] accuracy = accuracy_score(y_test, predictions) self.feature_accuracy = round(accuracy * 100.0, 2) print("Accuracy: %f%%" % (self.feature_accuracy)) # Fit model using each importance as a threshold thresholds = np.sort(model.feature_importances_) for thresh in thresholds: # Select features using threshold selection = SelectFromModel(model, threshold=thresh, prefit=True) select_X_train = selection.transform(X_train) # Train model selection_model = xgb.XGBClassifier() selection_model.fit(select_X_train, y_train) # Evalation model select_X_test = selection.transform(X_test) y_pred = selection_model.predict(select_X_test) predictions = [round(value) for value in y_pred] accuracy = accuracy_score(y_test, predictions) print ("Thresh=%.3f, n=%d, Accuracy: %.2f%%" % (thresh, select_X_train.shape[1], accuracy*100.0))