Python sklearn.feature_selection.RFECV Examples
The following are 14
code examples of sklearn.feature_selection.RFECV().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.feature_selection
, or try the search function
.
Example #1
Source File: utils_feature_selection.py From auto_ml with MIT License | 7 votes |
def get_feature_selection_model_from_name(type_of_estimator, model_name): model_map = { 'classifier': { 'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'), 'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1), 'GenericUnivariateSelect': GenericUnivariateSelect(), 'KeepAll': 'KeepAll' }, 'regressor': { 'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'), 'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1), 'GenericUnivariateSelect': GenericUnivariateSelect(), 'KeepAll': 'KeepAll' } } return model_map[type_of_estimator][model_name]
Example #2
Source File: FSRegression.py From CausalDiscoveryToolbox with MIT License | 6 votes |
def predict_features(self, df_features, df_target, idx=0, **kwargs): """For one variable, predict its neighbouring nodes. Args: df_features (pandas.DataFrame): df_target (pandas.Series): idx (int): (optional) for printing purposes kwargs (dict): additional options for algorithms Returns: list: scores of each feature relatively to the target """ estimator = SVR(kernel='linear') selector = RFECV(estimator, step=1) selector = selector.fit(df_features.values, np.ravel(df_target.values)) return selector.grid_scores_
Example #3
Source File: test_feature_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.feature_selection.GenericUnivariateSelect, fs.GenericUnivariateSelect) self.assertIs(df.feature_selection.SelectPercentile, fs.SelectPercentile) self.assertIs(df.feature_selection.SelectKBest, fs.SelectKBest) self.assertIs(df.feature_selection.SelectFpr, fs.SelectFpr) self.assertIs(df.feature_selection.SelectFromModel, fs.SelectFromModel) self.assertIs(df.feature_selection.SelectFdr, fs.SelectFdr) self.assertIs(df.feature_selection.SelectFwe, fs.SelectFwe) self.assertIs(df.feature_selection.RFE, fs.RFE) self.assertIs(df.feature_selection.RFECV, fs.RFECV) self.assertIs(df.feature_selection.VarianceThreshold, fs.VarianceThreshold)
Example #4
Source File: PipeTasks.py From ProFET with GNU General Public License v3.0 | 6 votes |
def GetKFeatures(filename, method='RFE',kbest=30,alpha=0.01, reduceMatrix = True): ''' Gets best features using chosen method (K-best, RFE, RFECV,'L1' (RandomizedLogisticRegression),'Tree' (ExtraTreesClassifier), mrmr), then prints top K features' names (from featNames). If reduceMatrix = True, then also returns X reduced to the K best features. Available methods' names are: 'RFE','RFECV','RandomizedLogisticRegression','K-best','ExtraTreesClassifier'.. Note, that effectiveyl, Any scikit learn method could be used, if correctly imported.. ''' #est = method() ''' Gets the K-best features (filtered by FDR, then select best ranked by t-test , more advanced options can be implemented). Save the data/matrix with the resulting/kept features to a new output file, "REDUCED_Feat.csv" ''' features, labels, lb_encoder,featureNames = load_data(filename) X, y = features, labels # change the names as ints back to strings class_names=lb_encoder.inverse_transform(y) print("Data and labels imported. PreFilter Feature matrix shape:") print(X.shape) selectK = SelectKBest(k=kbest) selectK.fit(X,y) selectK_mask=selectK.get_support() K_featnames = featureNames[selectK_mask] print('X After K filter:',X.shape) print("K_featnames: %s" %(K_featnames)) if reduceMatrix ==True : Reduced_df = pd.read_csv(filename, index_col=0) Reduced_df = Reduced_df[Reduced_df.columns[selectK_mask]] Reduced_df.to_csv('REDUCED_Feat.csv') print('Saved to REDUCED_Feat.csv') return Reduced_df #WORKS! But unreadable with too many features!
Example #5
Source File: QuincyLearn.py From quincy with GNU General Public License v3.0 | 6 votes |
def __select_features(self, X, y, feature_names): logging.info("Automagically extracting features with recursive feature eliminiation based on RandomForest") model = RandomForestClassifier(n_jobs=-1) rfe = RFECV(model, cv=QuincyConfig.CV, scoring=QuincyConfig.METRIC) fit = rfe.fit(X, y) logging.info("Number of selected features: %d" % fit.n_features_) discarded, selected = self.__get_discarded_and_selected_features(feature_names, fit) X = self.__drop_discarded_features(X, discarded) feature_selection_results = self.__get_feature_selection_results(X, discarded, feature_names, fit, model, selected, y) self._featureSelectionResults = feature_selection_results return X
Example #6
Source File: RFECV.py From simba with GNU Lesser General Public License v3.0 | 5 votes |
def perf_RFCVE(projectPath, RFCVE_CVs, RFCVE_step_size, clf, data_train, target_train): selector = RFECV(estimator=clf, step=RFCVE_step_size, cv=RFCVE_CVs, scoring='f1', verbose=1) selector = selector.fit(data_train, target_train) print(selector.support_)
Example #7
Source File: nested_cv.py From Nested-Cross-Validation with MIT License | 5 votes |
def _fit_recursive_feature_elimination(self, X_train_outer, y_train_outer, X_test_outer): rfe = RFECV(estimator=self.model, min_features_to_select=self.rfe_n_features, cv=self.inner_cv, n_jobs = self.n_jobs) rfe.fit(X_train_outer, y_train_outer) log.info('Best number of features was: {0}'.format(rfe.n_features_)) # Assign selected features to data return rfe.transform(X_train_outer), rfe.transform(X_test_outer)
Example #8
Source File: FeatureSelector.py From CDSS with GNU General Public License v3.0 | 5 votes |
def compute_ranks(self): if self._algorithm == FeatureSelector.SELECT_K_BEST: scores = self._selector.scores_ sorted_scores = sorted(scores, reverse=True) ranks = [sorted_scores.index(i) + 1 for i in scores] elif self._algorithm == FeatureSelector.SELECT_PERCENTILE: scores = self._selector.scores_ sorted_scores = sorted(scores, reverse=True) ranks = [sorted_scores.index(i) + 1 for i in scores] elif self._algorithm == FeatureSelector.RECURSIVE_ELIMINATION: n_selected = self._selector.n_features_ support = self._selector.support_ ranking = self._selector.ranking_ # RFE and RFECV do not provide feature scores. Instead, they # provide a list of features which have been selected (support) # and an ascending list indicating when each other feature was # eliminated. Use these two to construct feature ranks, though # acknowledge that RFE and RFECV do not actually distinguish between # the weights of selected features. ranks = [0]*len(support) selected_count = 0 for i in range(len(ranking)): if support[i]: # All selected features in ranking receive rank 1, so need # to iterate through list and add incrementing values so # that features ranked 1, 1, 1, become 1, 2, 3. ranks[i] = ranking[i] + selected_count selected_count += 1 else: # Even if there are 5 selected features, the 6th feature # in ranking is given rank 2, so add (n_selected - 1). ranks[i] = ranking[i] + (n_selected - 1) return ranks
Example #9
Source File: FeatureSelector.py From CDSS with GNU General Public License v3.0 | 5 votes |
def _eliminate_recursively(self, k=None): if self._problem == FeatureSelector.CLASSIFICATION: estimator = RandomForestClassifier(random_state=self._random_state) else: estimator = LassoCV(random_state=self._random_state) # If k is not specified, then use RFECV to automatically decide on # optimal number of features. If specified, then use RFE. if k is None: self._selector = RFECV(estimator) else: self._selector = RFE(estimator, n_features_to_select=k, step=0.05)
Example #10
Source File: wine.py From UCI-Data-Analysis with Apache License 2.0 | 5 votes |
def recursiveFeatureSelectorCV(classifier_model,train_data,train_labels,test_data,number_of_features): rfe = RFECV(classifier_model,number_of_features) transformed_train_data = rfe.fit_transform(train_data,train_labels) transformed_test_data = rfe.transform(test_data) return transformed_train_data,transformed_test_data #Iterating over all feature preprocessors and classifiers in turn
Example #11
Source File: PipeTasks.py From ProFET with GNU General Public License v3.0 | 5 votes |
def plotRFECV (X,y,stepSize=0.05,scoring='f1'): ''' Plot recursive feature elimination example with automatic tuning of the number of features selected with cross-validation. http://scikit-learn.org/stable/auto_examples/plot_rfe_with_cross_validation.html#example-plot-rfe-with-cross-validation-py ''' from sklearn.svm import SVC from sklearn.cross_validation import StratifiedKFold from sklearn.feature_selection import RFECV # Create the RFE object and compute a cross-validated score. # svc = SVC(kernel="linear") svc = SVC(kernel="linear",class_weight='auto', cache_size=1400) # The "accuracy" scoring is proportional to the number of correct # classifications rfecv = RFECV(estimator=svc, step=stepSize, cv=StratifiedKFold(y, 2), scoring=scoring) rfecv.fit(X, y) print("Optimal number of features : %d" % rfecv.n_features_) # Plot number of features VS. cross-validation scores import matplotlib.pyplot as plt plt.figure() plt.xlabel("Number of features selected") plt.ylabel("Cross validation score (nb of correct classifications)") plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_) plt.show() return rfecv
Example #12
Source File: Model_Parameters_CV.py From ProFET with GNU General Public License v3.0 | 5 votes |
def plot_RFE(X,y): from sklearn.svm import SVC from sklearn.cross_validation import StratifiedKFold from sklearn.feature_selection import RFECV from sklearn.datasets import make_classification from sklearn.metrics import zero_one_loss import pylab as pl import matplotlib.pylab as pl # Create the RFE object and compute a cross-validated score. # svc= SVC(kernel="linear", class_weight="auto", cache_size=1200, shrinking=True) svc=LinearSVC(penalty='l1', loss='l2', dual=False, class_weight='auto',multi_class='ovr') # SGD = SGDClassifier(penalty='elasticnet',class_weight='auto',n_jobs=-1,n_iter=10,l1_ratio =0.15) ## rfecv = RFECV(estimator=svc, step=0.1, cv=StratifiedKFold(y, 5), scoring='roc_auc') rfecv = RFECV(estimator=svc, step=0.2,cv=StratifiedKFold(y, 2), scoring='f1') X_RFE = rfecv.fit_transform(X, y) print("Optimal number of features in X_RFE : %d" % rfecv.n_features_) # Plot number of features VS. cross-validation scores pl.figure() pl.xlabel("Number of features selected") pl.ylabel("Cross validation score (nb of misclassifications)") pl.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_) pl.show() print ('RFE Opt.shapes features CV score:') CV_multi_stats(X_RFE,y,svc) return (X_RFE,rfecv)
Example #13
Source File: feature_selection_insight.py From karura with Apache License 2.0 | 4 votes |
def adopt(self, dfe, interpreted=None): models = [] # about scoring, please see following document # http://scikit-learn.org/stable/modules/model_evaluation.html#common-cases-predefined-values scoring = "accuracy" # todo: now, text and datetime colum is ignored for t in (FType.text, FType.datetime): columns = dfe.get_columns(t, include_target=False) dfe.df.drop(columns, inplace=True, axis=1) dfe.sync() if dfe.get_target_ftype() == FType.categorical: #models = [RandomForestClassifier(), SVC(kernel="linear")] models = [RandomForestClassifier()] if self.is_binary_classification(dfe): scoring = "f1" else: # see reference about f1 score # http://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score scoring = "f1_micro" # if prediction does not occur to some label, macro is too worse to evaluate elif dfe.get_target_ftype() == FType.numerical: # About the model to select the feature, please refer # http://scikit-learn.org/stable/modules/feature_selection.html models = [Lasso(alpha=.1), RandomForestRegressor()] scoring = "r2" else: raise Exception("Target type is None or un-predictable type.") features = dfe.get_features() target = dfe.get_target() best_rfecv = None feature_masks = [] for m in models: rfecv = RFECV(estimator=m, step=1, cv=self.cv_count, scoring=scoring, n_jobs=self.n_jobs) rfecv.fit(features, target) feature_masks.append(rfecv.support_) selected_mask = [] if len(feature_masks) < 2: selected_mask = feature_masks[0] else: selected_mask = np.logical_and(*feature_masks) # take the feature that some models take eliminates = features.columns[np.logical_not(selected_mask)] dfe.df.drop(eliminates, inplace=True, axis=1) dfe.sync() selected = features.columns[selected_mask].tolist() ss = self.a2t(selected) self.description = { "ja": "項目{}は予測に有効な項目です。これらを利用し、モデルを構築します。".format(ss), "en": "Columns {} are useful to predict. I'll use these to make model.".format(ss) } return True
Example #14
Source File: genericmodelclass.py From easyML with BSD 3-Clause "New" or "Revised" License | 4 votes |
def recursive_feature_elimination_cv(self, step=1, inplace=False): """A method to implement recursive feature elimination on the model with cross-validation(CV). At each step, features are ranked as per the algorithm used and lowest ranked features are removed, as specified by the step argument. At each step, the CV score is determined using the scoring metric specified in the model. The set of features with highest cross validation scores is then chosen. Parameters __________ step : int or float, default=1 If int, then step corresponds to the number of features to remove at each iteration. If float and within (0.0, 1.0), then step corresponds to the percentage (rounded down) of features to remove at each iteration. If float and greater than one, then integral part will be considered as an integer input inplace : bool, default=False If True, the predictors of the class are modified to those selected by the RFECV procedure. Returns _______ selected : pandas series A series object containing the selected features as index and their rank in selection as values """ rfecv = RFECV( self.alg, step=step,cv=self.cv_folds, scoring=self.scoring_metric,n_jobs=-1 ) rfecv.fit( self.datablock.train[self.predictors], self.datablock.train[self.datablock.target] ) if step>1: min_nfeat = (len(self.predictors) - step*(len(rfecv.grid_scores_)-1)) plt.xlabel("Number of features selected") plt.ylabel("Cross validation score") plt.plot( range(min_nfeat, len(self.predictors)+1, step), rfecv.grid_scores_ ) plt.show(block=False) ranks = pd.Series(rfecv.ranking_, index=self.predictors) selected = ranks.loc[rfecv.support_] if inplace: self.set_predictors(selected.index.tolist()) return ranks