Python sklearn.feature_selection.f_regression() Examples
The following are 15
code examples of sklearn.feature_selection.f_regression().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.feature_selection
, or try the search function
.
Example #1
Source File: predict.py From Loan_Default_Prediction with BSD 3-Clause "New" or "Revised" License | 6 votes |
def getTopFeatures(train_x, train_y, n_features=100): f_val, p_val = f_regression(train_x,train_y) f_val_dict = {} p_val_dict = {} for i in range(len(f_val)): if math.isnan(f_val[i]): f_val[i] = 0.0 f_val_dict[i] = f_val[i] if math.isnan(p_val[i]): p_val[i] = 0.0 p_val_dict[i] = p_val[i] sorted_f = sorted(f_val_dict.iteritems(), key=operator.itemgetter(1),reverse=True) sorted_p = sorted(p_val_dict.iteritems(), key=operator.itemgetter(1),reverse=True) feature_indexs = [] for i in range(0,n_features): feature_indexs.append(sorted_f[i][0]) return feature_indexs # generate the new data, based on which features are generated, and used
Example #2
Source File: dominance.py From dominance-analysis with MIT License | 6 votes |
def get_top_k(self): columns=list(self.data.columns.values) columns.remove(self.target) # remove intercept from top_k if(self.objective): top_k_vars=SelectKBest(f_regression, k=self.top_k) top_k_vars.fit_transform(self.data[columns], self.data[self.target]) else: columns.remove('intercept') try: top_k_vars=SelectKBest(chi2, k=self.top_k) top_k_vars.fit_transform(self.data[columns], self.data[self.target]) except: top_k_vars=SelectKBest(f_classif, k=self.top_k) top_k_vars.fit_transform(self.data[columns], self.data[self.target]) return [columns[i] for i in top_k_vars.get_support(indices=True)]
Example #3
Source File: test_base.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_pipeline(self): from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression from sklearn.pipeline import Pipeline diabetes = datasets.load_diabetes() models = ['OLS', 'GLS', 'WLS', 'GLSAR', 'QuantReg', 'GLM', 'RLM'] for model in models: klass = getattr(sm, model) selector = SelectKBest(f_regression, k=5) estimator = Pipeline([('selector', selector), ('reg', base.StatsModelsRegressor(klass))]) estimator.fit(diabetes.data, diabetes.target) result = estimator.predict(diabetes.data) data = SelectKBest(f_regression, k=5).fit_transform(diabetes.data, diabetes.target) expected = klass(diabetes.target, data).fit().predict(data) self.assert_numpy_array_almost_equal(result, expected)
Example #4
Source File: feature_selection.py From dataiku-contrib with Apache License 2.0 | 5 votes |
def univariate_feature_selection(mode,predictors,target): if mode == 'f_regression': fselect = SelectPercentile(f_regression, 100) if mode == 'f_classif': fselect = SelectPercentile(f_classif, 100) if mode == 'chi2': fselect = SelectPercentile(chi2, 100) fselect.fit_transform(predictors, target) return fselect.pvalues_
Example #5
Source File: model_recommendation.py From DIVE-backend with GNU General Public License v3.0 | 5 votes |
def get_initial_regression_model_recommendation(project_id, dataset_id, dependent_variable_id=None, recommendation_type=MRT.LASSO.value, table_layout=MCT.LEAVE_ONE_OUT.value, data_size_cutoff=current_app.config['ANALYSIS_DATA_SIZE_CUTOFF'], categorical_value_limit=current_app.config['ANALYSIS_CATEGORICAL_VALUE_LIMIT']): df = get_data(project_id=project_id, dataset_id=dataset_id) if len(df) > data_size_cutoff: df = df.sample(data_size_cutoff) field_properties = db_access.get_field_properties(project_id, dataset_id) quantitative_field_properties = [ fp for fp in field_properties if fp['general_type'] == 'q'] dependent_variable = next((f for f in field_properties if f['id'] == dependent_variable_id), None) \ if dependent_variable_id \ else np.random.choice(quantitative_field_properties, size=1)[0] independent_variables = [] for fp in field_properties: if (fp['name'] != dependent_variable['name']): if (fp['general_type'] == 'c' and (fp['is_unique'] or len(fp['unique_values']) > categorical_value_limit)): continue independent_variables.append(fp) recommendationTypeToFunction = { MRT.FORWARD_R2.value: forward_r2, MRT.LASSO.value: lasso, MRT.RFE.value: recursive_feature_elimination, MRT.FORWARD_F.value: f_regression } result = recommendationTypeToFunction[recommendation_type](df, dependent_variable, independent_variables) return { 'recommended': True, 'table_layout': table_layout, 'recommendation_type': recommendation_type, 'dependent_variable_id': dependent_variable['id'], 'independent_variables_ids': [ x['id'] for x in result ], }
Example #6
Source File: model_recommendation.py From DIVE-backend with GNU General Public License v3.0 | 5 votes |
def f_regression(df, dependent_variable, independent_variables, interaction_terms=[], model_limit=5): considered_independent_variables_per_model, patsy_models = \ construct_models(df, dependent_variable, independent_variables, interaction_terms, table_layout=MCT.ALL_VARIABLES.value) y, X = dmatrices(patsy_models[0], df, return_type='dataframe') f_test, r = f_regression(X, y, center=True) logger.info(f_test) logger.info(r) return
Example #7
Source File: test_core_pipeline.py From lale with Apache License 2.0 | 5 votes |
def test_import_from_sklearn_pipeline(self): from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression from sklearn import svm from sklearn.pipeline import Pipeline anova_filter = SelectKBest(f_regression, k=3) clf = svm.SVC(kernel='linear') sklearn_pipeline = Pipeline([('anova', anova_filter), ('svc', clf)]) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) for i, pipeline_step in enumerate(sklearn_pipeline.named_steps): sklearn_step_params = sklearn_pipeline.named_steps[pipeline_step].get_params() lale_sklearn_params = lale_pipeline.steps()[i]._impl._wrapped_model.get_params() self.assertEqual(sklearn_step_params, lale_sklearn_params) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Example #8
Source File: test_core_pipeline.py From lale with Apache License 2.0 | 5 votes |
def test_import_from_sklearn_pipeline2(self): from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression from sklearn import svm from sklearn.pipeline import Pipeline anova_filter = SelectKBest(f_regression, k=3) clf = svm.SVC(kernel='linear') sklearn_pipeline = Pipeline([('anova', anova_filter), ('svc', clf)]) sklearn_pipeline.fit(self.X_train, self.y_train) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) lale_pipeline.predict(self.X_test)
Example #9
Source File: test_core_pipeline.py From lale with Apache License 2.0 | 5 votes |
def test_import_from_sklearn_pipeline3(self): from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression from sklearn import svm from sklearn.pipeline import Pipeline anova_filter = SelectKBest(f_regression, k=3) clf = svm.SVC(kernel='linear') sklearn_pipeline = Pipeline([('anova', anova_filter), ('svc', clf)]) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline, fitted=False) with self.assertRaises(ValueError):#fitted=False returns a Trainable, so calling predict is invalid. lale_pipeline.predict(self.X_test)
Example #10
Source File: FieldSelector.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def decode(cls, obj): from sklearn.feature_selection import f_classif, f_regression, GenericUnivariateSelect new_obj = GenericUnivariateSelect.__new__(GenericUnivariateSelect) new_obj.__dict__ = obj['dict'] if new_obj.score_func == 'f_classif': new_obj.score_func = f_classif elif new_obj.score_func == 'f_regression': new_obj.score_func = f_regression else: raise ValueError('Unsupported GenericUnivariateSelect.score_func "%s"' % new_obj.score_func) return new_obj
Example #11
Source File: FieldSelector.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), floats=['param'], strs=['type', 'mode'], aliases={'type': 'score_func'}, ) if 'score_func' not in out_params: out_params['score_func'] = f_classif else: if out_params['score_func'].lower() == 'categorical': out_params['score_func'] = f_classif elif out_params['score_func'].lower() in ['numerical', 'numeric']: out_params['score_func'] = f_regression else: raise RuntimeError('type can either be categorical or numeric.') if 'mode' in out_params: if out_params['mode'] not in ('k_best', 'fpr', 'fdr', 'fwe', 'percentile'): raise RuntimeError('mode can only be one of the following: fdr, fpr, fwe, k_best, and percentile') if out_params['mode'] in ['fpr', 'fdr', 'fwe']: if 'param' in out_params: if not 0 < out_params['param'] < 1: msg = 'Invalid param value for mode {}: param must be between 0 and 1.'.format(out_params['mode']) raise ValueError(msg) # k_best and percentile require integer param if 'param' in out_params and out_params.get('mode') not in ['fdr', 'fpr', 'fwe']: original_value = out_params['param'] out_params['param'] = int(out_params['param']) if out_params['param'] != original_value: msg = 'param value {} is not an integer; mode={} requires an integer.' msg = msg.format(original_value, out_params.get('mode', 'percentile')) raise ValueError(msg) self.estimator = GenericUnivariateSelect(**out_params)
Example #12
Source File: filters.py From causallib with Apache License 2.0 | 5 votes |
def compute_pvals(self, X, y): # TODO: export to stats_utils? is_y_binary = (len(np.unique(y)) == 2) # is_binary_feature = np.sum(((X != np.nanmin(X, axis=0)[np.newaxis, :]) & # (X != np.nanmax(X, axis=0)[np.newaxis, :])), axis=0) == 0 is_binary_feature = areColumnsBinary(X) p_vals = np.zeros(X.shape[1]) if is_y_binary: # Process non-binary columns: for i in np.where(~is_binary_feature)[0]: x0 = X.loc[y == 0, i] x1 = X.loc[y == 1, i] if self.is_linear: _, p_vals[i] = stats.ttest_ind(x0, x1) else: _, p_vals[i] = stats.ks_2samp(x0, x1) # Process binary features: _, p_vals[is_binary_feature] = feature_selection.chi2(X.loc[:, is_binary_feature], y) else: # Process non-binary features: _, p_vals[~is_binary_feature] = feature_selection.f_regression(X.loc[:, ~is_binary_feature], y) # Process binary features: y_mat = np.row_stack(y) for i in np.where(is_binary_feature)[0]: _, p_vals[i] = feature_selection.f_regression(y_mat, X.loc[:, i]) return p_vals
Example #13
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def test_init(self): selector = SelectKBest(score_func = f_regression, k = 1) selector.fit(numpy.array([[0, 0], [1.0, 2.0]]), numpy.array([0.5, 1.0])) self.assertEqual([0, 1], selector._get_support_mask().tolist()) selector_proxy = SelectorProxy(selector) self.assertEqual([0, 1], selector_proxy.support_mask_.tolist())
Example #14
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def test_fit(self): selector = SelectKBest(score_func = f_regression, k = 1) selector_proxy = SelectorProxy(selector) self.assertFalse(hasattr(selector_proxy, "support_mask_")) selector_proxy.fit(numpy.array([[0, 0], [1.0, 2.0]]), numpy.array([0.5, 1.0])) self.assertEqual([0, 1], selector._get_support_mask().tolist()) self.assertEqual([0, 1], selector_proxy.support_mask_.tolist())
Example #15
Source File: test_feature_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_f_regression(self): diabetes = datasets.load_diabetes() df = pdml.ModelFrame(diabetes) result = df.feature_selection.f_regression() expected = fs.f_regression(diabetes.data, diabetes.target) self.assertEqual(len(result), 2) self.assert_numpy_array_almost_equal(result[0], expected[0]) self.assert_numpy_array_almost_equal(result[1], expected[1])