Python sklearn.feature_selection.f_regression() Examples

The following are 15 code examples of sklearn.feature_selection.f_regression(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.feature_selection , or try the search function .
Example #1
Source File:    From Loan_Default_Prediction with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def getTopFeatures(train_x, train_y, n_features=100):
    f_val, p_val = f_regression(train_x,train_y)
    f_val_dict = {}
    p_val_dict = {}
    for i in range(len(f_val)):
        if math.isnan(f_val[i]):
            f_val[i] = 0.0
        f_val_dict[i] = f_val[i]
        if math.isnan(p_val[i]):
            p_val[i] = 0.0
        p_val_dict[i] = p_val[i]
    sorted_f = sorted(f_val_dict.iteritems(), key=operator.itemgetter(1),reverse=True)
    sorted_p = sorted(p_val_dict.iteritems(), key=operator.itemgetter(1),reverse=True)
    feature_indexs = []
    for i in range(0,n_features):
    return feature_indexs

# generate the new data, based on which features are generated, and used 
Example #2
Source File:    From dominance-analysis with MIT License 6 votes vote down vote up
def get_top_k(self):
		# remove intercept from top_k
			top_k_vars=SelectKBest(f_regression, k=self.top_k)
				top_k_vars=SelectKBest(chi2, k=self.top_k)
				top_k_vars=SelectKBest(f_classif, k=self.top_k)
		return [columns[i] for i in top_k_vars.get_support(indices=True)] 
Example #3
Source File:    From pandas-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_pipeline(self):
        from sklearn.feature_selection import SelectKBest
        from sklearn.feature_selection import f_regression
        from sklearn.pipeline import Pipeline

        diabetes = datasets.load_diabetes()
        models = ['OLS', 'GLS', 'WLS', 'GLSAR', 'QuantReg', 'GLM', 'RLM']

        for model in models:
            klass = getattr(sm, model)

            selector = SelectKBest(f_regression, k=5)
            estimator = Pipeline([('selector', selector),
                                  ('reg', base.StatsModelsRegressor(klass))])

            result = estimator.predict(

            data = SelectKBest(f_regression, k=5).fit_transform(,
            expected = klass(, data).fit().predict(data)
            self.assert_numpy_array_almost_equal(result, expected) 
Example #4
Source File:    From dataiku-contrib with Apache License 2.0 5 votes vote down vote up
def univariate_feature_selection(mode,predictors,target):
    if mode == 'f_regression':
        fselect = SelectPercentile(f_regression, 100)
    if mode == 'f_classif':
        fselect = SelectPercentile(f_classif, 100)
    if mode == 'chi2':
        fselect = SelectPercentile(chi2, 100)
    fselect.fit_transform(predictors, target)
    return fselect.pvalues_ 
Example #5
Source File:    From DIVE-backend with GNU General Public License v3.0 5 votes vote down vote up
def get_initial_regression_model_recommendation(project_id, dataset_id, dependent_variable_id=None, recommendation_type=MRT.LASSO.value, table_layout=MCT.LEAVE_ONE_OUT.value, data_size_cutoff=current_app.config['ANALYSIS_DATA_SIZE_CUTOFF'], categorical_value_limit=current_app.config['ANALYSIS_CATEGORICAL_VALUE_LIMIT']):
    df = get_data(project_id=project_id, dataset_id=dataset_id)
    if len(df) > data_size_cutoff:
        df = df.sample(data_size_cutoff)
    field_properties = db_access.get_field_properties(project_id, dataset_id)
    quantitative_field_properties = [ fp for fp in field_properties if fp['general_type'] == 'q']

    dependent_variable = next((f for f in field_properties if f['id'] == dependent_variable_id), None) \
        if dependent_variable_id \
        else np.random.choice(quantitative_field_properties, size=1)[0]

    independent_variables = []
    for fp in field_properties:
        if (fp['name'] != dependent_variable['name']):
            if (fp['general_type'] == 'c' and (fp['is_unique'] or len(fp['unique_values']) > categorical_value_limit)):

    recommendationTypeToFunction = {
        MRT.FORWARD_R2.value: forward_r2,
        MRT.LASSO.value: lasso,
        MRT.RFE.value: recursive_feature_elimination,
        MRT.FORWARD_F.value: f_regression

    result = recommendationTypeToFunction[recommendation_type](df, dependent_variable, independent_variables)

    return {
        'recommended': True,
        'table_layout': table_layout,
        'recommendation_type': recommendation_type,
        'dependent_variable_id': dependent_variable['id'],
        'independent_variables_ids': [ x['id'] for x in result ],
Example #6
Source File:    From DIVE-backend with GNU General Public License v3.0 5 votes vote down vote up
def f_regression(df, dependent_variable, independent_variables, interaction_terms=[], model_limit=5):
    considered_independent_variables_per_model, patsy_models = \
        construct_models(df, dependent_variable, independent_variables, interaction_terms, table_layout=MCT.ALL_VARIABLES.value)
    y, X = dmatrices(patsy_models[0], df, return_type='dataframe')

    f_test, r = f_regression(X, y, center=True)
Example #7
Source File:    From lale with Apache License 2.0 5 votes vote down vote up
def test_import_from_sklearn_pipeline(self):
        from sklearn.feature_selection import SelectKBest
        from sklearn.feature_selection import f_regression        
        from sklearn import svm
        from sklearn.pipeline import Pipeline
        anova_filter = SelectKBest(f_regression, k=3)
        clf = svm.SVC(kernel='linear')        
        sklearn_pipeline = Pipeline([('anova', anova_filter), ('svc', clf)])  
        lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
        for i, pipeline_step in enumerate(sklearn_pipeline.named_steps):
            sklearn_step_params = sklearn_pipeline.named_steps[pipeline_step].get_params()
            lale_sklearn_params = lale_pipeline.steps()[i]._impl._wrapped_model.get_params()
            self.assertEqual(sklearn_step_params, lale_sklearn_params)
        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline) 
Example #8
Source File:    From lale with Apache License 2.0 5 votes vote down vote up
def test_import_from_sklearn_pipeline2(self):
        from sklearn.feature_selection import SelectKBest
        from sklearn.feature_selection import f_regression        
        from sklearn import svm
        from sklearn.pipeline import Pipeline
        anova_filter = SelectKBest(f_regression, k=3)
        clf = svm.SVC(kernel='linear')        
        sklearn_pipeline = Pipeline([('anova', anova_filter), ('svc', clf)]), self.y_train)
        lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
Example #9
Source File:    From lale with Apache License 2.0 5 votes vote down vote up
def test_import_from_sklearn_pipeline3(self):
        from sklearn.feature_selection import SelectKBest
        from sklearn.feature_selection import f_regression        
        from sklearn import svm
        from sklearn.pipeline import Pipeline
        anova_filter = SelectKBest(f_regression, k=3)
        clf = svm.SVC(kernel='linear')        
        sklearn_pipeline = Pipeline([('anova', anova_filter), ('svc', clf)])
        lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline, fitted=False)
        with self.assertRaises(ValueError):#fitted=False returns a Trainable, so calling predict is invalid.
Example #10
Source File:    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def decode(cls, obj):
        from sklearn.feature_selection import f_classif, f_regression, GenericUnivariateSelect

        new_obj = GenericUnivariateSelect.__new__(GenericUnivariateSelect)
        new_obj.__dict__ = obj['dict']

        if new_obj.score_func == 'f_classif':
            new_obj.score_func = f_classif
        elif new_obj.score_func == 'f_regression':
            new_obj.score_func = f_regression
            raise ValueError('Unsupported GenericUnivariateSelect.score_func "%s"' % new_obj.score_func)

        return new_obj 
Example #11
Source File:    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, options):

        out_params = convert_params(
            options.get('params', {}),
            strs=['type', 'mode'],
            aliases={'type': 'score_func'},

        if 'score_func' not in out_params:
            out_params['score_func'] = f_classif
            if out_params['score_func'].lower() == 'categorical':
                out_params['score_func'] = f_classif
            elif out_params['score_func'].lower() in ['numerical', 'numeric']:
                out_params['score_func'] = f_regression
                raise RuntimeError('type can either be categorical or numeric.')

        if 'mode' in out_params:
            if out_params['mode'] not in ('k_best', 'fpr', 'fdr', 'fwe', 'percentile'):
                raise RuntimeError('mode can only be one of the following: fdr, fpr, fwe, k_best, and percentile')

            if out_params['mode'] in ['fpr', 'fdr', 'fwe']:
                if 'param' in out_params:
                    if not 0 < out_params['param'] < 1:
                        msg = 'Invalid param value for mode {}: param must be between 0 and 1.'.format(out_params['mode'])
                        raise ValueError(msg)

        # k_best and percentile require integer param
        if 'param' in out_params and out_params.get('mode') not in ['fdr', 'fpr', 'fwe']:
            original_value = out_params['param']
            out_params['param'] = int(out_params['param'])
            if out_params['param'] != original_value:
                msg = 'param value {} is not an integer; mode={} requires an integer.'
                msg = msg.format(original_value, out_params.get('mode', 'percentile'))
                raise ValueError(msg)

        self.estimator = GenericUnivariateSelect(**out_params) 
Example #12
Source File:    From causallib with Apache License 2.0 5 votes vote down vote up
def compute_pvals(self, X, y):
        # TODO: export to stats_utils?
        is_y_binary = (len(np.unique(y)) == 2)
        # is_binary_feature = np.sum(((X != np.nanmin(X, axis=0)[np.newaxis, :]) &
        #                             (X != np.nanmax(X, axis=0)[np.newaxis, :])), axis=0) == 0
        is_binary_feature = areColumnsBinary(X)
        p_vals = np.zeros(X.shape[1])
        if is_y_binary:
            # Process non-binary columns:
            for i in np.where(~is_binary_feature)[0]:
                x0 = X.loc[y == 0, i]
                x1 = X.loc[y == 1, i]
                if self.is_linear:
                    _, p_vals[i] = stats.ttest_ind(x0, x1)
                    _, p_vals[i] = stats.ks_2samp(x0, x1)

            # Process binary features:
            _, p_vals[is_binary_feature] = feature_selection.chi2(X.loc[:, is_binary_feature], y)

            # Process non-binary features:
            _, p_vals[~is_binary_feature] = feature_selection.f_regression(X.loc[:, ~is_binary_feature], y)

            # Process binary features:
            y_mat = np.row_stack(y)
            for i in np.where(is_binary_feature)[0]:
                _, p_vals[i] = feature_selection.f_regression(y_mat, X.loc[:, i])
        return p_vals 
Example #13
Source File:    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_init(self):
		selector = SelectKBest(score_func = f_regression, k = 1)[[0, 0], [1.0, 2.0]]), numpy.array([0.5, 1.0]))
		self.assertEqual([0, 1], selector._get_support_mask().tolist())
		selector_proxy = SelectorProxy(selector)
		self.assertEqual([0, 1], selector_proxy.support_mask_.tolist()) 
Example #14
Source File:    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_fit(self):
		selector = SelectKBest(score_func = f_regression, k = 1)
		selector_proxy = SelectorProxy(selector)
		self.assertFalse(hasattr(selector_proxy, "support_mask_"))[[0, 0], [1.0, 2.0]]), numpy.array([0.5, 1.0]))
		self.assertEqual([0, 1], selector._get_support_mask().tolist())
		self.assertEqual([0, 1], selector_proxy.support_mask_.tolist()) 
Example #15
Source File:    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_f_regression(self):
        diabetes = datasets.load_diabetes()
        df = pdml.ModelFrame(diabetes)

        result = df.feature_selection.f_regression()
        expected = fs.f_regression(,

        self.assertEqual(len(result), 2)
        self.assert_numpy_array_almost_equal(result[0], expected[0])
        self.assert_numpy_array_almost_equal(result[1], expected[1])