Python sklearn.feature_selection.mutual_info_regression() Examples

The following are 2 code examples of sklearn.feature_selection.mutual_info_regression(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.feature_selection , or try the search function .
Example #1
Source File: eda.py    From xam with MIT License 5 votes vote down vote up
def feature_importance_regression(features, target, n_neighbors=3, random_state=None):

    cont = features.select_dtypes(include=[np.floating])
    disc = features.select_dtypes(include=[np.integer, np.bool])

    cont_imp = pd.DataFrame(index=cont.columns)
    disc_imp = pd.DataFrame(index=disc.columns)

    # Continuous features
    if cont_imp.index.size > 0:

        # Pearson correlation
        pearson = np.array([stats.pearsonr(feature, target) for _, feature in cont.iteritems()])
        cont_imp['pearson_r'] = pearson[:, 0]
        cont_imp['pearson_r_p_value'] = pearson[:, 1]

        # Mutual information
        mut_inf = feature_selection.mutual_info_regression(cont, target, discrete_features=False,
                                                           n_neighbors=n_neighbors,
                                                           random_state=random_state)
        cont_imp['mutual_information'] = mut_inf

    # Discrete features
    if disc_imp.index.size > 0:

        # F-test
        f_tests = defaultdict(dict)

        for feature in disc.columns:
            groups = [target[idxs] for idxs in disc.groupby(feature).groups.values()]
            statistic, p_value = stats.f_oneway(*groups)
            f_tests[feature]['f_statistic'] = statistic
            f_tests[feature]['f_p_value'] = p_value

        f_tests_df = pd.DataFrame.from_dict(f_tests, orient='index')
        disc_imp['f_statistic'] = f_tests_df['f_statistic']
        disc_imp['f_p_value'] = f_tests_df['f_p_value']

        # Mutual information
        mut_inf = feature_selection.mutual_info_regression(disc, target, discrete_features=True,
                                                           n_neighbors=n_neighbors,
                                                           random_state=random_state)
        disc_imp['mutual_information'] = mut_inf

    return cont_imp, disc_imp 
Example #2
Source File: numerical.py    From CausalDiscoveryToolbox with MIT License 5 votes vote down vote up
def predict(self, a, b):
        """ Compute the test statistic

        Args:
            a (array-like): Variable 1
            b (array-like): Variable 2

        Returns:
            float: test statistic
        """
        a = np.array(a).reshape((-1, 1))
        b = np.array(b).reshape((-1, 1))
        return (mutual_info_regression(a, b.reshape((-1,))) + mutual_info_regression(b, a.reshape((-1,))))/2