Python Examples of statsmodels.stats.multitest.multipletests

Source File: contrast.py From vnpy_crypto with MIT License

5 votes

def t_test_multi(result, contrasts, method='hs', alpha=0.05, ci_method=None,
                 contrast_names=None):
    """perform t_test and add multiplicity correction to results dataframe

    Parameters
    ----------
    result results instance
        results of an estimated model
    contrasts : ndarray
        restriction matrix for t_test
    method : string or list of strings
        method for multiple testing p-value correction, default is'hs'.
    alpha : float
        significance level for multiple testing reject decision.
    ci_method : None
        not used yet, will be for multiplicity corrected confidence intervals
    contrast_names : list of strings or None
        If contrast_names are provided, then they are used in the index of the
        returned dataframe, otherwise some generic default names are created.

    Returns
    -------
    res_df : pandas DataFrame
        The dataframe contains the results of the t_test and additional columns
        for multiplicity corrected p-values and boolean indicator for whether
        the Null hypothesis is rejected.
    """
    tt = result.t_test(contrasts)
    res_df = tt.summary_frame(xname=contrast_names)

    if type(method) is not list:
        method = [method]
    for meth in method:
        mt = multipletests(tt.pvalue, method=meth, alpha=alpha)
        res_df['pvalue-%s' % meth] = mt[1]
        res_df['reject-%s' % meth] = mt[0]
    return res_df

Source File: base.py From vnpy_crypto with MIT License

5 votes

def pval_corrected(self, method=None):
        '''p-values corrected for multiple testing problem

        This uses the default p-value correction of the instance stored in
        ``self.multitest_method`` if method is None.

        '''
        import statsmodels.stats.multitest as smt
        if method is None:
            method = self.multitest_method
        #TODO: breaks with method=None
        return smt.multipletests(self.pvals_raw, method=method)[1]

Source File: enrich.py From pypath with GNU General Public License v3.0

5 votes

def correction(self, method='hommel', alpha=None):
        print('foo bar')
        if alpha is None:
            print('alpha is none')
            alpha = self.alpha
        print([enr.pvalue for enr in self.enrichments.values()])
        pvals_corr = smm.multipletests(
            [enr.pvalue for enr in self.enrichments.values()],
            alpha=alpha,
            method=method)[1]
        for i, k in zip(xrange(len(pvals_corr)), self.enrichments.keys()):
            setattr(self.enrichments[k], 'pval_adj', pvals_corr[i])

Source File: base.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def pval_corrected(self, method=None):
        '''p-values corrected for multiple testing problem

        This uses the default p-value correction of the instance stored in
        ``self.multitest_method`` if method is None.

        '''
        import statsmodels.stats.multitest as smt
        if method is None:
            method = self.multitest_method
        #TODO: breaks with method=None
        return smt.multipletests(self.pvals_raw, method=method)[1]

Source File: correct.py From NiMARE with MIT License

5 votes

def _transform(self, result):
        p = result.maps['p']
        _, p_corr, _, _ = mc.multipletests(p, alpha=0.05, method=self.method,
                                           is_sorted=False)
        corr_maps = {'p': p_corr}
        self._generate_secondary_maps(result, corr_maps)
        return corr_maps

Source File: MannWhitneyU.py From scattertext with Apache License 2.0

4 votes

def get_score_df(self, correction_method=None):
        '''
        Computes Mann Whitney corrected p, z-values.  Falls back to normal approximation when numerical limits are reached.

        :param correction_method: str or None, correction method from statsmodels.stats.multitest.multipletests
         'fdr_bh' is recommended.
        :return: pd.DataFrame
        '''
        X = self._get_X().astype(np.float64)
        X = X / X.sum(axis=1)
        cat_X, ncat_X = self._get_cat_and_ncat(X)

        def normal_apx(u, x, y):
            # from https://stats.stackexchange.com/questions/116315/problem-with-mann-whitney-u-test-in-scipy
            m_u = len(x) * len(y) / 2
            sigma_u = np.sqrt(len(x) * len(y) * (len(x) + len(y) + 1) / 12)
            z = (u - m_u) / sigma_u
            return 2*norm.cdf(z)
        scores = []
        for i in range(cat_X.shape[1]):
            cat_list = cat_X.T[i].A1
            ncat_list = ncat_X.T[i].A1
            try:
                if cat_list.mean() > ncat_list.mean():
                    mw = mannwhitneyu(cat_list, ncat_list, alternative='greater')
                    if mw.pvalue in (0, 1):
                        mw.pvalue = normal_apx(mw.staistic, cat_list, ncat_list)

                    scores.append({'mwu': mw.statistic, 'mwu_p': mw.pvalue, 'mwu_z': norm.isf(float(mw.pvalue)), 'valid':True})

                else:
                    mw = mannwhitneyu(ncat_list, cat_list, alternative='greater')
                    if mw.pvalue in (0, 1):
                        mw.pvalue = normal_apx(mw.staistic, ncat_list, cat_list)

                    scores.append({'mwu': -mw.statistic, 'mwu_p': 1 - mw.pvalue, 'mwu_z': 1. - norm.isf(float(mw.pvalue)), 'valid':True})
            except:
                scores.append({'mwu': 0, 'mwu_p': 0, 'mwu_z': 0, 'valid':False})

        score_df = pd.DataFrame(scores, index=self.corpus_.get_terms()).fillna(0)
        if correction_method is not None:
            from statsmodels.stats.multitest import multipletests
            for method in ['mwu']:
                valid_pvals = score_df[score_df.valid].mwu_p
                valid_pvals_abs = np.min([valid_pvals, 1-valid_pvals], axis=0)
                valid_pvals_abs_corr = multipletests(valid_pvals_abs, method=correction_method)[1]
                score_df[method + '_p_corr'] = 0.5
                valid_pvals_abs_corr[valid_pvals > 0.5] = 1. - valid_pvals_abs_corr[valid_pvals > 0.5]
                valid_pvals_abs_corr[valid_pvals < 0.5] = valid_pvals_abs_corr[valid_pvals < 0.5]
                score_df.loc[score_df.valid, method + '_p_corr'] = valid_pvals_abs_corr
                score_df[method + '_z'] = -norm.ppf(score_df[method + '_p_corr'])
        return score_df

Source File: _rank_genes_groups.py From scanpy with BSD 3-Clause "New" or "Revised" License

4 votes

def compute_statistics(self, method, corr_method, n_genes_user, rankby_abs, **kwds):

        if method in {'t-test', 't-test_overestim_var'}:
            generate_test_results = self.t_test(method)
        elif method == 'wilcoxon':
            generate_test_results = self.wilcoxon()
        elif method == 'logreg':
            generate_test_results = self.logreg(**kwds)

        for group_index, scores, pvals in generate_test_results:
            group_name = str(self.groups_order[group_index])

            if n_genes_user is not None:
                scores_sort = np.abs(scores) if rankby_abs else scores
                global_indices = _select_top_n(scores_sort, n_genes_user)
                first_col = 'names'
            else:
                global_indices = slice(None)
                first_col = 'scores'

            if self.stats is None:
                idx = pd.MultiIndex.from_tuples([(group_name, first_col)])
                self.stats = pd.DataFrame(columns=idx)

            if n_genes_user is not None:
                self.stats[group_name, 'names'] = self.var_names[global_indices]

            self.stats[group_name, 'scores'] = scores[global_indices]

            if pvals is not None:
                self.stats[group_name, 'pvals'] = pvals[global_indices]
                if corr_method == 'benjamini-hochberg':
                    from statsmodels.stats.multitest import multipletests

                    pvals[np.isnan(pvals)] = 1
                    _, pvals_adj, _, _ = multipletests(
                        pvals, alpha=0.05, method='fdr_bh'
                    )
                elif corr_method == 'bonferroni':
                    pvals_adj = np.minimum(pvals * n_genes, 1.0)
                self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]

            if self.means is not None:
                mean_group = self.means[group_index]
                if self.ireference is None:
                    mean_rest = self.means_rest[group_index]
                else:
                    mean_rest = self.means[self.ireference]
                foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
                    self.expm1_func(mean_rest) + 1e-9
                )  # add small value to remove 0's
                self.stats[group_name, 'logfoldchanges'] = np.log2(
                    foldchanges[global_indices]
                )

        if n_genes_user is None:
            self.stats.index = self.var_names


# TODO: Make arguments after groupby keyword only

Python statsmodels.stats.multitest.multipletests() Examples