Python statsmodels.stats.multitest.multipletests() Examples

The following are 7 code examples of statsmodels.stats.multitest.multipletests(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module statsmodels.stats.multitest , or try the search function .
Example #1
Source File: contrast.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def t_test_multi(result, contrasts, method='hs', alpha=0.05, ci_method=None,
                 contrast_names=None):
    """perform t_test and add multiplicity correction to results dataframe

    Parameters
    ----------
    result results instance
        results of an estimated model
    contrasts : ndarray
        restriction matrix for t_test
    method : string or list of strings
        method for multiple testing p-value correction, default is'hs'.
    alpha : float
        significance level for multiple testing reject decision.
    ci_method : None
        not used yet, will be for multiplicity corrected confidence intervals
    contrast_names : list of strings or None
        If contrast_names are provided, then they are used in the index of the
        returned dataframe, otherwise some generic default names are created.

    Returns
    -------
    res_df : pandas DataFrame
        The dataframe contains the results of the t_test and additional columns
        for multiplicity corrected p-values and boolean indicator for whether
        the Null hypothesis is rejected.
    """
    tt = result.t_test(contrasts)
    res_df = tt.summary_frame(xname=contrast_names)

    if type(method) is not list:
        method = [method]
    for meth in method:
        mt = multipletests(tt.pvalue, method=meth, alpha=alpha)
        res_df['pvalue-%s' % meth] = mt[1]
        res_df['reject-%s' % meth] = mt[0]
    return res_df 
Example #2
Source File: base.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def pval_corrected(self, method=None):
        '''p-values corrected for multiple testing problem

        This uses the default p-value correction of the instance stored in
        ``self.multitest_method`` if method is None.

        '''
        import statsmodels.stats.multitest as smt
        if method is None:
            method = self.multitest_method
        #TODO: breaks with method=None
        return smt.multipletests(self.pvals_raw, method=method)[1] 
Example #3
Source File: enrich.py    From pypath with GNU General Public License v3.0 5 votes vote down vote up
def correction(self, method='hommel', alpha=None):
        print('foo bar')
        if alpha is None:
            print('alpha is none')
            alpha = self.alpha
        print([enr.pvalue for enr in self.enrichments.values()])
        pvals_corr = smm.multipletests(
            [enr.pvalue for enr in self.enrichments.values()],
            alpha=alpha,
            method=method)[1]
        for i, k in zip(xrange(len(pvals_corr)), self.enrichments.keys()):
            setattr(self.enrichments[k], 'pval_adj', pvals_corr[i]) 
Example #4
Source File: base.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def pval_corrected(self, method=None):
        '''p-values corrected for multiple testing problem

        This uses the default p-value correction of the instance stored in
        ``self.multitest_method`` if method is None.

        '''
        import statsmodels.stats.multitest as smt
        if method is None:
            method = self.multitest_method
        #TODO: breaks with method=None
        return smt.multipletests(self.pvals_raw, method=method)[1] 
Example #5
Source File: correct.py    From NiMARE with MIT License 5 votes vote down vote up
def _transform(self, result):
        p = result.maps['p']
        _, p_corr, _, _ = mc.multipletests(p, alpha=0.05, method=self.method,
                                           is_sorted=False)
        corr_maps = {'p': p_corr}
        self._generate_secondary_maps(result, corr_maps)
        return corr_maps 
Example #6
Source File: MannWhitneyU.py    From scattertext with Apache License 2.0 4 votes vote down vote up
def get_score_df(self, correction_method=None):
        '''
        Computes Mann Whitney corrected p, z-values.  Falls back to normal approximation when numerical limits are reached.

        :param correction_method: str or None, correction method from statsmodels.stats.multitest.multipletests
         'fdr_bh' is recommended.
        :return: pd.DataFrame
        '''
        X = self._get_X().astype(np.float64)
        X = X / X.sum(axis=1)
        cat_X, ncat_X = self._get_cat_and_ncat(X)

        def normal_apx(u, x, y):
            # from https://stats.stackexchange.com/questions/116315/problem-with-mann-whitney-u-test-in-scipy
            m_u = len(x) * len(y) / 2
            sigma_u = np.sqrt(len(x) * len(y) * (len(x) + len(y) + 1) / 12)
            z = (u - m_u) / sigma_u
            return 2*norm.cdf(z)
        scores = []
        for i in range(cat_X.shape[1]):
            cat_list = cat_X.T[i].A1
            ncat_list = ncat_X.T[i].A1
            try:
                if cat_list.mean() > ncat_list.mean():
                    mw = mannwhitneyu(cat_list, ncat_list, alternative='greater')
                    if mw.pvalue in (0, 1):
                        mw.pvalue = normal_apx(mw.staistic, cat_list, ncat_list)

                    scores.append({'mwu': mw.statistic, 'mwu_p': mw.pvalue, 'mwu_z': norm.isf(float(mw.pvalue)), 'valid':True})

                else:
                    mw = mannwhitneyu(ncat_list, cat_list, alternative='greater')
                    if mw.pvalue in (0, 1):
                        mw.pvalue = normal_apx(mw.staistic, ncat_list, cat_list)

                    scores.append({'mwu': -mw.statistic, 'mwu_p': 1 - mw.pvalue, 'mwu_z': 1. - norm.isf(float(mw.pvalue)), 'valid':True})
            except:
                scores.append({'mwu': 0, 'mwu_p': 0, 'mwu_z': 0, 'valid':False})

        score_df = pd.DataFrame(scores, index=self.corpus_.get_terms()).fillna(0)
        if correction_method is not None:
            from statsmodels.stats.multitest import multipletests
            for method in ['mwu']:
                valid_pvals = score_df[score_df.valid].mwu_p
                valid_pvals_abs = np.min([valid_pvals, 1-valid_pvals], axis=0)
                valid_pvals_abs_corr = multipletests(valid_pvals_abs, method=correction_method)[1]
                score_df[method + '_p_corr'] = 0.5
                valid_pvals_abs_corr[valid_pvals > 0.5] = 1. - valid_pvals_abs_corr[valid_pvals > 0.5]
                valid_pvals_abs_corr[valid_pvals < 0.5] = valid_pvals_abs_corr[valid_pvals < 0.5]
                score_df.loc[score_df.valid, method + '_p_corr'] = valid_pvals_abs_corr
                score_df[method + '_z'] = -norm.ppf(score_df[method + '_p_corr'])
        return score_df 
Example #7
Source File: _rank_genes_groups.py    From scanpy with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def compute_statistics(self, method, corr_method, n_genes_user, rankby_abs, **kwds):

        if method in {'t-test', 't-test_overestim_var'}:
            generate_test_results = self.t_test(method)
        elif method == 'wilcoxon':
            generate_test_results = self.wilcoxon()
        elif method == 'logreg':
            generate_test_results = self.logreg(**kwds)

        for group_index, scores, pvals in generate_test_results:
            group_name = str(self.groups_order[group_index])

            if n_genes_user is not None:
                scores_sort = np.abs(scores) if rankby_abs else scores
                global_indices = _select_top_n(scores_sort, n_genes_user)
                first_col = 'names'
            else:
                global_indices = slice(None)
                first_col = 'scores'

            if self.stats is None:
                idx = pd.MultiIndex.from_tuples([(group_name, first_col)])
                self.stats = pd.DataFrame(columns=idx)

            if n_genes_user is not None:
                self.stats[group_name, 'names'] = self.var_names[global_indices]

            self.stats[group_name, 'scores'] = scores[global_indices]

            if pvals is not None:
                self.stats[group_name, 'pvals'] = pvals[global_indices]
                if corr_method == 'benjamini-hochberg':
                    from statsmodels.stats.multitest import multipletests

                    pvals[np.isnan(pvals)] = 1
                    _, pvals_adj, _, _ = multipletests(
                        pvals, alpha=0.05, method='fdr_bh'
                    )
                elif corr_method == 'bonferroni':
                    pvals_adj = np.minimum(pvals * n_genes, 1.0)
                self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]

            if self.means is not None:
                mean_group = self.means[group_index]
                if self.ireference is None:
                    mean_rest = self.means_rest[group_index]
                else:
                    mean_rest = self.means[self.ireference]
                foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
                    self.expm1_func(mean_rest) + 1e-9
                )  # add small value to remove 0's
                self.stats[group_name, 'logfoldchanges'] = np.log2(
                    foldchanges[global_indices]
                )

        if n_genes_user is None:
            self.stats.index = self.var_names


# TODO: Make arguments after groupby keyword only