Python Examples of scipy.stats.ttest

Source File: param_sensitivity.py From scanorama with MIT License

7 votes

def test_knn(datasets_dimred, genes, labels, idx, distr, xlabels):
    knns = [ 5, 10, 50, 100 ]
    len_distr = len(distr)
    for knn in knns:
        integrated = assemble(datasets_dimred[:], knn=knn, sigma=150)
        X = np.concatenate(integrated)
        distr.append(sil(X[idx, :], labels[idx]))
        for d in distr[:len_distr]:
            print(ttest_ind(np.ravel(X[idx, :]), np.ravel(d)))
        xlabels.append(str(knn))
    print('')
    
    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('knn'))

Source File: param_sensitivity.py From scanorama with MIT License

6 votes

def test_sigma(datasets_dimred, genes, labels, idx, distr, xlabels):
    sigmas = [ 10, 50, 100, 200 ]
    len_distr = len(distr)
    for sigma in sigmas:
        integrated = assemble(datasets_dimred[:], sigma=sigma)
        X = np.concatenate(integrated)
        distr.append(sil(X[idx, :], labels[idx]))
        for d in distr[:len_distr]:
            print(ttest_ind(np.ravel(X[idx, :]), np.ravel(d)))
        xlabels.append(str(sigma))
    print('')
    
    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('sigma'))

Source File: scdiff.py From scdiff with MIT License

6 votes

def getDiffGeneTTest(self):
                cut=5e-2
                AE=[item.E for item in self.fromNode.cells]
                BE=[item.E for item in self.toNode.cells]
                G=range(len(AE[0]))
                #pdb.set_trace()
                TT=[]
                for i in G:
                        X=[item[i] for item in AE]
                        Y=[item[i] for item in BE]
                        pxy=ttest_ind(X,Y)[-1]
                        TT.append([pxy,i])
                TT.sort()
                TT=[item for item in TT if item[0]<cut]
                DG=[[self.GL[item[1]],item[0]] for item in TT if self.GL[item[1]]]
                return DG

        #-------------------------------------------------------------------

Source File: scdiff.py From scdiff with MIT License

6 votes

def tellDifference(nodeCells,nodePCells,geneIndex,fcut=0.6):
	if len(nodePCells)==0:
		return [0,1,0]
	X=[item.E[geneIndex] for item in nodeCells]
	Y=[item.E[geneIndex] for item in nodePCells]
	fc=sum(X)/len(X)-sum(Y)/len(Y)
	pv=ttest_ind(X,Y)[1]
	
	pcut=0.05
	if pv<pcut and fc>fcut:
		return [1,pv,fc]
	if pv<pcut and fc<-1*fcut:
		return [-1,pv,fc]	
	return [0,pv,fc]
#-----------------------------------------------------------------------------
# filter X

Source File: numerical_comparison.py From DIVE-backend with GNU General Public License v3.0

6 votes

def ttest(df, fields, indep, dep):
    # Ensure single field
    dep_field_name = dep[0]
    indep_field_name = indep[0]
    unique_indep_values = get_unique(df[indep_field_name])

    subsets = {}
    for v in unique_indep_values:
        subsets[v] = np.array(df[df[indep_field_name] == v][dep_field_name])

    result = {}
    for (x, y) in combinations(unique_indep_values, 2):
        (statistic, pvalue) = ttest_ind(subsets[x], subsets[y])
        result[str([x, y])] = {
            'statistic': statistic,
            'pvalue': pvalue
        }

    return result



##################
#Functions to determine which tests could be run
##################

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

6 votes

def test_ttest_ind_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [np.nan, 2.0, 3.0, 4.0]
    y = [1.0, 2.0, 1.0, 2.0]

    r1 = stats.ttest_ind(x, y, nan_policy='omit')
    r2 = stats.ttest_ind(y, x, nan_policy='omit')
    assert_allclose(r2.statistic, -r1.statistic, atol=1e-15)
    assert_allclose(r2.pvalue, r1.pvalue, atol=1e-15)

    # NB: arguments are not paired when NaNs are dropped
    r3 = stats.ttest_ind(y, x[1:])
    assert_allclose(r2, r3, atol=1e-15)

    # .. and this is consistent with R. R code:
    # x = c(NA, 2.0, 3.0, 4.0)
    # y = c(1.0, 2.0, 1.0, 2.0)
    # t.test(x, y, var.equal=TRUE)
    assert_allclose(r2, (-2.5354627641855498, 0.052181400457057901), atol=1e-15)

Source File: statistics.py From HPOlib with GNU General Public License v3.0

6 votes

def calculate_statistics(best_dict, keys, round_=0):
    p_values = dict()
    for key in keys:
        p_values[key] = defaultdict(float)

    for idx, key0 in enumerate(keys):
        if len(keys) > 1:
            for j, key1 in enumerate(keys[idx+1:]):
                if round_ > 0:
                    t_true, p_true = stats.ttest_ind(numpy.round(best_dict[key0], round_),
                                                        numpy.round(best_dict[key1], round_))
                else:
                    t_true, p_true = stats.ttest_ind(best_dict[key0], best_dict[key1])
                p_values[key0][key1] = p_true

    return p_values

Source File: match.py From pscore_match with BSD 2-Clause "Simplified" License

6 votes

def t_test(covariates, groups):
    """ 
    Two sample t test for the distribution of treatment and control covariates
    
    Parameters
    ----------
    covariates : DataFrame 
        Dataframe with one covariate per column.
        If matches are with replacement, then duplicates should be 
        included as additional rows.
    groups : array-like
        treatment assignments, must be 2 groups
    
    Returns
    -------
    A list of p-values, one for each column in covariates
    """
    colnames = list(covariates.columns)
    J = len(colnames)
    pvalues = np.zeros(J)
    for j in range(J):
        var = covariates[colnames[j]]
        res = ttest_ind(var[groups == 1], var[groups == 0])
        pvalues[j] = res.pvalue
    return pvalues

Source File: test_stats.py From scprep with GNU General Public License v3.0

6 votes

def test_t_statistic():
    X = data.generate_positive_sparse_matrix(shape=(500, 3), seed=42, poisson_mean=0.2)
    Y = data.generate_positive_sparse_matrix(shape=(500, 3), seed=42, poisson_mean=0.3)
    u_stat = [
        stats.ttest_ind(X[:, i], Y[:, i], equal_var=False)[0] for i in range(X.shape[1])
    ]

    def test_fun(X):
        return scprep.stats.t_statistic(
            scprep.select.select_rows(X, idx=np.arange(500)),
            scprep.select.select_rows(X, idx=np.arange(500, 1000)),
        )

    matrix.test_all_matrix_types(
        np.vstack([X, Y]),
        utils.assert_transform_equals,
        Y=u_stat,
        transform=test_fun,
        check=partial(utils.assert_all_close, rtol=2e-3),
    )

Source File: analyser.py From spotpy with MIT License

6 votes

def compare_different_objectivefunctions(like1,like2):
    """
    Performs the Welch’s t-test (aka unequal variances t-test)

    :like1: objectivefunction values
    :type: list

    :like2: Other objectivefunction values
    :type: list

    :return: p Value
    :rtype: list
    """
    from scipy import stats
    out = stats.ttest_ind(like1, like2, equal_var=False)
    print(out)
    if out[1]>0.05:
        print('like1 is NOT signifikant different to like2: p>0.05')
    else:
        print('like1 is signifikant different to like2: p<0.05' )
    return out

Source File: loss.py From CausalDiscoveryToolbox with MIT License

6 votes

def loop(self, xy, yx):
        """ Tests the loop condition based on the new results and the
        parameters.

        Args:
            xy (list): list containing all the results for one set of samples
            yx (list): list containing all the results for the other set.

        Returns:
            bool: True if the loop has to continue, False otherwise.
        """
        if self.iter < 2:
            self.iter += self.runs_per_iter
            return True
        t_test, self.p_value = ttest_ind(xy, yx, equal_var=False)
        if self.p_value > self.threshold and self.iter < self.max_iter:
            self.iter += self.runs_per_iter
            return True
        else:
            return False

Source File: models.py From redtide with MIT License

6 votes

def price_trend(self, symbol=None, k=5, metric='rvalue', baseval=0):
        if k < 3:
            raise ValueError('Stocks.price_trend: k >= 3 is a must')
        if metric not in ['slope', 'rvalue', 'pvalue', 'stderr']:
            raise ValueError('Stocks.price_trend: invalid metric: {}'.format(metric))
        if symbol is None:
            values = []
            for s in self.stocks.values():
                r = s.price_trend(k)
                if r:
                    values.append(getattr(r, metric))
            n = len(values)
            if n >= 2:
                # n depends on stocks watching, meanless if few
                return ttest_ind(values, [baseval]*n, equal_var=False)
        elif self.has_stock(symbol):
            return self.stocks[symbol].price_trend(k)
        return None

Source File: models.py From redtide with MIT License

6 votes

def volume_trend(self, symbol=None, k=5, metric='rvalue', baseval=0):
        if k < 3:
            raise ValueError('Stocks.volume_trend: k >= 3 is a must')
        if metric not in ['slope', 'rvalue', 'pvalue', 'stderr']:
            raise ValueError('Stocks.volume_trend: invalid metric: {}'.format(metric))
        if symbol is None:
            values = []
            for s in self.stocks.values():
                r = s.volume_trend(k)
                if r:
                    values.append(getattr(r, metric))
            n = len(values)
            if n >= 2:
                return ttest_ind(values, [baseval]*n, equal_var=False)
        elif self.has_stock(symbol):
            return self.stocks[symbol].volume_trend(k)
        return None

Source File: test_weightstats.py From vnpy_crypto with MIT License

6 votes

def test_weightstats_1(self):
        x1, x2 = self.x1, self.x2
        w1, w2 = self.w1, self.w2
        w1_ = 2. * np.ones(len(x1))
        w2_ = 2. * np.ones(len(x2))

        d1 = DescrStatsW(x1)
#        print ttest_ind(x1, x2)
#        print ttest_ind(x1, x2, usevar='unequal')
#        #print ttest_ind(x1, x2, usevar='unequal')
#        print stats.ttest_ind(x1, x2)
#        print ttest_ind(x1, x2, usevar='unequal', alternative='larger')
#        print ttest_ind(x1, x2, usevar='unequal', alternative='smaller')
#        print ttest_ind(x1, x2, usevar='unequal', weights=(w1_, w2_))
#        print stats.ttest_ind(np.r_[x1, x1], np.r_[x2,x2])
        assert_almost_equal(ttest_ind(x1, x2, weights=(w1_, w2_))[:2],
                            stats.ttest_ind(np.r_[x1, x1], np.r_[x2, x2]))

Source File: param_sensitivity.py From scanorama with MIT License

6 votes

def test_alpha(datasets_dimred, genes, labels, idx, distr, xlabels):
    alphas = [ 0, 0.05, 0.20, 0.50 ]
    len_distr = len(distr)
    for alpha in alphas:
        integrated = assemble(datasets_dimred[:], alpha=alpha, sigma=150)
        X = np.concatenate(integrated)
        distr.append(sil(X[idx, :], labels[idx]))
        for d in distr[:len_distr]:
            print(ttest_ind(np.ravel(X[idx, :]), np.ravel(d)))
        xlabels.append(str(alpha))
    print('')
    
    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('alpha'))

Source File: param_sensitivity.py From scanorama with MIT License

6 votes

def test_approx(datasets_dimred, genes, labels, idx, distr, xlabels):
    integrated = assemble(datasets_dimred[:], approx=False, sigma=150)
    X = np.concatenate(integrated)
    distr.append(sil(X[idx, :], labels[idx]))
    len_distr = len(distr)
    for d in distr[:len_distr]:
        print(ttest_ind(np.ravel(X[idx, :]), np.ravel(d)))
    xlabels.append('Exact NN')
    print('')
    
    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('approx'))

Source File: param_sensitivity.py From scanorama with MIT License

6 votes

def test_perplexity(datasets_dimred, genes, labels, idx,
                    distr, xlabels):
    X = np.concatenate(datasets_dimred)

    perplexities = [ 10, 100, 500, 2000 ]
    len_distr = len(distr)
    for perplexity in perplexities:
        embedding = fit_tsne(X, perplexity=perplexity)
        distr.append(sil(embedding[idx, :], labels[idx]))
        for d in distr[:len_distr]:
            print(ttest_ind(np.ravel(X[idx, :]), np.ravel(d)))
        xlabels.append(str(perplexity))
    print('')
    
    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('perplexity'))

Source File: compare_genomes.py From mCaller with MIT License

6 votes

def compare_by_position(bed1,bed2,xmfa):
    pos_dict = {}

    for i,bed in enumerate([bed1,bed2]):
        pos_dict[i] = {}
        with open(bed,'r') as fi:
                for line in fi:
                #2  1892198 1892199 TCMMTMTTMMM 0.5 -   16
                    csome,start,end,motif,perc_meth,strand,num_reads,probabilities = tuple(line.split('\t'))
                    pos_dict[i][(csome,start,end,strand)] = ((perc_meth,num_reads),np.asarray([float(p) for p in probabilities.strip().split(',')]))

    for pos in pos_dict[0]:
        if pos in pos_dict[1]:
            try:
                u,pval = mannwhitneyu(pos_dict[0][pos][1],pos_dict[0][pos][1],alternative='two-sided')
            except ValueError:
                u,pval = 'none','identical'
            u2,pval2 = ranksums(pos_dict[0][pos][1],pos_dict[0][pos][1])
            try:
                t,pval3 = ttest_ind(pos_dict[0][pos][1],pos_dict[0][pos][1])
            except:
                t,pval3 = 'none','missing df'
            d,pval4 = ks_2samp(pos_dict[0][pos][1],pos_dict[0][pos][1])
            if pval4 < 0.9:
                print pos, pos_dict[0][pos][0], pos_dict[1][pos][0], pval, pval2, pval3, pval4

Source File: EDA.py From exploripy with MIT License

6 votes

def TTest(self):
		"""
		Calculate PValue based on T-Test. This is applicable only for Binary Categorical Variable with all the Continuous Variables
		"""
		temp_df = self.df.dropna()
		start = time.time()
		TList = []
		Insight1 = "With Confidence interval of 0.05, the distribution of variable - \"{0}\" varies significantly based on the categorical variable - \"{1}\". "
		Insight2 = "With Confidence interval of 0.05, the distribution of variable - \"{0}\" does not vary significantly based on the categorical variable - \"{1}\". "
		for CategoricalVar in self.BinaryCategoricalFeatures:
			for ContinuousVar in self.ContinuousFeatures:
				binary1 = temp_df[CategoricalVar].unique()[0]
				binary2 = temp_df[CategoricalVar].unique()[1]
				TStat,p = stats.ttest_ind(temp_df[temp_df[CategoricalVar]==binary1][ContinuousVar],temp_df[temp_df[CategoricalVar]==binary2][ContinuousVar])
				if p <= 0.05:
					Insight = Insight1.format(ContinuousVar,CategoricalVar)
				else:
					Insight = Insight2.format(ContinuousVar,CategoricalVar)
				TList.append(dict(Continuous=ContinuousVar,Categorical=CategoricalVar,TStat=TStat,PValue=p,Insight=Insight))
		
		end = time.time()
		if self.debug == 'YES':
			print('T-Test',end-start)
			print(pd.DataFrame(TList))
		return pd.DataFrame(TList)

Source File: basenji_test_reps.py From basenji with Apache License 2.0

6 votes

def stat_tests(ref_cors, exp_cors, alternative):
  _, mwp = mannwhitneyu(ref_cors, exp_cors, alternative=alternative)
  tt, tp = ttest_ind(ref_cors, exp_cors)

  if alternative == 'less':
    if tt > 0:
      tp = 1 - (1-tp)/2
    else:
      tp /= 2
  elif alternative == 'greater':
    if tt <= 0:
      tp /= 2
    else:
      tp = 1 - (1-tp)/2

  return mwp, tp

################################################################################
# __main__
################################################################################

Source File: Statistics.py From ClearMap with GNU General Public License v3.0

6 votes

def tTestVoxelization(group1, group2, signed = False, removeNaN = True, pcutoff = None):
    """t-Test on differences between the individual voxels in group1 and group2, group is a array of voxelizations"""
    
    g1 = self.readDataGroup(group1);  
    g2 = self.readDataGroup(group2);  
    
    tvals, pvals = stats.ttest_ind(g1, g2, axis = 0, equal_var = True);

    #remove nans
    if removeNaN: 
        pi = numpy.isnan(pvals);
        pvals[pi] = 1.0;
        tvals[pi] = 0;

    pvals = self.cutoffPValues(pvals, pcutoff = pcutoff);

    #return
    if signed:
        return pvals, numpy.sign(tvals);
    else:
        return pvals;

Source File: Statistics.py From ClearMap with GNU General Public License v3.0

6 votes

def tTestPointsInRegions(pointCounts1, pointCounts2, labeledImage = lbl.DefaultLabeledImageFile, signed = False, removeNaN = True, pcutoff = None, equal_var = False):
    """t-Test on differences in counts of points in labeled regions"""
    
    #ids, p1 = countPointsGroupInRegions(pointGroup1, labeledImage = labeledImage, withIds = True);
    #p2 = countPointsGroupInRegions(pointGroup2,  labeledImage = labeledImage, withIds = False);   
    
    tvals, pvals = stats.ttest_ind(pointCounts1, pointCounts2, axis = 1, equal_var = equal_var);
    
    #remove nans
    if removeNaN: 
        pi = numpy.isnan(pvals);
        pvals[pi] = 1.0;
        tvals[pi] = 0;

    pvals = self.cutoffPValues(pvals, pcutoff = pcutoff);
    
    #pvals.shape = (1,) + pvals.shape;
    #ids.shape = (1,) + ids.shape;
    #pvals = numpy.concatenate((ids.T, pvals.T), axis = 1);
    
    if signed:
        return pvals, numpy.sign(tvals);
    else:
        return pvals;

Source File: data.py From multipy with BSD 3-Clause "New" or "Revised" License

6 votes

def two_group_model(N=25, m=1000, pi0=0.1, delta=0.7):
    """A two-group model for generating test data (described in [3] and
    elsewhere). The default input arguments can be used to reproduce the
    result reported by Reiss and colleagues in Figure 2.A.

    Input arguments:
    N     - Number of samples in each group or condition.
    m     - Number of variables
    pi0   - Proportion of null effects among the m variables.
    delta - Location parameter of the non-null part of the distribution of Y,
            which controls the effect size.

    Output arguments:
    tstat - Test statistics (Student's t's)
    pvals - P-values corresponding to the test statistics
    """
    X = normal(loc=0, scale=1, size=(N, m))
    Y = np.hstack([normal(loc=0, scale=1, size=(N, int(pi0*m))),
                   normal(loc=delta, scale=1, size=(N, int(round((1-pi0)*m, 1))))])
    # Two-sample t-test
    tstat, pvals = ttest_ind(X, Y, axis=0)
    return tstat, pvals

Source File: evaluation.py From sktime with BSD 3-Clause "New" or "Revised" License

5 votes

def t_test(self, metric_name=None):
        """
        Runs t-test on all possible combinations between the estimators.
        """
        self._check_is_evaluated()
        metric_name = self._validate_metric_name(metric_name)
        metrics_per_estimator_dataset = \
            self._get_metrics_per_estimator_dataset(
                metric_name)

        t_df = pd.DataFrame()
        perms = itertools.product(metrics_per_estimator_dataset.keys(),
                                  repeat=2)
        values = np.array([])
        for perm in perms:
            x = np.array(metrics_per_estimator_dataset[perm[0]])
            y = np.array(metrics_per_estimator_dataset[perm[1]])
            t_stat, p_val = ttest_ind(x, y)

            t_test = {
                "estimator_1": perm[0],
                "estimator_2": perm[1],
                "t_stat": t_stat,
                "p_val": p_val
            }

            t_df = t_df.append(t_test, ignore_index=True)
            values = np.append(values, t_stat)
            values = np.append(values, p_val)

        index = t_df["estimator_1"].unique()
        values_names = ["t_stat", "p_val"]
        col_idx = pd.MultiIndex.from_product([index, values_names])
        values_reshaped = values.reshape(len(index),
                                         len(values_names) * len(index))

        values_df_multiindex = pd.DataFrame(values_reshaped, index=index,
                                            columns=col_idx)

        return t_df, values_df_multiindex

Source File: evaluate.py From lentil with Apache License 2.0

5 votes

def compare_validation_aucs(self, model_a, model_b):
        """
        Use a paired t-test to check the statistical significance
        of the difference between the validation AUCs (across CV runs) of two models

        :param str model_a: The name of a model
        :param str model_b: The name of another model
        :rtype: float
        :return: p-value
        """

        return stats.ttest_ind(
                self.validation_aucs(model_a), self.validation_aucs(model_b), equal_var=True)[1]

Source File: continuous_mdr.py From scikit-mdr with MIT License

5 votes

def score(self, features, targets):
        """Estimates the quality of the ContinuousMDR model using a t-statistic.

        Parameters
        ----------
        features: array-like {n_samples, n_features}
            Feature matrix to predict from
        targets: array-like {n_samples}
            List of true target values

        Returns
        -------
        quality_score: float
            The estimated quality of the Continuous MDR model

        """
        if self.feature_map is None:
            raise ValueError('The Continuous MDR model must be fit before score() can be called.')

        group_0_trait_values = []
        group_1_trait_values = []

        for feature_instance in self.feature_map:
            if self.feature_map[feature_instance] == 0:
                group_0_trait_values.extend(self.mdr_matrix_values[feature_instance])
            else:
                group_1_trait_values.extend(self.mdr_matrix_values[feature_instance])

        return abs(ttest_ind(group_0_trait_values, group_1_trait_values).statistic)

Source File: ptest.py From pregel with MIT License

5 votes

def _compare_gaussians(dist1, dist2):
    '''Method to compare samples from two gaussians distributions to determine if they are likely to be drawn from the
     same distribution.
    Here we assume that we already know that the 2 dist are indeed gaussians.
    Return true if the two list of samples are likely to be drawn from the same gaussian'''

    _, pvalue = ttest_ind(dist1, dist2, equal_var=False)

    if (pvalue > 0.05):
        # Likely to be drawn from same gaussian
        return True
    else:
        return False

Source File: filters.py From causallib with Apache License 2.0

5 votes

def compute_pvals(self, X, y):
        # TODO: export to stats_utils?
        is_y_binary = (len(np.unique(y)) == 2)
        # is_binary_feature = np.sum(((X != np.nanmin(X, axis=0)[np.newaxis, :]) &
        #                             (X != np.nanmax(X, axis=0)[np.newaxis, :])), axis=0) == 0
        is_binary_feature = areColumnsBinary(X)
        p_vals = np.zeros(X.shape[1])
        if is_y_binary:
            # Process non-binary columns:
            for i in np.where(~is_binary_feature)[0]:
                x0 = X.loc[y == 0, i]
                x1 = X.loc[y == 1, i]
                if self.is_linear:
                    _, p_vals[i] = stats.ttest_ind(x0, x1)
                else:
                    _, p_vals[i] = stats.ks_2samp(x0, x1)

            # Process binary features:
            _, p_vals[is_binary_feature] = feature_selection.chi2(X.loc[:, is_binary_feature], y)

        else:
            # Process non-binary features:
            _, p_vals[~is_binary_feature] = feature_selection.f_regression(X.loc[:, ~is_binary_feature], y)

            # Process binary features:
            y_mat = np.row_stack(y)
            for i in np.where(is_binary_feature)[0]:
                _, p_vals[i] = feature_selection.f_regression(y_mat, X.loc[:, i])
        return p_vals

Source File: test_single_external_libs.py From diffxpy with BSD 3-Clause "New" or "Revised" License

5 votes

def test_t_test_ref(self, n_cells: int = 2000, n_genes: int = 100):
        """
        Test if de.test.t_test() generates the same p-value distribution as scipy t-test.

        :param n_cells: Number of cells to simulate (number of observations per test).
        :param n_genes: Number of genes to simulate (number of tests).
        """
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        logging.getLogger("batchglm").setLevel(logging.WARNING)
        logging.getLogger("diffxpy").setLevel(logging.INFO)

        np.random.seed(1)
        sim = self._prepare_data(n_cells=n_cells, n_genes=n_genes)
        test = de.test.t_test(
            data=sim.input_data,
            grouping="condition",
            sample_description=sim.sample_description
        )

        # Run scipy t-tests as a reference.
        conds = np.unique(sim.sample_description["condition"].values)
        ind_a = np.where(sim.sample_description["condition"] == conds[0])[0]
        ind_b = np.where(sim.sample_description["condition"] == conds[1])[0]
        scipy_pvals = stats.ttest_ind(
            a=sim.x[ind_a, :],
            b=sim.x[ind_b, :],
            axis=0,
            equal_var=False
        ).pvalue
        self._eval(test=test, ref_pvals=scipy_pvals)
        return True

Source File: calc_Utilities.py From IceVarFigs with MIT License

5 votes

def calc_indttest(varx,vary):
    """
    Function calculates statistical difference for 2 independent
    sample t-test

    Parameters
    ----------
    varx : 3d array
    vary : 3d array
    
    Returns
    -------
    stat = calculated t-statistic
    pvalue = two-tailed p-value

    Usage
    -----
    stat,pvalue = calc_ttest(varx,vary)
    """
    print('\n>>> Using calc_ttest function!')
    
    ### Import modules
    import numpy as np
    import scipy.stats as sts
    
    ### 2-independent sample t-test
    stat,pvalue = sts.ttest_ind(varx,vary,nan_policy='omit')
    
    ### Significant at 95% confidence level
    pvalue[np.where(pvalue >= 0.05)] = np.nan
    pvalue[np.where(pvalue < 0.05)] = 1.
    
    print('*Completed: Finished calc_ttest function!')
    return stat,pvalue

###############################################################################
###############################################################################
###############################################################################

Python scipy.stats.ttest_ind() Examples