Python scipy.stats.kruskal() Examples
The following are 18
code examples of scipy.stats.kruskal().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.stats
, or try the search function
.
Example #1
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_simple_tie(self): x = [1] y = [1, 2] h_uncorr = 1.5**2 + 2*2.25**2 - 12 corr = 0.75 expected = h_uncorr / corr # 0.5 h, p = stats.kruskal(x, y) # Since the expression is simple and the exact answer is 0.5, it # should be safe to use assert_equal(). assert_equal(h, expected)
Example #2
Source File: multicomp.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def kruskal(self, pairs=None, multimethod='T'): ''' pairwise comparison for kruskal-wallis test This is just a reimplementation of scipy.stats.kruskal and does not yet use a multiple comparison correction. ''' self.getranks() tot = self.nobs meanranks = self.ranks.groupmean groupnobs = self.ranks.groupnobs # simultaneous/separate treatment of multiple tests f=(tot * (tot + 1.) / 12.) / stats.tiecorrect(self.rankdata) #(xranks) print('MultiComparison.kruskal') for i,j in zip(*self.pairindices): #pdiff = np.abs(mrs[i] - mrs[j]) pdiff = np.abs(meanranks[i] - meanranks[j]) se = np.sqrt(f * np.sum(1. / groupnobs[[i,j]] )) #np.array([8,8]))) #Fixme groupnobs[[i,j]] )) Q = pdiff / se # TODO : print(statments, fix print(i,j, pdiff, se, pdiff / se, pdiff / se > 2.6310) print(stats.norm.sf(Q) * 2) return stats.norm.sf(Q) * 2
Example #3
Source File: utils.py From dl-eeg-review with MIT License | 5 votes |
def run_kruskal(df, condition_col, value_col='acc_diff', min_n_obs=6, plot=False): """Run Kruskal-Wallis analysis of variance test. Args: df (pd.DataFrame): dataframe where each row is a paper. condition_col (str): name of column to use as condition. Keyword Args: value_col (str): name of column to use as the numerical value to run the test on. min_n_obs (int): minimum number of observations in each sample in order to run the test. Returns: (float): U statistic (float): p-value """ data = [i for name, i in df.groupby(condition_col)[value_col] if len(i) >= min_n_obs] if len(data) > 2: stat, p = kruskal(*data) else: stat, p = np.nan, np.nan print('Not enough samples with more than {} observations.'.format(min_n_obs)) if plot: enough_samples = df[condition_col].value_counts() >= min_n_obs enough_samples = enough_samples.index[enough_samples].tolist() fig, ax = plt.subplots() sns.violinplot( data=df[df[condition_col].isin(enough_samples)], x=condition_col, y=value_col, ax=ax) ax.set_title('Kruskal-Wallis for {} vs. {}\n(pvalue={:0.4f})'.format( condition_col, value_col, p)) else: fig = None return {'test': 'kruskal', 'pvalue': p, 'stat': stat, 'fig': fig}
Example #4
Source File: columnar_tests.py From drifter_ml with MIT License | 5 votes |
def kruskal_similar_distribution(self, column, pvalue_threshold=0.05, num_rounds=3): p_value = permutation_test( self.new_data[column], self.historical_data[column], method="approximate", num_rounds=num_rounds, func=lambda x, y: stats.kruskal(x, y).statistic, seed=0) if p_value < pvalue_threshold: return False return True
Example #5
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_nan_policy(self): x = np.arange(10.) x[9] = np.nan assert_equal(stats.kruskal(x, x), (np.nan, np.nan)) assert_almost_equal(stats.kruskal(x, x, nan_policy='omit'), (0.0, 1.0)) assert_raises(ValueError, stats.kruskal, x, x, nan_policy='raise') assert_raises(ValueError, stats.kruskal, x, x, nan_policy='foobar')
Example #6
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_kruskal_result_attributes(self): x = [1, 3, 5, 7, 9] y = [2, 4, 6, 8, 10] res = stats.kruskal(x, y) attributes = ('statistic', 'pvalue') check_named_results(res, attributes)
Example #7
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_three_groups(self): # A test of stats.kruskal with three groups, with ties. x = [1, 1, 1] y = [2, 2, 2] z = [2, 2] h_uncorr = (12. / 8. / 9.) * (3*2**2 + 3*6**2 + 2*6**2) - 3 * 9 # 5.0 corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8) expected = h_uncorr / corr # 7.0 h, p = stats.kruskal(x, y, z) assert_approx_equal(h, expected) assert_approx_equal(p, stats.distributions.chi2.sf(h, 2))
Example #8
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_another_tie(self): x = [1, 1, 1, 2] y = [2, 2, 2, 2] h_uncorr = (12. / 8. / 9.) * 4 * (3**2 + 6**2) - 3 * 9 corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8) expected = h_uncorr / corr h, p = stats.kruskal(x, y) assert_approx_equal(h, expected)
Example #9
Source File: multicomp.py From vnpy_crypto with MIT License | 5 votes |
def kruskal(self, pairs=None, multimethod='T'): ''' pairwise comparison for kruskal-wallis test This is just a reimplementation of scipy.stats.kruskal and does not yet use a multiple comparison correction. ''' self.getranks() tot = self.nobs meanranks = self.ranks.groupmean groupnobs = self.ranks.groupnobs # simultaneous/separate treatment of multiple tests f=(tot * (tot + 1.) / 12.) / stats.tiecorrect(self.rankdata) #(xranks) print('MultiComparison.kruskal') for i,j in zip(*self.pairindices): #pdiff = np.abs(mrs[i] - mrs[j]) pdiff = np.abs(meanranks[i] - meanranks[j]) se = np.sqrt(f * np.sum(1. / groupnobs[[i,j]] )) #np.array([8,8]))) #Fixme groupnobs[[i,j]] )) Q = pdiff / se # TODO : print(statments, fix print(i,j, pdiff, se, pdiff / se, pdiff / se > 2.6310) print(stats.norm.sf(Q) * 2) return stats.norm.sf(Q) * 2
Example #10
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_basic(self): x = [1, 3, 5, 7, 9] y = [2, 4, 6, 8, 10] h, p = stats.kruskal(x, y) assert_approx_equal(h, 3./11, significant=10) assert_approx_equal(p, stats.distributions.chi2.sf(3./11, 1)) h, p = stats.kruskal(np.array(x), np.array(y)) assert_approx_equal(h, 3./11, significant=10) assert_approx_equal(p, stats.distributions.chi2.sf(3./11, 1))
Example #11
Source File: test_stats.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_simple(self): x = [1] y = [2] h, p = stats.kruskal(x, y) assert_equal(h, 1.0) assert_approx_equal(p, stats.distributions.chi2.sf(h, 1)) h, p = stats.kruskal(np.array(x), np.array(y)) assert_equal(h, 1.0) assert_approx_equal(p, stats.distributions.chi2.sf(h, 1))
Example #12
Source File: test_stats.py From Computable with MIT License | 5 votes |
def test_three_groups(self): # A test of stats.kruskal with three groups, with ties. x = [1, 1, 1] y = [2, 2, 2] z = [2, 2] h_uncorr = (12. / 8. / 9.) * (3*2**2 + 3*6**2 + 2*6**2) - 3 * 9 # 5.0 corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8) expected = h_uncorr / corr # 7.0 h, p = stats.kruskal(x, y, z) assert_approx_equal(h, expected) assert_approx_equal(p, stats.chisqprob(h, 2))
Example #13
Source File: test_stats.py From Computable with MIT License | 5 votes |
def test_another_tie(self): x = [1, 1, 1, 2] y = [2, 2, 2, 2] h_uncorr = (12. / 8. / 9.) * 4 * (3**2 + 6**2) - 3 * 9 corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8) expected = h_uncorr / corr h, p = stats.kruskal(x, y) assert_approx_equal(h, expected)
Example #14
Source File: test_stats.py From Computable with MIT License | 5 votes |
def test_simple_tie(self): x = [1] y = [1, 2] h_uncorr = 1.5**2 + 2*2.25**2 - 12 corr = 0.75 expected = h_uncorr / corr # 0.5 h, p = stats.kruskal(x, y) # Since the expression is simple and the exact answer is 0.5, it # should be safe to use assert_equal(). assert_equal(h, expected)
Example #15
Source File: test_stats.py From Computable with MIT License | 5 votes |
def test_basic(self): x = [1, 3, 5, 7, 9] y = [2, 4, 6, 8, 10] h, p = stats.kruskal(x, y) assert_approx_equal(h, 3./11, significant=10) assert_approx_equal(p, stats.chisqprob(3./11, 1)) h, p = stats.kruskal(np.array(x), np.array(y)) assert_approx_equal(h, 3./11, significant=10) assert_approx_equal(p, stats.chisqprob(3./11, 1))
Example #16
Source File: test_stats.py From Computable with MIT License | 5 votes |
def test_simple(self): x = [1] y = [2] h, p = stats.kruskal(x, y) assert_equal(h, 1.0) assert_approx_equal(p, stats.chisqprob(h, 1)) h, p = stats.kruskal(np.array(x), np.array(y)) assert_equal(h, 1.0) assert_approx_equal(p, stats.chisqprob(h, 1))
Example #17
Source File: FeatureSelector.py From FAE with GNU General Public License v3.0 | 5 votes |
def KruskalWallisAnalysis(self, array, label): args = [array[safe_mask(array, label == k)] for k in np.unique(label)] neg, pos = args[0], args[1] f_list, p_list = [], [] for index in range(array.shape[1]): f, p = kruskal(neg[:, index], pos[:, index]) f_list.append(f), p_list.append(p) return np.array(f_list), np.array(p_list)
Example #18
Source File: numerical_comparison.py From DIVE-backend with GNU General Public License v3.0 | 4 votes |
def get_valid_tests(equal_var, independent, normal, num_samples): ''' Get valid tests given number of samples and statistical characterization of samples: Equal variance Indepenence Normality ''' if num_samples == 1: valid_tests = { 'chisquare': stats.chisquare, 'power_divergence': stats.power_divergence, 'kstest': stats.kstest } if normal: valid_tests['input']['one_sample_ttest'] = stats.ttest_1samp elif num_samples == 2: if independent: valid_tests = { 'mannwhitneyu': stats.mannwhitneyu, 'kruskal': stats.kruskal, 'ks_2samp': stats.ks_2samp } if normal: valid_tests['two_sample_ttest'] = stats.ttest_ind if equal_var: valid_tests['f_oneway'] = stats.f_oneway else: valid_tests = { 'two_sample_ks': stats.ks_2samp, 'wilcoxon': stats.wilcoxon } if normal: valid_tests['two_sample_related_ttest'] = stats.ttest_rel elif num_samples >= 3: if independent: valid_tests = { 'kruskal': stats.kruskal } if normal and equal_var: valid_tests['f_oneway'] = stats.f_oneway else: valid_tests['friedmanchisquare'] = stats.friedmanchisquare return valid_tests