Python Examples of scipy.stats.kruskal

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_simple_tie(self):
        x = [1]
        y = [1, 2]
        h_uncorr = 1.5**2 + 2*2.25**2 - 12
        corr = 0.75
        expected = h_uncorr / corr   # 0.5
        h, p = stats.kruskal(x, y)
        # Since the expression is simple and the exact answer is 0.5, it
        # should be safe to use assert_equal().
        assert_equal(h, expected)

Source File: multicomp.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def kruskal(self, pairs=None, multimethod='T'):
        '''
        pairwise comparison for kruskal-wallis test

        This is just a reimplementation of scipy.stats.kruskal and does
        not yet use a multiple comparison correction.

        '''
        self.getranks()
        tot = self.nobs
        meanranks = self.ranks.groupmean
        groupnobs = self.ranks.groupnobs


        # simultaneous/separate treatment of multiple tests
        f=(tot * (tot + 1.) / 12.) / stats.tiecorrect(self.rankdata) #(xranks)
        print('MultiComparison.kruskal')
        for i,j in zip(*self.pairindices):
            #pdiff = np.abs(mrs[i] - mrs[j])
            pdiff = np.abs(meanranks[i] - meanranks[j])
            se = np.sqrt(f * np.sum(1. / groupnobs[[i,j]] )) #np.array([8,8]))) #Fixme groupnobs[[i,j]] ))
            Q = pdiff / se

            # TODO : print(statments, fix
            print(i,j, pdiff, se, pdiff / se, pdiff / se > 2.6310)
            print(stats.norm.sf(Q) * 2)
            return stats.norm.sf(Q) * 2

Source File: utils.py From dl-eeg-review with MIT License

5 votes

def run_kruskal(df, condition_col, value_col='acc_diff', min_n_obs=6, 
                plot=False):
    """Run Kruskal-Wallis analysis of variance test.

    Args:
        df (pd.DataFrame): dataframe where each row is a paper.
        condition_col (str): name of column to use as condition.

    Keyword Args:
        value_col (str): name of column to use as the numerical value to run the
            test on.
        min_n_obs (int): minimum number of observations in each sample in order
            to run the test.

    Returns:
        (float): U statistic
        (float): p-value
    """
    data = [i for name, i in df.groupby(condition_col)[value_col]
            if len(i) >= min_n_obs]

    if len(data) > 2:
        stat, p = kruskal(*data)
    else:
        stat, p = np.nan, np.nan
        print('Not enough samples with more than {} observations.'.format(min_n_obs))

    if plot:
        enough_samples = df[condition_col].value_counts() >= min_n_obs
        enough_samples = enough_samples.index[enough_samples].tolist()
        fig, ax = plt.subplots()
        sns.violinplot(
            data=df[df[condition_col].isin(enough_samples)], x=condition_col, 
            y=value_col, ax=ax)
        ax.set_title('Kruskal-Wallis for {} vs. {}\n(pvalue={:0.4f})'.format(
            condition_col, value_col, p))
    else:
        fig = None

    return {'test': 'kruskal', 'pvalue': p, 'stat': stat, 'fig': fig}

Source File: columnar_tests.py From drifter_ml with MIT License

5 votes

def kruskal_similar_distribution(self, column,
                                      pvalue_threshold=0.05,
                                      num_rounds=3):
        p_value = permutation_test(
            self.new_data[column],
            self.historical_data[column],
            method="approximate",
            num_rounds=num_rounds,
            func=lambda x, y: stats.kruskal(x, y).statistic,
            seed=0)
        if p_value < pvalue_threshold:
            return False
        return True

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_nan_policy(self):
        x = np.arange(10.)
        x[9] = np.nan
        assert_equal(stats.kruskal(x, x), (np.nan, np.nan))
        assert_almost_equal(stats.kruskal(x, x, nan_policy='omit'), (0.0, 1.0))
        assert_raises(ValueError, stats.kruskal, x, x, nan_policy='raise')
        assert_raises(ValueError, stats.kruskal, x, x, nan_policy='foobar')

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_kruskal_result_attributes(self):
        x = [1, 3, 5, 7, 9]
        y = [2, 4, 6, 8, 10]
        res = stats.kruskal(x, y)
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes)

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_three_groups(self):
        # A test of stats.kruskal with three groups, with ties.
        x = [1, 1, 1]
        y = [2, 2, 2]
        z = [2, 2]
        h_uncorr = (12. / 8. / 9.) * (3*2**2 + 3*6**2 + 2*6**2) - 3 * 9  # 5.0
        corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8)
        expected = h_uncorr / corr  # 7.0
        h, p = stats.kruskal(x, y, z)
        assert_approx_equal(h, expected)
        assert_approx_equal(p, stats.distributions.chi2.sf(h, 2))

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_another_tie(self):
        x = [1, 1, 1, 2]
        y = [2, 2, 2, 2]
        h_uncorr = (12. / 8. / 9.) * 4 * (3**2 + 6**2) - 3 * 9
        corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8)
        expected = h_uncorr / corr
        h, p = stats.kruskal(x, y)
        assert_approx_equal(h, expected)

Source File: multicomp.py From vnpy_crypto with MIT License

5 votes

def kruskal(self, pairs=None, multimethod='T'):
        '''
        pairwise comparison for kruskal-wallis test

        This is just a reimplementation of scipy.stats.kruskal and does
        not yet use a multiple comparison correction.

        '''
        self.getranks()
        tot = self.nobs
        meanranks = self.ranks.groupmean
        groupnobs = self.ranks.groupnobs


        # simultaneous/separate treatment of multiple tests
        f=(tot * (tot + 1.) / 12.) / stats.tiecorrect(self.rankdata) #(xranks)
        print('MultiComparison.kruskal')
        for i,j in zip(*self.pairindices):
            #pdiff = np.abs(mrs[i] - mrs[j])
            pdiff = np.abs(meanranks[i] - meanranks[j])
            se = np.sqrt(f * np.sum(1. / groupnobs[[i,j]] )) #np.array([8,8]))) #Fixme groupnobs[[i,j]] ))
            Q = pdiff / se

            # TODO : print(statments, fix
            print(i,j, pdiff, se, pdiff / se, pdiff / se > 2.6310)
            print(stats.norm.sf(Q) * 2)
            return stats.norm.sf(Q) * 2

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_basic(self):
        x = [1, 3, 5, 7, 9]
        y = [2, 4, 6, 8, 10]
        h, p = stats.kruskal(x, y)
        assert_approx_equal(h, 3./11, significant=10)
        assert_approx_equal(p, stats.distributions.chi2.sf(3./11, 1))
        h, p = stats.kruskal(np.array(x), np.array(y))
        assert_approx_equal(h, 3./11, significant=10)
        assert_approx_equal(p, stats.distributions.chi2.sf(3./11, 1))

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_simple(self):
        x = [1]
        y = [2]
        h, p = stats.kruskal(x, y)
        assert_equal(h, 1.0)
        assert_approx_equal(p, stats.distributions.chi2.sf(h, 1))
        h, p = stats.kruskal(np.array(x), np.array(y))
        assert_equal(h, 1.0)
        assert_approx_equal(p, stats.distributions.chi2.sf(h, 1))

Source File: test_stats.py From Computable with MIT License

5 votes

def test_three_groups(self):
        # A test of stats.kruskal with three groups, with ties.
        x = [1, 1, 1]
        y = [2, 2, 2]
        z = [2, 2]
        h_uncorr = (12. / 8. / 9.) * (3*2**2 + 3*6**2 + 2*6**2) - 3 * 9  # 5.0
        corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8)
        expected = h_uncorr / corr  # 7.0
        h, p = stats.kruskal(x, y, z)
        assert_approx_equal(h, expected)
        assert_approx_equal(p, stats.chisqprob(h, 2))

Source File: test_stats.py From Computable with MIT License

5 votes

def test_another_tie(self):
        x = [1, 1, 1, 2]
        y = [2, 2, 2, 2]
        h_uncorr = (12. / 8. / 9.) * 4 * (3**2 + 6**2) - 3 * 9
        corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8)
        expected = h_uncorr / corr
        h, p = stats.kruskal(x, y)
        assert_approx_equal(h, expected)

Source File: test_stats.py From Computable with MIT License

5 votes

def test_simple_tie(self):
        x = [1]
        y = [1, 2]
        h_uncorr = 1.5**2 + 2*2.25**2 - 12
        corr = 0.75
        expected = h_uncorr / corr   # 0.5
        h, p = stats.kruskal(x, y)
        # Since the expression is simple and the exact answer is 0.5, it
        # should be safe to use assert_equal().
        assert_equal(h, expected)

Source File: test_stats.py From Computable with MIT License

5 votes

def test_basic(self):
        x = [1, 3, 5, 7, 9]
        y = [2, 4, 6, 8, 10]
        h, p = stats.kruskal(x, y)
        assert_approx_equal(h, 3./11, significant=10)
        assert_approx_equal(p, stats.chisqprob(3./11, 1))
        h, p = stats.kruskal(np.array(x), np.array(y))
        assert_approx_equal(h, 3./11, significant=10)
        assert_approx_equal(p, stats.chisqprob(3./11, 1))

Source File: test_stats.py From Computable with MIT License

5 votes

def test_simple(self):
        x = [1]
        y = [2]
        h, p = stats.kruskal(x, y)
        assert_equal(h, 1.0)
        assert_approx_equal(p, stats.chisqprob(h, 1))
        h, p = stats.kruskal(np.array(x), np.array(y))
        assert_equal(h, 1.0)
        assert_approx_equal(p, stats.chisqprob(h, 1))

Source File: FeatureSelector.py From FAE with GNU General Public License v3.0

5 votes

def KruskalWallisAnalysis(self, array, label):
        args = [array[safe_mask(array, label == k)] for k in np.unique(label)]
        neg, pos = args[0], args[1]
        f_list, p_list = [], []
        for index in range(array.shape[1]):
            f, p = kruskal(neg[:, index], pos[:, index])
            f_list.append(f), p_list.append(p)
        return np.array(f_list), np.array(p_list)

Source File: numerical_comparison.py From DIVE-backend with GNU General Public License v3.0

4 votes

def get_valid_tests(equal_var, independent, normal, num_samples):
    '''
    Get valid tests given number of samples and statistical characterization of
    samples:

    Equal variance
    Indepenence
    Normality
    '''
    if num_samples == 1:
        valid_tests = {
            'chisquare': stats.chisquare,
            'power_divergence': stats.power_divergence,
            'kstest': stats.kstest
        }
        if normal:
            valid_tests['input']['one_sample_ttest'] = stats.ttest_1samp

    elif num_samples == 2:
        if independent:
            valid_tests = {
                'mannwhitneyu': stats.mannwhitneyu,
                'kruskal': stats.kruskal,
                'ks_2samp': stats.ks_2samp
            }
            if normal:
                valid_tests['two_sample_ttest'] = stats.ttest_ind
                if equal_var:
                    valid_tests['f_oneway'] = stats.f_oneway
        else:
            valid_tests = {
                'two_sample_ks': stats.ks_2samp,
                'wilcoxon': stats.wilcoxon
            }
            if normal:
                valid_tests['two_sample_related_ttest'] = stats.ttest_rel

    elif num_samples >= 3:
        if independent:
            valid_tests = {
                'kruskal': stats.kruskal
            }
            if normal and equal_var:
                valid_tests['f_oneway'] = stats.f_oneway

        else:
            valid_tests['friedmanchisquare'] = stats.friedmanchisquare

    return valid_tests

Python scipy.stats.kruskal() Examples