Python Examples of scipy.stats.mstats.rankdata

Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License

6 votes

def test_ranking(self):
        x = ma.array([0,1,1,1,2,3,4,5,5,6,])
        assert_almost_equal(mstats.rankdata(x),
                           [1,3,3,3,5,6,7,8.5,8.5,10])
        x[[3,4]] = masked
        assert_almost_equal(mstats.rankdata(x),
                           [1,2.5,2.5,0,0,4,5,6.5,6.5,8])
        assert_almost_equal(mstats.rankdata(x, use_missing=True),
                            [1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
        x = ma.array([0,1,5,1,2,4,3,5,1,6,])
        assert_almost_equal(mstats.rankdata(x),
                           [1,3,8.5,3,5,7,6,8.5,3,10])
        x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
        assert_almost_equal(mstats.rankdata(x),
                            [[1,3,3,3,5], [6,7,8.5,8.5,10]])
        assert_almost_equal(mstats.rankdata(x, axis=1),
                           [[1,3,3,3,5], [1,2,3.5,3.5,5]])
        assert_almost_equal(mstats.rankdata(x,axis=0),
                           [[1,1,1,1,1], [2,2,2,2,2,]])

Source File: rank.py From stereo-magnification with Apache License 2.0

6 votes

def compute_rank(data):
  print '\nRANK\n'
  # rankdata assigns rank 1 to the lowest element, so
  # we need to negate before ranking.
  ssim_rank = rankdata(np.array(data['ssim']) * -1.0, axis=1)
  psnr_rank = rankdata(np.array(data['psnr']) * -1.0, axis=1)
  # Rank mean + std.
  for i, m in enumerate(data['models']):
    print '%30s    ssim-rank %.2f ± %.2f    psnr-rank %.2f ± %.2f' % (
        m, np.mean(ssim_rank[:, i]), np.std(ssim_rank[:, i]),
        np.mean(psnr_rank[:, i]), np.std(psnr_rank[:, i]))
  # Rank frequencies
  print '\n    SSIM rank freqs'
  print_rank_freqs(data, ssim_rank)
  print '\n    PSNR rank freqs'
  print_rank_freqs(data, psnr_rank)

Source File: test_mstats_basic.py From Computable with MIT License

5 votes

def test_ranking(self):
        x = ma.array([0,1,1,1,2,3,4,5,5,6,])
        assert_almost_equal(mstats.rankdata(x),[1,3,3,3,5,6,7,8.5,8.5,10])
        x[[3,4]] = masked
        assert_almost_equal(mstats.rankdata(x),[1,2.5,2.5,0,0,4,5,6.5,6.5,8])
        assert_almost_equal(mstats.rankdata(x,use_missing=True),
                            [1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
        x = ma.array([0,1,5,1,2,4,3,5,1,6,])
        assert_almost_equal(mstats.rankdata(x),[1,3,8.5,3,5,7,6,8.5,3,10])
        x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
        assert_almost_equal(mstats.rankdata(x),[[1,3,3,3,5],[6,7,8.5,8.5,10]])
        assert_almost_equal(mstats.rankdata(x,axis=1),[[1,3,3,3,5],[1,2,3.5,3.5,5]])
        assert_almost_equal(mstats.rankdata(x,axis=0),[[1,1,1,1,1],[2,2,2,2,2,]])

Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_rankdata(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.rankdata(x)
            rm = stats.mstats.rankdata(x)
            assert_allclose(r, rm)

Source File: costrank.py From ruptures with BSD 2-Clause "Simplified" License

5 votes

def fit(self, signal):
        """Set parameters of the instance.

        Args:
            signal (array): signal. Shape (n_samples,) or (n_samples, n_features)

        Returns:
            self
        """
        if signal.ndim == 1:
            signal = signal.reshape(-1, 1)

        obs, vars = signal.shape

        # Convert signal data into ranks in the range [1, n]
        ranks = rankdata(signal, axis=0)
        # Center the ranks into the range [-(n+1)/2, (n+1)/2]
        centered_ranks = (ranks - ((obs + 1) / 2))
        # Sigma is the covariance of these ranks.
        # If it's a scalar, reshape it into a 1x1 matrix
        cov = np.cov(centered_ranks, rowvar=False,
                     bias=True).reshape(vars, vars)

        # Use the pseudoinverse to handle linear dependencies
        # see Lung-Yut-Fong, A., Lévy-Leduc, C., & Cappé, O. (2015)
        try:
            self.inv_cov = pinv(cov)
        except LinAlgError as e:
            raise LinAlgError(
                "The covariance matrix of the rank signal is not invertible and the "
                "pseudo-inverse computation did not converge."
            ) from e
        self.ranks = centered_ranks

        return self

Source File: score_processor.py From tornado with MIT License

5 votes

def rank_matrix(current_stats):
        ranked_stats = rankdata(current_stats, axis=0)
        return ranked_stats

Source File: anova.py From seqc with GNU General Public License v2.0

4 votes

def post_hoc_tests(self):
        """
        carries out post-hoc tests between genes with significant ANOVA results using
        Welch's U-test on ranked data.
        """
        if self._anova is None:
            self.anova()

        anova_significant = np.array(self._anova) < 1  # call array in case it is a Series

        # limit to significant data, convert to column-wise ranks.
        data = self.data[:, anova_significant]
        rank_data = np.apply_along_axis(rankdata, 0, data)
        # assignments = self.group_assignments[anova_significant]

        split_indices = np.where(np.diff(self.group_assignments))[0] + 1
        array_views = np.array_split(rank_data, split_indices, axis=0)

        # get mean and standard deviations of each
        fmean = partial(np.mean, axis=0)
        fvar = partial(np.var, axis=0)
        mu = np.vstack(list(map(fmean, array_views))).T  # transpose to get gene rows
        n = np.array(list(map(lambda x: x.shape[0], array_views)))
        s = np.vstack(list(map(fvar, array_views))).T
        s_norm = s / n  # transpose to get gene rows

        # calculate T
        numerator = mu[:, np.newaxis, :] - mu[:, :, np.newaxis]
        denominator = np.sqrt(s_norm[:, np.newaxis, :] + s_norm[:, :, np.newaxis])
        statistic = numerator / denominator

        # calculate df
        s_norm2 = s**2 / (n**2 * n-1)
        numerator = (s_norm[:, np.newaxis, :] + s_norm[:, :, np.newaxis]) ** 2
        denominator = (s_norm2[:, np.newaxis, :] + s_norm2[:, :, np.newaxis])
        df = np.floor(numerator / denominator)

        # get significance
        p = t.cdf(np.abs(statistic), df)  # note, two tailed test

        # calculate fdr correction; because above uses 2-tails, alpha here is halved
        # because each test is evaluated twice due to the symmetry of vectorization.
        p_adj = multipletests(np.ravel(p), alpha=self.alpha, method='fdr_tsbh')[1]
        p_adj = p_adj.reshape(*p.shape)

        phr = namedtuple('PostHocResults', ['p_adj', 'statistic', 'mu'])
        self.post_hoc = phr(p_adj, statistic, mu)

        if self.index is not None:
            p_adj = pd.Panel(
                p_adj, items=self.index[anova_significant], major_axis=self.groups,
                minor_axis=self.groups)
            statistic = pd.Panel(
                statistic, items=self.index[anova_significant], major_axis=self.groups,
                minor_axis=self.groups)
            mu = pd.DataFrame(mu, self.index[anova_significant], columns=self.groups)

        return p_adj, statistic, mu

Source File: resampled_nonparametric.py From seqc with GNU General Public License v2.0

4 votes

def _mannwhitneyu(x, y, use_continuity=True):
    """
    Computes the Mann-Whitney statistic
    Missing values in `x` and/or `y` are discarded.
    Parameters
    ----------
    x : ndarray,
        Input, vector or observations x features matrix
    y : ndarray,
        Input, vector or observations x features matrix. If matrix, must have
        same number of features as x
    use_continuity : {True, False}, optional
        Whether a continuity correction (1/2.) should be taken into account.
    Returns
    -------
    statistic : float
        The Mann-Whitney statistic
    approx z : float
        The normal-approximated z-score for U.
    pvalue : float
        Approximate p-value assuming a normal distribution.
    """
    if x.ndim == 1 and y.ndim == 1:
        x, y = x[:, np.newaxis], y[:, np.newaxis]
    ranks = rankdata(np.concatenate([x, y]), axis=0)
    nx, ny = x.shape[0], y.shape[0]
    nt = nx + ny
    U = ranks[:nx].sum(0) - nx * (nx + 1) / 2.

    mu = (nx * ny) / 2.
    u = np.amin([U, nx*ny - U], axis=0)  # get smaller U by convention

    sigsq = np.ones(ranks.shape[1]) * (nt ** 3 - nt) / 12.

    for i in np.arange(len(sigsq)):
        ties = count_tied_groups(ranks[:, i])
        sigsq[i] -= np.sum(v * (k ** 3 - k) for (k, v) in ties.items()) / 12.
    sigsq *= nx * ny / float(nt * (nt - 1))

    if use_continuity:
        z = (U - 1 / 2. - mu) / np.sqrt(sigsq)
    else:
        z = (U - mu) / np.sqrt(sigsq)

    prob = erfc(abs(z) / np.sqrt(2))
    return np.vstack([u, z, prob]).T

Python scipy.stats.mstats.rankdata() Examples