Python scipy.stats.mstats.rankdata() Examples
The following are 8
code examples of scipy.stats.mstats.rankdata().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.stats.mstats
, or try the search function
.
Example #1
Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_ranking(self): x = ma.array([0,1,1,1,2,3,4,5,5,6,]) assert_almost_equal(mstats.rankdata(x), [1,3,3,3,5,6,7,8.5,8.5,10]) x[[3,4]] = masked assert_almost_equal(mstats.rankdata(x), [1,2.5,2.5,0,0,4,5,6.5,6.5,8]) assert_almost_equal(mstats.rankdata(x, use_missing=True), [1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8]) x = ma.array([0,1,5,1,2,4,3,5,1,6,]) assert_almost_equal(mstats.rankdata(x), [1,3,8.5,3,5,7,6,8.5,3,10]) x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]]) assert_almost_equal(mstats.rankdata(x), [[1,3,3,3,5], [6,7,8.5,8.5,10]]) assert_almost_equal(mstats.rankdata(x, axis=1), [[1,3,3,3,5], [1,2,3.5,3.5,5]]) assert_almost_equal(mstats.rankdata(x,axis=0), [[1,1,1,1,1], [2,2,2,2,2,]])
Example #2
Source File: rank.py From stereo-magnification with Apache License 2.0 | 6 votes |
def compute_rank(data): print '\nRANK\n' # rankdata assigns rank 1 to the lowest element, so # we need to negate before ranking. ssim_rank = rankdata(np.array(data['ssim']) * -1.0, axis=1) psnr_rank = rankdata(np.array(data['psnr']) * -1.0, axis=1) # Rank mean + std. for i, m in enumerate(data['models']): print '%30s ssim-rank %.2f ± %.2f psnr-rank %.2f ± %.2f' % ( m, np.mean(ssim_rank[:, i]), np.std(ssim_rank[:, i]), np.mean(psnr_rank[:, i]), np.std(psnr_rank[:, i])) # Rank frequencies print '\n SSIM rank freqs' print_rank_freqs(data, ssim_rank) print '\n PSNR rank freqs' print_rank_freqs(data, psnr_rank)
Example #3
Source File: test_mstats_basic.py From Computable with MIT License | 5 votes |
def test_ranking(self): x = ma.array([0,1,1,1,2,3,4,5,5,6,]) assert_almost_equal(mstats.rankdata(x),[1,3,3,3,5,6,7,8.5,8.5,10]) x[[3,4]] = masked assert_almost_equal(mstats.rankdata(x),[1,2.5,2.5,0,0,4,5,6.5,6.5,8]) assert_almost_equal(mstats.rankdata(x,use_missing=True), [1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8]) x = ma.array([0,1,5,1,2,4,3,5,1,6,]) assert_almost_equal(mstats.rankdata(x),[1,3,8.5,3,5,7,6,8.5,3,10]) x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]]) assert_almost_equal(mstats.rankdata(x),[[1,3,3,3,5],[6,7,8.5,8.5,10]]) assert_almost_equal(mstats.rankdata(x,axis=1),[[1,3,3,3,5],[1,2,3.5,3.5,5]]) assert_almost_equal(mstats.rankdata(x,axis=0),[[1,1,1,1,1],[2,2,2,2,2,]])
Example #4
Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def test_rankdata(self): for n in self.get_n(): x, y, xm, ym = self.generate_xy_sample(n) r = stats.rankdata(x) rm = stats.mstats.rankdata(x) assert_allclose(r, rm)
Example #5
Source File: costrank.py From ruptures with BSD 2-Clause "Simplified" License | 5 votes |
def fit(self, signal): """Set parameters of the instance. Args: signal (array): signal. Shape (n_samples,) or (n_samples, n_features) Returns: self """ if signal.ndim == 1: signal = signal.reshape(-1, 1) obs, vars = signal.shape # Convert signal data into ranks in the range [1, n] ranks = rankdata(signal, axis=0) # Center the ranks into the range [-(n+1)/2, (n+1)/2] centered_ranks = (ranks - ((obs + 1) / 2)) # Sigma is the covariance of these ranks. # If it's a scalar, reshape it into a 1x1 matrix cov = np.cov(centered_ranks, rowvar=False, bias=True).reshape(vars, vars) # Use the pseudoinverse to handle linear dependencies # see Lung-Yut-Fong, A., Lévy-Leduc, C., & Cappé, O. (2015) try: self.inv_cov = pinv(cov) except LinAlgError as e: raise LinAlgError( "The covariance matrix of the rank signal is not invertible and the " "pseudo-inverse computation did not converge." ) from e self.ranks = centered_ranks return self
Example #6
Source File: score_processor.py From tornado with MIT License | 5 votes |
def rank_matrix(current_stats): ranked_stats = rankdata(current_stats, axis=0) return ranked_stats
Example #7
Source File: anova.py From seqc with GNU General Public License v2.0 | 4 votes |
def post_hoc_tests(self): """ carries out post-hoc tests between genes with significant ANOVA results using Welch's U-test on ranked data. """ if self._anova is None: self.anova() anova_significant = np.array(self._anova) < 1 # call array in case it is a Series # limit to significant data, convert to column-wise ranks. data = self.data[:, anova_significant] rank_data = np.apply_along_axis(rankdata, 0, data) # assignments = self.group_assignments[anova_significant] split_indices = np.where(np.diff(self.group_assignments))[0] + 1 array_views = np.array_split(rank_data, split_indices, axis=0) # get mean and standard deviations of each fmean = partial(np.mean, axis=0) fvar = partial(np.var, axis=0) mu = np.vstack(list(map(fmean, array_views))).T # transpose to get gene rows n = np.array(list(map(lambda x: x.shape[0], array_views))) s = np.vstack(list(map(fvar, array_views))).T s_norm = s / n # transpose to get gene rows # calculate T numerator = mu[:, np.newaxis, :] - mu[:, :, np.newaxis] denominator = np.sqrt(s_norm[:, np.newaxis, :] + s_norm[:, :, np.newaxis]) statistic = numerator / denominator # calculate df s_norm2 = s**2 / (n**2 * n-1) numerator = (s_norm[:, np.newaxis, :] + s_norm[:, :, np.newaxis]) ** 2 denominator = (s_norm2[:, np.newaxis, :] + s_norm2[:, :, np.newaxis]) df = np.floor(numerator / denominator) # get significance p = t.cdf(np.abs(statistic), df) # note, two tailed test # calculate fdr correction; because above uses 2-tails, alpha here is halved # because each test is evaluated twice due to the symmetry of vectorization. p_adj = multipletests(np.ravel(p), alpha=self.alpha, method='fdr_tsbh')[1] p_adj = p_adj.reshape(*p.shape) phr = namedtuple('PostHocResults', ['p_adj', 'statistic', 'mu']) self.post_hoc = phr(p_adj, statistic, mu) if self.index is not None: p_adj = pd.Panel( p_adj, items=self.index[anova_significant], major_axis=self.groups, minor_axis=self.groups) statistic = pd.Panel( statistic, items=self.index[anova_significant], major_axis=self.groups, minor_axis=self.groups) mu = pd.DataFrame(mu, self.index[anova_significant], columns=self.groups) return p_adj, statistic, mu
Example #8
Source File: resampled_nonparametric.py From seqc with GNU General Public License v2.0 | 4 votes |
def _mannwhitneyu(x, y, use_continuity=True): """ Computes the Mann-Whitney statistic Missing values in `x` and/or `y` are discarded. Parameters ---------- x : ndarray, Input, vector or observations x features matrix y : ndarray, Input, vector or observations x features matrix. If matrix, must have same number of features as x use_continuity : {True, False}, optional Whether a continuity correction (1/2.) should be taken into account. Returns ------- statistic : float The Mann-Whitney statistic approx z : float The normal-approximated z-score for U. pvalue : float Approximate p-value assuming a normal distribution. """ if x.ndim == 1 and y.ndim == 1: x, y = x[:, np.newaxis], y[:, np.newaxis] ranks = rankdata(np.concatenate([x, y]), axis=0) nx, ny = x.shape[0], y.shape[0] nt = nx + ny U = ranks[:nx].sum(0) - nx * (nx + 1) / 2. mu = (nx * ny) / 2. u = np.amin([U, nx*ny - U], axis=0) # get smaller U by convention sigsq = np.ones(ranks.shape[1]) * (nt ** 3 - nt) / 12. for i in np.arange(len(sigsq)): ties = count_tied_groups(ranks[:, i]) sigsq[i] -= np.sum(v * (k ** 3 - k) for (k, v) in ties.items()) / 12. sigsq *= nx * ny / float(nt * (nt - 1)) if use_continuity: z = (U - 1 / 2. - mu) / np.sqrt(sigsq) else: z = (U - mu) / np.sqrt(sigsq) prob = erfc(abs(z) / np.sqrt(2)) return np.vstack([u, z, prob]).T