Python Examples of scipy.stats.spearmanr

Source File: nanops.py From recruit with Apache License 2.0

7 votes

def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr
    elif callable(method):
        return method

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]

Source File: similarity_test.py From kor2vec with MIT License

6 votes

def word_sim_test(filename, pos_vectors):
    delim = ','
    actual_sim_list, pred_sim_list = [], []
    missed = 0

    with open(filename, 'r') as pairs:
        for pair in pairs:
            w1, w2, actual_sim = pair.strip().split(delim)

            try:
                w1_vec = create_word_vector(w1, pos_vectors)
                w2_vec = create_word_vector(w2, pos_vectors)
                pred = float(np.inner(w1_vec, w2_vec))
                actual_sim_list.append(float(actual_sim))
                pred_sim_list.append(pred)

            except KeyError:
                missed += 1

    spearman, _ = st.spearmanr(actual_sim_list, pred_sim_list)
    pearson, _ = st.pearsonr(actual_sim_list, pred_sim_list)

    return spearman, pearson, missed

Source File: nanops.py From Computable with MIT License

6 votes

def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]

Source File: summary.py From neleval with Apache License 2.0

6 votes

def __call__(self):
        all_results = np.empty((len(self.systems), len(self.measures)))
        # TODO: parallelise?
        for system, sys_results in zip(self.systems, all_results):
            if self.gold is None:
                result_dict = Evaluate.read_tab_format(utf8_open(system))
            else:
                result_dict = Evaluate(system, self.gold, measures=self.measures, fmt='none')()
            sys_results[...] = [result_dict[measure]['fscore'] for measure in self.measures]

        self.all_results = all_results

        correlations = {}
        scores_by_measure = zip(self.measures, all_results.T)
        for (measure_i, scores_i), (measure_j, scores_j) in _pairs(scores_by_measure):
            correlations[measure_i, measure_j] = {'pearson': stats.pearsonr(scores_i, scores_j),
                                                  'spearman': stats.spearmanr(scores_i, scores_j),
                                                  'kendall': stats.kendalltau(scores_i, scores_j)}

        quartiles = {}
        for measure_i, scores_i in scores_by_measure:
            quartiles[measure_i] = np.percentile(scores_i, [0, 25, 50, 75, 100])

        return self.format(self, {'quartiles': quartiles, 'correlations': correlations})

Source File: spearman_rho.py From rankeval with Mozilla Public License 2.0

6 votes

def eval_per_query(self, y, y_pred):
        """
        This methods computes Spearman Rho at per query level (on the instances
        belonging to a specific query).

        Parameters
        ----------
        y: numpy array
            Represents the labels of instances corresponding to one query in the
            dataset (ground truth).
        y_pred: numpy array.
            Represents the predicted document scores obtained during the model
            scoring phase for that query.

        Returns
        -------
        rho: float
            The Spearman Rho per query.
        """
        spearman_rho = stats.spearmanr(y, y_pred)
        return spearman_rho.correlation

Source File: gimvi_tutorial.py From scVI with MIT License

6 votes

def imputation_score(trainer_both, data_spatial, gene_ids_test, normalized=True):
    _, fish_imputation = trainer_both.get_imputed_values(normalized=normalized)
    original, imputed = (
        data_spatial.X[:, gene_ids_test],
        fish_imputation[:, gene_ids_test],
    )

    if normalized:
        original /= data_spatial.X.sum(axis=1).reshape(-1, 1)

    spearman_gene = []
    for g in range(imputed.shape[1]):
        if np.all(imputed[:, g] == 0):
            correlation = 0
        else:
            correlation = spearmanr(original[:, g], imputed[:, g])[0]
        spearman_gene.append(correlation)
    return np.median(np.array(spearman_gene))

Source File: test_stats.py From Computable with MIT License

6 votes

def test_tie1(self):
        # Data
        x = [1.0, 2.0, 3.0, 4.0]
        y = [1.0, 2.0, 2.0, 3.0]
        # Ranks of the data, with tie-handling.
        xr = [1.0, 2.0, 3.0, 4.0]
        yr = [1.0, 2.5, 2.5, 4.0]
        # Result of spearmanr should be the same as applying
        # pearsonr to the ranks.
        sr = stats.spearmanr(x, y)
        pr = stats.pearsonr(xr, yr)
        assert_almost_equal(sr, pr)


##    W.II.E.  Tabulate X against X, using BIG as a case weight.  The values
##    should appear on the diagonal and the total should be 899999955.
##    If the table cannot hold these values, forget about working with
##    census data.  You can also tabulate HUGE against TINY.  There is no
##    reason a tabulation program should not be able to distinguish
##    different values regardless of their magnitude.

### I need to figure out how to do this one.

Source File: accuracy.py From basenji with Apache License 2.0

6 votes

def spearmanr(self):
    """ Compute target SpearmanR vector. """

    scor = np.zeros(self.num_targets)

    for ti in range(self.num_targets):
      if self.targets_na is not None:
        preds_ti = self.preds[~self.targets_na, ti]
        targets_ti = self.targets[~self.targets_na, ti]
      else:
        preds_ti = self.preds[:, :, ti].flatten()
        targets_ti = self.targets[:, :, ti].flatten()

      sc, _ = stats.spearmanr(targets_ti, preds_ti)
      scor[ti] = sc

    return scor


################################################################################
# __main__
################################################################################

Source File: rfpimp.py From malss with MIT License

6 votes

def feature_corr_matrix(df):
    """
    Return the Spearman's rank-order correlation between all pairs
    of features as a matrix with feature names as index and column names.
    The diagonal will be all 1.0 as features are self correlated.

    Spearman's correlation is the same thing as converting two variables
    to rank values and then running a standard Pearson's correlation
    on those ranked variables. Spearman's is nonparametric and does not
    assume a linear relationship between the variables; it looks for
    monotonic relationships.

    :param df_train: dataframe containing features as columns, and
                     without the target variable.
    :return: a data frame with the correlation matrix
    """
    corr = np.round(spearmanr(df).correlation, 4)
    df_corr = pd.DataFrame(data=corr, index=df.columns, columns=df.columns)
    return df_corr

Source File: rfpimp.py From malss with MIT License

6 votes

def feature_corr_matrix(df):
    """
    Return the Spearman's rank-order correlation between all pairs
    of features as a matrix with feature names as index and column names.
    The diagonal will be all 1.0 as features are self correlated.

    Spearman's correlation is the same thing as converting two variables
    to rank values and then running a standard Pearson's correlation
    on those ranked variables. Spearman's is nonparametric and does not
    assume a linear relationship between the variables; it looks for
    monotonic relationships.

    :param df_train: dataframe containing features as columns, and
                     without the target variable.
    :return: a data frame with the correlation matrix
    """
    corr = np.round(spearmanr(df).correlation, 4)
    df_corr = pd.DataFrame(data=corr, index=df.columns, columns=df.columns)
    return df_corr

Source File: nanops.py From vnpy_crypto with MIT License

6 votes

def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]

Source File: evalrank.py From link-prediction_with_deep-learning with MIT License

6 votes

def evaluate(wv, reference):
    """Evaluate wv against reference, return (rho, count) where rwo is
    Spearman's rho and count is the number of reference word pairs
    that could be evaluated against.
    """
    gold, predicted = [], []
    for words, sim in sorted(reference, key=lambda ws: ws[1]):
        try:
            v1, v2 = wv[words[0]], wv[words[1]]
        except KeyError:
            continue
        gold.append((words, sim))
        predicted.append((words, cosine(v1, v2)))
    simlist = lambda ws: [s for w,s in ws]
    rho, p = spearmanr(simlist(gold), simlist(predicted))
    return (rho, len(gold))

Source File: time_align.py From scanorama with MIT License

6 votes

def time_dist(datasets_dimred, time):
    time_dist = euclidean_distances(time, time)

    time_dists, scores = [], []
    for i in range(time_dist.shape[0]):
        for j in range(time_dist.shape[1]):
            if i >= j:
                continue
            score = np.mean(euclidean_distances(
                datasets_dimred[i], datasets_dimred[j]
            ))
            time_dists.append(time_dist[i, j])
            scores.append(score)

    print('Spearman rho = {}'.format(spearmanr(time_dists, scores)))
    print('Pearson rho = {}'.format(pearsonr(time_dists, scores)))

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_sLITTLEROUND(self):
        y = stats.spearmanr(LITTLE,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_scalar(self):
        y = stats.spearmanr(4., 2.)
        assert_(np.isnan(y).all())

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_uneven_lengths(self):
        assert_raises(ValueError, stats.spearmanr, [1, 2, 1], [8, 9])
        assert_raises(ValueError, stats.spearmanr, [1, 2, 1], 8)

Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_spearmanr(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r, p = stats.spearmanr(x, y)
            rm, pm = stats.mstats.spearmanr(xm, ym)
            assert_almost_equal(r, rm, 14)
            assert_almost_equal(p, pm, 14)

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_sLITTLETINY(self):
        y = stats.spearmanr(LITTLE,TINY)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_spearmanr(self):
        # Tests some computations of Spearman's rho
        (x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95])
        assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
        (x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan])
        (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
        assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)

        x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
              1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7]
        y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
              0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4]
        assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
        x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
              1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan]
        y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
              0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan]
        (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
        assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
        # Next test is to make sure calculation uses sufficient precision.
        # The denominator's value is ~n^3 and used to be represented as an
        # int. 2000**3 > 2**32 so these arrays would cause overflow on
        # some machines.
        x = list(range(2000))
        y = list(range(2000))
        y[0], y[9] = y[9], y[0]
        y[10], y[434] = y[434], y[10]
        y[435], y[1509] = y[1509], y[435]
        # rho = 1 - 6 * (2 * (9^2 + 424^2 + 1074^2))/(2000 * (2000^2 - 1))
        #     = 1 - (1 / 500)
        #     = 0.998
        assert_almost_equal(mstats.spearmanr(x,y)[0], 0.998)

        # test for namedtuple attributes
        res = mstats.spearmanr(x, y)
        attributes = ('correlation', 'pvalue')
        check_named_results(res, attributes, ma=True)

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_sHUGEHUGE(self):
        y = stats.spearmanr(HUGE,HUGE)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_sXROUND(self):
        y = stats.spearmanr(X,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: spearman_correlation.py From allennlp with Apache License 2.0

5 votes

def get_metric(self, reset: bool = False):
        """
        # Returns

        The accumulated sample Spearman correlation.
        """
        spearman_correlation = stats.spearmanr(
            self.total_predictions.cpu().numpy(), self.total_gold_labels.cpu().numpy()
        )

        if reset:
            self.reset()

        return spearman_correlation[0]

Source File: metrics.py From DL-text with MIT License

5 votes

def eval_sick(model,X_test_l,X_test_r,test_score):
    #r = np.arange(1,6)
    pred = model.predict([X_test_l,X_test_r])*4+1
    pred = [i[0] for i in pred]
    pred = np.array(pred)
    test_score = np.array(test_score)*4+1
    sp_coef = measures.spearmanr(pred,test_score)[0]
    per_coef = measures.pearsonr(pred,test_score)[0]
    mse_coef = np.mean(np.square(pred-test_score))
    
    return sp_coef, per_coef, mse_coef

Source File: test_stats.py From Computable with MIT License

5 votes

def test_sROUNDROUND(self):
        y = stats.spearmanr(ROUND,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_stats.py From Computable with MIT License

5 votes

def test_sTINYROUND(self):
        y = stats.spearmanr(TINY,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_stats.py From Computable with MIT License

5 votes

def test_sTINYTINY(self):
        y = stats.spearmanr(TINY,TINY)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_stats.py From Computable with MIT License

5 votes

def test_sHUGEROUND(self):
        y = stats.spearmanr(HUGE,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_stats.py From Computable with MIT License

5 votes

def test_sHUGEHUGE(self):
        y = stats.spearmanr(HUGE,HUGE)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_stats.py From Computable with MIT License

5 votes

def test_sLITTLEROUND(self):
        y = stats.spearmanr(LITTLE,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_stats.py From Computable with MIT License

5 votes

def test_sLITTLETINY(self):
        y = stats.spearmanr(LITTLE,TINY)
        r = y[0]
        assert_approx_equal(r,1.0)

Python scipy.stats.spearmanr() Examples