Python Examples of scipy.stats.pearsonr

Source File: conftest.py From NiBetaSeries with MIT License

9 votes

def betaseries_file(tmpdir_factory,
                    deriv_betaseries_fname=deriv_betaseries_fname):
    bfile = tmpdir_factory.mktemp("beta").ensure(deriv_betaseries_fname)
    np.random.seed(3)
    num_trials = 40
    tgt_corr = 0.1
    bs1 = np.random.rand(num_trials)
    # create another betaseries with a target correlation
    bs2 = minimize(lambda x: abs(tgt_corr - pearsonr(bs1, x)[0]),
                   np.random.rand(num_trials)).x

    # two identical beta series
    bs_data = np.array([[[bs1, bs2]]])

    # the nifti image
    bs_img = nib.Nifti1Image(bs_data, np.eye(4))
    bs_img.to_filename(str(bfile))

    return bfile

Source File: plot_functions.py From idea_relations with MIT License

7 votes

def joint_plot(x, y, xlabel=None,
               ylabel=None, xlim=None, ylim=None,
               loc="best", color='#0485d1',
               size=8, markersize=50, kind="kde",
               scatter_color="r"):
    with sns.axes_style("darkgrid"):
        if xlabel and ylabel:
            g = SubsampleJointGrid(xlabel, ylabel,
                    data=DataFrame(data={xlabel: x, ylabel: y}),
                    space=0.1, ratio=2, size=size, xlim=xlim, ylim=ylim)
        else:
            g = SubsampleJointGrid(x, y, size=size,
                    space=0.1, ratio=2, xlim=xlim, ylim=ylim)
        g.plot_joint(sns.kdeplot, shade=True, cmap="Blues")
        g.plot_sub_joint(plt.scatter, 1000, s=20, c=scatter_color, alpha=0.3)
        g.plot_marginals(sns.distplot, kde=False, rug=False)
        g.annotate(ss.pearsonr, fontsize=25, template="{stat} = {val:.2g}\np = {p:.2g}")
        g.ax_joint.set_yticklabels(g.ax_joint.get_yticks())
        g.ax_joint.set_xticklabels(g.ax_joint.get_xticks())
    return g

Source File: test_stats.py From Computable with MIT License

6 votes

def test_tie1(self):
        # Data
        x = [1.0, 2.0, 3.0, 4.0]
        y = [1.0, 2.0, 2.0, 3.0]
        # Ranks of the data, with tie-handling.
        xr = [1.0, 2.0, 3.0, 4.0]
        yr = [1.0, 2.5, 2.5, 4.0]
        # Result of spearmanr should be the same as applying
        # pearsonr to the ranks.
        sr = stats.spearmanr(x, y)
        pr = stats.pearsonr(xr, yr)
        assert_almost_equal(sr, pr)


##    W.II.E.  Tabulate X against X, using BIG as a case weight.  The values
##    should appear on the diagonal and the total should be 899999955.
##    If the table cannot hold these values, forget about working with
##    census data.  You can also tabulate HUGE against TINY.  There is no
##    reason a tabulation program should not be able to distinguish
##    different values regardless of their magnitude.

### I need to figure out how to do this one.

Source File: test_comparer.py From rsmtool with Apache License 2.0

6 votes

def test_compute_correlations_between_versions_default_columns(self):
        df_old = pd.DataFrame({'spkitemid': ['a', 'b', 'c'],
                               'feature1': [1.3, 1.5, 2.1],
                               'feature2': [1.1, 6.2, 2.1],
                               'sc1': [2, 3, 4]})
        df_new = pd.DataFrame({'spkitemid': ['a', 'b', 'c'],
                               'feature1': [-1.3, -1.5, -2.1],
                               'feature2': [1.1, 6.2, 2.1],
                               'sc1': [2, 3, 4]})
        df_cors = Comparer.compute_correlations_between_versions(df_old, df_new)
        assert_almost_equal(df_cors.at['feature1', 'old_new'], -1.0)
        assert_almost_equal(df_cors.at['feature2', 'old_new'], 1.0)
        assert_equal(df_cors.at['feature1', 'human_old'], pearsonr(df_old['feature1'],
                                                                   df_old['sc1'])[0])
        assert_equal(df_cors.at['feature1', 'human_new'], pearsonr(df_new['feature1'],
                                                                   df_new['sc1'])[0])
        assert_equal(df_cors.at['feature1', "N"], 3)

Source File: test_comparer.py From rsmtool with Apache License 2.0

6 votes

def test_compute_correlations_between_versions_custom_columns(self):
        df_old = pd.DataFrame({'id': ['a', 'b', 'c'],
                               'feature1': [1.3, 1.5, 2.1],
                               'feature2': [1.1, 6.2, 2.1],
                               'r1': [2, 3, 4]})
        df_new = pd.DataFrame({'id': ['a', 'b', 'c'],
                               'feature1': [-1.3, -1.5, -2.1],
                               'feature2': [1.1, 6.2, 2.1],
                               'r1': [2, 3, 4]})

        df_cors = Comparer.compute_correlations_between_versions(df_old,
                                                                 df_new,
                                                                 human_score='r1',
                                                                 id_column='id')

        assert_almost_equal(df_cors.at['feature1', 'old_new'], -1.0)
        assert_almost_equal(df_cors.at['feature2', 'old_new'], 1.0)
        assert_equal(df_cors.at['feature1', 'human_old'], pearsonr(df_old['feature1'],
                                                                   df_old['r1'])[0])
        assert_equal(df_cors.at['feature1', 'human_new'], pearsonr(df_new['feature1'],
                                                                   df_new['r1'])[0])
        assert_equal(df_cors.at['feature1', "N"], 3)

Source File: summary.py From neleval with Apache License 2.0

6 votes

def __call__(self):
        all_results = np.empty((len(self.systems), len(self.measures)))
        # TODO: parallelise?
        for system, sys_results in zip(self.systems, all_results):
            if self.gold is None:
                result_dict = Evaluate.read_tab_format(utf8_open(system))
            else:
                result_dict = Evaluate(system, self.gold, measures=self.measures, fmt='none')()
            sys_results[...] = [result_dict[measure]['fscore'] for measure in self.measures]

        self.all_results = all_results

        correlations = {}
        scores_by_measure = zip(self.measures, all_results.T)
        for (measure_i, scores_i), (measure_j, scores_j) in _pairs(scores_by_measure):
            correlations[measure_i, measure_j] = {'pearson': stats.pearsonr(scores_i, scores_j),
                                                  'spearman': stats.spearmanr(scores_i, scores_j),
                                                  'kendall': stats.kendalltau(scores_i, scores_j)}

        quartiles = {}
        for measure_i, scores_i in scores_by_measure:
            quartiles[measure_i] = np.percentile(scores_i, [0, 25, 50, 75, 100])

        return self.format(self, {'quartiles': quartiles, 'correlations': correlations})

Source File: create_retrosynthesis_plots.py From molecule-chef with GNU General Public License v3.0

6 votes

def produce_the_kde_plot(cycles, color, save_name):
    ground_truth_and_suggested = [(eval_code.get_best_qed_from_smiles_bag(elem['ground_truth_product']),
                                   eval_code.get_best_qed_from_smiles_bag(elem['suggested_product']))
                                         for elem in cycles]
    len_out = len(ground_truth_and_suggested)
    ground_truth_and_suggested = [elem for elem in ground_truth_and_suggested if elem[1] != -np.inf]
    len_filter = len(ground_truth_and_suggested)
    num_discarding = len_out - len_filter
    if num_discarding:
        warnings.warn(f"Discarding {num_discarding} our of {len_out} as no successful reconstruction")
    ground_truth_and_suggested = np.array(ground_truth_and_suggested)
    ground_truth_product_qed = ground_truth_and_suggested[:, 0]
    suggested_product_qed = ground_truth_and_suggested[:, 1]

    g = sns.jointplot(x=ground_truth_product_qed, y=suggested_product_qed, kind="kde", color=color,
                      )
    g.set_axis_labels("product's QED", "reconstructed product's QED", fontsize=16)
    rsquare = lambda a, b: stats.pearsonr(ground_truth_product_qed, suggested_product_qed)[0] ** 2
    g = g.annotate(rsquare, template="{stat}: {val:.2f}",
                   stat="$R^2$", loc="upper left", fontsize=12)
    print(f"Rsquare: {stats.pearsonr(ground_truth_product_qed, suggested_product_qed)[0] ** 2}")
    print(f"scipystats: {stats.linregress(ground_truth_product_qed, suggested_product_qed)}")
    plt.tight_layout()
    plt.savefig(f"{save_name}.pdf")

Source File: hypothesis_test.py From fairtest with Apache License 2.0

6 votes

def permutation_test_corr(x, y, num_samples=10000):
    """
    Monte-Carlo permutation test for correlation

    Parameters
    ----------
    x :
        Values for the first dimension

    y :
        Values for the second dimension

    num_samples :
        the number of random permutations to perform

    Returns
    -------
    pval :
        the p-value

    References
    ----------
    https://en.wikipedia.org/wiki/Resampling_(statistics)
    """
    x = np.array(x, dtype='float')
    y = np.array(y, dtype='float')

    obs_0, _ = stats.pearsonr(x, y)
    k = 0
    z = np.concatenate([x, y])
    for _ in range(num_samples):
        np.random.shuffle(z)
        k += abs(obs_0) < abs(stats.pearsonr(z[:len(x)], z[len(x):])[0])
    pval = (1.0*k) / num_samples
    return max(pval, 1.0/num_samples)

Source File: random_test.py From tick with BSD 3-Clause "New" or "Revised" License

6 votes

def test_parallel_create_independant_random(self):
        """...Test that random number generator creates independant
        samples in a multithreaded environment
        """

        for thread_type in self.thread_types:
            samples = self._generate_samples_in_parallel(
                parallelization_type=thread_type)

            # We check that we do not have any lines that is identical to the
            # one following
            following_samples_are_different = \
                np.prod(np.linalg.norm(samples[:-1] - samples[1:], axis=1) > 0)
            self.assertEqual(
                following_samples_are_different, 1,
                "Two samples generated in parallel are identical")

            # We check that our generated samples are not correlated
            for (i, sample_i), (j, sample_j) in \
                    itertools.product(enumerate(samples), enumerate(samples)):
                if i != j:
                    corr_coeff = stats.pearsonr(sample_i, sample_j)[0]
                    self.assertLess(np.abs(corr_coeff), 0.1)

Source File: accuracy.py From basenji with Apache License 2.0

6 votes

def pearsonr(self, log=False, pseudocount=1, clip=None):
    """ Compute target PearsonR vector. """

    pcor = np.zeros(self.num_targets)

    for ti in range(self.num_targets):
      if self.targets_na is not None:
        preds_ti = self.preds[~self.targets_na, ti].astype('float64')
        targets_ti = self.targets[~self.targets_na, ti].astype('float64')
      else:
        preds_ti = self.preds[:, :, ti].flatten().astype('float64')
        targets_ti = self.targets[:, :, ti].flatten().astype('float64')

      if clip is not None:
        preds_ti = np.clip(preds_ti, 0, clip)
        targets_ti = np.clip(targets_ti, 0, clip)

      if log:
        preds_ti = np.log2(preds_ti + pseudocount)
        targets_ti = np.log2(targets_ti + pseudocount)

      pc, _ = stats.pearsonr(targets_ti, preds_ti)
      pcor[ti] = pc

    return pcor

Source File: similarity_test.py From kor2vec with MIT License

6 votes

def word_sim_test(filename, pos_vectors):
    delim = ','
    actual_sim_list, pred_sim_list = [], []
    missed = 0

    with open(filename, 'r') as pairs:
        for pair in pairs:
            w1, w2, actual_sim = pair.strip().split(delim)

            try:
                w1_vec = create_word_vector(w1, pos_vectors)
                w2_vec = create_word_vector(w2, pos_vectors)
                pred = float(np.inner(w1_vec, w2_vec))
                actual_sim_list.append(float(actual_sim))
                pred_sim_list.append(pred)

            except KeyError:
                missed += 1

    spearman, _ = st.spearmanr(actual_sim_list, pred_sim_list)
    pearson, _ = st.pearsonr(actual_sim_list, pred_sim_list)

    return spearman, pearson, missed

Source File: time_align.py From scanorama with MIT License

6 votes

def time_dist(datasets_dimred, time):
    time_dist = euclidean_distances(time, time)

    time_dists, scores = [], []
    for i in range(time_dist.shape[0]):
        for j in range(time_dist.shape[1]):
            if i >= j:
                continue
            score = np.mean(euclidean_distances(
                datasets_dimred[i], datasets_dimred[j]
            ))
            time_dists.append(time_dist[i, j])
            scores.append(score)

    print('Spearman rho = {}'.format(spearmanr(time_dists, scores)))
    print('Pearson rho = {}'.format(pearsonr(time_dists, scores)))

Source File: test_stats.py From Computable with MIT License

5 votes

def test_pHUGETINY(self):
        y = stats.pearsonr(HUGE,TINY)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_pXROUND(self):
        y = stats.pearsonr(X,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_pXBIG(self):
        y = stats.pearsonr(X,BIG)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_pXX(self):
        y = stats.pearsonr(X,X)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_pearsonr(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r, p = stats.pearsonr(x, y)
            rm, pm = stats.mstats.pearsonr(xm, ym)

            assert_almost_equal(r, rm, decimal=14)
            assert_almost_equal(p, pm, decimal=14)

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_pXLITTLE(self):
        y = stats.pearsonr(X,LITTLE)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: test_stats.py From Computable with MIT License

5 votes

def test_pLITTLEROUND(self):
        y = stats.pearsonr(LITTLE,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: three_tissue_response.py From dmipy with MIT License

5 votes

def _cost_function(threshold, image):
    "The cost function used by the optimal_threshold function."
    rho = -pearsonr(image, image > threshold)[0]
    return rho

Source File: test_stats.py From Computable with MIT License

5 votes

def test_pHUGEHUGE(self):
        y = stats.pearsonr(HUGE,HUGE)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: metrics.py From DL-text with MIT License

5 votes

def eval_sick(model,X_test_l,X_test_r,test_score):
    #r = np.arange(1,6)
    pred = model.predict([X_test_l,X_test_r])*4+1
    pred = [i[0] for i in pred]
    pred = np.array(pred)
    test_score = np.array(test_score)*4+1
    sp_coef = measures.spearmanr(pred,test_score)[0]
    per_coef = measures.pearsonr(pred,test_score)[0]
    mse_coef = np.mean(np.square(pred-test_score))
    
    return sp_coef, per_coef, mse_coef

Source File: test_stats.py From Computable with MIT License

5 votes

def test_pTINYROUND(self):
        y = stats.pearsonr(TINY,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: demo_corr.py From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License

5 votes

def _plot_correlation_func(x, y):

    r, p = pearsonr(x, y)
    title = "Cor($X_1$, $X_2$) = %.3f" % r
    pylab.scatter(x, y)
    pylab.title(title)
    pylab.xlabel("$X_1$")
    pylab.ylabel("$X_2$")

    f1 = scipy.poly1d(scipy.polyfit(x, y, 1))
    pylab.plot(x, f1(x), "r--", linewidth=2)
    # pylab.xticks([w*7*24 for w in [0,1,2,3,4]], ['week %i'%(w+1) for w in
    # [0,1,2,3,4]])

Source File: test_stats.py From Computable with MIT License

5 votes

def test_pTINYTINY(self):
        y = stats.pearsonr(TINY,TINY)
        r = y[0]
        assert_approx_equal(r,1.0)

Source File: stat_models.py From pyod with BSD 2-Clause "Simplified" License

5 votes

def pearsonr_mat(mat, w=None):
    """Utility function to calculate pearson matrix (row-wise).

    Parameters
    ----------
    mat : numpy array of shape (n_samples, n_features)
        Input matrix.

    w : numpy array of shape (n_features,)
        Weights.

    Returns
    -------
    pear_mat : numpy array of shape (n_samples, n_samples)
        Row-wise pearson score matrix.

    """
    mat = check_array(mat)
    n_row = mat.shape[0]
    n_col = mat.shape[1]
    pear_mat = np.full([n_row, n_row], 1).astype(float)

    if w is not None:
        for cx in range(n_row):
            for cy in range(cx + 1, n_row):
                curr_pear = wpearsonr(mat[cx, :], mat[cy, :], w)
                pear_mat[cx, cy] = curr_pear
                pear_mat[cy, cx] = curr_pear
    else:
        for cx in range(n_col):
            for cy in range(cx + 1, n_row):
                curr_pear = pearsonr(mat[cx, :], mat[cy, :])[0]
                pear_mat[cx, cy] = curr_pear
                pear_mat[cy, cx] = curr_pear

    return pear_mat

Source File: binary.py From brats_segmentation-pytorch with MIT License

5 votes

def volume_change_correlation(results, references):
    r"""计算二值图像的体素改变的相关系数
    Volume change correlation.
    
    Computes the linear correlation of change in binary object volume between
    the contents of the successive binary images supplied. Measured through
    the Pearson product-moment correlation coefficient. 
    
    Parameters
    ----------
    results : sequence of array_like
        Ordered list of input data containing objects. Each array_like will be
        converted into binary: background where 0, object everywhere else.
    references : sequence of array_like
        Ordered list of input data containing objects. Each array_like will be
        converted into binary: background where 0, object everywhere else.
        The order must be the same as for ``results``.
    
    Returns
    -------
    r : float
        The correlation coefficient between -1 and 1.
    p : float
        The two-side p value.
        
    """
    # 转化为二维
    results = numpy.atleast_2d(numpy.array(results).astype(numpy.bool))
    references = numpy.atleast_2d(numpy.array(references).astype(numpy.bool))
    
    results_volumes = numpy.asarray([numpy.count_nonzero(r) for r in results])
    references_volumes = numpy.asarray([numpy.count_nonzero(r) for r in references])
    
    results_volumes_changes = results_volumes[1:] - results_volumes[:-1]
    references_volumes_changes = references_volumes[1:] - references_volumes[:-1] 
    
    return pearsonr(results_volumes_changes, references_volumes_changes) # returns (Pearson's correlation coefficient, 2-tailed p-value)

Source File: test_stats.py From Computable with MIT License

5 votes

def test_r_exactly_neg1(self):
        a = arange(3.0)
        b = -a
        r, prob = stats.pearsonr(a,b)
        assert_equal(r, -1.0)
        assert_equal(prob, 0.0)

Source File: test_stats.py From Computable with MIT License

5 votes

def test_r_exactly_pos1(self):
        a = arange(3.0)
        b = a
        r, prob = stats.pearsonr(a,b)
        assert_equal(r, 1.0)
        assert_equal(prob, 0.0)

Source File: test_stats.py From Computable with MIT License

5 votes

def test_pROUNDROUND(self):
        y = stats.pearsonr(ROUND,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0)

Python scipy.stats.pearsonr() Examples