Python scipy.stats.pearsonr() Examples

The following are 30 code examples of scipy.stats.pearsonr(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: conftest.py    From NiBetaSeries with MIT License 9 votes vote down vote up
def betaseries_file(tmpdir_factory,
                    deriv_betaseries_fname=deriv_betaseries_fname):
    bfile = tmpdir_factory.mktemp("beta").ensure(deriv_betaseries_fname)
    np.random.seed(3)
    num_trials = 40
    tgt_corr = 0.1
    bs1 = np.random.rand(num_trials)
    # create another betaseries with a target correlation
    bs2 = minimize(lambda x: abs(tgt_corr - pearsonr(bs1, x)[0]),
                   np.random.rand(num_trials)).x

    # two identical beta series
    bs_data = np.array([[[bs1, bs2]]])

    # the nifti image
    bs_img = nib.Nifti1Image(bs_data, np.eye(4))
    bs_img.to_filename(str(bfile))

    return bfile 
Example #2
Source File: plot_functions.py    From idea_relations with MIT License 7 votes vote down vote up
def joint_plot(x, y, xlabel=None,
               ylabel=None, xlim=None, ylim=None,
               loc="best", color='#0485d1',
               size=8, markersize=50, kind="kde",
               scatter_color="r"):
    with sns.axes_style("darkgrid"):
        if xlabel and ylabel:
            g = SubsampleJointGrid(xlabel, ylabel,
                    data=DataFrame(data={xlabel: x, ylabel: y}),
                    space=0.1, ratio=2, size=size, xlim=xlim, ylim=ylim)
        else:
            g = SubsampleJointGrid(x, y, size=size,
                    space=0.1, ratio=2, xlim=xlim, ylim=ylim)
        g.plot_joint(sns.kdeplot, shade=True, cmap="Blues")
        g.plot_sub_joint(plt.scatter, 1000, s=20, c=scatter_color, alpha=0.3)
        g.plot_marginals(sns.distplot, kde=False, rug=False)
        g.annotate(ss.pearsonr, fontsize=25, template="{stat} = {val:.2g}\np = {p:.2g}")
        g.ax_joint.set_yticklabels(g.ax_joint.get_yticks())
        g.ax_joint.set_xticklabels(g.ax_joint.get_xticks())
    return g 
Example #3
Source File: test_stats.py    From Computable with MIT License 6 votes vote down vote up
def test_tie1(self):
        # Data
        x = [1.0, 2.0, 3.0, 4.0]
        y = [1.0, 2.0, 2.0, 3.0]
        # Ranks of the data, with tie-handling.
        xr = [1.0, 2.0, 3.0, 4.0]
        yr = [1.0, 2.5, 2.5, 4.0]
        # Result of spearmanr should be the same as applying
        # pearsonr to the ranks.
        sr = stats.spearmanr(x, y)
        pr = stats.pearsonr(xr, yr)
        assert_almost_equal(sr, pr)


##    W.II.E.  Tabulate X against X, using BIG as a case weight.  The values
##    should appear on the diagonal and the total should be 899999955.
##    If the table cannot hold these values, forget about working with
##    census data.  You can also tabulate HUGE against TINY.  There is no
##    reason a tabulation program should not be able to distinguish
##    different values regardless of their magnitude.

### I need to figure out how to do this one. 
Example #4
Source File: test_comparer.py    From rsmtool with Apache License 2.0 6 votes vote down vote up
def test_compute_correlations_between_versions_default_columns(self):
        df_old = pd.DataFrame({'spkitemid': ['a', 'b', 'c'],
                               'feature1': [1.3, 1.5, 2.1],
                               'feature2': [1.1, 6.2, 2.1],
                               'sc1': [2, 3, 4]})
        df_new = pd.DataFrame({'spkitemid': ['a', 'b', 'c'],
                               'feature1': [-1.3, -1.5, -2.1],
                               'feature2': [1.1, 6.2, 2.1],
                               'sc1': [2, 3, 4]})
        df_cors = Comparer.compute_correlations_between_versions(df_old, df_new)
        assert_almost_equal(df_cors.at['feature1', 'old_new'], -1.0)
        assert_almost_equal(df_cors.at['feature2', 'old_new'], 1.0)
        assert_equal(df_cors.at['feature1', 'human_old'], pearsonr(df_old['feature1'],
                                                                   df_old['sc1'])[0])
        assert_equal(df_cors.at['feature1', 'human_new'], pearsonr(df_new['feature1'],
                                                                   df_new['sc1'])[0])
        assert_equal(df_cors.at['feature1', "N"], 3) 
Example #5
Source File: test_comparer.py    From rsmtool with Apache License 2.0 6 votes vote down vote up
def test_compute_correlations_between_versions_custom_columns(self):
        df_old = pd.DataFrame({'id': ['a', 'b', 'c'],
                               'feature1': [1.3, 1.5, 2.1],
                               'feature2': [1.1, 6.2, 2.1],
                               'r1': [2, 3, 4]})
        df_new = pd.DataFrame({'id': ['a', 'b', 'c'],
                               'feature1': [-1.3, -1.5, -2.1],
                               'feature2': [1.1, 6.2, 2.1],
                               'r1': [2, 3, 4]})

        df_cors = Comparer.compute_correlations_between_versions(df_old,
                                                                 df_new,
                                                                 human_score='r1',
                                                                 id_column='id')

        assert_almost_equal(df_cors.at['feature1', 'old_new'], -1.0)
        assert_almost_equal(df_cors.at['feature2', 'old_new'], 1.0)
        assert_equal(df_cors.at['feature1', 'human_old'], pearsonr(df_old['feature1'],
                                                                   df_old['r1'])[0])
        assert_equal(df_cors.at['feature1', 'human_new'], pearsonr(df_new['feature1'],
                                                                   df_new['r1'])[0])
        assert_equal(df_cors.at['feature1', "N"], 3) 
Example #6
Source File: summary.py    From neleval with Apache License 2.0 6 votes vote down vote up
def __call__(self):
        all_results = np.empty((len(self.systems), len(self.measures)))
        # TODO: parallelise?
        for system, sys_results in zip(self.systems, all_results):
            if self.gold is None:
                result_dict = Evaluate.read_tab_format(utf8_open(system))
            else:
                result_dict = Evaluate(system, self.gold, measures=self.measures, fmt='none')()
            sys_results[...] = [result_dict[measure]['fscore'] for measure in self.measures]

        self.all_results = all_results

        correlations = {}
        scores_by_measure = zip(self.measures, all_results.T)
        for (measure_i, scores_i), (measure_j, scores_j) in _pairs(scores_by_measure):
            correlations[measure_i, measure_j] = {'pearson': stats.pearsonr(scores_i, scores_j),
                                                  'spearman': stats.spearmanr(scores_i, scores_j),
                                                  'kendall': stats.kendalltau(scores_i, scores_j)}

        quartiles = {}
        for measure_i, scores_i in scores_by_measure:
            quartiles[measure_i] = np.percentile(scores_i, [0, 25, 50, 75, 100])

        return self.format(self, {'quartiles': quartiles, 'correlations': correlations}) 
Example #7
Source File: create_retrosynthesis_plots.py    From molecule-chef with GNU General Public License v3.0 6 votes vote down vote up
def produce_the_kde_plot(cycles, color, save_name):
    ground_truth_and_suggested = [(eval_code.get_best_qed_from_smiles_bag(elem['ground_truth_product']),
                                   eval_code.get_best_qed_from_smiles_bag(elem['suggested_product']))
                                         for elem in cycles]
    len_out = len(ground_truth_and_suggested)
    ground_truth_and_suggested = [elem for elem in ground_truth_and_suggested if elem[1] != -np.inf]
    len_filter = len(ground_truth_and_suggested)
    num_discarding = len_out - len_filter
    if num_discarding:
        warnings.warn(f"Discarding {num_discarding} our of {len_out} as no successful reconstruction")
    ground_truth_and_suggested = np.array(ground_truth_and_suggested)
    ground_truth_product_qed = ground_truth_and_suggested[:, 0]
    suggested_product_qed = ground_truth_and_suggested[:, 1]

    g = sns.jointplot(x=ground_truth_product_qed, y=suggested_product_qed, kind="kde", color=color,
                      )
    g.set_axis_labels("product's QED", "reconstructed product's QED", fontsize=16)
    rsquare = lambda a, b: stats.pearsonr(ground_truth_product_qed, suggested_product_qed)[0] ** 2
    g = g.annotate(rsquare, template="{stat}: {val:.2f}",
                   stat="$R^2$", loc="upper left", fontsize=12)
    print(f"Rsquare: {stats.pearsonr(ground_truth_product_qed, suggested_product_qed)[0] ** 2}")
    print(f"scipystats: {stats.linregress(ground_truth_product_qed, suggested_product_qed)}")
    plt.tight_layout()
    plt.savefig(f"{save_name}.pdf") 
Example #8
Source File: hypothesis_test.py    From fairtest with Apache License 2.0 6 votes vote down vote up
def permutation_test_corr(x, y, num_samples=10000):
    """
    Monte-Carlo permutation test for correlation

    Parameters
    ----------
    x :
        Values for the first dimension

    y :
        Values for the second dimension

    num_samples :
        the number of random permutations to perform

    Returns
    -------
    pval :
        the p-value

    References
    ----------
    https://en.wikipedia.org/wiki/Resampling_(statistics)
    """
    x = np.array(x, dtype='float')
    y = np.array(y, dtype='float')

    obs_0, _ = stats.pearsonr(x, y)
    k = 0
    z = np.concatenate([x, y])
    for _ in range(num_samples):
        np.random.shuffle(z)
        k += abs(obs_0) < abs(stats.pearsonr(z[:len(x)], z[len(x):])[0])
    pval = (1.0*k) / num_samples
    return max(pval, 1.0/num_samples) 
Example #9
Source File: random_test.py    From tick with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_parallel_create_independant_random(self):
        """...Test that random number generator creates independant
        samples in a multithreaded environment
        """

        for thread_type in self.thread_types:
            samples = self._generate_samples_in_parallel(
                parallelization_type=thread_type)

            # We check that we do not have any lines that is identical to the
            # one following
            following_samples_are_different = \
                np.prod(np.linalg.norm(samples[:-1] - samples[1:], axis=1) > 0)
            self.assertEqual(
                following_samples_are_different, 1,
                "Two samples generated in parallel are identical")

            # We check that our generated samples are not correlated
            for (i, sample_i), (j, sample_j) in \
                    itertools.product(enumerate(samples), enumerate(samples)):
                if i != j:
                    corr_coeff = stats.pearsonr(sample_i, sample_j)[0]
                    self.assertLess(np.abs(corr_coeff), 0.1) 
Example #10
Source File: accuracy.py    From basenji with Apache License 2.0 6 votes vote down vote up
def pearsonr(self, log=False, pseudocount=1, clip=None):
    """ Compute target PearsonR vector. """

    pcor = np.zeros(self.num_targets)

    for ti in range(self.num_targets):
      if self.targets_na is not None:
        preds_ti = self.preds[~self.targets_na, ti].astype('float64')
        targets_ti = self.targets[~self.targets_na, ti].astype('float64')
      else:
        preds_ti = self.preds[:, :, ti].flatten().astype('float64')
        targets_ti = self.targets[:, :, ti].flatten().astype('float64')

      if clip is not None:
        preds_ti = np.clip(preds_ti, 0, clip)
        targets_ti = np.clip(targets_ti, 0, clip)

      if log:
        preds_ti = np.log2(preds_ti + pseudocount)
        targets_ti = np.log2(targets_ti + pseudocount)

      pc, _ = stats.pearsonr(targets_ti, preds_ti)
      pcor[ti] = pc

    return pcor 
Example #11
Source File: similarity_test.py    From kor2vec with MIT License 6 votes vote down vote up
def word_sim_test(filename, pos_vectors):
    delim = ','
    actual_sim_list, pred_sim_list = [], []
    missed = 0

    with open(filename, 'r') as pairs:
        for pair in pairs:
            w1, w2, actual_sim = pair.strip().split(delim)

            try:
                w1_vec = create_word_vector(w1, pos_vectors)
                w2_vec = create_word_vector(w2, pos_vectors)
                pred = float(np.inner(w1_vec, w2_vec))
                actual_sim_list.append(float(actual_sim))
                pred_sim_list.append(pred)

            except KeyError:
                missed += 1

    spearman, _ = st.spearmanr(actual_sim_list, pred_sim_list)
    pearson, _ = st.pearsonr(actual_sim_list, pred_sim_list)

    return spearman, pearson, missed 
Example #12
Source File: time_align.py    From scanorama with MIT License 6 votes vote down vote up
def time_dist(datasets_dimred, time):
    time_dist = euclidean_distances(time, time)

    time_dists, scores = [], []
    for i in range(time_dist.shape[0]):
        for j in range(time_dist.shape[1]):
            if i >= j:
                continue
            score = np.mean(euclidean_distances(
                datasets_dimred[i], datasets_dimred[j]
            ))
            time_dists.append(time_dist[i, j])
            scores.append(score)

    print('Spearman rho = {}'.format(spearmanr(time_dists, scores)))
    print('Pearson rho = {}'.format(pearsonr(time_dists, scores))) 
Example #13
Source File: test_stats.py    From Computable with MIT License 5 votes vote down vote up
def test_pHUGETINY(self):
        y = stats.pearsonr(HUGE,TINY)
        r = y[0]
        assert_approx_equal(r,1.0) 
Example #14
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_pXROUND(self):
        y = stats.pearsonr(X,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0) 
Example #15
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_pXBIG(self):
        y = stats.pearsonr(X,BIG)
        r = y[0]
        assert_approx_equal(r,1.0) 
Example #16
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_pXX(self):
        y = stats.pearsonr(X,X)
        r = y[0]
        assert_approx_equal(r,1.0) 
Example #17
Source File: test_mstats_basic.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_pearsonr(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r, p = stats.pearsonr(x, y)
            rm, pm = stats.mstats.pearsonr(xm, ym)

            assert_almost_equal(r, rm, decimal=14)
            assert_almost_equal(p, pm, decimal=14) 
Example #18
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_pXLITTLE(self):
        y = stats.pearsonr(X,LITTLE)
        r = y[0]
        assert_approx_equal(r,1.0) 
Example #19
Source File: test_stats.py    From Computable with MIT License 5 votes vote down vote up
def test_pLITTLEROUND(self):
        y = stats.pearsonr(LITTLE,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0) 
Example #20
Source File: three_tissue_response.py    From dmipy with MIT License 5 votes vote down vote up
def _cost_function(threshold, image):
    "The cost function used by the optimal_threshold function."
    rho = -pearsonr(image, image > threshold)[0]
    return rho 
Example #21
Source File: test_stats.py    From Computable with MIT License 5 votes vote down vote up
def test_pHUGEHUGE(self):
        y = stats.pearsonr(HUGE,HUGE)
        r = y[0]
        assert_approx_equal(r,1.0) 
Example #22
Source File: metrics.py    From DL-text with MIT License 5 votes vote down vote up
def eval_sick(model,X_test_l,X_test_r,test_score):
    #r = np.arange(1,6)
    pred = model.predict([X_test_l,X_test_r])*4+1
    pred = [i[0] for i in pred]
    pred = np.array(pred)
    test_score = np.array(test_score)*4+1
    sp_coef = measures.spearmanr(pred,test_score)[0]
    per_coef = measures.pearsonr(pred,test_score)[0]
    mse_coef = np.mean(np.square(pred-test_score))
    
    return sp_coef, per_coef, mse_coef 
Example #23
Source File: test_stats.py    From Computable with MIT License 5 votes vote down vote up
def test_pTINYROUND(self):
        y = stats.pearsonr(TINY,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0) 
Example #24
Source File: demo_corr.py    From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License 5 votes vote down vote up
def _plot_correlation_func(x, y):

    r, p = pearsonr(x, y)
    title = "Cor($X_1$, $X_2$) = %.3f" % r
    pylab.scatter(x, y)
    pylab.title(title)
    pylab.xlabel("$X_1$")
    pylab.ylabel("$X_2$")

    f1 = scipy.poly1d(scipy.polyfit(x, y, 1))
    pylab.plot(x, f1(x), "r--", linewidth=2)
    # pylab.xticks([w*7*24 for w in [0,1,2,3,4]], ['week %i'%(w+1) for w in
    # [0,1,2,3,4]]) 
Example #25
Source File: test_stats.py    From Computable with MIT License 5 votes vote down vote up
def test_pTINYTINY(self):
        y = stats.pearsonr(TINY,TINY)
        r = y[0]
        assert_approx_equal(r,1.0) 
Example #26
Source File: stat_models.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def pearsonr_mat(mat, w=None):
    """Utility function to calculate pearson matrix (row-wise).

    Parameters
    ----------
    mat : numpy array of shape (n_samples, n_features)
        Input matrix.

    w : numpy array of shape (n_features,)
        Weights.

    Returns
    -------
    pear_mat : numpy array of shape (n_samples, n_samples)
        Row-wise pearson score matrix.

    """
    mat = check_array(mat)
    n_row = mat.shape[0]
    n_col = mat.shape[1]
    pear_mat = np.full([n_row, n_row], 1).astype(float)

    if w is not None:
        for cx in range(n_row):
            for cy in range(cx + 1, n_row):
                curr_pear = wpearsonr(mat[cx, :], mat[cy, :], w)
                pear_mat[cx, cy] = curr_pear
                pear_mat[cy, cx] = curr_pear
    else:
        for cx in range(n_col):
            for cy in range(cx + 1, n_row):
                curr_pear = pearsonr(mat[cx, :], mat[cy, :])[0]
                pear_mat[cx, cy] = curr_pear
                pear_mat[cy, cx] = curr_pear

    return pear_mat 
Example #27
Source File: binary.py    From brats_segmentation-pytorch with MIT License 5 votes vote down vote up
def volume_change_correlation(results, references):
    r"""计算二值图像的体素改变的相关系数
    Volume change correlation.
    
    Computes the linear correlation of change in binary object volume between
    the contents of the successive binary images supplied. Measured through
    the Pearson product-moment correlation coefficient. 
    
    Parameters
    ----------
    results : sequence of array_like
        Ordered list of input data containing objects. Each array_like will be
        converted into binary: background where 0, object everywhere else.
    references : sequence of array_like
        Ordered list of input data containing objects. Each array_like will be
        converted into binary: background where 0, object everywhere else.
        The order must be the same as for ``results``.
    
    Returns
    -------
    r : float
        The correlation coefficient between -1 and 1.
    p : float
        The two-side p value.
        
    """
    # 转化为二维
    results = numpy.atleast_2d(numpy.array(results).astype(numpy.bool))
    references = numpy.atleast_2d(numpy.array(references).astype(numpy.bool))
    
    results_volumes = numpy.asarray([numpy.count_nonzero(r) for r in results])
    references_volumes = numpy.asarray([numpy.count_nonzero(r) for r in references])
    
    results_volumes_changes = results_volumes[1:] - results_volumes[:-1]
    references_volumes_changes = references_volumes[1:] - references_volumes[:-1] 
    
    return pearsonr(results_volumes_changes, references_volumes_changes) # returns (Pearson's correlation coefficient, 2-tailed p-value) 
Example #28
Source File: test_stats.py    From Computable with MIT License 5 votes vote down vote up
def test_r_exactly_neg1(self):
        a = arange(3.0)
        b = -a
        r, prob = stats.pearsonr(a,b)
        assert_equal(r, -1.0)
        assert_equal(prob, 0.0) 
Example #29
Source File: test_stats.py    From Computable with MIT License 5 votes vote down vote up
def test_r_exactly_pos1(self):
        a = arange(3.0)
        b = a
        r, prob = stats.pearsonr(a,b)
        assert_equal(r, 1.0)
        assert_equal(prob, 0.0) 
Example #30
Source File: test_stats.py    From Computable with MIT License 5 votes vote down vote up
def test_pROUNDROUND(self):
        y = stats.pearsonr(ROUND,ROUND)
        r = y[0]
        assert_approx_equal(r,1.0)