Python scipy.stats.ks_2samp() Examples
The following are 30
code examples of scipy.stats.ks_2samp().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.stats
, or try the search function
.
Example #1
Source File: test_mvknn.py From cgpm with Apache License 2.0 | 6 votes |
def test_conditional_indicator(knn_xz): # Simulate from the conditional distribution of x|z (see # generate_real_nominal_data) and perfrom a KS tests at each of the # subpopulations at the six levels of z. data = np.asarray(knn_xz.data.values()) indicators = sorted(set(data[:,1].astype(int))) _, ax = plt.subplots() ax.set_title('Conditional Simulation Of X Given Indicator Z') for t in indicators: # Plot original data. data_subpop = data[data[:,1] == t] ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t]) # Plot simulated data. samples_subpop = [s[0] for s in knn_xz.simulate(-1, [0], constraints={1:t}, N=len(data_subpop))] ax.scatter( np.repeat(t, len(data_subpop)) + .25, samples_subpop, color=gu.colors[t]) # KS test. pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1] assert .1 < pvalue ax.set_xlabel('z') ax.set_ylabel('x') ax.grid()
Example #2
Source File: compare_genomes.py From mCaller with MIT License | 6 votes |
def compare_by_position(bed1,bed2,xmfa): pos_dict = {} for i,bed in enumerate([bed1,bed2]): pos_dict[i] = {} with open(bed,'r') as fi: for line in fi: #2 1892198 1892199 TCMMTMTTMMM 0.5 - 16 csome,start,end,motif,perc_meth,strand,num_reads,probabilities = tuple(line.split('\t')) pos_dict[i][(csome,start,end,strand)] = ((perc_meth,num_reads),np.asarray([float(p) for p in probabilities.strip().split(',')])) for pos in pos_dict[0]: if pos in pos_dict[1]: try: u,pval = mannwhitneyu(pos_dict[0][pos][1],pos_dict[0][pos][1],alternative='two-sided') except ValueError: u,pval = 'none','identical' u2,pval2 = ranksums(pos_dict[0][pos][1],pos_dict[0][pos][1]) try: t,pval3 = ttest_ind(pos_dict[0][pos][1],pos_dict[0][pos][1]) except: t,pval3 = 'none','missing df' d,pval4 = ks_2samp(pos_dict[0][pos][1],pos_dict[0][pos][1]) if pval4 < 0.9: print pos, pos_dict[0][pos][0], pos_dict[1][pos][0], pval, pval2, pval3, pval4
Example #3
Source File: test_weighted_statistics.py From pyABC with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_resample_deterministic(): """ Test the deterministic resampling routine. """ nw = 50 # number of weighed points points = np.random.randn(nw) weights = np.random.rand(nw) weights /= np.sum(weights) n = 1000 # number of non-weighted points resampled_det = ws.resample_deterministic(points, weights, n, False) resampled = ws.resample(points, weights, n) # should be same distribution _, p = ks_2samp(resampled_det, resampled) assert p > 1e-2 resampled_det2 = ws.resample_deterministic(points, weights, n, True) assert len(resampled_det2) == n _, p = ks_2samp(resampled_det2, resampled) assert p > 1e-2
Example #4
Source File: test_weighted_statistics.py From pyABC with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_resample(): """ Test that the resampling process yields consistent distributions, using a KS test. """ nw = 50 # number of weighted points points = np.random.randn(nw) weights = np.random.rand(nw) weights /= np.sum(weights) n = 1000 # number of non-weighted points # sample twice from same samples resampled1 = ws.resample(points, weights, n) resampled2 = ws.resample(points, weights, n) # should be same distribution _, p = ks_2samp(resampled1, resampled2) assert p > 1e-2 # use different points points3 = np.random.randn(nw) resampled3 = ws.resample(points3, weights, n) # should be different distributions _, p = ks_2samp(resampled1, resampled3) assert p < 1e-2
Example #5
Source File: stat_utils.py From causallib with Apache License 2.0 | 6 votes |
def calc_weighted_ks2samp(x, y, wx, wy): """ Weighted Kolmogorov-Smirnov References: [1] https://stackoverflow.com/a/40059727 """ x_ix = np.argsort(x) y_ix = np.argsort(y) x, wx = x[x_ix], wx[x_ix] y, wy = y[y_ix], wy[y_ix] data = np.concatenate((x, y)) wx_cum = np.hstack([0, wx.cumsum() / wx.sum()]) wy_cum = np.hstack([0, wy.cumsum() / wy.sum()]) # Align the "steps" between the two distribution so the differences will be well defined: x_align = wx_cum[[np.searchsorted(x, data, side="right")]] y_align = wy_cum[[np.searchsorted(y, data, side="right")]] stat = np.max(np.abs(x_align - y_align)) # stat = ks_2samp(wx * x, wy * y) return stat
Example #6
Source File: feature_selection.py From default-credit-card-prediction with MIT License | 6 votes |
def kolmogorov_smirnov_two_sample_test(X,y): """ Performs the two sample Kolmogorov-Smirnov test, testing wheter feature values of each class are drawn from identical distributions Keyword arguments: X -- The feature vectors y -- The target vector """ kolmogorov_smirnov=[[(0,0)]]*len(X[0]) # print kolmogorov_smirnov for feature_col in xrange(len(X[0])): ks_test_statistic,p_value=stats.ks_2samp(X[y==0,feature_col],X[y==1,feature_col]) kolmogorov_smirnov[feature_col]=(ks_test_statistic,p_value) #debug for f in xrange(23): print kolmogorov_smirnov[f] return kolmogorov_smirnov
Example #7
Source File: ks.py From alibi-detect with Apache License 2.0 | 6 votes |
def feature_score(self, X_ref: np.ndarray, X: np.ndarray) -> np.ndarray: """ Compute K-S scores per feature. Parameters ---------- X_ref Reference instances to compare distribution with. X Batch of instances. Returns ------- Feature level drift scores. """ X = X.reshape(X.shape[0], -1) X_ref = X_ref.reshape(X_ref.shape[0], -1) p_val = np.zeros(self.n_features, dtype=np.float32) for f in range(self.n_features): # TODO: update to 'exact' when bug fix is released in scipy 1.5 p_val[f] = ks_2samp(X_ref[:, f], X[:, f], alternative=self.alternative, mode='asymp')[1] return p_val
Example #8
Source File: test_multivariate.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_pairwise_distances(self): # Test that the distribution of pairwise distances is close to correct. np.random.seed(514) def random_ortho(dim): u, _s, v = np.linalg.svd(np.random.normal(size=(dim, dim))) return np.dot(u, v) for dim in range(2, 6): def generate_test_statistics(rvs, N=1000, eps=1e-10): stats = np.array([ np.sum((rvs(dim=dim) - rvs(dim=dim))**2) for _ in range(N) ]) # Add a bit of noise to account for numeric accuracy. stats += np.random.uniform(-eps, eps, size=stats.shape) return stats expected = generate_test_statistics(random_ortho) actual = generate_test_statistics(scipy.stats.ortho_group.rvs) _D, p = scipy.stats.ks_2samp(expected, actual) assert_array_less(.05, p)
Example #9
Source File: atlas3.py From ssbio with MIT License | 6 votes |
def get_pca_ks_stats(self, maxrange=5): """Get a dictionary of PC#: K-S test stat for each """ pc_to_phenotype_pairs = {} num_components = self.principal_observations_df.shape[1] if num_components < maxrange: maxrange = num_components phenotypes = self.principal_observations_df.phenotype.unique().tolist() for i in range(0, maxrange): phenotype_pair_to_ks = {} for p1, p2 in combinations(phenotypes, 2): p1_pc = self.principal_observations_df[self.principal_observations_df.phenotype == p1].iloc[:,i].as_matrix() p2_pc = self.principal_observations_df[self.principal_observations_df.phenotype == p2].iloc[:,i].as_matrix() phenotype_pair_to_ks[(p1, p2)] = ks_2samp(p1_pc, p2_pc) pc_to_phenotype_pairs[i + 1] = phenotype_pair_to_ks return pc_to_phenotype_pairs
Example #10
Source File: test_mvknn.py From cgpm with Apache License 2.0 | 6 votes |
def test_joint(knn_xz): # Simulate from the joint distribution of x,z (see # generate_real_nominal_data) and perform a KS tests at each of the # subpopulations at the six levels of z. data = np.asarray(knn_xz.data.values()) indicators = sorted(set(data[:,1].astype(int))) joint_samples = knn_xz.simulate(-1, [0,1], N=len(data)) _, ax = plt.subplots() ax.set_title('Joint Simulation') for t in indicators: # Plot original data. data_subpop = data[data[:,1] == t] ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t]) # Plot simulated data for indicator t. samples_subpop = [j[0] for j in joint_samples if j[1] == t] ax.scatter( np.add([t]*len(samples_subpop), .25), samples_subpop, color=gu.colors[t]) # KS test. pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1] assert .05 < pvalue ax.set_xlabel('z') ax.set_ylabel('x') ax.grid()
Example #11
Source File: test_mvkde.py From cgpm with Apache License 2.0 | 6 votes |
def test_joint(kde_xz): # Simulate from the joint distribution of x,z (see # generate_real_nominal_data) and perform a KS tests at each of the # subpopulations at the six levels of z. data = np.asarray(kde_xz.data.values()) indicators = sorted(set(data[:,1].astype(int))) joint_samples = kde_xz.simulate(-1, [0,1], N=len(data)) _, ax = plt.subplots() ax.set_title('Joint Simulation') for t in indicators: # Plot original data. data_subpop = data[data[:,1] == t] ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t]) # Plot simulated data for indicator t. samples_subpop = [j[0] for j in joint_samples if j[1] == t] ax.scatter( np.add([t]*len(samples_subpop), .25), samples_subpop, color=gu.colors[t]) # KS test. _, p = ks_2samp(data_subpop[:,0], samples_subpop) assert .05 < p ax.set_xlabel('z') ax.set_ylabel('x') ax.grid()
Example #12
Source File: test_mvkde.py From cgpm with Apache License 2.0 | 6 votes |
def test_conditional_indicator(kde_xz): # Simulate from the conditional distribution of x|z (see # generate_real_nominal_data) and perfrom a KS tests at each of the # subpopulations at the six levels of z. data = np.asarray(kde_xz.data.values()) indicators = sorted(set(data[:,1].astype(int))) _, ax = plt.subplots() ax.set_title('Conditional Simulation Of X Given Indicator Z') for t in indicators: # Plot original data. data_subpop = data[data[:,1] == t] ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t]) # Plot simulated data. samples_subpop = [s[0] for s in kde_xz.simulate(-1, [0], {1:t}, None, N=len(data_subpop))] ax.scatter( np.repeat(t, len(data_subpop)) + .25, samples_subpop, color=gu.colors[t]) # KS test. _, p = ks_2samp(data_subpop[:,0], samples_subpop) assert .1 < p ax.set_xlabel('z') ax.set_ylabel('x') ax.grid()
Example #13
Source File: test_normal_categorical.py From cgpm with Apache License 2.0 | 6 votes |
def test_joint(state): # Simulate from the joint distribution of (x,z). joint_samples = state.simulate(-1, [0,1], N=N_SAMPLES) _, ax = plt.subplots() ax.set_title('Joint Simulation') for t in INDICATORS: # Plot original data. data_subpop = DATA[DATA[:,1] == t] ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t]) # Plot simulated data for indicator t. samples_subpop = [j[0] for j in joint_samples if j[1] == t] ax.scatter( np.add([t]*len(samples_subpop), .25), samples_subpop, color=gu.colors[t]) # KS test. pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1] assert .05 < pvalue ax.set_xlabel('Indicator') ax.set_ylabel('x') ax.grid()
Example #14
Source File: test_loom_simulate_bivariate_gaussian.py From bayeslite with Apache License 2.0 | 6 votes |
def test_simulate_y_from_partially_populated_fresh_row(seed): """Check that Loom conditions on partial observation in new rowid.""" means = ((0,20), (20,0)) sample_size = 50 mix_ratio = [0.7, 0.3] table = 'data' with bayeslite.bayesdb_open(seed=seed) as bdb: sample_gaussians = axis_aligned_gaussians(means, sample_size, bdb._np_prng) samples = mix(sample_gaussians, mix_ratio, bdb._np_prng) register_loom(bdb) prepare_bdb(bdb, samples, table) rowid = insert_row(bdb, table, means[0][0], None) simulated_samples = simulate_from_rowid(bdb, table, 1, rowid, limit=sample_size) y_samples = [y for _x, y in sample_gaussians[0]] _statistic, p_value = stats.ks_2samp(y_samples, simulated_samples) assert 0.10 < p_value
Example #15
Source File: test_normal_categorical.py From cgpm with Apache License 2.0 | 6 votes |
def test_conditional_indicator(state): # Simulate from the conditional X|Z _, ax = plt.subplots() ax.set_title('Conditional Simulation Of Data X Given Indicator Z') for t in INDICATORS: # Plot original data. data_subpop = DATA[DATA[:,1] == t] ax.scatter(data_subpop[:,1], data_subpop[:,0], color=gu.colors[t]) # Plot simulated data. samples_subpop = [s[0] for s in state.simulate(-1, [0], {1:t}, None, len(data_subpop))] ax.scatter( np.repeat(t, len(data_subpop)) + .25, samples_subpop, color=gu.colors[t]) # KS test. pvalue = ks_2samp(data_subpop[:,0], samples_subpop)[1] assert .01 < pvalue ax.set_xlabel('Indicator') ax.set_ylabel('x') ax.grid()
Example #16
Source File: test_tableone.py From tableone with MIT License | 5 votes |
def mytest(*args): """ Hypothesis test for test_self_defined_statistical_tests """ mytest.__name__ = "Test name" _, pval = stats.ks_2samp(*args) return pval
Example #17
Source File: edgepy.py From edgePy with MIT License | 5 votes |
def ks_2_samples(self): """Run a 2-tailed Kolmogorov-Smirnov test on the DGEList object. Args: None. Returns: gene_details: a dictionary of dictionary (key, gene), holding mean1 and mean2 for the two groups gene_likelihood: a dictionary (key, gene), holding the p-value of the separation of the two groups group_types: list of the groups in order. """ gene_likelihood1: Dict[Hashable, float] = {} group_types = set(self.dge_list.groups_list) group_types = list(group_types) group_filters: Dict[Hashable, Any] = {} gene_details: Dict[Hashable, Dict[Hashable, Any]] = {} for group in group_types: group_filters[group] = [g == group for g in self.dge_list.groups_list] for gene_idx, gene in enumerate(self.dge_list.genes): gene_row = self.dge_list.counts[gene_idx] if len(group_types) == 2: group_data1 = gene_row.compress(group_filters[group_types[0]]) mean1 = np.mean(group_data1) group_data2 = gene_row.compress(group_filters[group_types[1]]) mean2 = np.mean(group_data2) gene_likelihood1[gene] = ks_2samp(group_data1, group_data2)[1] gene_details[gene] = {'mean1': mean1, 'mean2': mean2} return gene_details, gene_likelihood1, group_types
Example #18
Source File: feature_selection.py From default-credit-card-prediction with MIT License | 5 votes |
def kolmogorov_smirnov_two_sample_test(sample_a,sample_b): """ Performs the two sample Kolmogorov-Smirnov test, testing wheter twoa samples are drawn from identical distributions Keyword arguments: sample_a -- The first sample sample_b -- The second sample """ return stats.ks_2samp(sample_a,sample_b)
Example #19
Source File: metrics.py From toad with MIT License | 5 votes |
def KS(score, target): """calculate ks value Args: score (array-like): list of score or probability that the model predict target (array-like): list of real target Returns: float: the max KS value """ mask = target == 1 res = ks_2samp(score[mask], score[~mask]) return res[0]
Example #20
Source File: test_mvkde.py From cgpm with Apache License 2.0 | 5 votes |
def test_univariate_two_sample(i): # This test ensures posterior sampling of uni/bimodal dists on R. When the # plot is shown, a density curve overlays the samples which is useful for # seeing that logpdf/simulate agree. N_SAMPLES = 100 rng = gu.gen_rng(2) # Synthetic samples. samples_train = SAMPLES[i](N_SAMPLES, rng) samples_test = SAMPLES[i](N_SAMPLES, rng) # Univariate KDE. kde = MultivariateKde([3], None, distargs={O: {ST: [N], SA:[{}]}}, rng=rng) # Incorporate observations. for rowid, x in enumerate(samples_train): kde.incorporate(rowid, {3: x}) # Run inference. kde.transition() # Generate posterior samples. samples_gen = [s[3] for s in kde.simulate(-1, [3], N=N_SAMPLES)] # Plot comparison of all train, test, and generated samples. fig, ax = plt.subplots() ax.scatter(samples_train, [0]*len(samples_train), color='b', label='Train') ax.scatter(samples_gen, [1]*len(samples_gen), color='r', label='KDE') ax.scatter(samples_test, [2]*len(samples_test), color='g', label='Test') # Overlay the density function. xs = np.linspace(ax.get_xlim()[0], ax.get_xlim()[1], 200) pdfs = [kde.logpdf(-1, {3: x}) for x in xs] # Convert the pdfs from the range to 1 to 1.5 by rescaling. pdfs_plot = np.exp(pdfs)+1 pdfs_plot = (pdfs_plot/max(pdfs_plot)) * 1.5 ax.plot(xs, pdfs_plot, color='k') # Clear up some labels. ax.set_title('Univariate KDE Posterior versus Generator') ax.set_xlabel('x') ax.set_yticklabels([]) # Show the plot. ax.grid() plt.close() # KS test _, p = ks_2samp(samples_test, samples_gen) assert .05 < p
Example #21
Source File: filters.py From causallib with Apache License 2.0 | 5 votes |
def compute_pvals(self, X, y): # TODO: export to stats_utils? is_y_binary = (len(np.unique(y)) == 2) # is_binary_feature = np.sum(((X != np.nanmin(X, axis=0)[np.newaxis, :]) & # (X != np.nanmax(X, axis=0)[np.newaxis, :])), axis=0) == 0 is_binary_feature = areColumnsBinary(X) p_vals = np.zeros(X.shape[1]) if is_y_binary: # Process non-binary columns: for i in np.where(~is_binary_feature)[0]: x0 = X.loc[y == 0, i] x1 = X.loc[y == 1, i] if self.is_linear: _, p_vals[i] = stats.ttest_ind(x0, x1) else: _, p_vals[i] = stats.ks_2samp(x0, x1) # Process binary features: _, p_vals[is_binary_feature] = feature_selection.chi2(X.loc[:, is_binary_feature], y) else: # Process non-binary features: _, p_vals[~is_binary_feature] = feature_selection.f_regression(X.loc[:, ~is_binary_feature], y) # Process binary features: y_mat = np.row_stack(y) for i in np.where(is_binary_feature)[0]: _, p_vals[i] = feature_selection.f_regression(y_mat, X.loc[:, i]) return p_vals
Example #22
Source File: test_sample.py From pyPESTO with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_prior(): """Check that priors are defined for sampling.""" # define negative log posterior posterior_fun = pypesto.Objective(fun=negative_log_posterior) # define negative log prior prior_fun = pypesto.Objective(fun=negative_log_prior) # define pypesto prior object prior_object = pypesto.NegLogPriors(objectives=[prior_fun]) # define pypesto problem using prior object test_problem = pypesto.Problem(objective=posterior_fun, x_priors_defs=prior_object, lb=-10, ub=10, x_names=['x']) sampler = sample.AdaptiveMetropolisSampler() result = sample.sample(test_problem, n_samples=1e4, sampler=sampler, x0=np.array([0.])) # get log prior values of first chain logprior_trace = -result.sample_result.trace_neglogprior[0, :] # check that not all entries are zero assert (logprior_trace != 0.).any() # get samples of first chain samples = result.sample_result.trace_x[0, :, 0] # generate ground-truth samples rvs = norm.rvs(size=5000, loc=-1., scale=np.sqrt(0.7)) # check sample distribution agreement with the ground-truth statistic, pval = ks_2samp(rvs, samples) print(statistic, pval) assert statistic < 0.1
Example #23
Source File: test_dc_stat_think.py From dc_stat_think with MIT License | 5 votes |
def test_ks_stat(x): theor_data = np.random.normal(0, 1, size=100) correct, _ = st.ks_2samp(x, theor_data) assert np.isclose(dcst.ks_stat(x, theor_data), correct) theor_data = np.random.exponential(1, size=100) correct, _ = st.ks_2samp(x, theor_data) assert np.isclose(dcst.ks_stat(x, theor_data), correct) theor_data = np.random.logistic(0, 1, size=100) correct, _ = st.ks_2samp(x, theor_data) assert np.isclose(dcst.ks_stat(x, theor_data), correct)
Example #24
Source File: test_dc_stat_think.py From dc_stat_think with MIT License | 5 votes |
def test_pandas_conversion(seed): df = pd.DataFrame({'a': [3, 2, 1, 4], 'b': [8, 6, 7, 5], 'c': [9.1, 10.1, 11.1, np.nan]}) x, y = dcst.ecdf(df.loc[:, 'a']) assert (x == np.array([1, 2, 3, 4])).all() assert (y == np.array([0.25, 0.5, 0.75, 1.0])).all() x, y = dcst.ecdf(df.loc[:, 'c']) assert np.allclose(x, np.array([9.1, 10.1, 11.1])) assert np.allclose(y, np.array([1/3, 2/3, 1.0])) df = pd.DataFrame({ 'a': np.concatenate((np.random.normal(0, 1, size=10), [np.nan]*990)), 'b': np.random.normal(0, 1, size=1000)}) correct, _ = st.ks_2samp(df['a'].dropna(), df['b']) assert np.isclose(dcst.ks_stat(df['a'], df['b']), correct) df = pd.DataFrame({ 'a': np.concatenate((np.random.normal(0, 1, size=80), [np.nan]*20)), 'b': np.random.normal(0, 1, size=100)}) dcst_private._seed_numba(seed) correct = dcst.draw_bs_reps(df['a'].values, np.mean, size=100) dcst_private._seed_numba(seed) assert np.allclose(dcst.draw_bs_reps(df['a'], np.mean, size=100), correct, atol=atol) dcst_private._seed_numba(seed) correct = dcst.draw_bs_reps(df['b'].values, np.mean, size=100) dcst_private._seed_numba(seed) assert np.allclose(dcst.draw_bs_reps(df['b'], np.mean, size=100), correct, atol=atol) dcst_private._seed_numba(seed) correct = dcst.draw_perm_reps(df['a'].values, df['b'].values, dcst.diff_of_means, size=100) dcst_private._seed_numba(seed) assert np.allclose(dcst.draw_perm_reps(df['a'], df['b'], dcst.diff_of_means, size=100), correct, atol=atol)
Example #25
Source File: _dp_verification.py From whitenoise-system with MIT License | 5 votes |
def ks_test(self, fD1, fD2): """ K-S Two sample test between the repeated query results on neighboring datasets """ return stats.ks_2samp(fD1, fD2)
Example #26
Source File: test_dissimilarity.py From flyingpigeon with Apache License 2.0 | 5 votes |
def test_1D_ks_2samp(self): # Compare with scipy.stats.ks_2samp x = np.random.randn(50) + 1 y = np.random.randn(50) s, p = stats.ks_2samp(x, y) dm = dd.kolmogorov_smirnov(x, y) aaeq(dm, s, 3)
Example #27
Source File: sample_from_the_prior_test.py From bilby with MIT License | 5 votes |
def ks_2samp_wrapper(data1, data2): if version.parse(scipy.__version__) >= version.parse("1.3.0"): return ks_2samp(data1, data2, alternative="two-sided", mode="asymp") else: return ks_2samp(data1, data2)
Example #28
Source File: gw_utils_test.py From bilby with MIT License | 5 votes |
def test_conversion_gives_correct_prior(self) -> None: zeniths = self.samples["zenith"] azimuths = self.samples["azimuth"] times = self.samples["time"] args = zip(*[ (zenith, azimuth, time, self.ifos) for zenith, azimuth, time in zip(zeniths, azimuths, times) ]) ras, decs = zip(*map(bilby.gw.utils.zenith_azimuth_to_ra_dec, *args)) self.assertGreaterEqual(ks_2samp(self.samples["ra"], ras).pvalue, 0.01) self.assertGreaterEqual(ks_2samp(self.samples["dec"], decs).pvalue, 0.01)
Example #29
Source File: burn_in.py From pycbc with GNU General Public License v3.0 | 5 votes |
def ks_test(samples1, samples2, threshold=0.9): """Applies a KS test to determine if two sets of samples are the same. The ks test is applied parameter-by-parameter. If the two-tailed p-value returned by the test is greater than ``threshold``, the samples are considered to be the same. Parameters ---------- samples1 : dict Dictionary of mapping parameters to the first set of samples. samples2 : dict Dictionary of mapping parameters to the second set of samples. threshold : float The thershold to use for the p-value. Default is 0.9. Returns ------- dict : Dictionary mapping parameter names to booleans indicating whether the given parameter passes the KS test. """ is_the_same = {} assert set(samples1.keys()) == set(samples2.keys()), ( "samples1 and 2 must have the same parameters") # iterate over the parameters for param in samples1: s1 = samples1[param] s2 = samples2[param] _, p_value = ks_2samp(s1, s2) is_the_same[param] = p_value > threshold return is_the_same
Example #30
Source File: kswin.py From scikit-multiflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def add_element(self, input_value): """ Add element to sliding window Adds an element on top of the sliding window and removes the oldest one from the window. Afterwards, the KS-test is performed. Parameters ---------- input_value: ndarray New data sample the sliding window should add. """ self.n += 1 currentLength = self.window.shape[0] if currentLength >= self.window_size: self.window = np.delete(self.window,0) rnd_window = np.random.choice(self.window[:-self.stat_size], self.stat_size) (st, self.p_value) = stats.ks_2samp(rnd_window, self.window[-self.stat_size:],mode="exact") if self.p_value <= self.alpha and st > 0.1: self.change_detected = True self.window = self.window[-self.stat_size:] else: self.change_detected = False else: # Not enough samples in sliding window for a valid test self.change_detected = False self.window = np.concatenate([self.window,[input_value]])