Python scipy.stats.mstats.mquantiles() Examples
The following are 18
code examples of scipy.stats.mstats.mquantiles().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.stats.mstats
, or try the search function
.
Example #1
Source File: test_mstats_basic.py From Computable with MIT License | 6 votes |
def test_mquantiles_limit_keyword(self): """Ticket #867""" data = np.array([[6., 7., 1.], [47., 15., 2.], [49., 36., 3.], [15., 39., 4.], [42., 40., -999.], [41., 41., -999.], [7., -999., -999.], [39., -999., -999.], [43., -999., -999.], [40., -999., -999.], [36., -999., -999.]]) desired = [[19.2, 14.6, 1.45], [40.0, 37.5, 2.5], [42.8, 40.05, 3.55]] quants = mstats.mquantiles(data, axis=0, limit=(0, 50)) assert_almost_equal(quants, desired)
Example #2
Source File: test_mstats_basic.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_mquantiles_limit_keyword(self): # Regression test for Trac ticket #867 data = np.array([[6., 7., 1.], [47., 15., 2.], [49., 36., 3.], [15., 39., 4.], [42., 40., -999.], [41., 41., -999.], [7., -999., -999.], [39., -999., -999.], [43., -999., -999.], [40., -999., -999.], [36., -999., -999.]]) desired = [[19.2, 14.6, 1.45], [40.0, 37.5, 2.5], [42.8, 40.05, 3.55]] quants = mstats.mquantiles(data, axis=0, limit=(0, 50)) assert_almost_equal(quants, desired)
Example #3
Source File: kernel_extras.py From vnpy_crypto with MIT License | 5 votes |
def _compute_sig(self): Y = self.endog X = self.exog b = self.estimator(Y, X) m = self.fform(X, b) n = np.shape(X)[0] resid = Y - m resid = resid - np.mean(resid) # center residuals self.test_stat = self._compute_test_stat(resid) sqrt5 = np.sqrt(5.) fct1 = (1 - sqrt5) / 2. fct2 = (1 + sqrt5) / 2. u1 = fct1 * resid u2 = fct2 * resid r = fct2 / sqrt5 I_dist = np.empty((self.nboot,1)) for j in range(self.nboot): u_boot = u2.copy() prob = np.random.uniform(0,1, size = (n,)) ind = prob < r u_boot[ind] = u1[ind] Y_boot = m + u_boot b_hat = self.estimator(Y_boot, X) m_hat = self.fform(X, b_hat) u_boot_hat = Y_boot - m_hat I_dist[j] = self._compute_test_stat(u_boot_hat) self.boots_results = I_dist sig = "Not Significant" if self.test_stat > mquantiles(I_dist, 0.9): sig = "*" if self.test_stat > mquantiles(I_dist, 0.95): sig = "**" if self.test_stat > mquantiles(I_dist, 0.99): sig = "***" return sig
Example #4
Source File: hmm.py From sima with GNU General Public License v2.0 | 5 votes |
def _threshold_gradient(im): """Indicate pixel locations with gradient below the bottom 10th percentile Parameters ---------- im : array The mean intensity images for each channel. Size: (num_channels, num_rows, num_columns). Returns ------- array Binary values indicating whether the magnitude of the gradient is below the 10th percentile. Same size as im. """ if im.shape[0] > 1: # Calculate directional relative derivatives _, g_x, g_y = np.gradient(np.log(im)) else: # Calculate directional relative derivatives g_x, g_y = np.gradient(np.log(im[0])) g_x = g_x.reshape([1, g_x.shape[0], g_x.shape[1]]) g_y = g_y.reshape([1, g_y.shape[0], g_y.shape[1]]) gradient_magnitudes = np.sqrt((g_x ** 2) + (g_y ** 2)) below_threshold = [] for chan in gradient_magnitudes: threshold = mquantiles(chan[np.isfinite(chan)].flatten(), [0.1])[0] below_threshold.append(chan < threshold) return np.array(below_threshold)
Example #5
Source File: _kernel_base.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def _compute_min_std_IQR(data): """Compute minimum of std and IQR for each variable.""" s1 = np.std(data, axis=0) q75 = mquantiles(data, 0.75, axis=0).data[0] q25 = mquantiles(data, 0.25, axis=0).data[0] s2 = (q75 - q25) / 1.349 # IQR dispersion = np.minimum(s1, s2) return dispersion
Example #6
Source File: kernel_regression.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def _compute_sig(self): """Calculates the significance level of the variable tested""" m = self._est_cond_mean() Y = self.endog X = self.exog n = np.shape(X)[0] u = Y - m u = u - np.mean(u) # center fct1 = (1 - 5**0.5) / 2. fct2 = (1 + 5**0.5) / 2. u1 = fct1 * u u2 = fct2 * u r = fct2 / (5 ** 0.5) I_dist = np.empty((self.nboot,1)) for j in range(self.nboot): u_boot = copy.deepcopy(u2) prob = np.random.uniform(0,1, size = (n,1)) ind = prob < r u_boot[ind] = u1[ind] Y_boot = m + u_boot I_dist[j] = self._compute_test_stat(Y_boot, X) sig = "Not Significant" if self.test_stat > mquantiles(I_dist, 0.9): sig = "*" if self.test_stat > mquantiles(I_dist, 0.95): sig = "**" if self.test_stat > mquantiles(I_dist, 0.99): sig = "***" return sig
Example #7
Source File: kernel_regression.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def _compute_sig(self): """ Computes the significance value for the variable(s) tested. The empirical distribution of the test statistic is obtained through bootstrapping the sample. The null hypothesis is rejected if the test statistic is larger than the 90, 95, 99 percentiles. """ t_dist = np.empty(shape=(self.nboot, )) Y = self.endog X = copy.deepcopy(self.exog) n = np.shape(Y)[0] X[:, self.test_vars] = np.mean(X[:, self.test_vars], axis=0) # Calculate the restricted mean. See p. 372 in [8] M = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw, defaults = EstimatorSettings(efficient=False)).fit()[0] M = np.reshape(M, (n, 1)) e = Y - M e = e - np.mean(e) # recenter residuals for i in range(self.nboot): ind = np.random.random_integers(0, n-1, size=(n,1)) e_boot = e[ind, 0] Y_boot = M + e_boot t_dist[i] = self._compute_test_stat(Y_boot, self.exog) self.t_dist = t_dist sig = "Not Significant" if self.test_stat > mquantiles(t_dist, 0.9): sig = "*" if self.test_stat > mquantiles(t_dist, 0.95): sig = "**" if self.test_stat > mquantiles(t_dist, 0.99): sig = "***" return sig
Example #8
Source File: kernel_extras.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def _compute_sig(self): Y = self.endog X = self.exog b = self.estimator(Y, X) m = self.fform(X, b) n = np.shape(X)[0] resid = Y - m resid = resid - np.mean(resid) # center residuals self.test_stat = self._compute_test_stat(resid) sqrt5 = np.sqrt(5.) fct1 = (1 - sqrt5) / 2. fct2 = (1 + sqrt5) / 2. u1 = fct1 * resid u2 = fct2 * resid r = fct2 / sqrt5 I_dist = np.empty((self.nboot,1)) for j in range(self.nboot): u_boot = u2.copy() prob = np.random.uniform(0,1, size = (n,)) ind = prob < r u_boot[ind] = u1[ind] Y_boot = m + u_boot b_hat = self.estimator(Y_boot, X) m_hat = self.fform(X, b_hat) u_boot_hat = Y_boot - m_hat I_dist[j] = self._compute_test_stat(u_boot_hat) self.boots_results = I_dist sig = "Not Significant" if self.test_stat > mquantiles(I_dist, 0.9): sig = "*" if self.test_stat > mquantiles(I_dist, 0.95): sig = "**" if self.test_stat > mquantiles(I_dist, 0.99): sig = "***" return sig
Example #9
Source File: stat_qq_line.py From plotnine with GNU General Public License v2.0 | 5 votes |
def compute_group(cls, data, scales, **params): line_p = params['line_p'] dparams = params['dparams'] # Compute theoretical values df = stat_qq.compute_group(data, scales, **params) sample = df['sample'].values theoretical = df['theoretical'].values # Compute slope & intercept of the line through the quantiles cdist = get_continuous_distribution(params['distribution']) x_coords = cdist.ppf(line_p, *dparams) y_coords = mquantiles(sample, line_p) slope = (np.diff(y_coords)/np.diff(x_coords))[0] intercept = y_coords[0] - slope*x_coords[0] # Get x,y points that describe the line if params['fullrange'] and scales.x: x = scales.x.dimension() else: x = theoretical.min(), theoretical.max() x = np.asarray(x) y = slope * x + intercept data = pd.DataFrame({'x': x, 'y': y}) return data
Example #10
Source File: stats_utils.py From arviz with Apache License 2.0 | 5 votes |
def quantile(ary, q, axis=None, limit=None): """Use same quantile function as R (Type 7).""" if limit is None: limit = tuple() return mquantiles(ary, q, alphap=1, betap=1, axis=axis, limit=limit)
Example #11
Source File: _kernel_base.py From vnpy_crypto with MIT License | 5 votes |
def _compute_min_std_IQR(data): """Compute minimum of std and IQR for each variable.""" s1 = np.std(data, axis=0) q75 = mquantiles(data, 0.75, axis=0).data[0] q25 = mquantiles(data, 0.25, axis=0).data[0] s2 = (q75 - q25) / 1.349 # IQR dispersion = np.minimum(s1, s2) return dispersion
Example #12
Source File: kernel_regression.py From vnpy_crypto with MIT License | 5 votes |
def _compute_sig(self): """Calculates the significance level of the variable tested""" m = self._est_cond_mean() Y = self.endog X = self.exog n = np.shape(X)[0] u = Y - m u = u - np.mean(u) # center fct1 = (1 - 5**0.5) / 2. fct2 = (1 + 5**0.5) / 2. u1 = fct1 * u u2 = fct2 * u r = fct2 / (5 ** 0.5) I_dist = np.empty((self.nboot,1)) for j in range(self.nboot): u_boot = copy.deepcopy(u2) prob = np.random.uniform(0,1, size = (n,1)) ind = prob < r u_boot[ind] = u1[ind] Y_boot = m + u_boot I_dist[j] = self._compute_test_stat(Y_boot, X) sig = "Not Significant" if self.test_stat > mquantiles(I_dist, 0.9): sig = "*" if self.test_stat > mquantiles(I_dist, 0.95): sig = "**" if self.test_stat > mquantiles(I_dist, 0.99): sig = "***" return sig
Example #13
Source File: kernel_regression.py From vnpy_crypto with MIT License | 5 votes |
def _compute_sig(self): """ Computes the significance value for the variable(s) tested. The empirical distribution of the test statistic is obtained through bootstrapping the sample. The null hypothesis is rejected if the test statistic is larger than the 90, 95, 99 percentiles. """ t_dist = np.empty(shape=(self.nboot, )) Y = self.endog X = copy.deepcopy(self.exog) n = np.shape(Y)[0] X[:, self.test_vars] = np.mean(X[:, self.test_vars], axis=0) # Calculate the restricted mean. See p. 372 in [8] M = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw, defaults = EstimatorSettings(efficient=False)).fit()[0] M = np.reshape(M, (n, 1)) e = Y - M e = e - np.mean(e) # recenter residuals for i in range(self.nboot): ind = np.random.random_integers(0, n-1, size=(n,1)) e_boot = e[ind, 0] Y_boot = M + e_boot t_dist[i] = self._compute_test_stat(Y_boot, self.exog) self.t_dist = t_dist sig = "Not Significant" if self.test_stat > mquantiles(t_dist, 0.9): sig = "*" if self.test_stat > mquantiles(t_dist, 0.95): sig = "**" if self.test_stat > mquantiles(t_dist, 0.99): sig = "***" return sig
Example #14
Source File: partial_dependence.py From Splunking-Crime with GNU Affero General Public License v3.0 | 4 votes |
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100): """Generate a grid of points based on the ``percentiles of ``X``. The grid is generated by placing ``grid_resolution`` equally spaced points between the ``percentiles`` of each column of ``X``. Parameters ---------- X : ndarray The data percentiles : tuple of floats The percentiles which are used to construct the extreme values of the grid axes. grid_resolution : int The number of equally spaced points that are placed on the grid. Returns ------- grid : ndarray All data points on the grid; ``grid.shape[1] == X.shape[1]`` and ``grid.shape[0] == grid_resolution * X.shape[1]``. axes : seq of ndarray The axes with which the grid has been created. """ if len(percentiles) != 2: raise ValueError('percentile must be tuple of len 2') if not all(0. <= x <= 1. for x in percentiles): raise ValueError('percentile values must be in [0, 1]') axes = [] emp_percentiles = mquantiles(X, prob=percentiles, axis=0) for col in range(X.shape[1]): uniques = np.unique(X[:, col]) if uniques.shape[0] < grid_resolution: # feature has low resolution use unique vals axis = uniques else: # create axis based on percentiles and grid resolution axis = np.linspace(emp_percentiles[0, col], emp_percentiles[1, col], num=grid_resolution, endpoint=True) axes.append(axis) return cartesian(axes), axes
Example #15
Source File: partial_dependence.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100): """Generate a grid of points based on the ``percentiles of ``X``. The grid is generated by placing ``grid_resolution`` equally spaced points between the ``percentiles`` of each column of ``X``. Parameters ---------- X : ndarray The data percentiles : tuple of floats The percentiles which are used to construct the extreme values of the grid axes. grid_resolution : int The number of equally spaced points that are placed on the grid. Returns ------- grid : ndarray All data points on the grid; ``grid.shape[1] == X.shape[1]`` and ``grid.shape[0] == grid_resolution * X.shape[1]``. axes : seq of ndarray The axes with which the grid has been created. """ if len(percentiles) != 2: raise ValueError('percentile must be tuple of len 2') if not all(0. <= x <= 1. for x in percentiles): raise ValueError('percentile values must be in [0, 1]') axes = [] emp_percentiles = mquantiles(X, prob=percentiles, axis=0) for col in range(X.shape[1]): uniques = np.unique(X[:, col]) if uniques.shape[0] < grid_resolution: # feature has low resolution use unique vals axis = uniques else: # create axis based on percentiles and grid resolution axis = np.linspace(emp_percentiles[0, col], emp_percentiles[1, col], num=grid_resolution, endpoint=True) axes.append(axis) return cartesian(axes), axes
Example #16
Source File: fixed_run.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_estimates(gen, sigmas=None, n_reps=100, n_null_samps=1000, cache_size=64, rep_states=False, name=None, save_samps=False, thresh_levels=(.2, .1, .05, .01)): if sigmas is None: sigmas = np.logspace(-1.7, 1.7, num=30) sigmas = np.asarray(sigmas) mmd = sg.QuadraticTimeMMD() mmd.set_num_null_samples(n_null_samps) mmd_mk = mmd.multikernel() for s in sigmas: mmd_mk.add_kernel(sg.GaussianKernel(cache_size, 2 * s**2)) info = OrderedDict() for k in 'sigma rep mmd_est var_est p'.split(): info[k] = [] thresh_names = [] for l in thresh_levels: s = 'thresh_{}'.format(l) thresh_names.append(s) info[s] = [] if save_samps: info['samps'] = [] thresh_prob = 1 - np.asarray(thresh_levels) bar = pb.ProgressBar() if name is not None: bar.start() bar.widgets.insert(0, '{} '.format(name)) for rep in bar(xrange(n_reps)): if rep_states: rep = np.random.randint(0, 2**32) X, Y = gen(rs=rep) else: X, Y = gen() n = X.shape[0] assert Y.shape[0] == n mmd.set_p(sg.RealFeatures(X.T)) mmd.set_q(sg.RealFeatures(Y.T)) info['sigma'].extend(sigmas) info['rep'].extend([rep] * len(sigmas)) stat = mmd_mk.compute_statistic() info['mmd_est'].extend(stat / (n / 2)) samps = mmd_mk.sample_null() info['p'].extend(np.mean(samps >= stat, axis=0)) if save_samps: info['samps'].extend(samps.T) info['var_est'].extend(mmd_mk.compute_variance_h1()) threshes = np.asarray(mquantiles(samps, prob=thresh_prob, axis=0)) for s, t in zip(thresh_names, threshes): info[s].extend(t) info = pd.DataFrame(info) info.set_index(['sigma', 'rep'], inplace=True) return info
Example #17
Source File: partial_dependence.py From twitter-stock-recommendation with MIT License | 4 votes |
def _grid_from_X(X, percentiles=(0.05, 0.95), grid_resolution=100): """Generate a grid of points based on the ``percentiles of ``X``. The grid is generated by placing ``grid_resolution`` equally spaced points between the ``percentiles`` of each column of ``X``. Parameters ---------- X : ndarray The data percentiles : tuple of floats The percentiles which are used to construct the extreme values of the grid axes. grid_resolution : int The number of equally spaced points that are placed on the grid. Returns ------- grid : ndarray All data points on the grid; ``grid.shape[1] == X.shape[1]`` and ``grid.shape[0] == grid_resolution * X.shape[1]``. axes : seq of ndarray The axes with which the grid has been created. """ if len(percentiles) != 2: raise ValueError('percentile must be tuple of len 2') if not all(0. <= x <= 1. for x in percentiles): raise ValueError('percentile values must be in [0, 1]') axes = [] emp_percentiles = mquantiles(X, prob=percentiles, axis=0) for col in range(X.shape[1]): uniques = np.unique(X[:, col]) if uniques.shape[0] < grid_resolution: # feature has low resolution use unique vals axis = uniques else: # create axis based on percentiles and grid resolution axis = np.linspace(emp_percentiles[0, col], emp_percentiles[1, col], num=grid_resolution, endpoint=True) axes.append(axis) return cartesian(axes), axes
Example #18
Source File: fithic.py From fithic with MIT License | 4 votes |
def read_biases(infilename): global biasLowerBound global biasUpperBound startt = time.time() biasDic={} rawBiases=[] with gzip.open(infilename, 'rt') as infile: for line in infile: words=line.rstrip().split() chrom=words[0]; midPoint=int(words[1]); bias=float(words[2]) if bias!=1.0: rawBiases.append(bias) botQ,med,topQ=mquantiles(rawBiases,prob=[0.05,0.5,0.95]) with open(logfile, 'a') as log: log.write("5th quantile of biases: "+str(botQ)+"\n") log.write("50th quantile of biases: "+str(med)+"\n") log.write("95th quantile of biases: "+str(topQ)+"\n") totalC=0 discardC=0 with gzip.open(infilename, 'rt') as infile: for line in infile: words=line.rstrip().split() chrom=words[0]; midPoint=int(words[1]); bias=float(words[2]); if bias<biasLowerBound or math.isnan(bias): bias=-1 #botQ discardC+=1 elif bias>biasUpperBound: bias=-1 #topQ discardC+=1 totalC+=1 if chrom not in biasDic: biasDic[chrom]={} if midPoint not in biasDic[chrom]: biasDic[chrom][midPoint]=bias with open(logfile, 'a') as log: log.write("Out of " + str(totalC) + " loci " +str(discardC) +" were discarded with biases not in range [0.5 2]\n\n" ) endt = time.time() print("Bias file read. Time took %s" % (endt-startt)) return biasDic # from read_biases #================================== # function to compute the contact probabilities # applied for intra-chromosomal interactions #==================================