Python numpy.quantile() Examples
The following are 30
code examples of numpy.quantile().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
numpy
, or try the search function
.
Example #1
Source File: classification_metric.py From FATE with Apache License 2.0 | 6 votes |
def prepare_confusion_mat(self, labels, scores, add_to_end=True, ): sorted_labels, sorted_scores = sort_score_and_label(labels, scores) score_threshold, cuts = None, None if self.cut_method == 'step': score_threshold, cuts = ThresholdCutter.cut_by_step(sorted_scores, steps=0.01) if add_to_end: score_threshold.append(min(score_threshold) - 0.001) cuts.append(1) elif self.cut_method == 'quantile': score_threshold = ThresholdCutter.cut_by_quantile(sorted_scores, remove_duplicate=self.remove_duplicate) score_threshold = list(np.flip(score_threshold)) confusion_mat = ConfusionMatrix.compute(sorted_labels, sorted_scores, score_threshold, ret=['tp', 'fp', 'fn', 'tn']) return confusion_mat, score_threshold, cuts
Example #2
Source File: base.py From kite with GNU General Public License v3.0 | 6 votes |
def setSymColormap(self): cmap = {'ticks': [[0., (0, 0, 0, 255)], [1e-3, (106, 0, 31, 255)], [.5, (255, 255, 255, 255)], [1., (8, 54, 104, 255)]], 'mode': 'rgb'} cmap = {'ticks': [[0., (0, 0, 0)], [1e-3, (172, 56, 56)], [.5, (255, 255, 255)], [1., (51, 53, 120)]], 'mode': 'rgb'} relevant_data = num.abs(self._plot.data[num.isfinite(self._plot.data)]) if num.any(relevant_data): lvl_max = num.quantile(relevant_data, .999) else: lvl_max = 1. self.gradient.restoreState(cmap) self.setLevels(-lvl_max, lvl_max)
Example #3
Source File: clustering.py From retentioneering-tools with Mozilla Public License 2.0 | 6 votes |
def find_best_eps(data, q=0.05): """ Find best maximal distance (eps) between dots for DBSCAN clustering. Parameters ------- data: pd.DataFrame Dataframe with features for clustering indexed as in ``retention_config.index_col`` q: float, optional Quantile of nearest neighbor positive distance between dots. The value of it will be an eps. Default: ``0.05`` Returns ------- Optimal eps Return type ------- Float """ nn = NearestNeighbors() nn.fit(data) dist = nn.kneighbors()[0] dist = dist.flatten() dist = dist[dist > 0] return np.quantile(dist, q)
Example #4
Source File: approximate_bayesian_computation.py From pygom with GNU General Public License v2.0 | 6 votes |
def get_tolerance(self,g): """ Parameters ---------- g: integer generation number of the ABC-SMC/MNN algorithm """ # choose the tolerance given the generation number and how q and tol are defined if g == 0: if not hasattr(self.tol, "__len__"): return self.tol else: return self.tol[0] else: if self.q is not None: return np.quantile(self.dist,self.q) else: return self.tol[g]
Example #5
Source File: util.py From metrics-mvp with MIT License | 6 votes |
def quantile_sorted(sorted_arr, quantile): # For small arrays (less than about 4000 items) np.quantile is significantly # slower than sorting the array and picking the quantile out by index. Computing # quantiles this way significantly improves performance for computing # trip time stats across all stops. max_index = len(sorted_arr) - 1 quantile_index = max_index * quantile quantile_index_int = int(quantile_index) quantile_index_fractional = quantile_index - quantile_index_int quantile_lower = sorted_arr[quantile_index_int] if quantile_index_fractional > 0: quantile_upper = sorted_arr[quantile_index_int + 1] return quantile_lower + (quantile_upper - quantile_lower) * quantile_index_fractional else: return quantile_lower
Example #6
Source File: test_stats_utils.py From arviz with Apache License 2.0 | 6 votes |
def test_wrap_ufunc_output(quantile, arg): ary = np.random.randn(4, 100) n_output = len(quantile) if arg: res = wrap_xarray_ufunc( np.quantile, ary, ufunc_kwargs={"n_output": n_output}, func_args=(quantile,) ) else: if n_output == 1: res = wrap_xarray_ufunc(np.quantile, ary, func_kwargs={"q": quantile}) else: res = wrap_xarray_ufunc( np.quantile, ary, ufunc_kwargs={"n_output": n_output}, func_kwargs={"q": quantile} ) if n_output == 1: assert not isinstance(res, tuple) else: assert isinstance(res, tuple) assert len(res) == n_output
Example #7
Source File: quantile.py From altair-transform with MIT License | 6 votes |
def visit_quantile(transform: alt.QuantileTransform, df: pd.DataFrame) -> pd.DataFrame: transform = transform.to_dict() quantile = transform["quantile"] groupby = transform.get("groupby") pname, vname = transform.get("as", ["prob", "value"]) probs = transform.get("probs") if probs is None: step = transform.get("step", 0.01) probs = np.arange(0.5 * step, 1.0, step) def qq(s: pd.Series) -> pd.DataFrame: return pd.DataFrame({pname: probs, vname: np.quantile(s, probs)}) if groupby: return ( df.groupby(groupby)[quantile] .apply(qq) .reset_index(groupby) .reset_index(drop=True) ) else: return qq(df[quantile]).reset_index(drop=True)
Example #8
Source File: normalize_by_quantile.py From spiketoolkit with MIT License | 6 votes |
def __init__(self, recording, scale=1.0, median=0.0, q1=0.01, q2=0.99, seed=0): if not isinstance(recording, RecordingExtractor): raise ValueError("'recording' must be a RecordingExtractor") self._recording = recording random_data = self._get_random_data_for_scaling(seed=seed).ravel() loc_q1, pre_median, loc_q2 = np.quantile(random_data, q=[q1, 0.5, q2]) pre_scale = abs(loc_q2 - loc_q1) self._scalar = scale / pre_scale self._offset = median - pre_median * self._scalar RecordingExtractor.__init__(self) self.copy_channel_properties(recording=self._recording) self.is_filtered = self._recording.is_filtered self._kwargs = {'recording': recording.make_serialized_dict(), 'scale': scale, 'median': median, 'q1': q1, 'q2': q2, 'seed': seed}
Example #9
Source File: regression_tests.py From drifter_ml with MIT License | 6 votes |
def trimean(self, data): """ I'm exposing this as a public method because the trimean is not implemented in enough packages. Formula: (25th percentile + 2*50th percentile + 75th percentile)/4 Parameters ---------- data : array-like an iterable, either a list or a numpy array Returns ------- the trimean: float """ q1 = np.quantile(data, 0.25) q3 = np.quantile(data, 0.75) median = np.median(data) return (q1 + 2*median + q3)/4
Example #10
Source File: classification_tests.py From drifter_ml with MIT License | 6 votes |
def trimean(self, data): """ I'm exposing this as a public method because the trimean is not implemented in enough packages. Formula: (25th percentile + 2*50th percentile + 75th percentile)/4 Parameters ---------- data : array-like an iterable, either a list or a numpy array Returns ------- the trimean: float """ q1 = np.quantile(data, 0.25) q3 = np.quantile(data, 0.75) median = np.median(data) return (q1 + 2*median + q3)/4
Example #11
Source File: experiments.py From whynot with MIT License | 6 votes |
def pollution_confounded_propensity(intervention, untreated_runs, treatment_bias): """Probability of treating each unit. To generate confounding, we are more likely to treat worlds with high pollution. """ def persistent_pollution(run): return run[intervention.time].persistent_pollution pollution = [persistent_pollution(run) for run in untreated_runs] upper_quantile = np.quantile(pollution, 0.9) def treatment_prob(idx): if pollution[idx] > upper_quantile: return treatment_bias return 1.0 - treatment_bias return np.array([treatment_prob(idx) for idx in range(len(untreated_runs))]) # pylint: disable-msg=invalid-name #: An observational experiment with confounding. Polluted states are more likely to be treated.
Example #12
Source File: utilities.py From abcpy with BSD 3-Clause Clear License | 5 votes |
def compute_similarity_matrix(target, quantile=0.1, return_pairwise_distances=False): """Compute the similarity matrix between some values given a given quantile of the Euclidean distances. If return_pairwise_distances is True, it also returns a matrix with the pairwise distances with every distance.""" logger = logging.getLogger("Compute_similarity_matrix") n_samples = target.shape[0] pairwise_distances = np.zeros([n_samples] * 2) for i in range(n_samples): for j in range(n_samples): pairwise_distances[i, j] = dist2(target[i], target[j]) q = np.quantile(pairwise_distances[~np.eye(n_samples, dtype=bool)].reshape(-1), quantile) similarity_set = pairwise_distances < q logger.info("Fraction of similar pairs (epurated by self-similarity): {}".format( (np.sum(similarity_set) - n_samples) / n_samples ** 2)) if (np.sum(similarity_set) - n_samples) / n_samples ** 2 == 0: raise RuntimeError("The chosen quantile is too small, as there are no similar samples according to the " "corresponding threshold.\nPlease increase the quantile.") return (similarity_set, pairwise_distances) if return_pairwise_distances else similarity_set
Example #13
Source File: hpat_pandas_series_rolling_functions.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def arr_quantile(arr, q): """Calculate quantile of values""" if len(arr) == 0: return numpy.nan return numpy.quantile(arr, q)
Example #14
Source File: test_quantile.py From altair-transform with MIT License | 5 votes |
def test_quantile_against_js( driver, data: pd.DataFrame, step: Optional[float], groupby: Optional[List[str]], probs: Optional[List[float]], as_: Optional[List[str]], ) -> None: transform: Dict[str, Any] = {"quantile": "x"} if step is not None: transform["step"] = step if groupby is not None: transform["groupby"] = groupby if probs is not None: transform["probs"] = probs if as_ is not None: transform["as"] = as_ got = altair_transform.apply(data, transform) want = driver.apply(data, transform) assert_frame_equal( got[sorted(got.columns)], want[sorted(want.columns)], check_dtype=False, check_index_type=False, check_less_precise=True, )
Example #15
Source File: test_quantile.py From altair-transform with MIT License | 5 votes |
def test_quantile_transform_groupby(data: pd.DataFrame) -> None: group = "c" transform = {"quantile": "x", "step": 0.1, "groupby": [group]} out = altair_transform.apply(data, transform) assert list(out.columns) == ["c", "prob", "value"] for key in data[group].unique(): out_group_1 = altair_transform.apply(data[data[group] == key], transform) out_group_2 = out[out[group] == key][out_group_1.columns].reset_index(drop=True) assert_frame_equal(out_group_1, out_group_2)
Example #16
Source File: test_function_base.py From pySINDy with MIT License | 5 votes |
def test_basic(self): x = np.arange(8) * 0.5 assert_equal(np.quantile(x, 0), 0.) assert_equal(np.quantile(x, 1), 3.5) assert_equal(np.quantile(x, 0.5), 1.75)
Example #17
Source File: test_quantile.py From altair-transform with MIT License | 5 votes |
def test_quantile_transform(data: pd.DataFrame) -> None: transform = {"quantile": "x", "step": 0.1} out = altair_transform.apply(data, transform) assert list(out.columns) == ["prob", "value"] assert_allclose(out.prob, np.arange(0.05, 1, 0.1)) assert_allclose(out.value, np.quantile(data.x, out.prob))
Example #18
Source File: pycoQC_plot.py From pycoQC with GNU General Public License v3.0 | 5 votes |
def _compute_percentiles (data): return list(np.quantile(data.dropna(), q=np.linspace(0,1,101)))
Example #19
Source File: binarize.py From sklearn-evaluation with MIT License | 5 votes |
def scores_at_quantile(y_score, quantile): """Binary scores at certain quantile """ cutoff_score = cutoff_score_at_quantile(y_score, quantile) y_score_binary = (y_score >= cutoff_score).astype(int) return y_score_binary
Example #20
Source File: spydrpick.py From panaroo with MIT License | 5 votes |
def tukey_outlier(hitsA, hitsB, mis): ids = np.unique(hitsA) max_hit_mis = np.zeros(len(ids)) for i in range(len(ids)): max_hit_mis[i] = np.max(mis[hitsA == ids[i]]) Q1, Q3 = np.quantile(max_hit_mis, [0.25, 0.75]) outliers = np.zeros(len(mis)) outliers[mis > (Q3 + 1.5 * (Q3 - Q1))] = 1 outliers[mis > (Q3 + 3 * (Q3 - Q1))] = 2 return outliers
Example #21
Source File: statistics.py From B-SOID with GNU General Public License v3.0 | 5 votes |
def feat_dist(feats): feat_range = [] feat_med = [] p_cts = [] edges = [] for i in range(feats.shape[0]): feat_range.append([np.quantile(feats[i, :], 0.05), np.quantile(feats[i, :], 0.95)]) feat_med.append(np.quantile(feats[i, :], 0.5)) p_ct, edge = np.histogram(feats[i, :], 50, density=True) p_cts.append(p_ct) edges.append(edge) return feat_range, feat_med, p_cts, edges
Example #22
Source File: statistics.py From B-SOID with GNU General Public License v3.0 | 5 votes |
def feat_dist(feats): feat_range = [] feat_med = [] p_cts = [] edges = [] for i in range(feats.shape[0]): feat_range.append([np.quantile(feats[i, :], 0.05), np.quantile(feats[i, :], 0.95)]) feat_med.append(np.quantile(feats[i, :], 0.5)) p_ct, edge = np.histogram(feats[i, :], 50, density=True) p_cts.append(p_ct) edges.append(edge) return feat_range, feat_med, p_cts, edges
Example #23
Source File: test_function_base.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_no_p_overwrite(self): # this is worth retesting, because quantile does not make a copy p0 = np.array([0, 0.75, 0.25, 0.5, 1.0]) p = p0.copy() np.quantile(np.arange(100.), p, interpolation="midpoint") assert_array_equal(p, p0) p0 = p0.tolist() p = p.tolist() np.quantile(np.arange(100.), p, interpolation="midpoint") assert_array_equal(p, p0)
Example #24
Source File: test_function_base.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_basic(self): x = np.arange(8) * 0.5 assert_equal(np.quantile(x, 0), 0.) assert_equal(np.quantile(x, 1), 3.5) assert_equal(np.quantile(x, 0.5), 1.75)
Example #25
Source File: nanfunctions.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'): """ Private function for rank 1 arrays. Compute quantile ignoring NaNs. See nanpercentile for parameter usage """ arr1d, overwrite_input = _remove_nan_1d(arr1d, overwrite_input=overwrite_input) if arr1d.size == 0: return np.full(q.shape, np.nan)[()] # convert to scalar return function_base._quantile_unchecked( arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation)
Example #26
Source File: approximate_bayesian_computation.py From pygom with GNU General Public License v2.0 | 5 votes |
def continue_posterior_sample(self, N, tol, G=1, q=None, M=None, progress=False): """ Parameters (same as get_posterior_sample) ---------- N: integer the number of samples in each generation tol: float or array like the initial tolerance or sequence of decreasing tolerances G: integer the number of generations used in ABC SMC/ ABC SMC MNN q: float (0 < q < 1) the quantile used to specify the tolerance for future generations in ABC SMC/ ABC SMC MNN M: integer the number of nearest neighbours used in ABC SMC MNN (M < N) progress: bool if True, reports the generation number, acceptance rate and threshold after each generation """ # perform checks assert N == self.N, "For now, set the sample size to be the same as the previous run" assert hasattr(self, "res"), "Use 'get_posterior_sample' before 'continue_posterior_sample'" if hasattr(tol, "__len__"): assert tol[0] <= self.final_tol, "The initial tolerance is greater than the final tolerance from the previous run" else: assert tol <= self.final_tol, "The initial tolerance is greater than the final tolerance from the previous run" self.get_posterior_sample(N, tol, G, q, M, progress, rerun=True)
Example #27
Source File: encodings.py From bindsnet with GNU Affero General Public License v3.0 | 5 votes |
def single( datum: torch.Tensor, time: int, dt: float = 1.0, sparsity: float = 0.5, device="cpu", **kwargs ) -> torch.Tensor: # language=rst """ Generates timing based single-spike encoding. Spike occurs earlier if the intensity of the input feature is higher. Features whose value is lower than threshold is remain silent. :param datum: Tensor of shape ``[n_1, ..., n_k]``. :param time: Length of the input and output. :param dt: Simulation time step. :param sparsity: Sparsity of the input representation. 0 for no spikes and 1 for all spikes. :return: Tensor of shape ``[time, n_1, ..., n_k]``. """ time = int(time / dt) shape = list(datum.shape) datum = np.copy(datum) quantile = np.quantile(datum, 1 - sparsity) s = np.zeros([time, *shape], device=device) s[0] = np.where(datum > quantile, np.ones(shape), np.zeros(shape)) return torch.Tensor(s).byte()
Example #28
Source File: normalize_by_quantile.py From spiketoolkit with MIT License | 5 votes |
def normalize_by_quantile(recording, scale=1.0, median=0.0, q1=0.01, q2=0.99, seed=0): ''' Rescale the traces from the given recording extractor with a scalar and offset. First, the median and quantiles of the distribution are estimated. Then the distribution is rescaled and offset so that the scale is given by the distance between the quantiles (1st and 99th by default) is set to `scale`, and the median is set to the given median. Parameters ---------- recording: RecordingExtractor The recording extractor to be transformed scalar: float Scale for the output distribution median: float Median for the output distribution q1: float (default 0.01) Lower quantile used for measuring the scale q1: float (default 0.99) Upper quantile used for measuring the seed: int Random seed for reproducibility Returns ------- rescaled_traces: NormalizeByQuantileRecording The rescaled traces recording extractor object ''' return NormalizeByQuantileRecording( recording=recording, scale=scale, median=median, q1=q1, q2=q2, seed=seed )
Example #29
Source File: blank_saturation.py From spiketoolkit with MIT License | 5 votes |
def __init__(self, recording, threshold=None, seed=0): if not isinstance(recording, RecordingExtractor): raise ValueError("'recording' must be a RecordingExtractor") self._recording = recording random_data = self._get_random_data_for_scaling(seed=seed).ravel() q = np.quantile(random_data, [0.001, 0.5, 1 - 0.001]) if 2 * q[1] - q[0] - q[2] < 2 * np.min([q[1] - q[0], q[2] - q[1]]): print('Warning, narrow signal range suggests artefact-free data.') self._median = q[1] if threshold is None: if np.abs(q[1] - q[0]) > np.abs(q[1] - q[2]): self._threshold = q[0] self._lower = True else: self._threshold = q[2] self._lower = False else: self._threshold = threshold if q[1] - threshold < 0: self._lower = False else: self._lower = True RecordingExtractor.__init__(self) self.copy_channel_properties(recording=self._recording) self.is_filtered = self._recording.is_filtered self._kwargs = {'recording': recording.make_serialized_dict(), 'threshold': threshold, 'seed': seed}
Example #30
Source File: benchmark.py From CrypTen with MIT License | 5 votes |
def time_me(func=None, n_loops=10): """Decorator returning average runtime in seconds over n_loops Args: func (function): invoked with given args / kwargs n_loops (int): number of times to invoke function for timing Returns: tuple of (time in seconds, inner quartile range, function return value). """ if func is None: return functools.partial(time_me, n_loops=n_loops) @functools.wraps(func) def timing_wrapper(*args, **kwargs): return_val = func(*args, **kwargs) times = [] for _ in range(n_loops): start = timeit.default_timer() func(*args, **kwargs) times.append(timeit.default_timer() - start) mid_runtime = np.quantile(times, 0.5) q1_runtime = np.quantile(times, 0.25) q3_runtime = np.quantile(times, 0.75) runtime = Runtime(mid_runtime, q1_runtime, q3_runtime) return runtime, return_val return timing_wrapper