Python Examples of numpy.quantile

Source File: classification_metric.py From FATE with Apache License 2.0

6 votes

def prepare_confusion_mat(self, labels, scores, add_to_end=True, ):
        sorted_labels, sorted_scores = sort_score_and_label(labels, scores)

        score_threshold, cuts = None, None

        if self.cut_method == 'step':
            score_threshold, cuts = ThresholdCutter.cut_by_step(sorted_scores, steps=0.01)
            if add_to_end:
                score_threshold.append(min(score_threshold) - 0.001)
                cuts.append(1)

        elif self.cut_method == 'quantile':
            score_threshold = ThresholdCutter.cut_by_quantile(sorted_scores, remove_duplicate=self.remove_duplicate)
            score_threshold = list(np.flip(score_threshold))

        confusion_mat = ConfusionMatrix.compute(sorted_labels, sorted_scores, score_threshold,
                                                ret=['tp', 'fp', 'fn', 'tn'])

        return confusion_mat, score_threshold, cuts

Source File: base.py From kite with GNU General Public License v3.0

6 votes

def setSymColormap(self):
        cmap = {'ticks':
                [[0., (0, 0, 0, 255)],
                 [1e-3, (106, 0, 31, 255)],
                 [.5, (255, 255, 255, 255)],
                 [1., (8, 54, 104, 255)]],
                'mode': 'rgb'}
        cmap = {'ticks':
                [[0., (0, 0, 0)],
                 [1e-3, (172, 56, 56)],
                 [.5, (255, 255, 255)],
                 [1., (51, 53, 120)]],
                'mode': 'rgb'}

        relevant_data = num.abs(self._plot.data[num.isfinite(self._plot.data)])
        if num.any(relevant_data):
            lvl_max = num.quantile(relevant_data, .999)
        else:
            lvl_max = 1.

        self.gradient.restoreState(cmap)
        self.setLevels(-lvl_max, lvl_max)

Source File: clustering.py From retentioneering-tools with Mozilla Public License 2.0

6 votes

def find_best_eps(data, q=0.05):
    """
    Find best maximal distance (eps) between dots for DBSCAN clustering.

    Parameters
    -------
    data: pd.DataFrame
        Dataframe with features for clustering indexed as in ``retention_config.index_col``
    q: float, optional
        Quantile of nearest neighbor positive distance between dots. The value of it will be an eps. Default: ``0.05``

    Returns
    -------
    Optimal eps

    Return type
    -------
    Float
    """
    nn = NearestNeighbors()
    nn.fit(data)
    dist = nn.kneighbors()[0]
    dist = dist.flatten()
    dist = dist[dist > 0]
    return np.quantile(dist, q)

Source File: approximate_bayesian_computation.py From pygom with GNU General Public License v2.0

6 votes

def get_tolerance(self,g):
        """
        Parameters
        ----------
        g: integer
           generation number of the ABC-SMC/MNN algorithm
        """
        # choose the tolerance given the generation number and how q and tol are defined
        if g == 0:
            if not hasattr(self.tol, "__len__"):
                return self.tol
            else:
                return self.tol[0]
        else:
            if self.q is not None:
                return np.quantile(self.dist,self.q)
            else:
                return self.tol[g]

Source File: util.py From metrics-mvp with MIT License

6 votes

def quantile_sorted(sorted_arr, quantile):
    # For small arrays (less than about 4000 items) np.quantile is significantly
    # slower than sorting the array and picking the quantile out by index. Computing
    # quantiles this way significantly improves performance for computing
    # trip time stats across all stops.

    max_index = len(sorted_arr) - 1
    quantile_index = max_index * quantile
    quantile_index_int = int(quantile_index)
    quantile_index_fractional = quantile_index - quantile_index_int

    quantile_lower = sorted_arr[quantile_index_int]
    if quantile_index_fractional > 0:
        quantile_upper = sorted_arr[quantile_index_int + 1]
        return quantile_lower + (quantile_upper - quantile_lower) * quantile_index_fractional
    else:
        return quantile_lower

Source File: test_stats_utils.py From arviz with Apache License 2.0

6 votes

def test_wrap_ufunc_output(quantile, arg):
    ary = np.random.randn(4, 100)
    n_output = len(quantile)
    if arg:
        res = wrap_xarray_ufunc(
            np.quantile, ary, ufunc_kwargs={"n_output": n_output}, func_args=(quantile,)
        )
    else:
        if n_output == 1:
            res = wrap_xarray_ufunc(np.quantile, ary, func_kwargs={"q": quantile})
        else:
            res = wrap_xarray_ufunc(
                np.quantile, ary, ufunc_kwargs={"n_output": n_output}, func_kwargs={"q": quantile}
            )
    if n_output == 1:
        assert not isinstance(res, tuple)
    else:
        assert isinstance(res, tuple)
        assert len(res) == n_output

Source File: quantile.py From altair-transform with MIT License

6 votes

def visit_quantile(transform: alt.QuantileTransform, df: pd.DataFrame) -> pd.DataFrame:
    transform = transform.to_dict()
    quantile = transform["quantile"]
    groupby = transform.get("groupby")
    pname, vname = transform.get("as", ["prob", "value"])
    probs = transform.get("probs")
    if probs is None:
        step = transform.get("step", 0.01)
        probs = np.arange(0.5 * step, 1.0, step)

    def qq(s: pd.Series) -> pd.DataFrame:
        return pd.DataFrame({pname: probs, vname: np.quantile(s, probs)})

    if groupby:
        return (
            df.groupby(groupby)[quantile]
            .apply(qq)
            .reset_index(groupby)
            .reset_index(drop=True)
        )

    else:
        return qq(df[quantile]).reset_index(drop=True)

Source File: normalize_by_quantile.py From spiketoolkit with MIT License

6 votes

def __init__(self, recording, scale=1.0, median=0.0, q1=0.01, q2=0.99, seed=0):
        if not isinstance(recording, RecordingExtractor):
            raise ValueError("'recording' must be a RecordingExtractor")
        self._recording = recording

        random_data = self._get_random_data_for_scaling(seed=seed).ravel()
        loc_q1, pre_median, loc_q2 = np.quantile(random_data, q=[q1, 0.5, q2])
        pre_scale = abs(loc_q2 - loc_q1)

        self._scalar = scale / pre_scale
        self._offset = median - pre_median * self._scalar
        RecordingExtractor.__init__(self)
        self.copy_channel_properties(recording=self._recording)
        self.is_filtered = self._recording.is_filtered

        self._kwargs = {'recording': recording.make_serialized_dict(), 'scale': scale, 'median': median,
                        'q1': q1, 'q2': q2, 'seed': seed}

Source File: regression_tests.py From drifter_ml with MIT License

6 votes

def trimean(self, data):
        """
        I'm exposing this as a public method because
        the trimean is not implemented in enough packages.
        
        Formula:
        (25th percentile + 2*50th percentile + 75th percentile)/4
        
        Parameters
        ----------
        data : array-like
          an iterable, either a list or a numpy array

        Returns
        -------
        the trimean: float
        """
        q1 = np.quantile(data, 0.25)
        q3 = np.quantile(data, 0.75)
        median = np.median(data)
        return (q1 + 2*median + q3)/4

Source File: classification_tests.py From drifter_ml with MIT License

6 votes

def trimean(self, data):
        """
        I'm exposing this as a public method because
        the trimean is not implemented in enough packages.
        
        Formula:
        (25th percentile + 2*50th percentile + 75th percentile)/4
        
        Parameters
        ----------
        data : array-like
          an iterable, either a list or a numpy array

        Returns
        -------
        the trimean: float
        """
        q1 = np.quantile(data, 0.25)
        q3 = np.quantile(data, 0.75)
        median = np.median(data)
        return (q1 + 2*median + q3)/4

Source File: experiments.py From whynot with MIT License

6 votes

def pollution_confounded_propensity(intervention, untreated_runs, treatment_bias):
    """Probability of treating each unit.

    To generate confounding, we are more likely to treat worlds with high pollution.
    """

    def persistent_pollution(run):
        return run[intervention.time].persistent_pollution

    pollution = [persistent_pollution(run) for run in untreated_runs]
    upper_quantile = np.quantile(pollution, 0.9)

    def treatment_prob(idx):
        if pollution[idx] > upper_quantile:
            return treatment_bias
        return 1.0 - treatment_bias

    return np.array([treatment_prob(idx) for idx in range(len(untreated_runs))])


# pylint: disable-msg=invalid-name
#: An observational experiment with confounding. Polluted states are more likely to be treated.

Source File: utilities.py From abcpy with BSD 3-Clause Clear License

5 votes

def compute_similarity_matrix(target, quantile=0.1, return_pairwise_distances=False):
    """Compute the similarity matrix between some values given a given quantile of the Euclidean distances.

    If return_pairwise_distances is True, it also returns a matrix with the pairwise distances with every distance."""

    logger = logging.getLogger("Compute_similarity_matrix")

    n_samples = target.shape[0]

    pairwise_distances = np.zeros([n_samples] * 2)

    for i in range(n_samples):
        for j in range(n_samples):
            pairwise_distances[i, j] = dist2(target[i], target[j])

    q = np.quantile(pairwise_distances[~np.eye(n_samples, dtype=bool)].reshape(-1), quantile)

    similarity_set = pairwise_distances < q

    logger.info("Fraction of similar pairs (epurated by self-similarity): {}".format(
        (np.sum(similarity_set) - n_samples) / n_samples ** 2))

    if (np.sum(similarity_set) - n_samples) / n_samples ** 2 == 0:
        raise RuntimeError("The chosen quantile is too small, as there are no similar samples according to the "
                           "corresponding threshold.\nPlease increase the quantile.")

    return (similarity_set, pairwise_distances) if return_pairwise_distances else similarity_set

Source File: hpat_pandas_series_rolling_functions.py From sdc with BSD 2-Clause "Simplified" License

5 votes

def arr_quantile(arr, q):
    """Calculate quantile of values"""
    if len(arr) == 0:
        return numpy.nan

    return numpy.quantile(arr, q)

Source File: test_quantile.py From altair-transform with MIT License

5 votes

def test_quantile_against_js(
    driver,
    data: pd.DataFrame,
    step: Optional[float],
    groupby: Optional[List[str]],
    probs: Optional[List[float]],
    as_: Optional[List[str]],
) -> None:
    transform: Dict[str, Any] = {"quantile": "x"}
    if step is not None:
        transform["step"] = step
    if groupby is not None:
        transform["groupby"] = groupby
    if probs is not None:
        transform["probs"] = probs
    if as_ is not None:
        transform["as"] = as_
    got = altair_transform.apply(data, transform)
    want = driver.apply(data, transform)
    assert_frame_equal(
        got[sorted(got.columns)],
        want[sorted(want.columns)],
        check_dtype=False,
        check_index_type=False,
        check_less_precise=True,
    )

Source File: test_quantile.py From altair-transform with MIT License

5 votes

def test_quantile_transform_groupby(data: pd.DataFrame) -> None:
    group = "c"
    transform = {"quantile": "x", "step": 0.1, "groupby": [group]}
    out = altair_transform.apply(data, transform)
    assert list(out.columns) == ["c", "prob", "value"]

    for key in data[group].unique():
        out_group_1 = altair_transform.apply(data[data[group] == key], transform)
        out_group_2 = out[out[group] == key][out_group_1.columns].reset_index(drop=True)
        assert_frame_equal(out_group_1, out_group_2)

Source File: test_function_base.py From pySINDy with MIT License

5 votes

def test_basic(self):
        x = np.arange(8) * 0.5
        assert_equal(np.quantile(x, 0), 0.)
        assert_equal(np.quantile(x, 1), 3.5)
        assert_equal(np.quantile(x, 0.5), 1.75)

Source File: test_quantile.py From altair-transform with MIT License

5 votes

def test_quantile_transform(data: pd.DataFrame) -> None:
    transform = {"quantile": "x", "step": 0.1}
    out = altair_transform.apply(data, transform)
    assert list(out.columns) == ["prob", "value"]
    assert_allclose(out.prob, np.arange(0.05, 1, 0.1))
    assert_allclose(out.value, np.quantile(data.x, out.prob))

Source File: pycoQC_plot.py From pycoQC with GNU General Public License v3.0

5 votes

def _compute_percentiles (data):
        return list(np.quantile(data.dropna(), q=np.linspace(0,1,101)))

Source File: binarize.py From sklearn-evaluation with MIT License

5 votes

def scores_at_quantile(y_score, quantile):
    """Binary scores at certain quantile
    """
    cutoff_score = cutoff_score_at_quantile(y_score, quantile)
    y_score_binary = (y_score >= cutoff_score).astype(int)
    return y_score_binary

Source File: spydrpick.py From panaroo with MIT License

5 votes

def tukey_outlier(hitsA, hitsB, mis):

    ids = np.unique(hitsA)
    max_hit_mis = np.zeros(len(ids))

    for i in range(len(ids)):
        max_hit_mis[i] = np.max(mis[hitsA == ids[i]])

    Q1, Q3 = np.quantile(max_hit_mis, [0.25, 0.75])

    outliers = np.zeros(len(mis))
    outliers[mis > (Q3 + 1.5 * (Q3 - Q1))] = 1
    outliers[mis > (Q3 + 3 * (Q3 - Q1))] = 2

    return outliers

Source File: statistics.py From B-SOID with GNU General Public License v3.0

5 votes

def feat_dist(feats):
    feat_range = []
    feat_med = []
    p_cts = []
    edges = []
    for i in range(feats.shape[0]):
        feat_range.append([np.quantile(feats[i, :], 0.05), np.quantile(feats[i, :], 0.95)])
        feat_med.append(np.quantile(feats[i, :], 0.5))
        p_ct, edge = np.histogram(feats[i, :], 50, density=True)
        p_cts.append(p_ct)
        edges.append(edge)
    return feat_range, feat_med, p_cts, edges

Source File: statistics.py From B-SOID with GNU General Public License v3.0

5 votes

def feat_dist(feats):
    feat_range = []
    feat_med = []
    p_cts = []
    edges = []
    for i in range(feats.shape[0]):
        feat_range.append([np.quantile(feats[i, :], 0.05), np.quantile(feats[i, :], 0.95)])
        feat_med.append(np.quantile(feats[i, :], 0.5))
        p_ct, edge = np.histogram(feats[i, :], 50, density=True)
        p_cts.append(p_ct)
        edges.append(edge)
    return feat_range, feat_med, p_cts, edges

Source File: test_function_base.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_no_p_overwrite(self):
        # this is worth retesting, because quantile does not make a copy
        p0 = np.array([0, 0.75, 0.25, 0.5, 1.0])
        p = p0.copy()
        np.quantile(np.arange(100.), p, interpolation="midpoint")
        assert_array_equal(p, p0)

        p0 = p0.tolist()
        p = p.tolist()
        np.quantile(np.arange(100.), p, interpolation="midpoint")
        assert_array_equal(p, p0)

Source File: test_function_base.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_basic(self):
        x = np.arange(8) * 0.5
        assert_equal(np.quantile(x, 0), 0.)
        assert_equal(np.quantile(x, 1), 3.5)
        assert_equal(np.quantile(x, 0.5), 1.75)

Source File: nanfunctions.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'):
    """
    Private function for rank 1 arrays. Compute quantile ignoring NaNs.
    See nanpercentile for parameter usage
    """
    arr1d, overwrite_input = _remove_nan_1d(arr1d,
        overwrite_input=overwrite_input)
    if arr1d.size == 0:
        return np.full(q.shape, np.nan)[()]  # convert to scalar

    return function_base._quantile_unchecked(
        arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation)

Source File: approximate_bayesian_computation.py From pygom with GNU General Public License v2.0

5 votes

def continue_posterior_sample(self, N, tol, G=1, q=None, M=None, progress=False):
        """
        Parameters (same as get_posterior_sample)
        ----------
        N: integer
            the number of samples in each generation
        tol: float or array like
            the initial tolerance or sequence of decreasing tolerances
        G: integer
            the number of generations used in ABC SMC/ ABC SMC MNN
        q: float (0 < q < 1)
            the quantile used to specify the tolerance for future generations in ABC SMC/ ABC SMC MNN
        M: integer
            the number of nearest neighbours used in ABC SMC MNN (M < N)
        progress: bool
            if True, reports the generation number, acceptance rate and threshold after each generation
        """
        # perform checks
        assert N == self.N, "For now, set the sample size to be the same as the previous run"
        assert hasattr(self, "res"), "Use 'get_posterior_sample' before 'continue_posterior_sample'"

        if hasattr(tol, "__len__"):
            assert tol[0] <= self.final_tol, "The initial tolerance is greater than the final tolerance from the previous run"
        else:
            assert tol <= self.final_tol, "The initial tolerance is greater than the final tolerance from the previous run"
            
        self.get_posterior_sample(N, tol, G, q, M, progress, rerun=True)

Source File: encodings.py From bindsnet with GNU Affero General Public License v3.0

5 votes

def single(
    datum: torch.Tensor,
    time: int,
    dt: float = 1.0,
    sparsity: float = 0.5,
    device="cpu",
    **kwargs
) -> torch.Tensor:
    # language=rst
    """
    Generates timing based single-spike encoding. Spike occurs earlier if the
    intensity of the input feature is higher. Features whose value is lower than
    threshold is remain silent.

    :param datum: Tensor of shape ``[n_1, ..., n_k]``.
    :param time: Length of the input and output.
    :param dt: Simulation time step.
    :param sparsity: Sparsity of the input representation. 0 for no spikes and 1 for all
        spikes.
    :return: Tensor of shape ``[time, n_1, ..., n_k]``.
    """
    time = int(time / dt)
    shape = list(datum.shape)
    datum = np.copy(datum)
    quantile = np.quantile(datum, 1 - sparsity)
    s = np.zeros([time, *shape], device=device)
    s[0] = np.where(datum > quantile, np.ones(shape), np.zeros(shape))
    return torch.Tensor(s).byte()

Source File: normalize_by_quantile.py From spiketoolkit with MIT License

5 votes

def normalize_by_quantile(recording, scale=1.0, median=0.0, q1=0.01, q2=0.99, seed=0):
    '''
    Rescale the traces from the given recording extractor with a scalar
    and offset. First, the median and quantiles of the distribution are estimated.
    Then the distribution is rescaled and offset so that the scale is given by the
    distance between the quantiles (1st and 99th by default) is set to `scale`,
    and the median is set to the given median.

    Parameters
    ----------
    recording: RecordingExtractor
        The recording extractor to be transformed
    scalar: float
        Scale for the output distribution
    median: float
        Median for the output distribution
    q1: float (default 0.01)
        Lower quantile used for measuring the scale
    q1: float (default 0.99)
        Upper quantile used for measuring the 
    seed: int
        Random seed for reproducibility
    Returns
    -------
    rescaled_traces: NormalizeByQuantileRecording
        The rescaled traces recording extractor object
    '''
    return NormalizeByQuantileRecording(
        recording=recording, 
        scale=scale, 
        median=median, 
        q1=q1, 
        q2=q2,
        seed=seed
    )

Source File: blank_saturation.py From spiketoolkit with MIT License

5 votes

def __init__(self, recording, threshold=None, seed=0):
        if not isinstance(recording, RecordingExtractor):
            raise ValueError("'recording' must be a RecordingExtractor")
        self._recording = recording
        random_data = self._get_random_data_for_scaling(seed=seed).ravel()
        q = np.quantile(random_data, [0.001, 0.5, 1 - 0.001])
        if 2 * q[1] - q[0] - q[2] < 2 * np.min([q[1] - q[0], q[2] - q[1]]):
            print('Warning, narrow signal range suggests artefact-free data.')
        self._median = q[1]
        if threshold is None:
            if np.abs(q[1] - q[0]) > np.abs(q[1] - q[2]):
                self._threshold = q[0]
                self._lower = True
            else:
                self._threshold = q[2]
                self._lower = False
        else:
            self._threshold = threshold
            if q[1] - threshold < 0:
                self._lower = False
            else:
                self._lower = True
        RecordingExtractor.__init__(self)
        self.copy_channel_properties(recording=self._recording)
        self.is_filtered = self._recording.is_filtered

        self._kwargs = {'recording': recording.make_serialized_dict(), 'threshold': threshold, 'seed': seed}

Source File: benchmark.py From CrypTen with MIT License

5 votes

def time_me(func=None, n_loops=10):
    """Decorator returning average runtime in seconds over n_loops

    Args:
        func (function): invoked with given args / kwargs
        n_loops (int): number of times to invoke function for timing

    Returns: tuple of (time in seconds, inner quartile range, function return value).
    """
    if func is None:
        return functools.partial(time_me, n_loops=n_loops)

    @functools.wraps(func)
    def timing_wrapper(*args, **kwargs):
        return_val = func(*args, **kwargs)
        times = []
        for _ in range(n_loops):
            start = timeit.default_timer()
            func(*args, **kwargs)
            times.append(timeit.default_timer() - start)
        mid_runtime = np.quantile(times, 0.5)
        q1_runtime = np.quantile(times, 0.25)
        q3_runtime = np.quantile(times, 0.75)
        runtime = Runtime(mid_runtime, q1_runtime, q3_runtime)
        return runtime, return_val

    return timing_wrapper

Python numpy.quantile() Examples