Python Examples of numpy.histogram_bin

Source File: test_quantity_non_ufuncs.py From Carnets with BSD 3-Clause "New" or "Revised" License

6 votes

def test_histogram_bin_edges(self):
        x = np.array([1.1, 1.2, 1.3, 2.1, 5.1]) * u.m
        out_b = np.histogram_bin_edges(x)
        expected_b = np.histogram_bin_edges(x.value) * x.unit
        assert np.all(out_b == expected_b)
        # With bins
        out2_b = np.histogram_bin_edges(x, [125, 200] * u.cm)
        expected2_b = np.histogram_bin_edges(x.value, [1.25, 2.]) * x.unit
        assert np.all(out2_b == expected2_b)
        with pytest.raises(u.UnitsError):
            np.histogram_bin_edges(x, [125, 200] * u.s)

        with pytest.raises(u.UnitsError):
            np.histogram_bin_edges(x, [125, 200])

        with pytest.raises(u.UnitsError):
            np.histogram_bin_edges(x.value, [125, 200] * u.s)

Source File: function_helpers.py From Carnets with BSD 3-Clause "New" or "Revised" License

5 votes

def histogram_bin_edges(a, bins=10, range=None, weights=None):
    # weights is currently unused
    a = _as_quantity(a)
    if not isinstance(bins, str):
        bins = _check_bins(bins, a.unit)

    return (a.value, bins, range, weights), {}, a.unit, None

Source File: histograms.py From recruit with Apache License 2.0

4 votes

def _hist_bin_auto(x, range):
    """
    Histogram bin estimator that uses the minimum width of the
    Freedman-Diaconis and Sturges estimators if the FD bandwidth is non zero
    and the Sturges estimator if the FD bandwidth is 0.

    The FD estimator is usually the most robust method, but its width
    estimate tends to be too large for small `x` and bad for data with limited
    variance. The Sturges estimator is quite good for small (<1000) datasets
    and is the default in the R language. This method gives good off the shelf
    behaviour.

    .. versionchanged:: 1.15.0
    If there is limited variance the IQR can be 0, which results in the
    FD bin width being 0 too. This is not a valid bin width, so
    ``np.histogram_bin_edges`` chooses 1 bin instead, which may not be optimal.
    If the IQR is 0, it's unlikely any variance based estimators will be of
    use, so we revert to the sturges estimator, which only uses the size of the
    dataset in its calculation.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.

    See Also
    --------
    _hist_bin_fd, _hist_bin_sturges
    """
    fd_bw = _hist_bin_fd(x, range)
    sturges_bw = _hist_bin_sturges(x, range)
    del range  # unused
    if fd_bw:
        return min(fd_bw, sturges_bw)
    else:
        # limited variance, so we return a len dependent bw estimator
        return sturges_bw

# Private dict initialized at module load time

Source File: test_statistics_execute.py From mars with Apache License 2.0

4 votes

def testHistogramBinEdgesExecution(self):
        rs = np.random.RandomState(0)

        raw = rs.randint(10, size=(20,))
        a = tensor(raw, chunk_size=3)

        # range provided
        for range_ in [(0, 10), (3, 11), (3, 7)]:
            bin_edges = histogram_bin_edges(a, range=range_)
            result = self.executor.execute_tensor(bin_edges)[0]
            expected = np.histogram_bin_edges(raw, range=range_)
            np.testing.assert_array_equal(result, expected)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            raw2 = rs.randint(10, size=(1,))
            b = tensor(raw2)
            raw3 = rs.randint(10, size=(0,))
            c = tensor(raw3)
            for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]:
                test_bins = [10, 'stone', 'auto', 'doane', 'fd',
                             'rice', 'scott', 'sqrt', 'sturges']
                for bins in test_bins:
                    bin_edges = histogram_bin_edges(t, bins=bins)

                    if r.size > 0:
                        with self.assertRaises(TilesError):
                            executor.execute_tensor(bin_edges)

                    result = executor.execute_tensors([bin_edges])[0]
                    expected = np.histogram_bin_edges(r, bins=bins)
                    np.testing.assert_array_equal(result, expected)

                test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)]
                for bins in test_bins:
                    bin_edges = histogram_bin_edges(t, bins=bins)
                    result = executor.execute_tensors([bin_edges])[0]
                    expected = np.histogram_bin_edges(r, bins=[0, 4, 8])
                    np.testing.assert_array_equal(result, expected)

            raw = np.arange(5)
            a = tensor(raw, chunk_size=3)
            bin_edges = histogram_bin_edges(a)
            result = executor.execute_tensors([bin_edges])[0]
            expected = np.histogram_bin_edges(raw)
            self.assertEqual(bin_edges.shape, expected.shape)
            np.testing.assert_array_equal(result, expected)

Source File: histograms.py From lambda-packs with MIT License

4 votes

def _hist_bin_auto(x):
    """
    Histogram bin estimator that uses the minimum width of the
    Freedman-Diaconis and Sturges estimators if the FD bandwidth is non zero
    and the Sturges estimator if the FD bandwidth is 0.

    The FD estimator is usually the most robust method, but its width
    estimate tends to be too large for small `x` and bad for data with limited
    variance. The Sturges estimator is quite good for small (<1000) datasets
    and is the default in the R language. This method gives good off the shelf
    behaviour.

    .. versionchanged:: 1.15.0
    If there is limited variance the IQR can be 0, which results in the
    FD bin width being 0 too. This is not a valid bin width, so
    ``np.histogram_bin_edges`` chooses 1 bin instead, which may not be optimal.
    If the IQR is 0, it's unlikely any variance based estimators will be of
    use, so we revert to the sturges estimator, which only uses the size of the
    dataset in its calculation.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.

    See Also
    --------
    _hist_bin_fd, _hist_bin_sturges
    """
    fd_bw = _hist_bin_fd(x)
    sturges_bw = _hist_bin_sturges(x)
    if fd_bw:
        return min(fd_bw, sturges_bw)
    else:
        # limited variance, so we return a len dependent bw estimator
        return sturges_bw

# Private dict initialized at module load time

Source File: histograms.py From Mastering-Elasticsearch-7.0 with MIT License

4 votes

def _hist_bin_auto(x, range):
    """
    Histogram bin estimator that uses the minimum width of the
    Freedman-Diaconis and Sturges estimators if the FD bandwidth is non zero
    and the Sturges estimator if the FD bandwidth is 0.

    The FD estimator is usually the most robust method, but its width
    estimate tends to be too large for small `x` and bad for data with limited
    variance. The Sturges estimator is quite good for small (<1000) datasets
    and is the default in the R language. This method gives good off the shelf
    behaviour.

    .. versionchanged:: 1.15.0
    If there is limited variance the IQR can be 0, which results in the
    FD bin width being 0 too. This is not a valid bin width, so
    ``np.histogram_bin_edges`` chooses 1 bin instead, which may not be optimal.
    If the IQR is 0, it's unlikely any variance based estimators will be of
    use, so we revert to the sturges estimator, which only uses the size of the
    dataset in its calculation.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.

    See Also
    --------
    _hist_bin_fd, _hist_bin_sturges
    """
    fd_bw = _hist_bin_fd(x, range)
    sturges_bw = _hist_bin_sturges(x, range)
    del range  # unused
    if fd_bw:
        return min(fd_bw, sturges_bw)
    else:
        # limited variance, so we return a len dependent bw estimator
        return sturges_bw

# Private dict initialized at module load time

Source File: histograms.py From GraphicDesignPatternByPython with MIT License

4 votes

def _hist_bin_auto(x):
    """
    Histogram bin estimator that uses the minimum width of the
    Freedman-Diaconis and Sturges estimators if the FD bandwidth is non zero
    and the Sturges estimator if the FD bandwidth is 0.

    The FD estimator is usually the most robust method, but its width
    estimate tends to be too large for small `x` and bad for data with limited
    variance. The Sturges estimator is quite good for small (<1000) datasets
    and is the default in the R language. This method gives good off the shelf
    behaviour.

    .. versionchanged:: 1.15.0
    If there is limited variance the IQR can be 0, which results in the
    FD bin width being 0 too. This is not a valid bin width, so
    ``np.histogram_bin_edges`` chooses 1 bin instead, which may not be optimal.
    If the IQR is 0, it's unlikely any variance based estimators will be of
    use, so we revert to the sturges estimator, which only uses the size of the
    dataset in its calculation.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.

    See Also
    --------
    _hist_bin_fd, _hist_bin_sturges
    """
    fd_bw = _hist_bin_fd(x)
    sturges_bw = _hist_bin_sturges(x)
    if fd_bw:
        return min(fd_bw, sturges_bw)
    else:
        # limited variance, so we return a len dependent bw estimator
        return sturges_bw

# Private dict initialized at module load time

Source File: histograms.py From predictive-maintenance-using-machine-learning with Apache License 2.0

4 votes

def _hist_bin_auto(x, range):
    """
    Histogram bin estimator that uses the minimum width of the
    Freedman-Diaconis and Sturges estimators if the FD bandwidth is non zero
    and the Sturges estimator if the FD bandwidth is 0.

    The FD estimator is usually the most robust method, but its width
    estimate tends to be too large for small `x` and bad for data with limited
    variance. The Sturges estimator is quite good for small (<1000) datasets
    and is the default in the R language. This method gives good off the shelf
    behaviour.

    .. versionchanged:: 1.15.0
    If there is limited variance the IQR can be 0, which results in the
    FD bin width being 0 too. This is not a valid bin width, so
    ``np.histogram_bin_edges`` chooses 1 bin instead, which may not be optimal.
    If the IQR is 0, it's unlikely any variance based estimators will be of
    use, so we revert to the sturges estimator, which only uses the size of the
    dataset in its calculation.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.

    See Also
    --------
    _hist_bin_fd, _hist_bin_sturges
    """
    fd_bw = _hist_bin_fd(x, range)
    sturges_bw = _hist_bin_sturges(x, range)
    del range  # unused
    if fd_bw:
        return min(fd_bw, sturges_bw)
    else:
        # limited variance, so we return a len dependent bw estimator
        return sturges_bw

# Private dict initialized at module load time

Source File: histograms.py From pySINDy with MIT License

4 votes

def _hist_bin_auto(x):
    """
    Histogram bin estimator that uses the minimum width of the
    Freedman-Diaconis and Sturges estimators if the FD bandwidth is non zero
    and the Sturges estimator if the FD bandwidth is 0.

    The FD estimator is usually the most robust method, but its width
    estimate tends to be too large for small `x` and bad for data with limited
    variance. The Sturges estimator is quite good for small (<1000) datasets
    and is the default in the R language. This method gives good off the shelf
    behaviour.

    .. versionchanged:: 1.15.0
    If there is limited variance the IQR can be 0, which results in the
    FD bin width being 0 too. This is not a valid bin width, so
    ``np.histogram_bin_edges`` chooses 1 bin instead, which may not be optimal.
    If the IQR is 0, it's unlikely any variance based estimators will be of
    use, so we revert to the sturges estimator, which only uses the size of the
    dataset in its calculation.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.

    See Also
    --------
    _hist_bin_fd, _hist_bin_sturges
    """
    fd_bw = _hist_bin_fd(x)
    sturges_bw = _hist_bin_sturges(x)
    if fd_bw:
        return min(fd_bw, sturges_bw)
    else:
        # limited variance, so we return a len dependent bw estimator
        return sturges_bw

# Private dict initialized at module load time

Source File: histograms.py From coffeegrindsize with MIT License

4 votes

def _hist_bin_auto(x, range):
    """
    Histogram bin estimator that uses the minimum width of the
    Freedman-Diaconis and Sturges estimators if the FD bandwidth is non zero
    and the Sturges estimator if the FD bandwidth is 0.

    The FD estimator is usually the most robust method, but its width
    estimate tends to be too large for small `x` and bad for data with limited
    variance. The Sturges estimator is quite good for small (<1000) datasets
    and is the default in the R language. This method gives good off the shelf
    behaviour.

    .. versionchanged:: 1.15.0
    If there is limited variance the IQR can be 0, which results in the
    FD bin width being 0 too. This is not a valid bin width, so
    ``np.histogram_bin_edges`` chooses 1 bin instead, which may not be optimal.
    If the IQR is 0, it's unlikely any variance based estimators will be of
    use, so we revert to the sturges estimator, which only uses the size of the
    dataset in its calculation.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.

    See Also
    --------
    _hist_bin_fd, _hist_bin_sturges
    """
    fd_bw = _hist_bin_fd(x, range)
    sturges_bw = _hist_bin_sturges(x, range)
    del range  # unused
    if fd_bw:
        return min(fd_bw, sturges_bw)
    else:
        # limited variance, so we return a len dependent bw estimator
        return sturges_bw

# Private dict initialized at module load time

Source File: histograms.py From Carnets with BSD 3-Clause "New" or "Revised" License

4 votes

def _hist_bin_auto(x, range):
    """
    Histogram bin estimator that uses the minimum width of the
    Freedman-Diaconis and Sturges estimators if the FD bandwidth is non zero
    and the Sturges estimator if the FD bandwidth is 0.

    The FD estimator is usually the most robust method, but its width
    estimate tends to be too large for small `x` and bad for data with limited
    variance. The Sturges estimator is quite good for small (<1000) datasets
    and is the default in the R language. This method gives good off the shelf
    behaviour.

    .. versionchanged:: 1.15.0
    If there is limited variance the IQR can be 0, which results in the
    FD bin width being 0 too. This is not a valid bin width, so
    ``np.histogram_bin_edges`` chooses 1 bin instead, which may not be optimal.
    If the IQR is 0, it's unlikely any variance based estimators will be of
    use, so we revert to the sturges estimator, which only uses the size of the
    dataset in its calculation.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.

    See Also
    --------
    _hist_bin_fd, _hist_bin_sturges
    """
    fd_bw = _hist_bin_fd(x, range)
    sturges_bw = _hist_bin_sturges(x, range)
    del range  # unused
    if fd_bw:
        return min(fd_bw, sturges_bw)
    else:
        # limited variance, so we return a len dependent bw estimator
        return sturges_bw

# Private dict initialized at module load time

Source File: histograms.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License

4 votes

def _hist_bin_auto(x):
    """
    Histogram bin estimator that uses the minimum width of the
    Freedman-Diaconis and Sturges estimators if the FD bandwidth is non zero
    and the Sturges estimator if the FD bandwidth is 0.

    The FD estimator is usually the most robust method, but its width
    estimate tends to be too large for small `x` and bad for data with limited
    variance. The Sturges estimator is quite good for small (<1000) datasets
    and is the default in the R language. This method gives good off the shelf
    behaviour.

    .. versionchanged:: 1.15.0
    If there is limited variance the IQR can be 0, which results in the
    FD bin width being 0 too. This is not a valid bin width, so
    ``np.histogram_bin_edges`` chooses 1 bin instead, which may not be optimal.
    If the IQR is 0, it's unlikely any variance based estimators will be of
    use, so we revert to the sturges estimator, which only uses the size of the
    dataset in its calculation.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.

    See Also
    --------
    _hist_bin_fd, _hist_bin_sturges
    """
    fd_bw = _hist_bin_fd(x)
    sturges_bw = _hist_bin_sturges(x)
    if fd_bw:
        return min(fd_bw, sturges_bw)
    else:
        # limited variance, so we return a len dependent bw estimator
        return sturges_bw

# Private dict initialized at module load time

Source File: histograms.py From twitter-stock-recommendation with MIT License

4 votes

def _hist_bin_auto(x):
    """
    Histogram bin estimator that uses the minimum width of the
    Freedman-Diaconis and Sturges estimators if the FD bandwidth is non zero
    and the Sturges estimator if the FD bandwidth is 0.

    The FD estimator is usually the most robust method, but its width
    estimate tends to be too large for small `x` and bad for data with limited
    variance. The Sturges estimator is quite good for small (<1000) datasets
    and is the default in the R language. This method gives good off the shelf
    behaviour.

    .. versionchanged:: 1.15.0
    If there is limited variance the IQR can be 0, which results in the
    FD bin width being 0 too. This is not a valid bin width, so
    ``np.histogram_bin_edges`` chooses 1 bin instead, which may not be optimal.
    If the IQR is 0, it's unlikely any variance based estimators will be of
    use, so we revert to the sturges estimator, which only uses the size of the
    dataset in its calculation.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.

    See Also
    --------
    _hist_bin_fd, _hist_bin_sturges
    """
    fd_bw = _hist_bin_fd(x)
    sturges_bw = _hist_bin_sturges(x)
    if fd_bw:
        return min(fd_bw, sturges_bw)
    else:
        # limited variance, so we return a len dependent bw estimator
        return sturges_bw

# Private dict initialized at module load time

Python numpy.histogram_bin_edges() Examples