Skip to content

Commit

Permalink
Make bins definition consistent with np.histogram (#20)
Browse files Browse the repository at this point in the history
* force last bin to be right edge inclusive

* added explanation to docstrings
  • Loading branch information
dougiesquire authored Sep 24, 2020
1 parent 4ae9f0a commit 7ba35ba
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 0 deletions.
6 changes: 6 additions & 0 deletions doc/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ GitHub repo: `https://github.com/xgcm/xhistogram <https://github.com/xgcm/xhisto
Release History
---------------

v0.1.?
~~~~~

- Aligned definition of `bins` with `numpy.histogram` (:pr:`???`)
By `Dougie Squire <https://github.com/dougiesquire>`_.

v0.1.1
~~~~~~

Expand Down
12 changes: 12 additions & 0 deletions xhistogram/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,15 @@ def _histogram_2d_vectorized(*args, bins=None, weights=None, density=False,
# https://github.com/numpy/numpy/blob/9c98662ee2f7daca3f9fae9d5144a9a8d3cabe8c/numpy/lib/histograms.py#L864-L882
# for now we stick with `digitize` because it's easy to understand how it works

# Add small increment to the last bin edge to make the final bin right-edge inclusive
# Note, this is the approach taken by sklearn, e.g.
# https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/calibration.py#L592
# but a better approach would be to use something like _search_sorted_inclusive() in
# numpy histogram. This is an additional motivation for moving to searchsorted
bins = [np.concatenate((
b[:-1],
b[-1:] + 1e-8)) for b in bins]

# the maximum possible value of of digitize is nbins
# for right=False:
# - 0 corresponds to a < b[0]
Expand Down Expand Up @@ -154,6 +163,9 @@ def histogram(*args, bins=None, axis=None, weights=None, density=False,
* A combination [int, array] or [array, int], where int
is the number of bins and array is the bin edges.
When bin edges are specified, all but the last (righthand-most) bin include
the left edge and exclude the right edge. The last bin includes both edges.
A ``TypeError`` will be raised if ``args`` contains dask arrays and
``bins`` are not specified explicitly as a list of arrays.
axis : None or int or tuple of ints, optional
Expand Down
21 changes: 21 additions & 0 deletions xhistogram/test/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,27 @@ def test_histogram_results_1d_weighted_broadcasting(block_size):
np.testing.assert_array_equal(2*h, h_w)


@pytest.mark.parametrize('block_size', [None, 1, 2])
def test_histogram_right_edge(block_size):
"""Test that last bin is both left- and right-edge inclusive as it
is for numpy.histogram
"""
nrows, ncols = 5, 20
data = np.ones((nrows, ncols))
bins = np.array([0, 0.5, 1]) # All data at rightmost edge

h = histogram(data, bins=bins, axis=1, block_size=block_size)
assert h.shape == (nrows, len(bins)-1)

# make sure we get the same thing as histogram (all data in the last bin)
hist, _ = np.histogram(data, bins=bins)
np.testing.assert_array_equal(hist, h.sum(axis=0))

# now try with no axis
h_na = histogram(data, bins=bins, block_size=block_size)
np.testing.assert_array_equal(hist, h_na)


def test_histogram_results_2d():
nrows, ncols = 5, 20
data_a = np.random.randn(nrows, ncols)
Expand Down
3 changes: 3 additions & 0 deletions xhistogram/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def histogram(*args, bins=None, dim=None, weights=None, density=False,
* A combination [int, array] or [array, int], where int
is the number of bins and array is the bin edges.
When bin edges are specified, all but the last (righthand-most) bin include
the left edge and exclude the right edge. The last bin includes both edges.
A ``TypeError`` will be raised if ``args`` contains dask arrays and
``bins`` are not specified explicitly as a list of arrays.
dim : tuple of strings, optional
Expand Down

0 comments on commit 7ba35ba

Please sign in to comment.