From e8be25e8f69ddfa6990aad2dbdd7bc255ec48c22 Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Tue, 1 Aug 2023 13:29:48 -0400
Subject: [PATCH 01/12] Very early POC

---
 zarr/core.py     | 33 +++++++++-------
 zarr/indexing.py | 99 ++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 115 insertions(+), 17 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index 43ccdbaf7d..afe0e3a9be 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -2067,13 +2067,16 @@ def _process_chunk(
         fields,
         out_selection,
         partial_read_decode=False,
+        coords=None
     ):
         """Take binary data from storage and fill output array"""
+        shape = [c if isinstance(c, int) else c[coords[i]]
+                 for i, c in enumerate(self._chunks)]
         if (
             out_is_ndarray
             and not fields
             and is_contiguous_selection(out_selection)
-            and is_total_slice(chunk_selection, self._chunks)
+            and is_total_slice(chunk_selection, shape)
             and not self._filters
             and self._dtype != object
         ):
@@ -2100,7 +2103,7 @@ def _process_chunk(
                     if isinstance(cdata, UncompressedPartialReadBufferV3):
                         cdata = cdata.read_full()
                     chunk = ensure_ndarray_like(cdata).view(self._dtype)
-                    chunk = chunk.reshape(self._chunks, order=self._order)
+                    chunk = chunk.reshape(shape, order=self._order)
                     np.copyto(dest, chunk)
                 return
 
@@ -2109,7 +2112,7 @@ def _process_chunk(
             if partial_read_decode:
                 cdata.prepare_chunk()
                 # size of chunk
-                tmp = np.empty_like(self._meta_array, shape=self._chunks, dtype=self.dtype)
+                tmp = np.empty_like(self._meta_array, shape=shape, dtype=self.dtype)
                 index_selection = PartialChunkIterator(chunk_selection, self.chunks)
                 for start, nitems, partial_out_selection in index_selection:
                     expected_shape = [
@@ -2138,7 +2141,7 @@ def _process_chunk(
                 return
         except ArrayIndexError:
             cdata = cdata.read_full()
-        chunk = self._decode_chunk(cdata)
+        chunk = self._decode_chunk(cdata, expected_shape=shape)
 
         # select data from chunk
         if fields:
@@ -2223,7 +2226,7 @@ def _chunk_getitems(
                 contexts = ConstantMap(ckeys, constant=Context(meta_array=self._meta_array))
             cdatas = self.chunk_store.getitems(ckeys, contexts=contexts)
 
-        for ckey, chunk_select, out_select in zip(ckeys, lchunk_selection, lout_selection):
+        for ckey, coords, chunk_select, out_select in zip(ckeys, lchunk_coords, lchunk_selection, lout_selection):
             if ckey in cdatas:
                 self._process_chunk(
                     out,
@@ -2234,6 +2237,7 @@ def _chunk_getitems(
                     fields,
                     out_select,
                     partial_read_decode=partial_read_decode,
+                    coords=coords
                 )
             else:
                 # check exception type
@@ -2305,7 +2309,8 @@ def _chunk_setitem(self, chunk_coords, chunk_selection, value, fields=None):
 
     def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=None):
         ckey = self._chunk_key(chunk_coords)
-        cdata = self._process_for_setitem(ckey, chunk_selection, value, fields=fields)
+        cdata = self._process_for_setitem(ckey, chunk_selection, value, coords=chunk_coords,
+                                          fields=fields)
 
         # attempt to delete chunk if it only contains the fill value
         if (not self.write_empty_chunks) and all_equal(self.fill_value, cdata):
@@ -2313,8 +2318,10 @@ def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=Non
         else:
             self.chunk_store[ckey] = self._encode_chunk(cdata)
 
-    def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):
-        if is_total_slice(chunk_selection, self._chunks) and not fields:
+    def _process_for_setitem(self, ckey, chunk_selection, value, coords=None, fields=None):
+        shape = [c if isinstance(c, int) else c[coords[i]]
+                 for i, c in enumerate(self._chunks)]
+        if is_total_slice(chunk_selection, shape) and not fields:
             # totally replace chunk
 
             # optimization: we are completely replacing the chunk, so no need
@@ -2324,7 +2331,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):
 
                 # setup array filled with value
                 chunk = np.empty_like(
-                    self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order
+                    self._meta_array, shape=shape, dtype=self._dtype, order=self._order
                 )
                 chunk.fill(value)
 
@@ -2346,22 +2353,22 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):
                 # chunk not initialized
                 if self._fill_value is not None:
                     chunk = np.empty_like(
-                        self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order
+                        self._meta_array, shape=shape, dtype=self._dtype, order=self._order
                     )
                     chunk.fill(self._fill_value)
                 elif self._dtype == object:
-                    chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order)
+                    chunk = np.empty(self.shape, dtype=self._dtype, order=self._order)
                 else:
                     # N.B., use zeros here so any region beyond the array has consistent
                     # and compressible data
                     chunk = np.zeros_like(
-                        self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order
+                        self._meta_array, shape=self.shape, dtype=self._dtype, order=self._order
                     )
 
             else:
 
                 # decode chunk
-                chunk = self._decode_chunk(cdata)
+                chunk = self._decode_chunk(cdata, expected_shape=shape)
                 if not chunk.flags.writeable:
                     chunk = chunk.copy(order="K")
 
diff --git a/zarr/indexing.py b/zarr/indexing.py
index 487cc8b9d9..c8e0842bd5 100644
--- a/zarr/indexing.py
+++ b/zarr/indexing.py
@@ -163,6 +163,85 @@ def __iter__(self):
         yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
 
 
+class VarSliceDimIndexer:
+    def __init__(self, dim_sel: slice, dim_len: int, chunk_lengths: list[int]):
+        # normalize
+        start = dim_sel.start
+        if start is None:
+            start = 0
+        elif start < 0:
+            start = dim_len - start
+
+        stop = dim_sel.stop
+        if stop is None:
+            stop = dim_len
+        elif stop < 0:
+            stop = dim_len - stop
+        step = dim_sel.step or 1
+        if step < 0:
+            raise NotImplementedError
+
+        # store attributes
+        self.offsets = np.cumsum([0] + chunk_lengths)
+        self.dim_len = dim_len
+        self.dim_sel = dim_sel
+        self.projections = []
+
+        remainder = 0
+        nfilled = 0
+        for i in range(len(chunk_lengths)):
+            if start > self.offsets[i + 1]:
+                # not yet at first chunk
+                continue
+            if stop < self.offsets[i]:
+                # past last valid chunk
+                break
+            slice_start = max(start - self.offsets[i], 0 + remainder)
+            slice_end = min(
+                stop - self.offsets[i], self.offsets[i + 1] - self.offsets[i]
+            )
+            if slice_start == slice_end:
+                continue
+            remainder = (
+                (self.offsets[i] + slice_start) - self.offsets[i + 1]
+            ) % step
+            nelem = (slice_end - slice_start) // step
+            self.projections.append(
+                ChunkDimProjection(
+                    i,
+                    slice(slice_start, slice_end, step),
+                    slice(nfilled, nfilled + nelem)
+                )
+            )
+            nfilled += nelem
+
+        self.nitems = nfilled
+
+    def __iter__(self):
+        yield from self.projections
+
+
+class VarIntDimIndexer:
+    def __init__(self, dim_sel: int, dim_len: int, chunk_lengths: list[int]):
+
+        self.offsets = np.cumsum([0] + chunk_lengths)
+        self.dim_len = dim_len
+
+        # normalize
+        dim_sel = normalize_integer_selection(dim_sel, self.dim_len)
+
+        # store attributes
+        self.dim_sel = dim_sel
+        self.nitems = 1
+
+    def __iter__(self):
+        for ix, off in enumerate(self.offsets):
+            if off > self.dim_sel:
+                break
+        ix -= 1
+        yield ChunkDimProjection(ix, self.dim_sel - self.offsets[ix], None)
+
+
 def ceildiv(a, b):
     return math.ceil(a / b)
 
@@ -339,10 +418,16 @@ def __init__(self, selection, array):
         for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks):
 
             if is_integer(dim_sel):
-                dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len)
+                if isinstance(dim_chunk_len, int):
+                    dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len)
+                else:
+                    dim_indexer = VarIntDimIndexer(dim_sel, dim_len, dim_chunk_len)
 
             elif is_slice(dim_sel):
-                dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len)
+                if isinstance(dim_chunk_len, int):
+                    dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len)
+                else:
+                    dim_indexer = VarSliceDimIndexer(dim_sel, dim_len, dim_chunk_len)
 
             else:
                 raise IndexError(
@@ -614,10 +699,16 @@ def __init__(self, selection, array):
         for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks):
 
             if is_integer(dim_sel):
-                dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len)
+                if isinstance(dim_chunk_len, int):
+                    dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len)
+                else:
+                    dim_indexer = VarIntDimIndexer(dim_sel, dim_len, dim_chunk_len)
 
             elif isinstance(dim_sel, slice):
-                dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len)
+                if isinstance(dim_chunk_len, int):
+                    dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len)
+                else:
+                    dim_indexer = VarSliceDimIndexer(dim_sel, dim_len, dim_chunk_len)
 
             elif is_integer_array(dim_sel):
                 dim_indexer = IntArrayDimIndexer(dim_sel, dim_len, dim_chunk_len)

From c50ee29b0b70e6d21559a20d915da5c767c91964 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 1 Aug 2023 17:34:26 +0000
Subject: [PATCH 02/12] style: pre-commit fixes

---
 zarr/core.py     | 19 ++++++++++---------
 zarr/indexing.py | 12 +++---------
 2 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index afe0e3a9be..5d39ef2d18 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -2067,11 +2067,10 @@ def _process_chunk(
         fields,
         out_selection,
         partial_read_decode=False,
-        coords=None
+        coords=None,
     ):
         """Take binary data from storage and fill output array"""
-        shape = [c if isinstance(c, int) else c[coords[i]]
-                 for i, c in enumerate(self._chunks)]
+        shape = [c if isinstance(c, int) else c[coords[i]] for i, c in enumerate(self._chunks)]
         if (
             out_is_ndarray
             and not fields
@@ -2226,7 +2225,9 @@ def _chunk_getitems(
                 contexts = ConstantMap(ckeys, constant=Context(meta_array=self._meta_array))
             cdatas = self.chunk_store.getitems(ckeys, contexts=contexts)
 
-        for ckey, coords, chunk_select, out_select in zip(ckeys, lchunk_coords, lchunk_selection, lout_selection):
+        for ckey, coords, chunk_select, out_select in zip(
+            ckeys, lchunk_coords, lchunk_selection, lout_selection
+        ):
             if ckey in cdatas:
                 self._process_chunk(
                     out,
@@ -2237,7 +2238,7 @@ def _chunk_getitems(
                     fields,
                     out_select,
                     partial_read_decode=partial_read_decode,
-                    coords=coords
+                    coords=coords,
                 )
             else:
                 # check exception type
@@ -2309,8 +2310,9 @@ def _chunk_setitem(self, chunk_coords, chunk_selection, value, fields=None):
 
     def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=None):
         ckey = self._chunk_key(chunk_coords)
-        cdata = self._process_for_setitem(ckey, chunk_selection, value, coords=chunk_coords,
-                                          fields=fields)
+        cdata = self._process_for_setitem(
+            ckey, chunk_selection, value, coords=chunk_coords, fields=fields
+        )
 
         # attempt to delete chunk if it only contains the fill value
         if (not self.write_empty_chunks) and all_equal(self.fill_value, cdata):
@@ -2319,8 +2321,7 @@ def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=Non
             self.chunk_store[ckey] = self._encode_chunk(cdata)
 
     def _process_for_setitem(self, ckey, chunk_selection, value, coords=None, fields=None):
-        shape = [c if isinstance(c, int) else c[coords[i]]
-                 for i, c in enumerate(self._chunks)]
+        shape = [c if isinstance(c, int) else c[coords[i]] for i, c in enumerate(self._chunks)]
         if is_total_slice(chunk_selection, shape) and not fields:
             # totally replace chunk
 
diff --git a/zarr/indexing.py b/zarr/indexing.py
index c8e0842bd5..a1e8fb1b01 100644
--- a/zarr/indexing.py
+++ b/zarr/indexing.py
@@ -197,20 +197,14 @@ def __init__(self, dim_sel: slice, dim_len: int, chunk_lengths: list[int]):
                 # past last valid chunk
                 break
             slice_start = max(start - self.offsets[i], 0 + remainder)
-            slice_end = min(
-                stop - self.offsets[i], self.offsets[i + 1] - self.offsets[i]
-            )
+            slice_end = min(stop - self.offsets[i], self.offsets[i + 1] - self.offsets[i])
             if slice_start == slice_end:
                 continue
-            remainder = (
-                (self.offsets[i] + slice_start) - self.offsets[i + 1]
-            ) % step
+            remainder = ((self.offsets[i] + slice_start) - self.offsets[i + 1]) % step
             nelem = (slice_end - slice_start) // step
             self.projections.append(
                 ChunkDimProjection(
-                    i,
-                    slice(slice_start, slice_end, step),
-                    slice(nfilled, nfilled + nelem)
+                    i, slice(slice_start, slice_end, step), slice(nfilled, nfilled + nelem)
                 )
             )
             nfilled += nelem

From 4f890ecac27874632e82e202ad7b371b6b5e6336 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Mon, 7 Aug 2023 14:12:35 +0200
Subject: [PATCH 03/12] Fix slices with steps, add tests

---
 zarr/indexing.py            | 73 +++++++++++++++++++++++++++++--------
 zarr/tests/test_indexing.py | 21 +++++++++++
 2 files changed, 78 insertions(+), 16 deletions(-)

diff --git a/zarr/indexing.py b/zarr/indexing.py
index a1e8fb1b01..9674990103 100644
--- a/zarr/indexing.py
+++ b/zarr/indexing.py
@@ -177,6 +177,9 @@ def __init__(self, dim_sel: slice, dim_len: int, chunk_lengths: list[int]):
             stop = dim_len
         elif stop < 0:
             stop = dim_len - stop
+        else:
+            stop = min(stop, dim_len)
+
         step = dim_sel.step or 1
         if step < 0:
             raise NotImplementedError
@@ -187,27 +190,65 @@ def __init__(self, dim_sel: slice, dim_len: int, chunk_lengths: list[int]):
         self.dim_sel = dim_sel
         self.projections = []
 
-        remainder = 0
-        nfilled = 0
-        for i in range(len(chunk_lengths)):
-            if start > self.offsets[i + 1]:
-                # not yet at first chunk
-                continue
-            if stop < self.offsets[i]:
-                # past last valid chunk
-                break
-            slice_start = max(start - self.offsets[i], 0 + remainder)
-            slice_end = min(stop - self.offsets[i], self.offsets[i + 1] - self.offsets[i])
-            if slice_start == slice_end:
+        first_chunk, last_chunk = np.searchsorted(self.offsets[1:], [start, stop])
+
+        # Special case: single chunk
+        if first_chunk == last_chunk:
+            slice_start = start - self.offsets[first_chunk]
+            slice_end = stop - self.offsets[first_chunk]
+
+            n_filled = ceildiv((slice_end - slice_start), step)
+
+            self.projections.append(
+                ChunkDimProjection(
+                    first_chunk,
+                    slice(
+                        start - self.offsets[first_chunk],
+                        stop - self.offsets[first_chunk],
+                        step
+                    ),
+                    slice(0, n_filled)
+                )
+            )
+            self.nitems = n_filled
+            return
+
+        # First chunk, don't need to worry about step for starting pos
+        slice_start = start - self.offsets[first_chunk]
+        slice_end = chunk_lengths[first_chunk]
+        nfilled = ceildiv((slice_end - slice_start), step)
+        self.projections.append(
+            ChunkDimProjection(
+                first_chunk,
+                slice(
+                    slice_start,
+                    slice_end,
+                    step,
+                ),
+                slice(0, nfilled)
+            )
+        )
+        for i in range(first_chunk + 1, last_chunk + 1):
+            rem = (self.offsets[i] - start) % step
+
+            if rem != 0:
+                slice_start = step - rem
+            else:
+                slice_start = 0
+
+            slice_end = min(chunk_lengths[i], stop-self.offsets[i])
+            slice_len = ceildiv((slice_end - slice_start), step)
+            if slice_end <= slice_start:
                 continue
-            remainder = ((self.offsets[i] + slice_start) - self.offsets[i + 1]) % step
-            nelem = (slice_end - slice_start) // step
+            cur_nfilled = nfilled + slice_len
             self.projections.append(
                 ChunkDimProjection(
-                    i, slice(slice_start, slice_end, step), slice(nfilled, nfilled + nelem)
+                    i, 
+                    slice(slice_start, slice_end, step),
+                    slice(nfilled, cur_nfilled),
                 )
             )
-            nfilled += nelem
+            nfilled = cur_nfilled
 
         self.nitems = nfilled
 
diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py
index 8a34c1e715..726b70d9c6 100644
--- a/zarr/tests/test_indexing.py
+++ b/zarr/tests/test_indexing.py
@@ -1790,3 +1790,24 @@ def test_accessed_chunks(shape, chunks, ops):
                 ) == 1
         # Check that no other chunks were accessed
         assert len(delta_counts) == 0
+
+@pytest.mark.parametrize(
+    "chunking, index",
+    [
+        ([[10]], (slice(5, 7),)),
+        ([[1, 2, 2, 5]], (slice(None),)),
+        ([[1, 2, 2, 5]], (slice(2, 8),)),
+        ([[1, 2, 2, 5]], (slice(None, None, 2),)),
+        ([[1, 2, 2, 5]], (slice(1, None, 2),)),
+        ([[1, 2, 2, 5], 5], (slice(1, None, 2), slice(None))),
+    ]
+)
+def test_varchunk_indexing(chunking, index):
+    from functools import reduce
+    from operator import mul
+    shape = tuple(x * 2 if isinstance(x, int) else sum(x) for x in chunking)
+    np_array = np.arange(reduce(mul, shape)).reshape(shape)
+    var_chunked = zarr.array(np_array, chunks=chunking)
+    regular_chunked = zarr.array(np_array)
+
+    np.testing.assert_equal(var_chunked[index], regular_chunked[index])
\ No newline at end of file

From b9ba867e14286732acd1ab669617c7d5e63eee52 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Mon, 7 Aug 2023 14:37:04 +0200
Subject: [PATCH 04/12] More indexing test cases

---
 zarr/tests/test_indexing.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py
index 726b70d9c6..bbb6ddd12c 100644
--- a/zarr/tests/test_indexing.py
+++ b/zarr/tests/test_indexing.py
@@ -1795,6 +1795,8 @@ def test_accessed_chunks(shape, chunks, ops):
     "chunking, index",
     [
         ([[10]], (slice(5, 7),)),
+        ([[1, 9]], (slice(5, 7),)),
+        ([[1, 5, 4]], (slice(5, 7),)),
         ([[1, 2, 2, 5]], (slice(None),)),
         ([[1, 2, 2, 5]], (slice(2, 8),)),
         ([[1, 2, 2, 5]], (slice(None, None, 2),)),

From e12a84920780e8c409ef9da4f3476f349e630831 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Mon, 7 Aug 2023 15:04:13 +0200
Subject: [PATCH 05/12] Fix indexing with offsets and steps

---
 zarr/indexing.py            | 57 +++++++++++--------------------------
 zarr/tests/test_indexing.py | 12 +++++++-
 2 files changed, 27 insertions(+), 42 deletions(-)

diff --git a/zarr/indexing.py b/zarr/indexing.py
index 9674990103..81a3f2d3ed 100644
--- a/zarr/indexing.py
+++ b/zarr/indexing.py
@@ -192,54 +192,29 @@ def __init__(self, dim_sel: slice, dim_len: int, chunk_lengths: list[int]):
 
         first_chunk, last_chunk = np.searchsorted(self.offsets[1:], [start, stop])
 
-        # Special case: single chunk
-        if first_chunk == last_chunk:
-            slice_start = start - self.offsets[first_chunk]
-            slice_end = stop - self.offsets[first_chunk]
+        rem = 0
+        nfilled = 0
+        next_slice_start = start - self.offsets[first_chunk]
 
-            n_filled = ceildiv((slice_end - slice_start), step)
-
-            self.projections.append(
-                ChunkDimProjection(
-                    first_chunk,
-                    slice(
-                        start - self.offsets[first_chunk],
-                        stop - self.offsets[first_chunk],
-                        step
-                    ),
-                    slice(0, n_filled)
-                )
-            )
-            self.nitems = n_filled
-            return
-
-        # First chunk, don't need to worry about step for starting pos
-        slice_start = start - self.offsets[first_chunk]
-        slice_end = chunk_lengths[first_chunk]
-        nfilled = ceildiv((slice_end - slice_start), step)
-        self.projections.append(
-            ChunkDimProjection(
-                first_chunk,
-                slice(
-                    slice_start,
-                    slice_end,
-                    step,
-                ),
-                slice(0, nfilled)
-            )
-        )
-        for i in range(first_chunk + 1, last_chunk + 1):
-            rem = (self.offsets[i] - start) % step
+        for i in range(first_chunk, last_chunk + 1):
+            # Setup slice for this chunk
+            slice_start = next_slice_start
+            slice_end = min(chunk_lengths[i], stop-self.offsets[i])
+            slice_len = ceildiv((slice_end - slice_start), step)
 
+            # Prepare for next iteration
+            # We do this ahead of the next iteration so that we don't need to special case the first chunk
+            rem = (self.offsets[i + 1] - start) % step
             if rem != 0:
-                slice_start = step - rem
+                next_slice_start = step - rem
             else:
-                slice_start = 0
+                next_slice_start = 0
 
-            slice_end = min(chunk_lengths[i], stop-self.offsets[i])
-            slice_len = ceildiv((slice_end - slice_start), step)
+            # Skip iteration if slice is empty
             if slice_end <= slice_start:
                 continue
+
+            # Add projection for this chunk + update nfilled
             cur_nfilled = nfilled + slice_len
             self.projections.append(
                 ChunkDimProjection(
diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py
index bbb6ddd12c..98b06b245f 100644
--- a/zarr/tests/test_indexing.py
+++ b/zarr/tests/test_indexing.py
@@ -1794,13 +1794,23 @@ def test_accessed_chunks(shape, chunks, ops):
 @pytest.mark.parametrize(
     "chunking, index",
     [
+        # Single chunk
         ([[10]], (slice(5, 7),)),
+        # Sample from single chunk
         ([[1, 9]], (slice(5, 7),)),
+        # Slice across multiple chunks
         ([[1, 5, 4]], (slice(5, 7),)),
+        # Full slice across multiple chunks
         ([[1, 2, 2, 5]], (slice(None),)),
+        # slice across multiple chunks
         ([[1, 2, 2, 5]], (slice(2, 8),)),
+        # slice across multiple chunks with step
         ([[1, 2, 2, 5]], (slice(None, None, 2),)),
+        # slice across multiple chunks with start and step
         ([[1, 2, 2, 5]], (slice(1, None, 2),)),
+        # Skip many chunks
+        ([[3, 3, 1, 1, 1, 1, 3, 1, 3]], (slice(1, None, 4),)),
+        # Slice 2d case
         ([[1, 2, 2, 5], 5], (slice(1, None, 2), slice(None))),
     ]
 )
@@ -1812,4 +1822,4 @@ def test_varchunk_indexing(chunking, index):
     var_chunked = zarr.array(np_array, chunks=chunking)
     regular_chunked = zarr.array(np_array)
 
-    np.testing.assert_equal(var_chunked[index], regular_chunked[index])
\ No newline at end of file
+    np.testing.assert_equal(var_chunked[index], regular_chunked[index])

From 467293703e5ae2c624e8e380adb262d0517b85a7 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Mon, 7 Aug 2023 17:09:48 +0200
Subject: [PATCH 06/12] Get boolean + integer indexing working

---
 zarr/indexing.py            | 148 +++++++++++++++++++++++++++++++++++-
 zarr/tests/test_indexing.py |  10 ++-
 2 files changed, 155 insertions(+), 3 deletions(-)

diff --git a/zarr/indexing.py b/zarr/indexing.py
index 81a3f2d3ed..3eb41ef1b5 100644
--- a/zarr/indexing.py
+++ b/zarr/indexing.py
@@ -522,6 +522,55 @@ def __iter__(self):
             yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
 
 
+class VarBoolArrayDimIndexer:
+    def __init__(self, dim_sel, dim_len, dim_chunk_lens: list[int]):
+
+        # check number of dimensions
+        if not is_bool_array(dim_sel, 1):
+            raise IndexError(
+                "Boolean arrays in an orthogonal selection must " "be 1-dimensional only"
+            )
+
+        # check shape
+        if dim_sel.shape[0] != dim_len:
+            raise IndexError(
+                "Boolean array has the wrong length for dimension; "
+                "expected {}, got {}".format(dim_len, dim_sel.shape[0])
+            )
+
+        # store attributes
+        self.dim_sel = dim_sel
+        self.dim_len = dim_len
+        self.dim_chunk_lens = dim_chunk_lens
+        self.nchunks = len(dim_chunk_lens)
+        self.offsets = np.cumsum([0] + dim_chunk_lens)
+
+        # precompute number of selected items for each chunk
+        self.chunk_nitems = np.zeros(self.nchunks, dtype="i8")
+        np.add.reduceat(dim_sel, self.offsets[:-1], out=self.chunk_nitems)
+        self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems)
+        self.nitems = self.chunk_nitems_cumsum[-1]
+        self.dim_chunk_ixs = np.nonzero(self.chunk_nitems)[0]
+
+    def __iter__(self):
+
+        # iterate over chunks with at least one item
+        for dim_chunk_ix in self.dim_chunk_ixs:
+
+            # find region in chunk
+            dim_offset = self.offsets[dim_chunk_ix]
+            dim_chunk_sel = self.dim_sel[dim_offset : self.offsets[dim_chunk_ix + 1]]
+
+            # find region in output
+            if dim_chunk_ix == 0:
+                start = 0
+            else:
+                start = self.chunk_nitems_cumsum[dim_chunk_ix - 1]
+            stop = self.chunk_nitems_cumsum[dim_chunk_ix]
+            dim_out_sel = slice(start, stop)
+
+            yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
+
 class Order:
     UNKNOWN = 0
     INCREASING = 1
@@ -555,6 +604,95 @@ def boundscheck_indices(x, dim_len):
         raise BoundsCheckError(dim_len)
 
 
+class VarIntArrayDimIndexer:
+    """Integer array selection against a single dimension."""
+
+    def __init__(
+        self,
+        dim_sel,
+        dim_len,
+        dim_chunk_lens: list[int],
+        wraparound=True,
+        boundscheck=True,
+        order=Order.UNKNOWN,
+    ):
+
+        # ensure 1d array
+        dim_sel = np.asanyarray(dim_sel)
+        if not is_integer_array(dim_sel, 1):
+            raise IndexError(
+                "integer arrays in an orthogonal selection must be " "1-dimensional only"
+            )
+
+        # handle wraparound
+        if wraparound:
+            wraparound_indices(dim_sel, dim_len)
+
+        # handle out of bounds
+        if boundscheck:
+            boundscheck_indices(dim_sel, dim_len)
+
+        # store attributes
+        self.dim_len = dim_len
+        self.dim_chunk_lens = dim_chunk_lens
+        self.nchunks = len(dim_chunk_lens)
+        self.nitems = len(dim_sel)
+        self.offsets = np.cumsum([0] + dim_chunk_lens)
+
+        # determine which chunk is needed for each selection item
+        # note: for dense integer selections, the division operation here is the
+        # bottleneck
+        dim_sel_chunk = np.digitize(dim_sel, self.offsets[1:])
+
+        # determine order of indices
+        if order == Order.UNKNOWN:
+            order = Order.check(dim_sel)
+        self.order = order
+
+        if self.order == Order.INCREASING:
+            self.dim_sel = dim_sel
+            self.dim_out_sel = None
+        elif self.order == Order.DECREASING:
+            self.dim_sel = dim_sel[::-1]
+            # TODO should be possible to do this without creating an arange
+            self.dim_out_sel = np.arange(self.nitems - 1, -1, -1)
+        else:
+            # sort indices to group by chunk
+            self.dim_out_sel = np.argsort(dim_sel_chunk)
+            self.dim_sel = np.take(dim_sel, self.dim_out_sel)
+
+        # precompute number of selected items for each chunk
+        self.chunk_nitems = np.bincount(dim_sel_chunk, minlength=self.nchunks)
+
+        # find chunks that we need to visit
+        self.dim_chunk_ixs = np.nonzero(self.chunk_nitems)[0]
+
+        # compute offsets into the output array
+        self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems)
+
+    def __iter__(self):
+
+        for dim_chunk_ix in self.dim_chunk_ixs:
+
+            # find region in output
+            if dim_chunk_ix == 0:
+                start = 0
+            else:
+                start = self.chunk_nitems_cumsum[dim_chunk_ix - 1]
+            stop = self.chunk_nitems_cumsum[dim_chunk_ix]
+            if self.order == Order.INCREASING:
+                dim_out_sel = slice(start, stop)
+            else:
+                dim_out_sel = self.dim_out_sel[start:stop]
+
+            # find region in chunk
+            dim_offset = self.offsets[dim_chunk_ix]
+            dim_chunk_sel = self.dim_sel[start:stop] - dim_offset
+
+            yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
+
+
+
 class IntArrayDimIndexer:
     """Integer array selection against a single dimension."""
 
@@ -721,10 +859,16 @@ def __init__(self, selection, array):
                     dim_indexer = VarSliceDimIndexer(dim_sel, dim_len, dim_chunk_len)
 
             elif is_integer_array(dim_sel):
-                dim_indexer = IntArrayDimIndexer(dim_sel, dim_len, dim_chunk_len)
+                if isinstance(dim_chunk_len, int):
+                    dim_indexer = IntArrayDimIndexer(dim_sel, dim_len, dim_chunk_len)
+                else:
+                    dim_indexer = VarIntArrayDimIndexer(dim_sel, dim_len, dim_chunk_len)
 
             elif is_bool_array(dim_sel):
-                dim_indexer = BoolArrayDimIndexer(dim_sel, dim_len, dim_chunk_len)
+                if isinstance(dim_chunk_len, int):
+                    dim_indexer = BoolArrayDimIndexer(dim_sel, dim_len, dim_chunk_len)
+                else:
+                    dim_indexer = VarBoolArrayDimIndexer(dim_sel, dim_len, dim_chunk_len)
 
             else:
                 raise IndexError(
diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py
index 98b06b245f..894570d1b3 100644
--- a/zarr/tests/test_indexing.py
+++ b/zarr/tests/test_indexing.py
@@ -1812,6 +1812,12 @@ def test_accessed_chunks(shape, chunks, ops):
         ([[3, 3, 1, 1, 1, 1, 3, 1, 3]], (slice(1, None, 4),)),
         # Slice 2d case
         ([[1, 2, 2, 5], 5], (slice(1, None, 2), slice(None))),
+        # Fancy indexing
+        ([[1, 2, 2, 5], 1], (np.array([0, 3, 7]),)),
+        # out of order fancy indexing
+        ([[1, 3, 1, 1, 5], 1], (np.array([6, 6, 6, 0, 10]),)),
+        # boolean indexing
+        ([[2, 1, 2], 2], ([True, False, False, True, True], slice(None))),
     ]
 )
 def test_varchunk_indexing(chunking, index):
@@ -1822,4 +1828,6 @@ def test_varchunk_indexing(chunking, index):
     var_chunked = zarr.array(np_array, chunks=chunking)
     regular_chunked = zarr.array(np_array)
 
-    np.testing.assert_equal(var_chunked[index], regular_chunked[index])
+    expected = regular_chunked[index]
+    actual = var_chunked[index]
+    np.testing.assert_equal(actual, expected)

From 8033a0a24b3cf330cf9173dca618fbb991cafb94 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 9 Aug 2023 14:17:21 +0000
Subject: [PATCH 07/12] style: pre-commit fixes

---
 zarr/indexing.py            | 6 +++---
 zarr/tests/test_indexing.py | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/zarr/indexing.py b/zarr/indexing.py
index 3eb41ef1b5..31185d3fe2 100644
--- a/zarr/indexing.py
+++ b/zarr/indexing.py
@@ -199,7 +199,7 @@ def __init__(self, dim_sel: slice, dim_len: int, chunk_lengths: list[int]):
         for i in range(first_chunk, last_chunk + 1):
             # Setup slice for this chunk
             slice_start = next_slice_start
-            slice_end = min(chunk_lengths[i], stop-self.offsets[i])
+            slice_end = min(chunk_lengths[i], stop - self.offsets[i])
             slice_len = ceildiv((slice_end - slice_start), step)
 
             # Prepare for next iteration
@@ -218,7 +218,7 @@ def __init__(self, dim_sel: slice, dim_len: int, chunk_lengths: list[int]):
             cur_nfilled = nfilled + slice_len
             self.projections.append(
                 ChunkDimProjection(
-                    i, 
+                    i,
                     slice(slice_start, slice_end, step),
                     slice(nfilled, cur_nfilled),
                 )
@@ -571,6 +571,7 @@ def __iter__(self):
 
             yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
 
+
 class Order:
     UNKNOWN = 0
     INCREASING = 1
@@ -692,7 +693,6 @@ def __iter__(self):
             yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
 
 
-
 class IntArrayDimIndexer:
     """Integer array selection against a single dimension."""
 
diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py
index 894570d1b3..eb9e5a60cb 100644
--- a/zarr/tests/test_indexing.py
+++ b/zarr/tests/test_indexing.py
@@ -1791,6 +1791,7 @@ def test_accessed_chunks(shape, chunks, ops):
         # Check that no other chunks were accessed
         assert len(delta_counts) == 0
 
+
 @pytest.mark.parametrize(
     "chunking, index",
     [
@@ -1818,11 +1819,12 @@ def test_accessed_chunks(shape, chunks, ops):
         ([[1, 3, 1, 1, 5], 1], (np.array([6, 6, 6, 0, 10]),)),
         # boolean indexing
         ([[2, 1, 2], 2], ([True, False, False, True, True], slice(None))),
-    ]
+    ],
 )
 def test_varchunk_indexing(chunking, index):
     from functools import reduce
     from operator import mul
+
     shape = tuple(x * 2 if isinstance(x, int) else sum(x) for x in chunking)
     np_array = np.arange(reduce(mul, shape)).reshape(shape)
     var_chunked = zarr.array(np_array, chunks=chunking)

From 11245007a560522b6ff2b9bf6b45a7861ae670c1 Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Wed, 9 Aug 2023 10:54:36 -0400
Subject: [PATCH 08/12] types

---
 zarr/indexing.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/zarr/indexing.py b/zarr/indexing.py
index 3eb41ef1b5..22d3b18f8b 100644
--- a/zarr/indexing.py
+++ b/zarr/indexing.py
@@ -1,7 +1,10 @@
+from __future__ import annotations
+
 import collections
 import itertools
 import math
 import numbers
+from typing import List
 
 import numpy as np
 
@@ -164,7 +167,7 @@ def __iter__(self):
 
 
 class VarSliceDimIndexer:
-    def __init__(self, dim_sel: slice, dim_len: int, chunk_lengths: list[int]):
+    def __init__(self, dim_sel: slice, dim_len: int, chunk_lengths: List[int]):
         # normalize
         start = dim_sel.start
         if start is None:
@@ -232,7 +235,7 @@ def __iter__(self):
 
 
 class VarIntDimIndexer:
-    def __init__(self, dim_sel: int, dim_len: int, chunk_lengths: list[int]):
+    def __init__(self, dim_sel: int, dim_len: int, chunk_lengths: List[int]):
 
         self.offsets = np.cumsum([0] + chunk_lengths)
         self.dim_len = dim_len
@@ -523,7 +526,7 @@ def __iter__(self):
 
 
 class VarBoolArrayDimIndexer:
-    def __init__(self, dim_sel, dim_len, dim_chunk_lens: list[int]):
+    def __init__(self, dim_sel, dim_len, dim_chunk_lens: List[int]):
 
         # check number of dimensions
         if not is_bool_array(dim_sel, 1):
@@ -611,7 +614,7 @@ def __init__(
         self,
         dim_sel,
         dim_len,
-        dim_chunk_lens: list[int],
+        dim_chunk_lens: List[int],
         wraparound=True,
         boundscheck=True,
         order=Order.UNKNOWN,

From 28315217ff3a1445a6b6a1ae888d550e24989831 Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Mon, 14 Aug 2023 11:10:47 -0400
Subject: [PATCH 09/12] some fix

---
 zarr/core.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index 5d39ef2d18..06d848558b 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -2070,7 +2070,7 @@ def _process_chunk(
         coords=None,
     ):
         """Take binary data from storage and fill output array"""
-        shape = [c if isinstance(c, int) else c[coords[i]] for i, c in enumerate(self._chunks)]
+        shape = tuple(c if isinstance(c, int) else c[coords[i]] for i, c in enumerate(self._chunks))
         if (
             out_is_ndarray
             and not fields
@@ -2321,7 +2321,7 @@ def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=Non
             self.chunk_store[ckey] = self._encode_chunk(cdata)
 
     def _process_for_setitem(self, ckey, chunk_selection, value, coords=None, fields=None):
-        shape = [c if isinstance(c, int) else c[coords[i]] for i, c in enumerate(self._chunks)]
+        shape = tuple(c if isinstance(c, int) else c[coords[i]] for i, c in enumerate(self._chunks))
         if is_total_slice(chunk_selection, shape) and not fields:
             # totally replace chunk
 
@@ -2358,12 +2358,12 @@ def _process_for_setitem(self, ckey, chunk_selection, value, coords=None, fields
                     )
                     chunk.fill(self._fill_value)
                 elif self._dtype == object:
-                    chunk = np.empty(self.shape, dtype=self._dtype, order=self._order)
+                    chunk = np.empty(shape, dtype=self._dtype, order=self._order)
                 else:
                     # N.B., use zeros here so any region beyond the array has consistent
                     # and compressible data
                     chunk = np.zeros_like(
-                        self._meta_array, shape=self.shape, dtype=self._dtype, order=self._order
+                        self._meta_array, shape=shape, dtype=self._dtype, order=self._order
                     )
 
             else:

From 0d10ccb2fdf6db1bd49a1c495a252957a7247a9c Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Mon, 14 Aug 2023 11:21:57 -0400
Subject: [PATCH 10/12] fix more

---
 zarr/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zarr/util.py b/zarr/util.py
index ea0dd9fcec..ad7f29a009 100644
--- a/zarr/util.py
+++ b/zarr/util.py
@@ -175,7 +175,7 @@ def normalize_chunks(chunks: Any, shape: Tuple[int, ...], typesize: int) -> Tupl
     if -1 in chunks or None in chunks:
         chunks = tuple(s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks))
 
-    chunks = tuple(int(c) for c in chunks)
+    chunks = tuple(list(int(_) for _ in c) if isinstance(c, list) else int(c) for c in chunks)
     return chunks
 
 

From 882f301ec0813d9b983111771ee6c94fa57db927 Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Mon, 14 Aug 2023 15:12:06 -0400
Subject: [PATCH 11/12] some coverage

---
 zarr/indexing.py            |  8 ++++----
 zarr/tests/test_indexing.py | 27 +++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/zarr/indexing.py b/zarr/indexing.py
index e06d624e44..26ef6d900b 100644
--- a/zarr/indexing.py
+++ b/zarr/indexing.py
@@ -173,13 +173,13 @@ def __init__(self, dim_sel: slice, dim_len: int, chunk_lengths: List[int]):
         if start is None:
             start = 0
         elif start < 0:
-            start = dim_len - start
+            start = dim_len + start
 
         stop = dim_sel.stop
         if stop is None:
             stop = dim_len
         elif stop < 0:
-            stop = dim_len - stop
+            stop = dim_len + stop
         else:
             stop = min(stop, dim_len)
 
@@ -205,8 +205,8 @@ def __init__(self, dim_sel: slice, dim_len: int, chunk_lengths: List[int]):
             slice_end = min(chunk_lengths[i], stop - self.offsets[i])
             slice_len = ceildiv((slice_end - slice_start), step)
 
-            # Prepare for next iteration
-            # We do this ahead of the next iteration so that we don't need to special case the first chunk
+            # Prepare for next iteration. We do this ahead of the next iteration
+            # so that we don't need to special case the first chunk
             rem = (self.offsets[i + 1] - start) % step
             if rem != 0:
                 next_slice_start = step - rem
diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py
index eb9e5a60cb..66c194396f 100644
--- a/zarr/tests/test_indexing.py
+++ b/zarr/tests/test_indexing.py
@@ -1801,6 +1801,8 @@ def test_accessed_chunks(shape, chunks, ops):
         ([[1, 9]], (slice(5, 7),)),
         # Slice across multiple chunks
         ([[1, 5, 4]], (slice(5, 7),)),
+        # Slice across multiple chunks, negative
+        ([[1, 5, 4]], (slice(-6, -2),)),
         # Full slice across multiple chunks
         ([[1, 2, 2, 5]], (slice(None),)),
         # slice across multiple chunks
@@ -1819,6 +1821,8 @@ def test_accessed_chunks(shape, chunks, ops):
         ([[1, 3, 1, 1, 5], 1], (np.array([6, 6, 6, 0, 10]),)),
         # boolean indexing
         ([[2, 1, 2], 2], ([True, False, False, True, True], slice(None))),
+        # single int indexing
+        ([[2, 1, 2]], (1,)),
     ],
 )
 def test_varchunk_indexing(chunking, index):
@@ -1833,3 +1837,26 @@ def test_varchunk_indexing(chunking, index):
     expected = regular_chunked[index]
     actual = var_chunked[index]
     np.testing.assert_equal(actual, expected)
+
+
+# single int indexing
+@pytest.mark.parametrize(
+    "chunking, index, err",
+    [
+        # negative step
+        ([[2, 1, 2]], (slice(None, None, -1)), NotImplementedError),
+        # boolean, wrong length
+        ([[2, 1, 2], 2], ([True, False, False, True], slice(None)), IndexError),
+        # boolean orthogonal
+        ([[2, 1, 2], 2], ([True, False, False, True, True], [True, False]), IndexError),
+    ]
+)
+def test_varchunk_errors(chunking, index, err):
+    from functools import reduce
+    from operator import mul
+
+    shape = tuple(x * 2 if isinstance(x, int) else sum(x) for x in chunking)
+    np_array = np.arange(reduce(mul, shape)).reshape(shape)
+    var_chunked = zarr.array(np_array, chunks=chunking)
+    with pytest.raises(err):
+        var_chunked[index]

From f8d9bf2672a80846a866aa9967ce5331fd1f1e87 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 14 Aug 2023 19:12:47 +0000
Subject: [PATCH 12/12] style: pre-commit fixes

---
 zarr/tests/test_indexing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py
index 66c194396f..3d43780610 100644
--- a/zarr/tests/test_indexing.py
+++ b/zarr/tests/test_indexing.py
@@ -1849,7 +1849,7 @@ def test_varchunk_indexing(chunking, index):
         ([[2, 1, 2], 2], ([True, False, False, True], slice(None)), IndexError),
         # boolean orthogonal
         ([[2, 1, 2], 2], ([True, False, False, True, True], [True, False]), IndexError),
-    ]
+    ],
 )
 def test_varchunk_errors(chunking, index, err):
     from functools import reduce