Skip to content

Commit

Permalink
Show nchunks on visualization
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite committed Feb 17, 2025
1 parent a6d12a3 commit be0ecd2
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 33 deletions.
2 changes: 2 additions & 0 deletions cubed/core/plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,8 @@ def visualize(
tooltip += f"chunk memory: {chunkmem}\n"
if hasattr(target, "nbytes"):
tooltip += f"nbytes: {memory_repr(target.nbytes)}\n"
if hasattr(target, "nchunks"):
tooltip += f"nchunks: {target.nchunks}\n"

del d["target"]

Expand Down
45 changes: 45 additions & 0 deletions cubed/storage/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import math
from functools import reduce
from itertools import starmap
from operator import mul

import numpy as np

from cubed.types import T_DType, T_RegularChunks, T_Shape


class ArrayMetadata:
def __init__(
self,
shape: T_Shape,
dtype: T_DType,
chunks: T_RegularChunks,
):
self.shape = shape
self.dtype = np.dtype(dtype)
self.chunks = chunks

@property
def size(self) -> int:
"""Number of elements in the array."""
return reduce(mul, self.shape, 1)

@property
def nbytes(self) -> int:
"""Number of bytes in array"""
return self.size * self.dtype.itemsize

@property
def _cdata_shape(self) -> T_Shape:
"""The shape of the chunk grid for this array."""
return tuple(
starmap(
lambda s, c: math.ceil(s / c),
zip(self.shape, self.chunks, strict=False),
)
)

@property
def nchunks(self) -> int:
"""Number of chunks in array"""
return reduce(mul, self._cdata_shape, 1)
25 changes: 9 additions & 16 deletions cubed/storage/virtual.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@

from cubed.backend_array_api import namespace as nxp
from cubed.backend_array_api import numpy_array_to_backend_array
from cubed.storage.types import ArrayMetadata
from cubed.types import T_DType, T_RegularChunks, T_Shape
from cubed.utils import array_memory, broadcast_trick, memory_repr


class VirtualEmptyArray:
class VirtualEmptyArray(ArrayMetadata):
"""An array that is never materialized (in memory or on disk) and contains empty values."""

def __init__(
Expand All @@ -19,9 +20,7 @@ def __init__(
dtype: T_DType,
chunks: T_RegularChunks,
):
self.shape = shape
self.dtype = np.dtype(dtype)
self.chunks = chunks
super().__init__(shape, dtype, chunks)

def __getitem__(self, key):
idx = ndindex[key]
Expand All @@ -35,7 +34,7 @@ def chunkmem(self):
return array_memory(self.dtype, (1,))


class VirtualFullArray:
class VirtualFullArray(ArrayMetadata):
"""An array that is never materialized (in memory or on disk) and contains a single fill value."""

def __init__(
Expand All @@ -45,9 +44,7 @@ def __init__(
chunks: T_RegularChunks,
fill_value: Any = None,
):
self.shape = shape
self.dtype = np.dtype(dtype)
self.chunks = chunks
super().__init__(shape, dtype, chunks)
self.fill_value = fill_value

def __getitem__(self, key):
Expand All @@ -64,15 +61,13 @@ def chunkmem(self):
return array_memory(self.dtype, (1,))


class VirtualOffsetsArray:
class VirtualOffsetsArray(ArrayMetadata):
"""An array that is never materialized (in memory or on disk) and contains sequentially incrementing integers."""

def __init__(self, shape: T_Shape):
dtype = nxp.int32
chunks = (1,) * len(shape)
self.shape = shape
self.dtype = np.dtype(dtype)
self.chunks = chunks
super().__init__(shape, dtype, chunks)

def __getitem__(self, key):
if key == () and self.shape == ():
Expand All @@ -82,7 +77,7 @@ def __getitem__(self, key):
)


class VirtualInMemoryArray:
class VirtualInMemoryArray(ArrayMetadata):
"""A small array that is held in memory but never materialized on disk."""

def __init__(
Expand All @@ -96,9 +91,7 @@ def __init__(
f"Size of in memory array is {memory_repr(array.nbytes)} which exceeds maximum of {memory_repr(max_nbytes)}. Consider loading the array from storage using `from_array`."
)
self.array = array
self.shape = array.shape
self.dtype = array.dtype
self.chunks = chunks
super().__init__(array.shape, array.dtype, chunks)

def __getitem__(self, key):
return self.array.__getitem__(key)
Expand Down
20 changes: 3 additions & 17 deletions cubed/storage/zarr.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
from operator import mul
from typing import Optional, Union

import numpy as np
import zarr
from toolz import reduce

from cubed.storage.backend import open_backend_array
from cubed.storage.types import ArrayMetadata
from cubed.types import T_DType, T_RegularChunks, T_Shape, T_Store


class LazyZarrArray:
class LazyZarrArray(ArrayMetadata):
"""A Zarr array that may not have been written to storage yet.
On creation, a normal Zarr array's metadata is immediately written to storage,
Expand All @@ -27,23 +25,11 @@ def __init__(
**kwargs,
):
"""Create a Zarr array lazily in memory."""
self.shape = shape
self.dtype = np.dtype(dtype)
self.chunks = chunks
super().__init__(shape, dtype, chunks)
self.store = store
self.path = path
self.kwargs = kwargs

@property
def size(self):
"""Number of elements in the array."""
return reduce(mul, self.shape, 1)

@property
def nbytes(self) -> int:
"""Number of bytes in array"""
return self.size * self.dtype.itemsize

def create(self, mode: str = "w-") -> zarr.Array:
"""Create the Zarr array in storage.
Expand Down

0 comments on commit be0ecd2

Please sign in to comment.