Skip to content

Commit

Permalink
new layout for metadata classes; begin dataclassing; add hierarchy mo…
Browse files Browse the repository at this point in the history
…dels
  • Loading branch information
d-v-b committed Jan 22, 2024
1 parent 1a98886 commit f357773
Show file tree
Hide file tree
Showing 31 changed files with 823 additions and 708 deletions.
11 changes: 6 additions & 5 deletions zarr/tests/test_codecs_v3.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,31 @@
from __future__ import annotations
from dataclasses import dataclass

import json
from typing import Iterator, List, Literal, Optional
from attr import frozen

import numpy as np
import pytest
import zarr
from zarr.v3 import codecs
from zarr.v3.array import Array, AsyncArray
from zarr.v3.common import Selection
from zarr.v3.types import Selection
from zarr.v3.indexing import morton_order_iter
from zarr.v3.metadata import CodecMetadata, ShardingCodecIndexLocation, runtime_configuration
from zarr.v3.common import runtime_configuration
from zarr.v3.metadata.v3.array import CodecMetadata, ShardingCodecIndexLocation

from zarr.v3.store import MemoryStore, Store


@frozen
@dataclass(frozen=True)
class _AsyncArrayProxy:
array: AsyncArray

def __getitem__(self, selection: Selection) -> _AsyncArraySelectionProxy:
return _AsyncArraySelectionProxy(self.array, selection)


@frozen
@dataclass(frozen=True)
class _AsyncArraySelectionProxy:
array: AsyncArray
selection: Selection
Expand Down
12 changes: 6 additions & 6 deletions zarr/v3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

import zarr.v3.codecs # noqa: F401
from zarr.v3.array import Array # noqa: F401
from zarr.v3.array_v2 import ArrayV2 # noqa: F401
# from zarr.v3.array_v2 import ArrayV2
from zarr.v3.common import RuntimeConfiguration # noqa: F401
from zarr.v3.group import Group # noqa: F401
from zarr.v3.group_v2 import GroupV2 # noqa: F401
from zarr.v3.metadata import RuntimeConfiguration, runtime_configuration # noqa: F401
from zarr.v3.common import runtime_configuration # noqa: F401
from zarr.v3.store import ( # noqa: F401
LocalStore,
RemoteStore,
Expand All @@ -22,18 +22,18 @@
async def open_auto_async(
store: StoreLike,
runtime_configuration_: RuntimeConfiguration = RuntimeConfiguration(),
) -> Union[Array, ArrayV2, Group, GroupV2]:
) -> Union[Array, Group]:
store_path = make_store_path(store)
try:
return await Group.open_or_array(store_path, runtime_configuration=runtime_configuration_)
except KeyError:
return await GroupV2.open_or_array(store_path, runtime_configuration_)
return await Group.open_or_array(store_path, runtime_configuration_)


def open_auto(
store: StoreLike,
runtime_configuration_: RuntimeConfiguration = RuntimeConfiguration(),
) -> Union[Array, ArrayV2, Group, GroupV2]:
) -> Union[Array, Group]:
return _sync(
open_auto_async(store, runtime_configuration_),
runtime_configuration_.asyncio_loop,
Expand Down
25 changes: 15 additions & 10 deletions zarr/v3/abc/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
import numpy as np

from zarr.v3.abc.store import ReadStore, WriteStore
from zarr.v3.common import Selection

from zarr.v3.types import Selection

class BaseArray(ABC):
@abstractproperty
@property
@abstractmethod
def store_path(self) -> str: # TODO: rename to `path`?
"""Path to this array in the underlying store."""
...

@abstractproperty
@property
@abstractmethod
def dtype(self) -> np.dtype:
"""Data type of the array elements.
Expand All @@ -25,7 +26,8 @@ def dtype(self) -> np.dtype:
"""
...

@abstractproperty
@property
@abstractmethod
def ndim(self) -> int:
"""Number of array dimensions (axes).
Expand All @@ -36,7 +38,8 @@ def ndim(self) -> int:
"""
...

@abstractproperty
@property
@abstractmethod
def shape(self) -> Tuple[int, ...]:
"""Array dimensions.
Expand All @@ -47,7 +50,8 @@ def shape(self) -> Tuple[int, ...]:
"""
...

@abstractproperty
@property
@abstractmethod
def size(self) -> int:
"""Number of elements in the array.
Expand All @@ -57,7 +61,8 @@ def size(self) -> int:
number of elements in an array.
"""

@abstractproperty
@property
@abstractmethod
def attrs(self) -> Dict[str, Any]:
"""Array attributes.
Expand All @@ -68,7 +73,8 @@ def attrs(self) -> Dict[str, Any]:
"""
...

@abstractproperty
@property
@abstractmethod
def info(self) -> Any:
"""Report some diagnostic information about the array.
Expand All @@ -78,7 +84,6 @@ def info(self) -> Any:
"""
...


class AsynchronousArray(BaseArray):
"""This class can be implemented as a v2 or v3 array"""

Expand Down
6 changes: 4 additions & 2 deletions zarr/v3/abc/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@

import numpy as np

from zarr.v3.common import BytesLike, SliceSelection
from zarr.v3.types import SliceSelection
from zarr.v3.metadata.v3.array import CodecMetadata
from zarr.v3.store import StorePath
from zarr.v3.types import BytesLike


if TYPE_CHECKING:
from zarr.v3.metadata import CoreArrayMetadata, CodecMetadata
from zarr.v3.metadata.v3.array import CoreArrayMetadata


class Codec(ABC):
Expand Down
47 changes: 31 additions & 16 deletions zarr/v3/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,50 +10,61 @@
# 2. Do we really need runtime_configuration? Specifically, the asyncio_loop seems problematic

from __future__ import annotations
from dataclasses import dataclass, replace

import json
from typing import Any, Dict, Iterable, Literal, Optional, Tuple, Union

import numpy as np
from attr import evolve, frozen

from zarr.v3.abc.array import SynchronousArray, AsynchronousArray
from zarr.v3.abc.codec import ArrayBytesCodecPartialDecodeMixin

# from zarr.v3.array_v2 import ArrayV2
from zarr.v3.codecs import CodecMetadata, CodecPipeline, bytes_codec
from zarr.v3.codecs import CodecPipeline, bytes_codec
from zarr.v3.common import (
ZARR_JSON,
ChunkCoords,
Selection,
SliceSelection,
concurrent_map,
)
from zarr.v3.indexing import BasicIndexer, all_chunk_coords, is_total_slice
from zarr.v3.metadata import (
from zarr.v3.common import (
RuntimeConfiguration,
)
from zarr.v3.codecs.sharding import ShardingCodec
from zarr.v3.metadata.v3.array import (
ArrayMetadata,
CodecMetadata,
DataType,
DefaultChunkKeyEncodingConfigurationMetadata,
DefaultChunkKeyEncodingMetadata,
RegularChunkGridConfigurationMetadata,
RegularChunkGridMetadata,
RuntimeConfiguration,
V2ChunkKeyEncodingConfigurationMetadata,
V2ChunkKeyEncodingMetadata,
dtype_to_data_type,
)
from zarr.v3.codecs.sharding import ShardingCodec
from zarr.v3.store import StoreLike, StorePath, make_store_path
from zarr.v3.sync import sync
from zarr.v3.types import ChunkCoords, Selection, SliceSelection


@frozen
class AsyncArray(AsynchronousArray):
metadata: ArrayMetadata
store_path: StorePath
_store_path: StorePath
runtime_configuration: RuntimeConfiguration
codec_pipeline: CodecPipeline

def __init__(
self,
metadata: ArrayMetadata,
store_path: StorePath,
runtime_configuration: RuntimeConfiguration,
codec_pipeline: CodecPipeline):

self.metadata = metadata
self._store_path = store_path
self.runtime_configuration = runtime_configuration
self.codec_pipeline = codec_pipeline

@classmethod
async def create(
cls,
Expand Down Expand Up @@ -198,6 +209,10 @@ def dtype(self) -> np.dtype:
@property
def attrs(self) -> dict:
return self.metadata.attributes

@property
def store_path(self) -> str:
return self._store_path

async def getitem(self, selection: Selection):
indexer = BasicIndexer(
Expand Down Expand Up @@ -376,7 +391,7 @@ async def _write_chunk_to_store(self, store_path: StorePath, chunk_array: np.nda

async def resize(self, new_shape: ChunkCoords) -> AsyncArray:
assert len(new_shape) == len(self.metadata.shape)
new_metadata = evolve(self.metadata, shape=new_shape)
new_metadata = replace(self.metadata, shape=new_shape)

# Remove all chunks outside of the new shape
chunk_shape = self.metadata.chunk_grid.configuration.chunk_shape
Expand All @@ -398,14 +413,14 @@ async def _delete_key(key: str) -> None:

# Write new metadata
await (self.store_path / ZARR_JSON).set_async(new_metadata.to_bytes())
return evolve(self, metadata=new_metadata)
return replace(self, metadata=new_metadata)

async def update_attributes(self, new_attributes: Dict[str, Any]) -> Array:
new_metadata = evolve(self.metadata, attributes=new_attributes)
new_metadata = replace(self.metadata, attributes=new_attributes)

# Write new metadata
await (self.store_path / ZARR_JSON).set_async(new_metadata.to_bytes())
return evolve(self, metadata=new_metadata)
return replace(self, metadata=new_metadata)

def __repr__(self):
return f"<AsyncArray {self.store_path} shape={self.shape} dtype={self.dtype}>"
Expand All @@ -414,7 +429,7 @@ async def info(self):
return NotImplemented


@frozen
@dataclass(frozen=True)
class Array(SynchronousArray):
_async_array: AsyncArray

Expand Down
Loading

0 comments on commit f357773

Please sign in to comment.