diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index 78945e97aa..7d36730538 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -50,7 +50,6 @@ jobs: azurite -l ~/blob_emulator --debug debug.log 2>&1 > stdouterr.log & pytest -sv --timeout=300 env: - ZARR_TEST_ABS: 1 ZARR_V3_EXPERIMENTAL_API: 1 ZARR_V3_SHARDING: 1 - name: Conda info diff --git a/docs/api.rst b/docs/api.rst index e200dd908d..b784f0d006 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -8,7 +8,6 @@ API reference api/core api/hierarchy api/storage - api/n5 api/convenience api/codecs api/attrs diff --git a/docs/api/n5.rst b/docs/api/n5.rst deleted file mode 100644 index b6a8d8c61e..0000000000 --- a/docs/api/n5.rst +++ /dev/null @@ -1,5 +0,0 @@ -N5 (``zarr.n5``) -================ -.. automodule:: zarr.n5 - -.. autoclass:: N5Store diff --git a/docs/api/storage.rst b/docs/api/storage.rst index 4321837449..7df93c4c8c 100644 --- a/docs/api/storage.rst +++ b/docs/api/storage.rst @@ -11,30 +11,12 @@ Storage (``zarr.storage``) .. automethod:: close .. automethod:: flush -.. autoclass:: DBMStore - - .. automethod:: close - .. automethod:: flush - -.. autoclass:: LMDBStore - - .. automethod:: close - .. automethod:: flush - -.. autoclass:: SQLiteStore - - .. automethod:: close - -.. autoclass:: MongoDBStore -.. autoclass:: RedisStore .. autoclass:: LRUStoreCache .. automethod:: invalidate .. automethod:: invalidate_values .. automethod:: invalidate_keys -.. autoclass:: ABSStore - .. autoclass:: FSStore .. autoclass:: ConsolidatedMetadataStore diff --git a/docs/api/v3.rst b/docs/api/v3.rst index 7665b2ddd1..dce07ace5f 100644 --- a/docs/api/v3.rst +++ b/docs/api/v3.rst @@ -50,11 +50,6 @@ You can also check ``Store type`` here (which indicates Zarr V3). .. autoclass:: MemoryStoreV3 .. autoclass:: DirectoryStoreV3 .. autoclass:: ZipStoreV3 -.. autoclass:: RedisStoreV3 -.. autoclass:: MongoDBStoreV3 -.. autoclass:: DBMStoreV3 -.. autoclass:: LMDBStoreV3 -.. autoclass:: SQLiteStoreV3 .. autoclass:: LRUStoreCacheV3 .. autoclass:: ConsolidatedMetadataStoreV3 diff --git a/docs/contributing.rst b/docs/contributing.rst index a65b3d104d..d078a82d9b 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -161,18 +161,6 @@ optional dependencies to be installed), run:: $ python -m doctest -o NORMALIZE_WHITESPACE -o ELLIPSIS docs/tutorial.rst docs/spec/v2.rst -Note that some tests also require storage services to be running -locally. To run the Azure Blob Service storage tests, run an Azure -storage emulator (e.g., azurite) and set the environment variable -``ZARR_TEST_ABS=1``. If you're using Docker to run azurite, start the service with:: - - docker run --rm -p 10000:10000 mcr.microsoft.com/azure-storage/azurite azurite-blob --loose --blobHost 0.0.0.0 - -To run the Mongo DB storage tests, run a Mongo -server locally and set the environment variable ``ZARR_TEST_MONGO=1``. -To run the Redis storage tests, run a Redis server locally on port -6379 and set the environment variable ``ZARR_TEST_REDIS=1``. - All tests are automatically run via GitHub Actions for every pull request and must pass before code can be accepted. Test coverage is also collected automatically via the Codecov service, and total diff --git a/docs/release.rst b/docs/release.rst index 3ed47ff9f5..33302e14ae 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,6 +18,13 @@ Release notes Unreleased (v3) --------------- +Refactoring +~~~~~~~~~~~ + +* Remove storage classes for the following backends: N5, Redis, MongoDB, ABS, LMDB, DBM, and SQLite. + The intention is that these storage classes will be developed external to ``zarr-python``. + By :user:`Davis Bennett ` :issue:`1791`. + Maintenance ~~~~~~~~~~~ diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 4099bac1c8..1d20b73966 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -805,60 +805,6 @@ boundaries. Note also that the ``close()`` method must be called after writing any data to the store, otherwise essential records will not be written to the underlying zip file. -Another storage alternative is the :class:`zarr.storage.DBMStore` class, added -in Zarr version 2.2. This class allows any DBM-style database to be used for -storing an array or group. Here is an example using a Berkeley DB B-tree -database for storage (requires `bsddb3 -`_ to be installed):: - - >>> import bsddb3 - >>> store = zarr.DBMStore('data/example.bdb', open=bsddb3.btopen) - >>> root = zarr.group(store=store, overwrite=True) - >>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') - >>> z[:] = 42 - >>> store.close() - -Also added in Zarr version 2.2 is the :class:`zarr.storage.LMDBStore` class which -enables the lightning memory-mapped database (LMDB) to be used for storing an array or -group (requires `lmdb `_ to be installed):: - - >>> store = zarr.LMDBStore('data/example.lmdb') - >>> root = zarr.group(store=store, overwrite=True) - >>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') - >>> z[:] = 42 - >>> store.close() - -In Zarr version 2.3 is the :class:`zarr.storage.SQLiteStore` class which -enables the SQLite database to be used for storing an array or group (requires -Python is built with SQLite support):: - - >>> store = zarr.SQLiteStore('data/example.sqldb') - >>> root = zarr.group(store=store, overwrite=True) - >>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') - >>> z[:] = 42 - >>> store.close() - -Also added in Zarr version 2.3 are two storage classes for interfacing with server-client -databases. The :class:`zarr.storage.RedisStore` class interfaces `Redis `_ -(an in memory data structure store), and the :class:`zarr.storage.MongoDB` class interfaces -with `MongoDB `_ (an object oriented NoSQL database). These stores -respectively require the `redis-py `_ and -`pymongo `_ packages to be installed. - -For compatibility with the `N5 `_ data format, Zarr also provides -an N5 backend (this is currently an experimental feature). Similar to the zip storage class, an -:class:`zarr.n5.N5Store` can be instantiated directly:: - - >>> store = zarr.N5Store('data/example.n5') - >>> root = zarr.group(store=store) - >>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') - >>> z[:] = 42 - -For convenience, the N5 backend will automatically be chosen when the filename -ends with `.n5`:: - - >>> root = zarr.open('data/example.n5', mode='w') - Distributed/cloud storage ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -902,23 +848,6 @@ Here is an example using S3Map to read an array created previously:: >>> z[:].tobytes() b'Hello from the cloud!' -Zarr now also has a builtin storage backend for Azure Blob Storage. -The class is :class:`zarr.storage.ABSStore` (requires -`azure-storage-blob `_ -to be installed):: - - >>> import azure.storage.blob - >>> container_client = azure.storage.blob.ContainerClient(...) # doctest: +SKIP - >>> store = zarr.ABSStore(client=container_client, prefix='zarr-testing') # doctest: +SKIP - >>> root = zarr.group(store=store, overwrite=True) # doctest: +SKIP - >>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') # doctest: +SKIP - >>> z[:] = 42 # doctest: +SKIP - -When using an actual storage account, provide ``account_name`` and -``account_key`` arguments to :class:`zarr.storage.ABSStore`, the -above client is just testing against the emulator. Please also note -that this is an experimental feature. - Note that retrieving data from a remote service via the network can be significantly slower than retrieving data from a local file system, and will depend on network latency and bandwidth between the client and server systems. If you are experiencing poor @@ -1590,8 +1519,8 @@ storage. Note that if an array or group is backed by an in-memory store like a ``dict`` or :class:`zarr.storage.MemoryStore`, then when it is pickled all of the store data will be included in the pickled data. However, if an array or group is backed by a persistent -store like a :class:`zarr.storage.DirectoryStore`, :class:`zarr.storage.ZipStore` or -:class:`zarr.storage.DBMStore` then the store data **are not** pickled. The only thing +store like a :class:`zarr.storage.DirectoryStore` or :class:`zarr.storage.ZipStore` +then the store data **are not** pickled. The only thing that is pickled is the necessary parameters to allow the store to re-open any underlying files or databases upon being unpickled. diff --git a/notebooks/store_benchmark.ipynb b/notebooks/store_benchmark.ipynb index 869e7df608..014f895c3e 100644 --- a/notebooks/store_benchmark.ipynb +++ b/notebooks/store_benchmark.ipynb @@ -35,48 +35,6 @@ "zarr.__version__" ] }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'6.2.5'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import bsddb3\n", - "bsddb3.__version__" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'0.93'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import lmdb\n", - "lmdb.__version__" - ] - }, { "cell_type": "code", "execution_count": 4, @@ -86,16 +44,6 @@ "import numpy as np" ] }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "import dbm.gnu\n", - "import dbm.ndbm" - ] - }, { "cell_type": "code", "execution_count": 6, @@ -119,21 +67,11 @@ " clean()\n", " fdict_root = zarr.group(store=dict())\n", " hdict_root = zarr.group(store=zarr.DictStore())\n", - " lmdb_root = zarr.group(store=zarr.LMDBStore(os.path.join(bench_dir, 'lmdb')))\n", - " gdbm_root = zarr.group(store=zarr.DBMStore(os.path.join(bench_dir, 'gdbm'), open=dbm.gnu.open))\n", - " ndbm_root = zarr.group(store=zarr.DBMStore(os.path.join(bench_dir, 'ndbm'), open=dbm.ndbm.open))\n", - " bdbm_btree_root = zarr.group(store=zarr.DBMStore(os.path.join(bench_dir, 'bdbm_btree'), open=bsddb3.btopen))\n", - " bdbm_hash_root = zarr.group(store=zarr.DBMStore(os.path.join(bench_dir, 'bdbm_hash'), open=bsddb3.hashopen))\n", " zip_root = zarr.group(store=zarr.ZipStore(os.path.join(bench_dir, 'zip'), mode='w'))\n", " dir_root = zarr.group(store=zarr.DirectoryStore(os.path.join(bench_dir, 'dir')))\n", "\n", " fdict_z = fdict_root.empty_like(name, a)\n", " hdict_z = hdict_root.empty_like(name, a)\n", - " lmdb_z = lmdb_root.empty_like(name, a)\n", - " gdbm_z = gdbm_root.empty_like(name, a)\n", - " ndbm_z = ndbm_root.empty_like(name, a)\n", - " bdbm_btree_z = bdbm_btree_root.empty_like(name, a)\n", - " bdbm_hash_z = bdbm_hash_root.empty_like(name, a)\n", " zip_z = zip_root.empty_like(name, a)\n", " dir_z = dir_root.empty_like(name, a)\n", "\n", @@ -254,91 +192,6 @@ "%timeit save(a, hdict_z)" ] }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "316 ms ± 12.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit save(a, lmdb_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "938 ms ± 111 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit save(a, gdbm_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "406 ms ± 8.93 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit save(a, ndbm_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.43 s ± 156 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit save(a, bdbm_btree_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.24 s ± 260 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit save(a, bdbm_hash_z)" - ] - }, { "cell_type": "code", "execution_count": 16, @@ -416,91 +269,6 @@ "%timeit load(hdict_z, a)" ] }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "429 ms ± 19.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit load(lmdb_z, a)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "459 ms ± 10 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit load(gdbm_z, a)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "473 ms ± 5.71 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit load(ndbm_z, a)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "504 ms ± 8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit load(bdbm_btree_z, a)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "519 ms ± 9.59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit load(bdbm_hash_z, a)" - ] - }, { "cell_type": "code", "execution_count": 25, @@ -620,91 +388,6 @@ "%timeit -r3 save(a, hdict_z)" ] }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "846 ms ± 24 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit -r3 save(a, lmdb_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6.35 s ± 785 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit -r3 save(a, gdbm_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4.62 s ± 1.09 s per loop (mean ± std. dev. of 3 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit -r3 save(a, ndbm_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7.84 s ± 1.66 s per loop (mean ± std. dev. of 3 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit -r3 save(a, bdbm_btree_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6.49 s ± 808 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit -r3 save(a, bdbm_hash_z)" - ] - }, { "cell_type": "code", "execution_count": 36, @@ -780,91 +463,6 @@ "%timeit -r3 load(hdict_z, a)" ] }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "532 ms ± 16.1 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit -r3 load(lmdb_z, a)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.2 s ± 10.9 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit -r3 load(gdbm_z, a)" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.18 s ± 13.2 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit -r3 load(ndbm_z, a)" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.59 s ± 16.7 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit -r3 load(bdbm_btree_z, a)" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.61 s ± 7.31 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%timeit -r3 load(bdbm_hash_z, a)" - ] - }, { "cell_type": "code", "execution_count": 46, @@ -980,96 +578,6 @@ "%time dask_op(hdict_z, fdict_z)" ] }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 15.1 s, sys: 524 ms, total: 15.6 s\n", - "Wall time: 3.02 s\n" - ] - } - ], - "source": [ - "%time dask_op(lmdb_z, fdict_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 16.5 s, sys: 712 ms, total: 17.2 s\n", - "Wall time: 3.13 s\n" - ] - } - ], - "source": [ - "%time dask_op(gdbm_z, fdict_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 16.3 s, sys: 604 ms, total: 16.9 s\n", - "Wall time: 3.22 s\n" - ] - } - ], - "source": [ - "%time dask_op(ndbm_z, fdict_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 19.6 s, sys: 1.24 s, total: 20.9 s\n", - "Wall time: 3.27 s\n" - ] - } - ], - "source": [ - "%time dask_op(bdbm_btree_z, fdict_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 20.3 s, sys: 1.08 s, total: 21.4 s\n", - "Wall time: 3.53 s\n" - ] - } - ], - "source": [ - "%time dask_op(bdbm_hash_z, fdict_z)" - ] - }, { "cell_type": "code", "execution_count": 83, @@ -1131,96 +639,6 @@ "%time dask_op(fdict_z, hdict_z)" ] }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 16.2 s, sys: 1.6 s, total: 17.8 s\n", - "Wall time: 2.71 s\n" - ] - } - ], - "source": [ - "%time dask_op(fdict_z, lmdb_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 16.8 s, sys: 3.05 s, total: 19.8 s\n", - "Wall time: 8.01 s\n" - ] - } - ], - "source": [ - "%time dask_op(fdict_z, gdbm_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 17.9 s, sys: 3.01 s, total: 20.9 s\n", - "Wall time: 5.46 s\n" - ] - } - ], - "source": [ - "%time dask_op(fdict_z, ndbm_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 13.8 s, sys: 3.39 s, total: 17.2 s\n", - "Wall time: 7.87 s\n" - ] - } - ], - "source": [ - "%time dask_op(fdict_z, bdbm_btree_z)" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 13.9 s, sys: 3.27 s, total: 17.2 s\n", - "Wall time: 6.73 s\n" - ] - } - ], - "source": [ - "%time dask_op(fdict_z, bdbm_hash_z)" - ] - }, { "cell_type": "code", "execution_count": 57, @@ -1263,11 +681,6 @@ "metadata": {}, "outputs": [], "source": [ - "lmdb_z.store.close()\n", - "gdbm_z.store.close()\n", - "ndbm_z.store.close()\n", - "bdbm_btree_z.store.close()\n", - "bdbm_hash_z.store.close()\n", "zip_z.store.close()" ] }, diff --git a/src/zarr/__init__.py b/src/zarr/__init__.py index 601b1295ab..cbbdfdaf27 100644 --- a/src/zarr/__init__.py +++ b/src/zarr/__init__.py @@ -30,20 +30,13 @@ ) from zarr.errors import CopyError, MetadataError from zarr.hierarchy import Group, group, open_group -from zarr.n5 import N5Store, N5FSStore from zarr.storage import ( - ABSStore, - DBMStore, DictStore, DirectoryStore, KVStore, - LMDBStore, LRUStoreCache, MemoryStore, - MongoDBStore, NestedDirectoryStore, - RedisStore, - SQLiteStore, TempStore, ZipStore, ) diff --git a/src/zarr/_storage/absstore.py b/src/zarr/_storage/absstore.py deleted file mode 100644 index d8e292535c..0000000000 --- a/src/zarr/_storage/absstore.py +++ /dev/null @@ -1,224 +0,0 @@ -"""This module contains storage classes related to Azure Blob Storage (ABS)""" - -import warnings -from numcodecs.compat import ensure_bytes -from zarr.util import normalize_storage_path -from zarr._storage.store import Store - -__doctest_requires__ = { - ("ABSStore", "ABSStore.*"): ["azure.storage.blob"], -} - - -class ABSStore(Store): - """Storage class using Azure Blob Storage (ABS). - - Parameters - ---------- - container : string - The name of the ABS container to use. - - .. deprecated:: - Use ``client`` instead. - - prefix : string - Location of the "directory" to use as the root of the storage hierarchy - within the container. - - account_name : string - The Azure blob storage account name. - - .. deprecated:: 2.8.3 - Use ``client`` instead. - - account_key : string - The Azure blob storage account access key. - - .. deprecated:: 2.8.3 - Use ``client`` instead. - - blob_service_kwargs : dictionary - Extra arguments to be passed into the azure blob client, for e.g. when - using the emulator, pass in blob_service_kwargs={'is_emulated': True}. - - .. deprecated:: 2.8.3 - Use ``client`` instead. - - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - - client : azure.storage.blob.ContainerClient, optional - And ``azure.storage.blob.ContainerClient`` to connect with. See - `here `_ # noqa - for more. - - .. versionadded:: 2.8.3 - - Notes - ----- - In order to use this store, you must install the Microsoft Azure Storage SDK for Python, - ``azure-storage-blob>=12.5.0``. - """ # noqa: E501 - - def __init__( - self, - container=None, - prefix="", - account_name=None, - account_key=None, - blob_service_kwargs=None, - dimension_separator=None, - client=None, - ): - self._dimension_separator = dimension_separator - self.prefix = normalize_storage_path(prefix) - if client is None: - # deprecated option, try to construct the client for them - msg = ( - "Providing 'container', 'account_name', 'account_key', and 'blob_service_kwargs'" - "is deprecated. Provide and instance of 'azure.storage.blob.ContainerClient' " - "'client' instead." - ) - warnings.warn(msg, FutureWarning, stacklevel=2) - from azure.storage.blob import ContainerClient - - blob_service_kwargs = blob_service_kwargs or {} - client = ContainerClient( - "https://{}.blob.core.windows.net/".format(account_name), - container, - credential=account_key, - **blob_service_kwargs, - ) - - self.client = client - self._container = container - self._account_name = account_name - self._account_key = account_key - - @staticmethod - def _warn_deprecated(property_): - msg = ( - "The {} property is deprecated and will be removed in a future " - "version. Get the property from 'ABSStore.client' instead." - ) - warnings.warn(msg.format(property_), FutureWarning, stacklevel=3) - - @property - def container(self): - self._warn_deprecated("container") - return self._container - - @property - def account_name(self): - self._warn_deprecated("account_name") - return self._account_name - - @property - def account_key(self): - self._warn_deprecated("account_key") - return self._account_key - - def _append_path_to_prefix(self, path): - if self.prefix == "": - return normalize_storage_path(path) - else: - return "/".join([self.prefix, normalize_storage_path(path)]) - - @staticmethod - def _strip_prefix_from_path(path, prefix): - # normalized things will not have any leading or trailing slashes - path_norm = normalize_storage_path(path) - prefix_norm = normalize_storage_path(prefix) - if prefix: - return path_norm[(len(prefix_norm) + 1) :] - else: - return path_norm - - def __getitem__(self, key): - from azure.core.exceptions import ResourceNotFoundError - - blob_name = self._append_path_to_prefix(key) - try: - return self.client.download_blob(blob_name).readall() - except ResourceNotFoundError: - raise KeyError("Blob %s not found" % blob_name) - - def __setitem__(self, key, value): - value = ensure_bytes(value) - blob_name = self._append_path_to_prefix(key) - self.client.upload_blob(blob_name, value, overwrite=True) - - def __delitem__(self, key): - from azure.core.exceptions import ResourceNotFoundError - - try: - self.client.delete_blob(self._append_path_to_prefix(key)) - except ResourceNotFoundError: - raise KeyError("Blob %s not found" % key) - - def __eq__(self, other): - return ( - isinstance(other, ABSStore) - and self.client == other.client - and self.prefix == other.prefix - ) - - def keys(self): - return list(self.__iter__()) - - def __iter__(self): - if self.prefix: - list_blobs_prefix = self.prefix + "/" - else: - list_blobs_prefix = None - for blob in self.client.list_blobs(list_blobs_prefix): - yield self._strip_prefix_from_path(blob.name, self.prefix) - - def __len__(self): - return len(self.keys()) - - def __contains__(self, key): - blob_name = self._append_path_to_prefix(key) - return self.client.get_blob_client(blob_name).exists() - - def listdir(self, path=None): - dir_path = normalize_storage_path(self._append_path_to_prefix(path)) - if dir_path: - dir_path += "/" - items = [ - self._strip_prefix_from_path(blob.name, dir_path) - for blob in self.client.walk_blobs(name_starts_with=dir_path, delimiter="/") - ] - return items - - def rmdir(self, path=None): - dir_path = normalize_storage_path(self._append_path_to_prefix(path)) - if dir_path: - dir_path += "/" - for blob in self.client.list_blobs(name_starts_with=dir_path): - self.client.delete_blob(blob) - - def getsize(self, path=None): - store_path = normalize_storage_path(path) - fs_path = self._append_path_to_prefix(store_path) - if fs_path: - blob_client = self.client.get_blob_client(fs_path) - else: - blob_client = None - - if blob_client and blob_client.exists(): - return blob_client.get_blob_properties().size - else: - size = 0 - if fs_path == "": - fs_path = None - elif not fs_path.endswith("/"): - fs_path += "/" - for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter="/"): - blob_client = self.client.get_blob_client(blob) - if blob_client.exists(): - size += blob_client.get_blob_properties().size - return size - - def clear(self): - self.rmdir() diff --git a/src/zarr/n5.py b/src/zarr/n5.py deleted file mode 100644 index 79bab20576..0000000000 --- a/src/zarr/n5.py +++ /dev/null @@ -1,896 +0,0 @@ -"""This module contains a storage class and codec to support the N5 format. -""" -import os -import struct -import sys -from typing import Any, Dict, Optional, cast -import warnings - -import numpy as np -from numcodecs.abc import Codec -from numcodecs.compat import ndarray_copy -from numcodecs.registry import get_codec, register_codec - -from .meta import ZARR_FORMAT, json_dumps, json_loads -from .storage import FSStore -from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, normalize_storage_path -from .storage import array_meta_key as zarr_array_meta_key -from .storage import attrs_key as zarr_attrs_key -from .storage import group_meta_key as zarr_group_meta_key - -N5_FORMAT = "2.0.0" - -zarr_to_n5_keys = [ - ("chunks", "blockSize"), - ("dtype", "dataType"), - ("compressor", "compression"), - ("shape", "dimensions"), -] -n5_attrs_key = "attributes.json" -n5_keywords = ["n5", "dataType", "dimensions", "blockSize", "compression"] - - -class N5Store(NestedDirectoryStore): - """Storage class using directories and files on a standard file system, - following the N5 format (https://github.com/saalfeldlab/n5). - - Parameters - ---------- - path : string - Location of directory to use as the root of the storage hierarchy. - normalize_keys : bool, optional - If True, all store keys will be normalized to use lower case characters - (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be - useful to avoid potential discrepancies between case-sensitive and - case-insensitive file system. Default value is False. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.N5Store('data/array.n5') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - - Store a group:: - - >>> store = zarr.N5Store('data/group.n5') - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - - Notes - ----- - - This is an experimental feature. - - Safe to write in multiple threads or processes. - - """ - - def __getitem__(self, key: str) -> bytes: - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) - value = group_metadata_to_zarr(self._load_n5_attrs(key_new)) - - return json_dumps(value) - - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) - top_level = key == zarr_array_meta_key - value = array_metadata_to_zarr(self._load_n5_attrs(key_new), top_level=top_level) - return json_dumps(value) - - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) - value = attrs_to_zarr(self._load_n5_attrs(key_new)) - - if len(value) == 0: - raise KeyError(key_new) - else: - return json_dumps(value) - - elif is_chunk_key(key): - key_new = invert_chunk_coords(key) - - else: - key_new = key - - return super().__getitem__(key_new) - - def __setitem__(self, key: str, value: Any): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) - - n5_attrs = self._load_n5_attrs(key_new) - n5_attrs.update(**group_metadata_to_n5(json_loads(value))) - - value = json_dumps(n5_attrs) - - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) - top_level = key == zarr_array_meta_key - n5_attrs = self._load_n5_attrs(key_new) - n5_attrs.update(**array_metadata_to_n5(json_loads(value), top_level=top_level)) - value = json_dumps(n5_attrs) - - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) - - n5_attrs = self._load_n5_attrs(key_new) - zarr_attrs = json_loads(value) - - for k in n5_keywords: - if k in zarr_attrs: - warnings.warn(f"Attribute {k} is a reserved N5 keyword", UserWarning) - - # remove previous user attributes - for k in list(n5_attrs.keys()): - if k not in n5_keywords: - del n5_attrs[k] - - # add new user attributes - n5_attrs.update(**zarr_attrs) - - value = json_dumps(n5_attrs) - - elif is_chunk_key(key): - key_new = invert_chunk_coords(key) - - else: - key_new = key - - super().__setitem__(key_new, value) - - def __delitem__(self, key: str): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) - elif is_chunk_key(key): - key_new = invert_chunk_coords(key) - else: - key_new = key - - super().__delitem__(key_new) - - def __contains__(self, key): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) - if key_new not in self: - return False - # group if not a dataset (attributes do not contain 'dimensions') - return "dimensions" not in self._load_n5_attrs(key_new) - - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) - # array if attributes contain 'dimensions' - return "dimensions" in self._load_n5_attrs(key_new) - - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) - return self._contains_attrs(key_new) - - elif is_chunk_key(key): - key_new = invert_chunk_coords(key) - else: - key_new = key - - return super().__contains__(key_new) - - def __eq__(self, other): - return isinstance(other, N5Store) and self.path == other.path - - def listdir(self, path: Optional[str] = None): - if path is not None: - path = invert_chunk_coords(path) - path = cast(str, path) - # We can't use NestedDirectoryStore's listdir, as it requires - # array_meta_key to be present in array directories, which this store - # doesn't provide. - children = super().listdir(path=path) - - if self._is_array(path): - # replace n5 attribute file with respective zarr attribute files - children.remove(n5_attrs_key) - children.append(zarr_array_meta_key) - if self._contains_attrs(path): - children.append(zarr_attrs_key) - - # special handling of directories containing an array to map - # inverted nested chunk keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and os.path.isdir(entry_path): - for dir_path, _, file_names in os.walk(entry_path): - for file_name in file_names: - file_path = os.path.join(dir_path, file_name) - rel_path = file_path.split(root_path + os.path.sep)[1] - new_child = rel_path.replace(os.path.sep, ".") - new_children.append(invert_chunk_coords(new_child)) - else: - new_children.append(entry) - - return sorted(new_children) - - elif self._is_group(path): - # replace n5 attribute file with respective zarr attribute files - children.remove(n5_attrs_key) - children.append(zarr_group_meta_key) - if self._contains_attrs(path): - children.append(zarr_attrs_key) - - return sorted(children) - - else: - return children - - def _load_n5_attrs(self, path: str) -> Dict[str, Any]: - try: - s = super().__getitem__(path) - return json_loads(s) - except KeyError: - return {} - - def _is_group(self, path: str): - if path is None: - attrs_key = n5_attrs_key - else: - attrs_key = os.path.join(path, n5_attrs_key) - - n5_attrs = self._load_n5_attrs(attrs_key) - return len(n5_attrs) > 0 and "dimensions" not in n5_attrs - - def _is_array(self, path: str): - if path is None: - attrs_key = n5_attrs_key - else: - attrs_key = os.path.join(path, n5_attrs_key) - - return "dimensions" in self._load_n5_attrs(attrs_key) - - def _contains_attrs(self, path: str): - if path is None: - attrs_key = n5_attrs_key - else: - if not path.endswith(n5_attrs_key): - attrs_key = os.path.join(path, n5_attrs_key) - else: - attrs_key = path - - attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) - return len(attrs) > 0 - - -class N5FSStore(FSStore): - """Implementation of the N5 format (https://github.com/saalfeldlab/n5) - using `fsspec`, which allows storage on a variety of filesystems. Based - on `zarr.N5Store`. - Parameters - ---------- - path : string - Location of directory to use as the root of the storage hierarchy. - normalize_keys : bool, optional - If True, all store keys will be normalized to use lower case characters - (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be - useful to avoid potential discrepancies between case-sensitive and - case-insensitive file system. Default value is False. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.N5FSStore('data/array.n5', auto_mkdir=True) - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - - Store a group:: - - >>> store = zarr.N5FSStore('data/group.n5', auto_mkdir=True) - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - - Notes - ----- - This is an experimental feature. - Safe to write in multiple threads or processes. - - Be advised that the `_dimension_separator` property of this store - (and arrays it creates) is ".", but chunks saved by this store will - in fact be "/" separated, as proscribed by the N5 format. - - This is counter-intuitive (to say the least), but not arbitrary. - Chunks in N5 format are stored with reversed dimension order - relative to Zarr chunks: a chunk of a 3D Zarr array would be stored - on a file system as `/0/1/2`, but in N5 the same chunk would be - stored as `/2/1/0`. Therefore, stores targeting N5 must intercept - chunk keys and flip the order of the dimensions before writing to - storage, and this procedure requires chunk keys with "." separated - dimensions, hence the Zarr arrays targeting N5 have the deceptive - "." dimension separator. - """ - - _array_meta_key = "attributes.json" - _group_meta_key = "attributes.json" - _attrs_key = "attributes.json" - - def __init__(self, *args, **kwargs): - if "dimension_separator" in kwargs: - warnings.warn("Keyword argument `dimension_separator` will be ignored") - kwargs["dimension_separator"] = "." - super().__init__(*args, **kwargs) - - @staticmethod - def _swap_separator(key: str): - segments = list(key.split("/")) - if segments: - last_segment = segments[-1] - if _prog_ckey.match(last_segment): - coords = list(last_segment.split(".")) - last_segment = "/".join(coords[::-1]) - segments = segments[:-1] + [last_segment] - key = "/".join(segments) - return key - - def _normalize_key(self, key: str): - if is_chunk_key(key): - key = invert_chunk_coords(key) - - key = normalize_storage_path(key).lstrip("/") - if key: - *bits, end = key.split("/") - - if end not in (self._array_meta_key, self._group_meta_key, self._attrs_key): - end = end.replace(".", "/") - key = "/".join(bits + [end]) - return key.lower() if self.normalize_keys else key - - def __getitem__(self, key: str) -> bytes: - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) - value = group_metadata_to_zarr(self._load_n5_attrs(key_new)) - - return json_dumps(value) - - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) - top_level = key == zarr_array_meta_key - value = array_metadata_to_zarr(self._load_n5_attrs(key_new), top_level=top_level) - return json_dumps(value) - - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) - value = attrs_to_zarr(self._load_n5_attrs(key_new)) - - if len(value) == 0: - raise KeyError(key_new) - else: - return json_dumps(value) - - elif is_chunk_key(key): - key_new = self._swap_separator(key) - - else: - key_new = key - - return super().__getitem__(key_new) - - def __setitem__(self, key: str, value: Any): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) - - n5_attrs = self._load_n5_attrs(key_new) - n5_attrs.update(**group_metadata_to_n5(json_loads(value))) - - value = json_dumps(n5_attrs) - - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) - top_level = key == zarr_array_meta_key - n5_attrs = self._load_n5_attrs(key_new) - n5_attrs.update(**array_metadata_to_n5(json_loads(value), top_level=top_level)) - - value = json_dumps(n5_attrs) - - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) - - n5_attrs = self._load_n5_attrs(key_new) - zarr_attrs = json_loads(value) - - for k in n5_keywords: - if k in zarr_attrs.keys(): - warnings.warn(f"Attribute {k} is a reserved N5 keyword", UserWarning) - - # replace previous user attributes - for k in list(n5_attrs.keys()): - if k not in n5_keywords: - del n5_attrs[k] - - # add new user attributes - n5_attrs.update(**zarr_attrs) - - value = json_dumps(n5_attrs) - - elif is_chunk_key(key): - key_new = self._swap_separator(key) - - else: - key_new = key - - super().__setitem__(key_new, value) - - def __delitem__(self, key: str): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) - elif is_chunk_key(key): - key_new = self._swap_separator(key) - else: - key_new = key - super().__delitem__(key_new) - - def __contains__(self, key: Any): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) - if key_new not in self: - return False - # group if not a dataset (attributes do not contain 'dimensions') - return "dimensions" not in self._load_n5_attrs(key_new) - - elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) - # array if attributes contain 'dimensions' - return "dimensions" in self._load_n5_attrs(key_new) - - elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) - return self._contains_attrs(key_new) - - elif is_chunk_key(key): - key_new = self._swap_separator(key) - - else: - key_new = key - return super().__contains__(key_new) - - def __eq__(self, other: Any): - return isinstance(other, N5FSStore) and self.path == other.path - - def listdir(self, path: Optional[str] = None): - if path is not None: - path = invert_chunk_coords(path) - - # We can't use NestedDirectoryStore's listdir, as it requires - # array_meta_key to be present in array directories, which this store - # doesn't provide. - children = super().listdir(path=path) - if self._is_array(path): - # replace n5 attribute file with respective zarr attribute files - children.remove(self._array_meta_key) - children.append(zarr_array_meta_key) - if self._contains_attrs(path): - children.append(zarr_attrs_key) - - # special handling of directories containing an array to map - # inverted nested chunk keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and self.fs.isdir(entry_path): - for file_name in self.fs.find(entry_path): - file_path = os.path.join(root_path, file_name) - rel_path = file_path.split(root_path)[1] - new_child = rel_path.lstrip("/").replace("/", ".") - new_children.append(invert_chunk_coords(new_child)) - else: - new_children.append(entry) - return sorted(new_children) - - elif self._is_group(path): - # replace n5 attribute file with respective zarr attribute files - children.remove(self._group_meta_key) - children.append(zarr_group_meta_key) - if self._contains_attrs(path): - children.append(zarr_attrs_key) - return sorted(children) - else: - return children - - def _load_n5_attrs(self, path: str): - try: - s = super().__getitem__(path) - return json_loads(s) - except KeyError: - return {} - - def _is_group(self, path: Optional[str]): - if path is None: - attrs_key = self._attrs_key - else: - attrs_key = os.path.join(path, self._attrs_key) - - n5_attrs = self._load_n5_attrs(attrs_key) - return len(n5_attrs) > 0 and "dimensions" not in n5_attrs - - def _is_array(self, path: Optional[str]): - if path is None: - attrs_key = self._attrs_key - else: - attrs_key = os.path.join(path, self._attrs_key) - - return "dimensions" in self._load_n5_attrs(attrs_key) - - def _contains_attrs(self, path: Optional[str]): - if path is None: - attrs_key = self._attrs_key - else: - if not path.endswith(self._attrs_key): - attrs_key = os.path.join(path, self._attrs_key) - else: - attrs_key = path - - attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) - return len(attrs) > 0 - - -def is_chunk_key(key: str): - rv = False - segments = list(key.split("/")) - if segments: - last_segment = segments[-1] - rv = bool(_prog_ckey.match(last_segment)) - return rv - - -def invert_chunk_coords(key: str): - segments = list(key.split("/")) - if segments: - last_segment = segments[-1] - if _prog_ckey.match(last_segment): - coords = list(last_segment.split(".")) - last_segment = "/".join(coords[::-1]) - segments = segments[:-1] + [last_segment] - key = "/".join(segments) - return key - - -def group_metadata_to_n5(group_metadata: Dict[str, Any]) -> Dict[str, Any]: - """Convert group metadata from zarr to N5 format.""" - del group_metadata["zarr_format"] - # TODO: This should only exist at the top-level - group_metadata["n5"] = N5_FORMAT - return group_metadata - - -def group_metadata_to_zarr(group_metadata: Dict[str, Any]) -> Dict[str, Any]: - """Convert group metadata from N5 to zarr format.""" - # This only exists at the top level - group_metadata.pop("n5", None) - group_metadata["zarr_format"] = ZARR_FORMAT - return group_metadata - - -def array_metadata_to_n5(array_metadata: Dict[str, Any], top_level=False) -> Dict[str, Any]: - """Convert array metadata from zarr to N5 format. If the `top_level` keyword argument is True, - then the `N5` : N5_FORMAT key : value pair will be inserted into the metadata.""" - - for f, t in zarr_to_n5_keys: - array_metadata[t] = array_metadata.pop(f) - del array_metadata["zarr_format"] - if top_level: - array_metadata["n5"] = N5_FORMAT - try: - dtype = np.dtype(array_metadata["dataType"]) - except TypeError: - raise TypeError(f"Data type {array_metadata['dataType']} is not supported by N5") - - array_metadata["dataType"] = dtype.name - array_metadata["dimensions"] = array_metadata["dimensions"][::-1] - array_metadata["blockSize"] = array_metadata["blockSize"][::-1] - - if "fill_value" in array_metadata: - if array_metadata["fill_value"] != 0 and array_metadata["fill_value"] is not None: - raise ValueError( - f"""Received fill_value = {array_metadata['fill_value']}, - but N5 only supports fill_value = 0""" - ) - del array_metadata["fill_value"] - - if "order" in array_metadata: - if array_metadata["order"] != "C": - raise ValueError( - f"Received order = {array_metadata['order']}, but N5 only supports order = C" - ) - del array_metadata["order"] - - if "filters" in array_metadata: - if array_metadata["filters"] != [] and array_metadata["filters"] is not None: - raise ValueError("Received filters, but N5 storage does not support zarr filters") - del array_metadata["filters"] - - assert "compression" in array_metadata - compressor_config = array_metadata["compression"] - compressor_config = compressor_config_to_n5(compressor_config) - array_metadata["compression"] = compressor_config - - if "dimension_separator" in array_metadata: - del array_metadata["dimension_separator"] - - return array_metadata - - -def array_metadata_to_zarr( - array_metadata: Dict[str, Any], top_level: bool = False -) -> Dict[str, Any]: - """Convert array metadata from N5 to zarr format. - If the `top_level` keyword argument is True, then the `N5` key will be removed from metadata""" - for t, f in zarr_to_n5_keys: - array_metadata[t] = array_metadata.pop(f) - if top_level: - array_metadata.pop("n5") - array_metadata["zarr_format"] = ZARR_FORMAT - - array_metadata["shape"] = array_metadata["shape"][::-1] - array_metadata["chunks"] = array_metadata["chunks"][::-1] - array_metadata["fill_value"] = 0 # also if None was requested - array_metadata["order"] = "C" - array_metadata["filters"] = [] - array_metadata["dimension_separator"] = "." - array_metadata["dtype"] = np.dtype(array_metadata["dtype"]).str - - compressor_config = array_metadata["compressor"] - compressor_config = compressor_config_to_zarr(compressor_config) - array_metadata["compressor"] = { - "id": N5ChunkWrapper.codec_id, - "compressor_config": compressor_config, - "dtype": array_metadata["dtype"], - "chunk_shape": array_metadata["chunks"], - } - - return array_metadata - - -def attrs_to_zarr(attrs: Dict[str, Any]) -> Dict[str, Any]: - """Get all zarr attributes from an N5 attributes dictionary (i.e., - all non-keyword attributes).""" - - # remove all N5 keywords - for n5_key in n5_keywords: - if n5_key in attrs: - del attrs[n5_key] - - return attrs - - -def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict[str, Any]: - if compressor_config is None: - return {"type": "raw"} - else: - _compressor_config = compressor_config - - # peel wrapper, if present - if _compressor_config["id"] == N5ChunkWrapper.codec_id: - _compressor_config = _compressor_config["compressor_config"] - - codec_id = _compressor_config["id"] - n5_config = {"type": codec_id} - - if codec_id == "bz2": - n5_config["type"] = "bzip2" - n5_config["blockSize"] = _compressor_config["level"] - - elif codec_id == "blosc": - n5_config["cname"] = _compressor_config["cname"] - n5_config["clevel"] = _compressor_config["clevel"] - n5_config["shuffle"] = _compressor_config["shuffle"] - n5_config["blocksize"] = _compressor_config["blocksize"] - - elif codec_id == "lzma": - # Switch to XZ for N5 if we are using the default XZ format. - # Note: 4 is the default, which is lzma.CHECK_CRC64. - if _compressor_config["format"] == 1 and _compressor_config["check"] in [-1, 4]: - n5_config["type"] = "xz" - else: - warnings.warn( - "Not all N5 implementations support lzma compression (yet). You " - "might not be able to open the dataset with another N5 library.", - RuntimeWarning, - ) - n5_config["format"] = _compressor_config["format"] - n5_config["check"] = _compressor_config["check"] - n5_config["filters"] = _compressor_config["filters"] - - # The default is lzma.PRESET_DEFAULT, which is 6. - if _compressor_config["preset"]: - n5_config["preset"] = _compressor_config["preset"] - else: - n5_config["preset"] = 6 - - elif codec_id == "zlib": - n5_config["type"] = "gzip" - n5_config["level"] = _compressor_config["level"] - n5_config["useZlib"] = True - - elif codec_id == "gzip": - n5_config["type"] = "gzip" - n5_config["level"] = _compressor_config["level"] - n5_config["useZlib"] = False - - else: - n5_config.update({k: v for k, v in _compressor_config.items() if k != "type"}) - - return n5_config - - -def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dict[str, Any]]: - codec_id = compressor_config["type"] - zarr_config = {"id": codec_id} - - if codec_id == "bzip2": - zarr_config["id"] = "bz2" - zarr_config["level"] = compressor_config["blockSize"] - - elif codec_id == "blosc": - zarr_config["cname"] = compressor_config["cname"] - zarr_config["clevel"] = compressor_config["clevel"] - zarr_config["shuffle"] = compressor_config["shuffle"] - zarr_config["blocksize"] = compressor_config["blocksize"] - - elif codec_id == "lzma": - zarr_config["format"] = compressor_config["format"] - zarr_config["check"] = compressor_config["check"] - zarr_config["preset"] = compressor_config["preset"] - zarr_config["filters"] = compressor_config["filters"] - - elif codec_id == "xz": - zarr_config["id"] = "lzma" - zarr_config["format"] = 1 # lzma.FORMAT_XZ - zarr_config["check"] = -1 - zarr_config["preset"] = compressor_config["preset"] - zarr_config["filters"] = None - - elif codec_id == "gzip": - if "useZlib" in compressor_config and compressor_config["useZlib"]: - zarr_config["id"] = "zlib" - zarr_config["level"] = compressor_config["level"] - else: - zarr_config["id"] = "gzip" - zarr_config["level"] = compressor_config["level"] - - elif codec_id == "raw": - return None - - else: - zarr_config.update({k: v for k, v in compressor_config.items() if k != "type"}) - - return zarr_config - - -class N5ChunkWrapper(Codec): - codec_id = "n5_wrapper" - - def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): - self.dtype = np.dtype(dtype) - self.chunk_shape = tuple(chunk_shape) - # is the dtype a little endian format? - self._little_endian = self.dtype.byteorder == "<" or ( - self.dtype.byteorder == "=" and sys.byteorder == "little" - ) - - if compressor: - if compressor_config is not None: - raise ValueError("Only one of compressor_config or compressor should be given.") - compressor_config = compressor.get_config() - - if compressor_config is None and compressor is None or compressor_config["id"] == "raw": - self.compressor_config = None - self._compressor = None - else: - self._compressor = get_codec(compressor_config) - self.compressor_config = self._compressor.get_config() - - def get_config(self): - config = {"id": self.codec_id, "compressor_config": self.compressor_config} - return config - - def encode(self, chunk): - assert chunk.flags.c_contiguous - - header = self._create_header(chunk) - chunk = self._to_big_endian(chunk) - - if self._compressor: - return header + self._compressor.encode(chunk) - else: - return header + chunk.tobytes(order="A") - - def decode(self, chunk, out=None) -> bytes: - len_header, chunk_shape = self._read_header(chunk) - chunk = chunk[len_header:] - - if out is not None: - # out should only be used if we read a complete chunk - assert chunk_shape == self.chunk_shape, "Expected chunk of shape {}, found {}".format( - self.chunk_shape, chunk_shape - ) - - if self._compressor: - self._compressor.decode(chunk, out) - else: - ndarray_copy(chunk, out) - - # we can byteswap in-place - if self._little_endian: - out.byteswap(True) - - return out - - else: - if self._compressor: - chunk = self._compressor.decode(chunk) - - # more expensive byteswap - chunk = self._from_big_endian(chunk) - - # read partial chunk - if chunk_shape != self.chunk_shape: - chunk = np.frombuffer(chunk, dtype=self.dtype) - chunk = chunk.reshape(chunk_shape) - complete_chunk = np.zeros(self.chunk_shape, dtype=self.dtype) - target_slices = tuple(slice(0, s) for s in chunk_shape) - complete_chunk[target_slices] = chunk - chunk = complete_chunk - - return chunk - - @staticmethod - def _create_header(chunk): - mode = struct.pack(">H", 0) - num_dims = struct.pack(">H", len(chunk.shape)) - shape = b"".join(struct.pack(">I", d) for d in chunk.shape[::-1]) - - return mode + num_dims + shape - - @staticmethod - def _read_header(chunk): - num_dims = struct.unpack(">H", chunk[2:4])[0] - shape = tuple( - struct.unpack(">I", chunk[i : i + 4])[0] for i in range(4, num_dims * 4 + 4, 4) - )[::-1] - - len_header = 4 + num_dims * 4 - - return len_header, shape - - def _to_big_endian(self, data): - # assumes data is ndarray - - if self._little_endian: - return data.byteswap() - return data - - def _from_big_endian(self, data): - # assumes data is byte array in big endian - - if not self._little_endian: - return data - - a = np.frombuffer(data, self.dtype.newbyteorder(">")) - return a.astype(self.dtype) - - -register_codec(N5ChunkWrapper, N5ChunkWrapper.codec_id) diff --git a/src/zarr/storage.py b/src/zarr/storage.py index a7bd22a6b9..9b2e9db92e 100644 --- a/src/zarr/storage.py +++ b/src/zarr/storage.py @@ -17,25 +17,21 @@ import atexit import errno import glob -import multiprocessing -import operator import os import re import shutil -import sys import tempfile import warnings import zipfile from collections import OrderedDict from collections.abc import MutableMapping from os import scandir -from pickle import PicklingError from threading import Lock, RLock from typing import Sequence, Mapping, Optional, Union, List, Tuple, Dict, Any import uuid import time -from numcodecs.compat import ensure_bytes, ensure_text, ensure_contiguous_ndarray_like +from numcodecs.compat import ensure_bytes, ensure_contiguous_ndarray_like from numcodecs.registry import codec_registry from zarr.context import Context @@ -51,7 +47,6 @@ from zarr.util import ( buffer_size, json_loads, - nolock, normalize_chunks, normalize_dimension_separator, normalize_dtype, @@ -63,7 +58,6 @@ ensure_contiguous_ndarray_or_bytes, ) -from zarr._storage.absstore import ABSStore # noqa: F401 from zarr._storage.store import ( # noqa: F401 _listdir_from_keys, _rename_from_keys, @@ -80,8 +74,6 @@ ) __doctest_requires__ = { - ("RedisStore", "RedisStore.*"): ["redis"], - ("MongoDBStore", "MongoDBStore.*"): ["pymongo"], ("LRUStoreCache", "LRUStoreCache.*"): ["s3fs"], } @@ -144,10 +136,6 @@ def normalize_store_arg(store: Any, storage_options=None, mode="r") -> BaseStore raise ValueError("storage_options passed with non-fsspec path") if store.endswith(".zip"): return ZipStore(store, mode=mode) - elif store.endswith(".n5"): - from zarr.n5 import N5Store - - return N5Store(store) else: return DirectoryStore(store) else: @@ -1813,386 +1801,6 @@ def migrate_1to2(store): del store["attrs"] -# noinspection PyShadowingBuiltins -class DBMStore(Store): - """Storage class using a DBM-style database. - - Parameters - ---------- - path : string - Location of database file. - flag : string, optional - Flags for opening the database file. - mode : int - File mode used if a new file is created. - open : function, optional - Function to open the database file. If not provided, :func:`dbm.open` will be - used on Python 3, and :func:`anydbm.open` will be used on Python 2. - write_lock: bool, optional - Use a lock to prevent concurrent writes from multiple threads (True by default). - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk.e - **open_kwargs - Keyword arguments to pass the `open` function. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.DBMStore('data/array.db') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - >>> store.close() # don't forget to call this when you're done - - Store a group:: - - >>> store = zarr.DBMStore('data/group.db') - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - >>> store.close() # don't forget to call this when you're done - - After modifying a DBMStore, the ``close()`` method must be called, otherwise - essential data may not be written to the underlying database file. The - DBMStore class also supports the context manager protocol, which ensures the - ``close()`` method is called on leaving the context, e.g.:: - - >>> with zarr.DBMStore('data/array.db') as store: - ... z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - ... z[...] = 42 - ... # no need to call store.close() - - A different database library can be used by passing a different function to - the `open` parameter. For example, if the `bsddb3 - `_ package is installed, a - Berkeley DB database can be used:: - - >>> import bsddb3 - >>> store = zarr.DBMStore('data/array.bdb', open=bsddb3.btopen) - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - >>> store.close() - - Notes - ----- - Please note that, by default, this class will use the Python standard - library `dbm.open` function to open the database file (or `anydbm.open` on - Python 2). There are up to three different implementations of DBM-style - databases available in any Python installation, and which one is used may - vary from one system to another. Database file formats are not compatible - between these different implementations. Also, some implementations are - more efficient than others. In particular, the "dumb" implementation will be - the fall-back on many systems, and has very poor performance for some usage - scenarios. If you want to ensure a specific implementation is used, pass the - corresponding open function, e.g., `dbm.gnu.open` to use the GNU DBM - library. - - Safe to write in multiple threads. May be safe to write in multiple processes, - depending on which DBM implementation is being used, although this has not been - tested. - - """ - - def __init__( - self, - path, - flag="c", - mode=0o666, - open=None, - write_lock=True, - dimension_separator=None, - **open_kwargs, - ): - if open is None: - import dbm - - open = dbm.open - path = os.path.abspath(path) - # noinspection PyArgumentList - self.db = open(path, flag, mode, **open_kwargs) - self.path = path - self.flag = flag - self.mode = mode - self.open = open - self.write_lock = write_lock - if write_lock: - # This may not be required as some dbm implementations manage their own - # locks, but err on the side of caution. - self.write_mutex = Lock() - else: - self.write_mutex = nolock - self.open_kwargs = open_kwargs - self._dimension_separator = dimension_separator - - def __getstate__(self): - try: - self.flush() # needed for ndbm - except Exception: - # flush may fail if db has already been closed - pass - return (self.path, self.flag, self.mode, self.open, self.write_lock, self.open_kwargs) - - def __setstate__(self, state): - path, flag, mode, open, write_lock, open_kws = state - if flag[0] == "n": - flag = "c" + flag[1:] # don't clobber an existing database - self.__init__(path=path, flag=flag, mode=mode, open=open, write_lock=write_lock, **open_kws) - - def close(self): - """Closes the underlying database file.""" - if hasattr(self.db, "close"): - with self.write_mutex: - self.db.close() - - def flush(self): - """Synchronizes data to the underlying database file.""" - if self.flag[0] != "r": - with self.write_mutex: - if hasattr(self.db, "sync"): - self.db.sync() - else: # pragma: no cover - # we don't cover this branch anymore as ndbm (oracle) is not packaged - # by conda-forge on non-mac OS: - # https://github.com/conda-forge/staged-recipes/issues/4476 - # fall-back, close and re-open, needed for ndbm - flag = self.flag - if flag[0] == "n": - flag = "c" + flag[1:] # don't clobber an existing database - self.db.close() - # noinspection PyArgumentList - self.db = self.open(self.path, flag, self.mode, **self.open_kwargs) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - def __getitem__(self, key): - if isinstance(key, str): - key = key.encode("ascii") - return self.db[key] - - def __setitem__(self, key, value): - if isinstance(key, str): - key = key.encode("ascii") - value = ensure_bytes(value) - with self.write_mutex: - self.db[key] = value - - def __delitem__(self, key): - if isinstance(key, str): - key = key.encode("ascii") - with self.write_mutex: - del self.db[key] - - def __eq__(self, other): - return ( - isinstance(other, DBMStore) - and self.path == other.path - and - # allow flag and mode to differ - self.open == other.open - and self.open_kwargs == other.open_kwargs - ) - - def keys(self): - return (ensure_text(k, "ascii") for k in iter(self.db.keys())) - - def __iter__(self): - return self.keys() - - def __len__(self): - return sum(1 for _ in self.keys()) - - def __contains__(self, key): - if isinstance(key, str): - key = key.encode("ascii") - return key in self.db - - def rmdir(self, path: str = "") -> None: - path = normalize_storage_path(path) - _rmdir_from_keys(self, path) - - -class LMDBStore(Store): - """Storage class using LMDB. Requires the `lmdb `_ - package to be installed. - - - Parameters - ---------- - path : string - Location of database file. - buffers : bool, optional - If True (default) use support for buffers, which should increase performance by - reducing memory copies. - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - **kwargs - Keyword arguments passed through to the `lmdb.open` function. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.LMDBStore('data/array.mdb') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - >>> store.close() # don't forget to call this when you're done - - Store a group:: - - >>> store = zarr.LMDBStore('data/group.mdb') - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - >>> store.close() # don't forget to call this when you're done - - After modifying a DBMStore, the ``close()`` method must be called, otherwise - essential data may not be written to the underlying database file. The - DBMStore class also supports the context manager protocol, which ensures the - ``close()`` method is called on leaving the context, e.g.:: - - >>> with zarr.LMDBStore('data/array.mdb') as store: - ... z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - ... z[...] = 42 - ... # no need to call store.close() - - Notes - ----- - By default writes are not immediately flushed to disk to increase performance. You - can ensure data are flushed to disk by calling the ``flush()`` or ``close()`` methods. - - Should be safe to write in multiple threads or processes due to the synchronization - support within LMDB, although writing from multiple processes has not been tested. - - """ - - def __init__(self, path, buffers=True, dimension_separator=None, **kwargs): - import lmdb - - # set default memory map size to something larger than the lmdb default, which is - # very likely to be too small for any moderate array (logic copied from zict) - map_size = 2**40 if sys.maxsize >= 2**32 else 2**28 - kwargs.setdefault("map_size", map_size) - - # don't initialize buffers to zero by default, shouldn't be necessary - kwargs.setdefault("meminit", False) - - # decide whether to use the writemap option based on the operating system's - # support for sparse files - writemap requires sparse file support otherwise - # the whole# `map_size` may be reserved up front on disk (logic copied from zict) - writemap = sys.platform.startswith("linux") - kwargs.setdefault("writemap", writemap) - - # decide options for when data are flushed to disk - choose to delay syncing - # data to filesystem, otherwise pay a large performance penalty (zict also does - # this) - kwargs.setdefault("metasync", False) - kwargs.setdefault("sync", False) - kwargs.setdefault("map_async", False) - - # set default option for number of cached transactions - max_spare_txns = multiprocessing.cpu_count() - kwargs.setdefault("max_spare_txns", max_spare_txns) - - # normalize path - path = os.path.abspath(path) - - # open database - self.db = lmdb.open(path, **kwargs) - - # store properties - self.buffers = buffers - self.path = path - self.kwargs = kwargs - self._dimension_separator = dimension_separator - - def __getstate__(self): - try: - self.flush() # just in case - except Exception: - # flush may fail if db has already been closed - pass - return self.path, self.buffers, self.kwargs - - def __setstate__(self, state): - path, buffers, kwargs = state - self.__init__(path=path, buffers=buffers, **kwargs) - - def close(self): - """Closes the underlying database.""" - self.db.close() - - def flush(self): - """Synchronizes data to the file system.""" - self.db.sync() - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - def __getitem__(self, key): - if isinstance(key, str): - key = key.encode("ascii") - # use the buffers option, should avoid a memory copy - with self.db.begin(buffers=self.buffers) as txn: - value = txn.get(key) - if value is None: - raise KeyError(key) - return value - - def __setitem__(self, key, value): - if isinstance(key, str): - key = key.encode("ascii") - with self.db.begin(write=True, buffers=self.buffers) as txn: - txn.put(key, value) - - def __delitem__(self, key): - if isinstance(key, str): - key = key.encode("ascii") - with self.db.begin(write=True) as txn: - if not txn.delete(key): - raise KeyError(key) - - def __contains__(self, key): - if isinstance(key, str): - key = key.encode("ascii") - with self.db.begin(buffers=self.buffers) as txn: - with txn.cursor() as cursor: - return cursor.set_key(key) - - def items(self): - with self.db.begin(buffers=self.buffers) as txn: - with txn.cursor() as cursor: - for k, v in cursor.iternext(keys=True, values=True): - yield ensure_text(k, "ascii"), v - - def keys(self): - with self.db.begin(buffers=self.buffers) as txn: - with txn.cursor() as cursor: - for k in cursor.iternext(keys=True, values=False): - yield ensure_text(k, "ascii") - - def values(self): - with self.db.begin(buffers=self.buffers) as txn: - with txn.cursor() as cursor: - yield from cursor.iternext(keys=False, values=True) - - def __iter__(self): - return self.keys() - - def __len__(self): - return self.db.stat()["entries"] - - class LRUStoreCache(Store): """Storage class that implements a least-recently-used (LRU) cache layer over some other store. Intended primarily for use with stores that can be slow to @@ -2392,358 +2000,6 @@ def __delitem__(self, key): self._invalidate_value(key) -class SQLiteStore(Store): - """Storage class using SQLite. - - Parameters - ---------- - path : string - Location of database file. - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - **kwargs - Keyword arguments passed through to the `sqlite3.connect` function. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.SQLiteStore('data/array.sqldb') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - >>> store.close() # don't forget to call this when you're done - - Store a group:: - - >>> store = zarr.SQLiteStore('data/group.sqldb') - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - >>> store.close() # don't forget to call this when you're done - """ - - def __init__(self, path, dimension_separator=None, **kwargs): - import sqlite3 - - self._dimension_separator = dimension_separator - - # normalize path - if path != ":memory:": - path = os.path.abspath(path) - - # store properties - self.path = path - self.kwargs = kwargs - - # allow threading if SQLite connections are thread-safe - # - # ref: https://www.sqlite.org/releaselog/3_3_1.html - # ref: https://github.com/python/cpython/issues/71377 - check_same_thread = True - if sqlite3.sqlite_version_info >= (3, 3, 1): - check_same_thread = False - - # keep a lock for serializing mutable operations - self.lock = Lock() - - # open database - self.db = sqlite3.connect( - self.path, - detect_types=0, - isolation_level=None, - check_same_thread=check_same_thread, - **self.kwargs, - ) - - # handle keys as `str`s - self.db.text_factory = str - - # get a cursor to read/write to the database - self.cursor = self.db.cursor() - - # initialize database with our table if missing - with self.lock: - self.cursor.execute("CREATE TABLE IF NOT EXISTS zarr(k TEXT PRIMARY KEY, v BLOB)") - - def __getstate__(self): - if self.path == ":memory:": - raise PicklingError("Cannot pickle in-memory SQLite databases") - return self.path, self.kwargs - - def __setstate__(self, state): - path, kwargs = state - self.__init__(path=path, **kwargs) - - def close(self): - """Closes the underlying database.""" - - # close cursor and db objects - self.cursor.close() - self.db.close() - - def __getitem__(self, key): - value = self.cursor.execute("SELECT v FROM zarr WHERE (k = ?)", (key,)) - for (v,) in value: - return v - raise KeyError(key) - - def __setitem__(self, key, value): - self.update({key: value}) - - def __delitem__(self, key): - with self.lock: - self.cursor.execute("DELETE FROM zarr WHERE (k = ?)", (key,)) - if self.cursor.rowcount < 1: - raise KeyError(key) - - def __contains__(self, key): - cs = self.cursor.execute("SELECT COUNT(*) FROM zarr WHERE (k = ?)", (key,)) - for (has,) in cs: - has = bool(has) - return has - - def items(self): - kvs = self.cursor.execute("SELECT k, v FROM zarr") - yield from kvs - - def keys(self): - ks = self.cursor.execute("SELECT k FROM zarr") - for (k,) in ks: - yield k - - def values(self): - vs = self.cursor.execute("SELECT v FROM zarr") - for (v,) in vs: - yield v - - def __iter__(self): - return self.keys() - - def __len__(self): - cs = self.cursor.execute("SELECT COUNT(*) FROM zarr") - for (c,) in cs: - return c - - def update(self, *args, **kwargs): - args += (kwargs,) - - kv_list = [] - for dct in args: - for k, v in dct.items(): - v = ensure_contiguous_ndarray_like(v) - - # Accumulate key-value pairs for storage - kv_list.append((k, v)) - - with self.lock: - self.cursor.executemany("REPLACE INTO zarr VALUES (?, ?)", kv_list) - - def listdir(self, path=None): - path = normalize_storage_path(path) - sep = "_" if path == "" else "/" - keys = self.cursor.execute( - """ - SELECT DISTINCT SUBSTR(m, 0, INSTR(m, "/")) AS l FROM ( - SELECT LTRIM(SUBSTR(k, LENGTH(?) + 1), "/") || "/" AS m - FROM zarr WHERE k LIKE (? || "{sep}%") - ) ORDER BY l ASC - """.format(sep=sep), - (path, path), - ) - keys = list(map(operator.itemgetter(0), keys)) - return keys - - def getsize(self, path=None): - path = normalize_storage_path(path) - size = self.cursor.execute( - """ - SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr - WHERE k LIKE (? || "%") AND - 0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/") - """, - (path, path), - ) - for (s,) in size: - return s - - def rmdir(self, path=None): - path = normalize_storage_path(path) - if path: - with self.lock: - self.cursor.execute('DELETE FROM zarr WHERE k LIKE (? || "/%")', (path,)) - else: - self.clear() - - def clear(self): - with self.lock: - self.cursor.executescript( - """ - BEGIN TRANSACTION; - DROP TABLE zarr; - CREATE TABLE zarr(k TEXT PRIMARY KEY, v BLOB); - COMMIT TRANSACTION; - """ - ) - - -class MongoDBStore(Store): - """Storage class using MongoDB. - - .. note:: This is an experimental feature. - - Requires the `pymongo `_ - package to be installed. - - Parameters - ---------- - database : string - Name of database - collection : string - Name of collection - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - **kwargs - Keyword arguments passed through to the `pymongo.MongoClient` function. - - Notes - ----- - The maximum chunksize in MongoDB documents is 16 MB. - - """ - - _key = "key" - _value = "value" - - def __init__( - self, - database="mongodb_zarr", - collection="zarr_collection", - dimension_separator=None, - **kwargs, - ): - import pymongo - - self._database = database - self._collection = collection - self._dimension_separator = dimension_separator - self._kwargs = kwargs - - self.client = pymongo.MongoClient(**self._kwargs) - self.db = self.client.get_database(self._database) - self.collection = self.db.get_collection(self._collection) - - def __getitem__(self, key): - doc = self.collection.find_one({self._key: key}) - - if doc is None: - raise KeyError(key) - else: - return doc[self._value] - - def __setitem__(self, key, value): - value = ensure_bytes(value) - self.collection.replace_one( - {self._key: key}, {self._key: key, self._value: value}, upsert=True - ) - - def __delitem__(self, key): - result = self.collection.delete_many({self._key: key}) - if not result.deleted_count == 1: - raise KeyError(key) - - def __iter__(self): - for f in self.collection.find({}): - yield f[self._key] - - def __len__(self): - return self.collection.count_documents({}) - - def __getstate__(self): - return self._database, self._collection, self._kwargs - - def __setstate__(self, state): - database, collection, kwargs = state - self.__init__(database=database, collection=collection, **kwargs) - - def close(self): - """Cleanup client resources and disconnect from MongoDB.""" - self.client.close() - - def clear(self): - """Remove all items from store.""" - self.collection.delete_many({}) - - -class RedisStore(Store): - """Storage class using Redis. - - .. note:: This is an experimental feature. - - Requires the `redis `_ - package to be installed. - - Parameters - ---------- - prefix : string - Name of prefix for Redis keys - dimension_separator : {'.', '/'}, optional - Separator placed between the dimensions of a chunk. - **kwargs - Keyword arguments passed through to the `redis.Redis` function. - - """ - - def __init__(self, prefix="zarr", dimension_separator=None, **kwargs): - import redis - - self._prefix = prefix - self._kwargs = kwargs - self._dimension_separator = dimension_separator - - self.client = redis.Redis(**kwargs) - - def _key(self, key): - return "{prefix}:{key}".format(prefix=self._prefix, key=key) - - def __getitem__(self, key): - return self.client[self._key(key)] - - def __setitem__(self, key, value): - value = ensure_bytes(value) - self.client[self._key(key)] = value - - def __delitem__(self, key): - count = self.client.delete(self._key(key)) - if not count: - raise KeyError(key) - - def keylist(self): - offset = len(self._key("")) # length of prefix - return [key[offset:].decode("utf-8") for key in self.client.keys(self._key("*"))] - - def keys(self): - yield from self.keylist() - - def __iter__(self): - yield from self.keys() - - def __len__(self): - return len(self.keylist()) - - def __getstate__(self): - return self._prefix, self._kwargs - - def __setstate__(self, state): - prefix, kwargs = state - self.__init__(prefix=prefix, **kwargs) - - def clear(self): - for key in self.keys(): - del self[key] - - class ConsolidatedMetadataStore(Store): """A layer over other storage, where the metadata has been consolidated into a single key. diff --git a/tests/test_core.py b/tests/test_core.py index 6303371793..d996af5563 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,6 +1,4 @@ import atexit -import os -import sys import pickle import shutil from typing import Any, Literal, Optional, Tuple, Union @@ -37,26 +35,19 @@ from zarr.core import Array from zarr.meta import json_loads -from zarr.n5 import N5Store, N5FSStore, n5_keywords from zarr.storage import ( - ABSStore, - DBMStore, DirectoryStore, FSStore, KVStore, - LMDBStore, LRUStoreCache, NestedDirectoryStore, - SQLiteStore, - atexit_rmglob, - atexit_rmtree, init_array, init_group, normalize_store_arg, ) from zarr.util import buffer_size -from .util import abs_container, skip_test_env_var, have_fsspec, mktemp +from .util import have_fsspec # noinspection PyMethodMayBeStatic @@ -1655,24 +1646,6 @@ def test_array_init_from_dict(): assert isinstance(a.store, KVStore) -@skip_test_env_var("ZARR_TEST_ABS") -class TestArrayWithABSStore(TestArray): - def create_store(self): - client = abs_container() - store = ABSStore(client=client) - store.rmdir() - return store - - @pytest.mark.xfail - def test_nbytes_stored(self): - return super().test_nbytes_stored() - - @pytest.mark.skipif(sys.version_info < (3, 7), reason="attr not serializable in py36") - def test_pickle(self): - # internal attribute on ContainerClient isn't serializable for py36 and earlier - super().test_pickle() - - class TestArrayWithNestedDirectoryStore(TestArrayWithDirectoryStore): def create_store(self): path = mkdtemp() @@ -1690,366 +1663,6 @@ def expected(self): ] -class TestArrayWithN5Store(TestArrayWithDirectoryStore): - def create_store(self): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - store = N5Store(path) - return store - - def test_array_0d(self): - # test behaviour for array with 0 dimensions - - # setup - a = np.zeros(()) - z = self.create_array(shape=(), dtype=a.dtype, fill_value=0) - - # check properties - assert a.ndim == z.ndim - assert a.shape == z.shape - assert a.size == z.size - assert a.dtype == z.dtype - assert a.nbytes == z.nbytes - with pytest.raises(TypeError): - len(z) - assert () == z.chunks - assert 1 == z.nchunks - assert (1,) == z.cdata_shape - # compressor always None - no point in compressing a single value - assert z.compressor.compressor_config is None - - # check __getitem__ - b = z[...] - assert isinstance(b, np.ndarray) - assert a.shape == b.shape - assert a.dtype == b.dtype - assert_array_equal(a, np.array(z)) - assert_array_equal(a, z[...]) - assert a[()] == z[()] - with pytest.raises(IndexError): - z[0] - with pytest.raises(IndexError): - z[:] - - # check __setitem__ - z[...] = 42 - assert 42 == z[()] - z[()] = 43 - assert 43 == z[()] - with pytest.raises(IndexError): - z[0] = 42 - with pytest.raises(IndexError): - z[:] = 42 - with pytest.raises(ValueError): - z[...] = np.array([1, 2, 3]) - - def test_array_1d_fill_value(self): - nvalues = 1050 - dtype = np.int32 - for fill_value in 0, None: - a = np.arange(nvalues, dtype=dtype) - f = np.empty_like(a) - f.fill(fill_value or 0) - z = self.create_array(shape=a.shape, chunks=100, dtype=a.dtype, fill_value=fill_value) - z[190:310] = a[190:310] - - assert_array_equal(f[:190], z[:190]) - assert_array_equal(a[190:310], z[190:310]) - assert_array_equal(f[310:], z[310:]) - - with pytest.raises(ValueError): - z = self.create_array(shape=(nvalues,), chunks=100, dtype=dtype, fill_value=1) - - def test_nchunks_initialized(self): - fill_value = 0 - dtype = "int" - z = self.create_array( - shape=100, chunks=10, fill_value=fill_value, dtype=dtype, write_empty_chunks=True - ) - - assert 0 == z.nchunks_initialized - # manually put something into the store to confuse matters - z.store["foo"] = b"bar" - assert 0 == z.nchunks_initialized - z[:] = 42 - assert 10 == z.nchunks_initialized - # manually remove a chunk from the store - del z.chunk_store[z._chunk_key((0,))] - assert 9 == z.nchunks_initialized - - # second round of similar tests with write_empty_chunks set to - # False - z = self.create_array( - shape=100, chunks=10, fill_value=fill_value, dtype=dtype, write_empty_chunks=False - ) - z[:] = 42 - assert 10 == z.nchunks_initialized - # manually remove a chunk from the store - del z.chunk_store[z._chunk_key((0,))] - assert 9 == z.nchunks_initialized - z[:] = z.fill_value - assert 0 == z.nchunks_initialized - - def test_array_order(self): - # N5 only supports 'C' at the moment - with pytest.raises(ValueError): - self.create_array(shape=(10, 11), chunks=(10, 11), dtype="i8", order="F") - - # 1D - a = np.arange(1050) - z = self.create_array(shape=a.shape, chunks=100, dtype=a.dtype, order="C") - assert z.order == "C" - assert z[:].flags.c_contiguous - z[:] = a - assert_array_equal(a, z[:]) - - # 2D - a = np.arange(10000).reshape((100, 100)) - z = self.create_array(shape=a.shape, chunks=(10, 10), dtype=a.dtype, order="C") - - assert z.order == "C" - assert z[:].flags.c_contiguous - z[:] = a - actual = z[:] - assert_array_equal(a, actual) - - def test_structured_array(self): - d = np.array( - [(b"aaa", 1, 4.2), (b"bbb", 2, 8.4), (b"ccc", 3, 12.6)], - dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")], - ) - fill_values = None, b"", (b"zzz", 42, 16.8) - with pytest.raises(TypeError): - self.check_structured_array(d, fill_values) - - def test_structured_array_subshapes(self): - d = np.array( - [ - (0, ((0, 1, 2), (1, 2, 3)), b"aaa"), - (1, ((1, 2, 3), (2, 3, 4)), b"bbb"), - (2, ((2, 3, 4), (3, 4, 5)), b"ccc"), - ], - dtype=[("foo", "i8"), ("bar", "(2, 3)f4"), ("baz", "S3")], - ) - fill_values = None, b"", (0, ((0, 0, 0), (1, 1, 1)), b"zzz") - with pytest.raises(TypeError): - self.check_structured_array(d, fill_values) - - def test_structured_array_nested(self): - d = np.array( - [ - (0, (0, ((0, 1), (1, 2), (2, 3)), 0), b"aaa"), - (1, (1, ((1, 2), (2, 3), (3, 4)), 1), b"bbb"), - (2, (2, ((2, 3), (3, 4), (4, 5)), 2), b"ccc"), - ], - dtype=[ - ("foo", "i8"), - ("bar", [("foo", "i4"), ("bar", "(3, 2)f4"), ("baz", "u1")]), - ("baz", "S3"), - ], - ) - fill_values = None, b"", (0, (0, ((0, 0), (1, 1), (2, 2)), 0), b"zzz") - with pytest.raises(TypeError): - self.check_structured_array(d, fill_values) - - def test_dtypes(self): - # integers - for dtype in "u1", "u2", "u4", "u8", "i1", "i2", "i4", "i8": - z = self.create_array(shape=10, chunks=3, dtype=dtype) - assert z.dtype == np.dtype(dtype) - a = np.arange(z.shape[0], dtype=dtype) - z[:] = a - assert_array_equal(a, z[:]) - - # floats - for dtype in "f2", "f4", "f8": - z = self.create_array(shape=10, chunks=3, dtype=dtype) - assert z.dtype == np.dtype(dtype) - a = np.linspace(0, 1, z.shape[0], dtype=dtype) - z[:] = a - assert_array_almost_equal(a, z[:]) - - # check that datetime generic units are not allowed - with pytest.raises(ValueError): - self.create_array(shape=100, dtype="M8") - with pytest.raises(ValueError): - self.create_array(shape=100, dtype="m8") - - def test_object_arrays(self): - # an object_codec is required for object arrays - with pytest.raises(ValueError): - self.create_array(shape=10, chunks=3, dtype=object) - - # an object_codec is required for object arrays, but allow to be provided via - # filters to maintain API backwards compatibility - with pytest.raises(ValueError): - with pytest.warns(FutureWarning): - self.create_array(shape=10, chunks=3, dtype=object, filters=[MsgPack()]) - - # create an object array using an object codec - with pytest.raises(ValueError): - self.create_array(shape=10, chunks=3, dtype=object, object_codec=MsgPack()) - - def test_object_arrays_vlen_text(self): - data = np.array(greetings * 1000, dtype=object) - - with pytest.raises(ValueError): - self.create_array(shape=data.shape, dtype=object, object_codec=VLenUTF8()) - - # convenience API - with pytest.raises(ValueError): - self.create_array(shape=data.shape, dtype=str) - - def test_object_arrays_vlen_bytes(self): - greetings_bytes = [g.encode("utf8") for g in greetings] - data = np.array(greetings_bytes * 1000, dtype=object) - - with pytest.raises(ValueError): - self.create_array(shape=data.shape, dtype=object, object_codec=VLenBytes()) - - # convenience API - with pytest.raises(ValueError): - self.create_array(shape=data.shape, dtype=bytes) - - def test_object_arrays_vlen_array(self): - data = np.array( - [np.array([1, 3, 7]), np.array([5]), np.array([2, 8, 12])] * 1000, dtype=object - ) - - codecs = VLenArray(int), VLenArray(" 2 else "" -# # setup some values -# store[prefix + "a"] = b"aaa" -# store[prefix + "b"] = b"bbb" -# store[prefix + "c/d"] = b"ddd" -# store[prefix + "c/e/f"] = b"fff" - -# # test iterators on store with data -# assert 4 == len(store) -# keys = [prefix + "a", prefix + "b", prefix + "c/d", prefix + "c/e/f"] -# values = [b"aaa", b"bbb", b"ddd", b"fff"] -# items = list(zip(keys, values)) -# assert set(keys) == set(store) -# assert set(keys) == set(store.keys()) -# assert set(values) == set(store.values()) -# assert set(items) == set(store.items()) - -# def test_getsize(self): -# return super().test_getsize() - -# def test_hierarchy(self): -# return super().test_hierarchy() - -# @pytest.mark.skipif(sys.version_info < (3, 7), reason="attr not serializable in py36") -# def test_pickle(self): -# # internal attribute on ContainerClient isn't serializable for py36 and earlier -# super().test_pickle() - - # class TestConsolidatedMetadataStore: # version = 2 @@ -2574,7 +2095,7 @@ def create_store(self, normalize_keys=False, dimension_separator=".", **kwargs): # with pytest.raises(ValueError): # normalize_store_arg(dict(), zarr_version=4) -# for ext, Class in [(".zip", ZipStore), (".n5", N5Store)]: +# for ext, Class in [(".zip", ZipStore),]: # fn = tmpdir.join("store" + ext) # store = normalize_store_arg(str(fn), zarr_version=2, mode="w") # assert isinstance(store, Class) diff --git a/tests/test_storage_v3.py b/tests/test_storage_v3.py index 3d8024de70..671bfeee2a 100644 --- a/tests/test_storage_v3.py +++ b/tests/test_storage_v3.py @@ -30,19 +30,13 @@ # normalize_store_arg, # ) # from zarr._storage.v3 import ( -# ABSStoreV3, # ConsolidatedMetadataStoreV3, -# DBMStoreV3, # DirectoryStoreV3, # FSStoreV3, # KVStore, # KVStoreV3, -# LMDBStoreV3, # LRUStoreCacheV3, # MemoryStoreV3, -# MongoDBStoreV3, -# RedisStoreV3, -# SQLiteStoreV3, # StoreV3, # ZipStoreV3, # ) @@ -50,20 +44,11 @@ # # pytest will fail to run if the following fixtures aren't imported here # from .test_storage import StoreTests as _StoreTests -# from .test_storage import TestABSStore as _TestABSStore # from .test_storage import TestConsolidatedMetadataStore as _TestConsolidatedMetadataStore -# from .test_storage import TestDBMStore as _TestDBMStore -# from .test_storage import TestDBMStoreBerkeleyDB as _TestDBMStoreBerkeleyDB -# from .test_storage import TestDBMStoreDumb as _TestDBMStoreDumb -# from .test_storage import TestDBMStoreGnu as _TestDBMStoreGnu -# from .test_storage import TestDBMStoreNDBM as _TestDBMStoreNDBM # from .test_storage import TestDirectoryStore as _TestDirectoryStore # from .test_storage import TestFSStore as _TestFSStore -# from .test_storage import TestLMDBStore as _TestLMDBStore # from .test_storage import TestLRUStoreCache as _TestLRUStoreCache # from .test_storage import TestMemoryStore as _TestMemoryStore -# from .test_storage import TestSQLiteStore as _TestSQLiteStore -# from .test_storage import TestSQLiteStoreInMemory as _TestSQLiteStoreInMemory # from .test_storage import TestZipStore as _TestZipStore # from .test_storage import dimension_separator_fixture, s3, skip_if_nested_chunks # noqa @@ -419,105 +404,6 @@ # return store -# class TestDBMStoreV3(_TestDBMStore, StoreV3Tests): -# def create_store(self, dimension_separator=None): -# path = mktemp(suffix=".anydbm") -# atexit.register(atexit_rmglob, path + "*") -# # create store using default dbm implementation -# store = DBMStoreV3(path, flag="n", dimension_separator=dimension_separator) -# return store - - -# class TestDBMStoreV3Dumb(_TestDBMStoreDumb, StoreV3Tests): -# def create_store(self, **kwargs): -# path = mktemp(suffix=".dumbdbm") -# atexit.register(atexit_rmglob, path + "*") - -# import dbm.dumb as dumbdbm - -# store = DBMStoreV3(path, flag="n", open=dumbdbm.open, **kwargs) -# return store - - -# class TestDBMStoreV3Gnu(_TestDBMStoreGnu, StoreV3Tests): -# def create_store(self, **kwargs): -# gdbm = pytest.importorskip("dbm.gnu") -# path = mktemp(suffix=".gdbm") # pragma: no cover -# atexit.register(os.remove, path) # pragma: no cover -# store = DBMStoreV3( -# path, flag="n", open=gdbm.open, write_lock=False, **kwargs -# ) # pragma: no cover -# return store # pragma: no cover - - -# class TestDBMStoreV3NDBM(_TestDBMStoreNDBM, StoreV3Tests): -# def create_store(self, **kwargs): -# ndbm = pytest.importorskip("dbm.ndbm") -# path = mktemp(suffix=".ndbm") # pragma: no cover -# atexit.register(atexit_rmglob, path + "*") # pragma: no cover -# store = DBMStoreV3(path, flag="n", open=ndbm.open, **kwargs) # pragma: no cover -# return store # pragma: no cover - - -# class TestDBMStoreV3BerkeleyDB(_TestDBMStoreBerkeleyDB, StoreV3Tests): -# def create_store(self, **kwargs): -# bsddb3 = pytest.importorskip("bsddb3") -# path = mktemp(suffix=".dbm") -# atexit.register(os.remove, path) -# store = DBMStoreV3(path, flag="n", open=bsddb3.btopen, write_lock=False, **kwargs) -# return store - - -# class TestLMDBStoreV3(_TestLMDBStore, StoreV3Tests): -# def create_store(self, **kwargs): -# pytest.importorskip("lmdb") -# path = mktemp(suffix=".lmdb") -# atexit.register(atexit_rmtree, path) -# buffers = True -# store = LMDBStoreV3(path, buffers=buffers, **kwargs) -# return store - - -# class TestSQLiteStoreV3(_TestSQLiteStore, StoreV3Tests): -# def create_store(self, **kwargs): -# pytest.importorskip("sqlite3") -# path = mktemp(suffix=".db") -# atexit.register(atexit_rmtree, path) -# store = SQLiteStoreV3(path, **kwargs) -# return store - - -# class TestSQLiteStoreV3InMemory(_TestSQLiteStoreInMemory, StoreV3Tests): -# def create_store(self, **kwargs): -# pytest.importorskip("sqlite3") -# store = SQLiteStoreV3(":memory:", **kwargs) -# return store - - -# @skip_test_env_var("ZARR_TEST_MONGO") -# class TestMongoDBStoreV3(StoreV3Tests): -# def create_store(self, **kwargs): -# pytest.importorskip("pymongo") -# store = MongoDBStoreV3( -# host="127.0.0.1", database="zarr_tests", collection="zarr_tests", **kwargs -# ) -# # start with an empty store -# store.clear() -# return store - - -# @skip_test_env_var("ZARR_TEST_REDIS") -# class TestRedisStoreV3(StoreV3Tests): -# def create_store(self, **kwargs): -# # TODO: this is the default host for Redis on Travis, -# # we probably want to generalize this though -# pytest.importorskip("redis") -# store = RedisStoreV3(host="localhost", port=6379, **kwargs) -# # start with an empty store -# store.clear() -# return store - - # @pytest.mark.skipif(not v3_sharding_available, reason="sharding is disabled") # class TestStorageTransformerV3(TestMappingStoreV3): # def create_store(self, **kwargs): @@ -566,12 +452,6 @@ # LRUStoreClass = LRUStoreCacheV3 -# @skip_test_env_var("ZARR_TEST_ABS") -# class TestABSStoreV3(_TestABSStore, StoreV3Tests): - -# ABSStoreClass = ABSStoreV3 - - # def test_normalize_store_arg_v3(tmpdir): # fn = tmpdir.join("store.zip") @@ -655,16 +535,10 @@ # def test_top_level_imports(): # for store_name in [ -# "ABSStoreV3", -# "DBMStoreV3", # "KVStoreV3", # "DirectoryStoreV3", -# "LMDBStoreV3", # "LRUStoreCacheV3", # "MemoryStoreV3", -# "MongoDBStoreV3", -# "RedisStoreV3", -# "SQLiteStoreV3", # "ZipStoreV3", # ]: # if v3_api_available: