diff --git a/Cargo.lock b/Cargo.lock index eed4e452..9114d88b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1768,9 +1768,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +checksum = "f54b3d09cbdd1f8c20650b28e7b09e338881482f4aa908a5f61a00c98fba2690" dependencies = [ "cfg-if", "chrono", @@ -1787,9 +1787,9 @@ dependencies = [ [[package]] name = "pyo3-async-runtimes" -version = "0.22.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2529f0be73ffd2be0cc43c013a640796558aa12d7ca0aab5cc14f375b4733031" +checksum = "977dc837525cfd22919ba6a831413854beb7c99a256c03bf8624ad707e45810e" dependencies = [ "futures", "once_cell", @@ -1800,9 +1800,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +checksum = "3015cf985888fe66cfb63ce0e321c603706cd541b7aec7ddd35c281390af45d8" dependencies = [ "once_cell", "target-lexicon", @@ -1810,9 +1810,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +checksum = "6fca7cd8fd809b5ac4eefb89c1f98f7a7651d3739dfb341ca6980090f554c270" dependencies = [ "libc", "pyo3-build-config", @@ -1820,9 +1820,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +checksum = "34e657fa5379a79151b6ff5328d9216a84f55dc93b17b08e7c3609a969b73aa0" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -1832,9 +1832,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +checksum = "295548d5ffd95fd1981d2d3cf4458831b21d60af046b729b6fd143b0ba7aee2f" dependencies = [ "heck", "proc-macro2", diff --git a/Changelog.python.md b/Changelog.python.md index bec08e35..a4c67cc7 100644 --- a/Changelog.python.md +++ b/Changelog.python.md @@ -1,5 +1,21 @@ # Changelog +## Python Icechunk Library 0.1.0a5 + +### Features + +- Sync with zarr 3.0b2. The biggest change is the `mode` param on `IcechunkStore` methods has been simplified to `read_only`. +- Changed `IcechunkStore::distributed_commit` to `IcechunkStore::merge`, which now does *not* commit, but attempts to merge the changes from another store back into the current store. +- Added a new `icechunk.dask.store_dask` method to write a dask array to an icechunk store. This is required for safely writing dask arrays to an icechunk store. +- Added a new `icechunk.xarray.to_icechunk` method to write an xarray dataset to an icechunk store. This is *required* for safely writing xarray datasets with dask arrays to an icechunk store in a distributed or multi-processing context. + +### Fixes + +- The `StorageConfig` methods have been correctly typed. +- `IcechunkStore` instances are now set to `read_only` by default after pickling. +- When checking out a snapshot or tag, the `IcechunkStore` will be set to read-only. If you want to write to the store, you must call `IcechunkStore::set_writeable()`. +- An error will now be raised if you try to checkout a snapshot that does not exist. + ## Python Icechunk Library 0.1.0a4 ### Features diff --git a/Changelog.rust.md b/Changelog.rust.md index 7665b8f4..b1bff2f7 100644 --- a/Changelog.rust.md +++ b/Changelog.rust.md @@ -1,5 +1,18 @@ # Changelog +## Rust Icechunk Library 0.1.0-alpha.5 + +### Features + +- Added new `Store::merge` method to merge changes from another store back into the current store. +- Added new `garbage_collect` method to remove dangling chunks from the store. +- Added new `Repository::rebase` method to detect and optionally fix conflicts between the current changes and the tip of a branch, allowing the user to commit the changes to the branch. + +### Fixes + +- `Store` will now be set to `ReadOnly` after checking out a snapshot or tag. +- An error will now be raised if you try to checkout a snapshot that does not exist. + ## Rust Icechunk Library 0.1.0-alpha.4 ### Features diff --git a/deny.toml b/deny.toml index 88fa8855..7a954ad8 100644 --- a/deny.toml +++ b/deny.toml @@ -22,6 +22,7 @@ allow = [ "OpenSSL", "Unicode-DFS-2016", "CC0-1.0", + "Unicode-3.0", ] # The confidence threshold for detecting a license from license text. # The higher the value, the more closely the license text must be to the diff --git a/icechunk-python/Cargo.toml b/icechunk-python/Cargo.toml index 04879bcb..cc5401e1 100644 --- a/icechunk-python/Cargo.toml +++ b/icechunk-python/Cargo.toml @@ -22,12 +22,12 @@ bytes = "1.8.0" chrono = { version = "0.4.38" } futures = "0.3.31" icechunk = { path = "../icechunk", version = "0.1.0-alpha.4" } -pyo3 = { version = "0.22", features = [ +pyo3 = { version = "0.23", features = [ "chrono", "extension-module", "experimental-async", ] } -pyo3-async-runtimes = { version = "0.22.0", features = ["tokio-runtime"] } +pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"] } async-stream = "0.3.6" thiserror = "2.0.3" tokio = "1.41" diff --git a/icechunk-python/examples/smoke-test.py b/icechunk-python/examples/smoke-test.py index 68b9eb0c..1c6de099 100644 --- a/icechunk-python/examples/smoke-test.py +++ b/icechunk-python/examples/smoke-test.py @@ -59,7 +59,7 @@ def create_array(*, group, name, size, dtype, fill_value) -> np.ndarray: return array -async def run(store: Store) -> None: +def run(store: Store) -> None: write_start = time.time() group = zarr.group(store=store, overwrite=True) group.attrs["foo"] = "foo" diff --git a/icechunk-python/src/errors.rs b/icechunk-python/src/errors.rs index 4a1434f2..75fbf980 100644 --- a/icechunk-python/src/errors.rs +++ b/icechunk-python/src/errors.rs @@ -1,3 +1,5 @@ +use std::convert::Infallible; + use icechunk::{ format::IcechunkFormatError, repository::RepositoryError, zarr::StoreError, }; @@ -32,6 +34,12 @@ pub(crate) enum PyIcechunkStoreError { UnkownError(String), } +impl From for PyIcechunkStoreError { + fn from(_: Infallible) -> Self { + PyIcechunkStoreError::UnkownError("Infallible".to_string()) + } +} + impl From for PyIcechunkStoreError { fn from(error: StoreError) -> Self { match error { diff --git a/icechunk-python/src/lib.rs b/icechunk-python/src/lib.rs index 97c010d9..32b34fca 100644 --- a/icechunk-python/src/lib.rs +++ b/icechunk-python/src/lib.rs @@ -23,7 +23,7 @@ use icechunk::{ use pyo3::{ exceptions::{PyKeyError, PyValueError}, prelude::*, - types::{PyBytes, PyList, PyNone, PyString}, + types::{PyNone, PyString}, }; use storage::{PyS3Credentials, PyStorageConfig, PyVirtualRefConfig}; use streams::PyAsyncGenerator; @@ -378,7 +378,7 @@ impl PyIcechunkStore { let store = Arc::clone(&self.store); pyo3_async_runtimes::tokio::get_runtime().block_on(async move { do_checkout_snapshot(store, snapshot_id).await?; - Ok(PyNone::get_bound(py).to_owned()) + Ok(PyNone::get(py).to_owned()) }) } @@ -401,7 +401,7 @@ impl PyIcechunkStore { let store = Arc::clone(&self.store); pyo3_async_runtimes::tokio::get_runtime().block_on(async move { do_checkout_branch(store, branch).await?; - Ok(PyNone::get_bound(py).to_owned()) + Ok(PyNone::get(py).to_owned()) }) } @@ -424,7 +424,7 @@ impl PyIcechunkStore { let store = Arc::clone(&self.store); pyo3_async_runtimes::tokio::get_runtime().block_on(async move { do_checkout_tag(store, tag).await?; - Ok(PyNone::get_bound(py).to_owned()) + Ok(PyNone::get(py).to_owned()) }) } @@ -457,7 +457,7 @@ impl PyIcechunkStore { pyo3_async_runtimes::tokio::get_runtime().block_on(async move { let res = do_commit(store, message).await?; - Ok(PyString::new_bound(py, res.as_str())) + Ok(PyString::new(py, res.as_str())) }) } @@ -512,11 +512,11 @@ impl PyIcechunkStore { }) } - fn reset<'py>(&'py self, py: Python<'py>) -> PyResult> { + fn reset(&self) -> PyIcechunkStoreResult> { let store = Arc::clone(&self.store); pyo3_async_runtimes::tokio::get_runtime().block_on(async move { let changes = do_reset(store).await?; - Ok(PyBytes::new_bound(py, &changes)) + Ok(Cow::Owned(changes)) }) } @@ -531,15 +531,11 @@ impl PyIcechunkStore { }) } - fn new_branch<'py>( - &'py self, - py: Python<'py>, - branch_name: String, - ) -> PyResult> { + fn new_branch(&self, branch_name: String) -> PyIcechunkStoreResult { let store = Arc::clone(&self.store); pyo3_async_runtimes::tokio::get_runtime().block_on(async move { let res = do_new_branch(store, branch_name).await?; - Ok(PyString::new_bound(py, res.as_str())) + Ok(res) }) } @@ -554,15 +550,11 @@ impl PyIcechunkStore { }) } - fn reset_branch<'py>( - &'py self, - py: Python<'py>, - to_snapshot: String, - ) -> PyResult> { + fn reset_branch(&self, to_snapshot: String) -> PyIcechunkStoreResult<()> { let store = Arc::clone(&self.store); pyo3_async_runtimes::tokio::get_runtime().block_on(async move { do_reset_branch(store, to_snapshot).await?; - Ok(PyNone::get_bound(py).to_owned()) + Ok(()) }) } @@ -581,36 +573,25 @@ impl PyIcechunkStore { }) } - fn tag<'py>( - &'py self, - py: Python<'py>, - tag: String, - snapshot_id: String, - ) -> PyResult> { + fn tag(&self, tag: String, snapshot_id: String) -> PyIcechunkStoreResult<()> { let store = Arc::clone(&self.store); pyo3_async_runtimes::tokio::get_runtime().block_on(async move { do_tag(store, tag, snapshot_id).await?; - Ok(PyNone::get_bound(py).to_owned()) + Ok(()) }) } - fn ancestry<'py>( - &'py self, - py: Python<'py>, - ) -> PyIcechunkStoreResult> { + fn ancestry(&self) -> PyIcechunkStoreResult> { // TODO: this holds everything in memory pyo3_async_runtimes::tokio::get_runtime().block_on(async move { let store = self.store.read().await; - let list = store + let ancestry = store .ancestry() .await? - .map_ok(|parent| { - let parent = Into::::into(parent); - Python::with_gil(|py| parent.into_py(py)) - }) + .map_ok(Into::::into) .try_collect::>() .await?; - Ok(PyList::new_bound(py, list)) + Ok(ancestry) }) } @@ -622,6 +603,7 @@ impl PyIcechunkStore { })? .map_ok(|parent| { let parent = Into::::into(parent); + #[allow(deprecated)] Python::with_gil(|py| parent.into_py(py)) }); let prepared_list = Arc::new(Mutex::new(list.boxed())); @@ -653,12 +635,12 @@ impl PyIcechunkStore { }) } - fn sync_clear<'py>(&'py self, py: Python<'py>) -> PyResult> { + fn sync_clear(&self) -> PyIcechunkStoreResult<()> { let store = Arc::clone(&self.store); pyo3_async_runtimes::tokio::get_runtime().block_on(async move { store.write().await.clear().await.map_err(PyIcechunkStoreError::from)?; - Ok(PyNone::get_bound(py).to_owned()) + Ok(()) }) } @@ -676,13 +658,7 @@ impl PyIcechunkStore { // We need to distinguish the "safe" case of trying to fetch an uninitialized key // from other types of errors, we use PyKeyError exception for that match data { - Ok(data) => { - let pybytes = Python::with_gil(|py| { - let bound_bytes = PyBytes::new_bound(py, &data); - bound_bytes.to_object(py) - }); - Ok(pybytes) - } + Ok(data) => Ok(Vec::from(data)), Err(StoreError::NotFound(_)) => Err(PyKeyError::new_err(key)), Err(err) => Err(PyIcechunkStoreError::StoreError(err).into()), } @@ -708,15 +684,7 @@ impl PyIcechunkStore { .into_iter() // If we want to error instead of returning None we can collect into // a Result, _> and short circuit - .map(|x| { - x.map(|x| { - Python::with_gil(|py| { - let bound_bytes = PyBytes::new_bound(py, &x); - bound_bytes.to_object(py) - }) - }) - .ok() - }) + .map(|x| x.map(Vec::from).ok()) .collect::>(); Ok(result) @@ -809,18 +777,17 @@ impl PyIcechunkStore { }) } - fn set_virtual_ref<'py>( - &'py self, - py: Python<'py>, + fn set_virtual_ref( + &self, key: String, location: String, offset: ChunkOffset, length: ChunkLength, - ) -> PyResult> { + ) -> PyIcechunkStoreResult<()> { let store = Arc::clone(&self.store); pyo3_async_runtimes::tokio::get_runtime().block_on(async move { do_set_virtual_ref(store, key, location, offset, length).await?; - Ok(PyNone::get_bound(py).to_owned()) + Ok(()) }) } @@ -890,6 +857,7 @@ impl PyIcechunkStore { } fn list(&self) -> PyIcechunkStoreResult { + #[allow(deprecated)] let list = pyo3_async_runtimes::tokio::get_runtime() .block_on(async move { let store = self.store.read().await; @@ -902,6 +870,7 @@ impl PyIcechunkStore { } fn list_prefix(&self, prefix: String) -> PyIcechunkStoreResult { + #[allow(deprecated)] let list = pyo3_async_runtimes::tokio::get_runtime() .block_on(async move { let store = self.store.read().await; @@ -913,6 +882,7 @@ impl PyIcechunkStore { } fn list_dir(&self, prefix: String) -> PyIcechunkStoreResult { + #[allow(deprecated)] let list = pyo3_async_runtimes::tokio::get_runtime() .block_on(async move { let store = self.store.read().await; diff --git a/icechunk-python/src/streams.rs b/icechunk-python/src/streams.rs index 13a691fb..8da99b8f 100644 --- a/icechunk-python/src/streams.rs +++ b/icechunk-python/src/streams.rs @@ -6,7 +6,7 @@ use pyo3::{exceptions::PyStopAsyncIteration, prelude::*}; use tokio::sync::Mutex; type PyObjectStream = - Arc> + Send>>>>; + Arc, StoreError>> + Send>>>>; /// An async generator that yields strings from a rust stream of strings /// @@ -41,7 +41,7 @@ impl PyAsyncGenerator { fn __anext__<'py>( slf: PyRefMut<'py, Self>, py: Python<'py>, - ) -> PyResult> { + ) -> PyResult> { // Arc::clone is cheap, so we can clone the Arc here because we move into the // future block let stream = slf.stream.clone(); @@ -62,7 +62,6 @@ impl PyAsyncGenerator { // TODO: Can we convert this is an async function or a coroutine in the next versions // of pyo3? - let result = pyo3_async_runtimes::tokio::future_into_py(py, future)?; - Ok(Some(result.to_object(py))) + pyo3_async_runtimes::tokio::future_into_py(py, future) } } diff --git a/icechunk-python/tests/test_config.py b/icechunk-python/tests/test_config.py index 29c74f91..ef237232 100644 --- a/icechunk-python/tests/test_config.py +++ b/icechunk-python/tests/test_config.py @@ -7,7 +7,7 @@ @pytest.fixture(scope="function") -async def tmp_store(tmpdir): +def tmp_store(tmpdir): store_path = f"{tmpdir}" store = icechunk.IcechunkStore.open_or_create( storage=icechunk.StorageConfig.filesystem(store_path), @@ -20,7 +20,7 @@ async def tmp_store(tmpdir): store.close() -async def test_no_inline_chunks(tmp_store): +def test_no_inline_chunks(tmp_store): store = tmp_store[0] store_path = tmp_store[1] array = zarr.open_array( @@ -40,7 +40,7 @@ async def test_no_inline_chunks(tmp_store): assert len(os.listdir(f"{store_path}/chunks")) == 10 -async def test_inline_chunks(tmp_store): +def test_inline_chunks(tmp_store): store = tmp_store[0] store_path = tmp_store[1] diff --git a/icechunk-python/tests/test_pickle.py b/icechunk-python/tests/test_pickle.py index 774576fb..c8f561de 100644 --- a/icechunk-python/tests/test_pickle.py +++ b/icechunk-python/tests/test_pickle.py @@ -8,7 +8,7 @@ @pytest.fixture(scope="function") -async def tmp_store(tmpdir): +def tmp_store(tmpdir): store_path = f"{tmpdir}" store = icechunk.IcechunkStore.open_or_create( storage=icechunk.StorageConfig.filesystem(store_path), @@ -20,7 +20,7 @@ async def tmp_store(tmpdir): store.close() -async def test_pickle_read_only(tmp_store): +def test_pickle_read_only(tmp_store): assert tmp_store._read_only is False roundtripped = pickle.loads(pickle.dumps(tmp_store)) @@ -33,7 +33,7 @@ async def test_pickle_read_only(tmp_store): assert tmp_store._read_only is False -async def test_pickle(tmp_store): +def test_pickle(tmp_store): root = zarr.group(store=tmp_store) array = root.ones(name="ones", shape=(10, 10), chunks=(5, 5), dtype="float32") array[:] = 20 diff --git a/icechunk-python/tests/test_zarr/test_api.py b/icechunk-python/tests/test_zarr/test_api.py index 9749768a..b5446bed 100644 --- a/icechunk-python/tests/test_zarr/test_api.py +++ b/icechunk-python/tests/test_zarr/test_api.py @@ -24,7 +24,7 @@ @pytest.fixture(scope="function") -async def memory_store() -> IcechunkStore: +def memory_store() -> IcechunkStore: return parse_store("memory", "") @@ -63,7 +63,7 @@ def test_open_normalized_path( assert node.path == normalize_path(path) -async def test_open_array(memory_store: IcechunkStore) -> None: +def test_open_array(memory_store: IcechunkStore) -> None: store = memory_store # open array, create if doesn't exist @@ -97,7 +97,7 @@ async def test_open_array(memory_store: IcechunkStore) -> None: open(store="doesnotexist", mode="r") -async def test_open_group(memory_store: IcechunkStore) -> None: +def test_open_group(memory_store: IcechunkStore) -> None: store = memory_store # open group, create if doesn't exist diff --git a/icechunk-python/tests/test_zarr/test_group.py b/icechunk-python/tests/test_zarr/test_group.py index 15b49648..50bd549a 100644 --- a/icechunk-python/tests/test_zarr/test_group.py +++ b/icechunk-python/tests/test_zarr/test_group.py @@ -985,9 +985,7 @@ def test_info(self, store: IcechunkStore) -> None: assert result == expected -async def test_delitem_removes_children( - store: IcechunkStore, zarr_format: ZarrFormat -) -> None: +def test_delitem_removes_children(store: IcechunkStore, zarr_format: ZarrFormat) -> None: # https://github.com/zarr-developers/zarr-python/issues/2191 g1 = zarr.group(store=store, zarr_format=zarr_format) g1.create_group("0")