diff --git a/.github/workflows/windows-check.yml b/.github/workflows/windows-check.yml new file mode 100644 index 00000000..130a35a9 --- /dev/null +++ b/.github/workflows/windows-check.yml @@ -0,0 +1,54 @@ +name: Windows tests +on: + pull_request: + types: [opened, reopened, synchronize, labeled] + push: + branches: + - main + +env: + CARGO_INCREMENTAL: 0 + CARGO_NET_RETRY: 10 + CI: 1 + RUST_BACKTRACE: short + RUSTFLAGS: "-D warnings -W unreachable-pub -W bare-trait-objects" + RUSTUP_MAX_RETRIES: 10 + +jobs: + rust: + name: Run rust tests in windows + timeout-minutes: 20 + runs-on: windows-latest + #defaults: + # run: + # working-directory: ./ + #permissions: + #contents: read + #actions: read + #pull-requests: read + env: + #CC: deny_c + RUST_CHANNEL: 'stable' + + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Install Rust toolchain + run: | + rustup update --no-self-update ${{ env.RUST_CHANNEL }} + rustup component add --toolchain ${{ env.RUST_CHANNEL }} rustfmt rust-src clippy + rustup default ${{ env.RUST_CHANNEL }} + + - name: Cache Dependencies + uses: Swatinem/rust-cache@v2 + with: + # workspaces: "rust -> target" + key: windows-${{ env.RUST_CHANNEL }} + + - name: Check + run: | + cargo test --lib diff --git a/Cargo.lock b/Cargo.lock index 069cc8e5..d0bc9b3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1358,7 +1358,7 @@ dependencies = [ [[package]] name = "icechunk" -version = "0.2.3" +version = "0.2.4" dependencies = [ "async-recursion", "async-stream", @@ -1405,7 +1405,7 @@ dependencies = [ [[package]] name = "icechunk-python" -version = "0.2.3" +version = "0.2.4" dependencies = [ "async-stream", "async-trait", diff --git a/Changelog.python.md b/Changelog.python.md index 1211c3ba..27e567b6 100644 --- a/Changelog.python.md +++ b/Changelog.python.md @@ -1,5 +1,11 @@ # Changelog +### Python Icechunk Library 0.2.4 + +### Fixes + +- Fixes a bug where object storage paths were incorrectly formatted when using Windows. + ## Python Icechunk Library 0.2.3 ### Features diff --git a/icechunk-python/Cargo.toml b/icechunk-python/Cargo.toml index 3d5c7fa7..df07c4af 100644 --- a/icechunk-python/Cargo.toml +++ b/icechunk-python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "icechunk-python" -version = "0.2.3" +version = "0.2.4" description = "Transactional storage engine for Zarr designed for use on cloud object storage" readme = "../README.md" repository = "https://github.com/earth-mover/icechunk" @@ -21,7 +21,7 @@ crate-type = ["cdylib"] bytes = "1.9.0" chrono = { version = "0.4.39" } futures = "0.3.31" -icechunk = { path = "../icechunk", version = "0.2.3", features = ["logs"] } +icechunk = { path = "../icechunk", version = "0.2.4", features = ["logs"] } itertools = "0.14.0" pyo3 = { version = "0.23", features = [ "chrono", diff --git a/icechunk/Cargo.toml b/icechunk/Cargo.toml index ffb11ca4..268ddf83 100644 --- a/icechunk/Cargo.toml +++ b/icechunk/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "icechunk" -version = "0.2.3" +version = "0.2.4" description = "Transactional storage engine for Zarr designed for use on cloud object storage" readme = "../README.md" repository = "https://github.com/earth-mover/icechunk" diff --git a/icechunk/src/storage/mod.rs b/icechunk/src/storage/mod.rs index fef9d0b4..4130bb3c 100644 --- a/icechunk/src/storage/mod.rs +++ b/icechunk/src/storage/mod.rs @@ -712,10 +712,27 @@ pub async fn new_gcs_storage( #[allow(clippy::unwrap_used, clippy::panic)] mod tests { - use std::collections::HashSet; + use std::{collections::HashSet, fs::File, io::Write, path::PathBuf}; use super::*; use proptest::prelude::*; + use tempfile::TempDir; + + #[tokio::test] + async fn test_is_clean() { + let repo_dir = TempDir::new().unwrap(); + let s = new_local_filesystem_storage(repo_dir.path()).await.unwrap(); + assert!(s.root_is_clean().await.unwrap()); + + let mut file = File::create(repo_dir.path().join("foo.txt")).unwrap(); + write!(file, "hello").unwrap(); + assert!(!s.root_is_clean().await.unwrap()); + + let inside_existing = + PathBuf::from_iter([repo_dir.path().as_os_str().to_str().unwrap(), "foo"]); + let s = new_local_filesystem_storage(&inside_existing).await.unwrap(); + assert!(s.root_is_clean().await.unwrap()); + } proptest! { #![proptest_config(ProptestConfig { diff --git a/icechunk/src/storage/object_store.rs b/icechunk/src/storage/object_store.rs index e389e763..306a82fd 100644 --- a/icechunk/src/storage/object_store.rs +++ b/icechunk/src/storage/object_store.rs @@ -83,7 +83,6 @@ impl ObjectStorage { Arc::new(LocalFileSystemObjectStoreBackend { path: prefix.to_path_buf() }); let client = backend.mk_object_store().await?; let storage = ObjectStorage { backend, client: OnceCell::new_with(Some(client)) }; - Ok(storage) } @@ -1022,6 +1021,8 @@ mod tests { use tempfile::TempDir; + use crate::format::{ChunkId, ManifestId, SnapshotId}; + use super::ObjectStorage; #[tokio::test] @@ -1071,4 +1072,34 @@ mod tests { ObjectStorage::new_local_filesystem(PathBuf::from(&rel_path).as_path()).await; assert!(store.is_ok()); } + + #[tokio::test] + async fn test_object_store_paths() { + let store = ObjectStorage::new_local_filesystem(PathBuf::from(".").as_path()) + .await + .unwrap(); + + let ref_key = "ref_key"; + let ref_path = store.ref_key(ref_key); + assert_eq!(ref_path.to_string(), format!("refs/{ref_key}")); + + let snapshot_id = SnapshotId::random(); + let snapshot_path = store.get_snapshot_path(&snapshot_id); + assert_eq!(snapshot_path.to_string(), format!("snapshots/{snapshot_id}")); + + let manifest_id = ManifestId::random(); + let manifest_path = store.get_manifest_path(&manifest_id); + assert_eq!(manifest_path.to_string(), format!("manifests/{manifest_id}")); + + let chunk_id = ChunkId::random(); + let chunk_path = store.get_chunk_path(&chunk_id); + assert_eq!(chunk_path.to_string(), format!("chunks/{chunk_id}")); + + let transaction_id = SnapshotId::random(); + let transaction_path = store.get_transaction_path(&transaction_id); + assert_eq!( + transaction_path.to_string(), + format!("transactions/{transaction_id}") + ); + } } diff --git a/icechunk/src/storage/s3.rs b/icechunk/src/storage/s3.rs index a40b657b..30dba4e7 100644 --- a/icechunk/src/storage/s3.rs +++ b/icechunk/src/storage/s3.rs @@ -30,7 +30,6 @@ use aws_sdk_s3::{ use aws_smithy_types_convert::{date_time::DateTimeExt, stream::PaginationStreamExt}; use bytes::{Buf, Bytes}; use chrono::{DateTime, Utc}; -use err_into::ErrorInto as _; use futures::{ stream::{self, BoxStream}, StreamExt, TryStreamExt, @@ -150,10 +149,10 @@ impl S3Storage { fn get_path_str(&self, file_prefix: &str, id: &str) -> StorageResult { let path = PathBuf::from_iter([self.prefix.as_str(), file_prefix, id]); - path.into_os_string() - .into_string() - .map_err(StorageErrorKind::BadPrefix) - .err_into() + let path_str = + path.into_os_string().into_string().map_err(StorageErrorKind::BadPrefix)?; + + Ok(path_str.replace("\\", "/")) } fn get_path( @@ -187,10 +186,10 @@ impl S3Storage { fn ref_key(&self, ref_key: &str) -> StorageResult { let path = PathBuf::from_iter([self.prefix.as_str(), REF_PREFIX, ref_key]); - path.into_os_string() - .into_string() - .map_err(StorageErrorKind::BadPrefix) - .err_into() + let path_str = + path.into_os_string().into_string().map_err(StorageErrorKind::BadPrefix)?; + + Ok(path_str.replace("\\", "/")) } async fn get_object_reader( @@ -612,10 +611,7 @@ impl Storage for S3Storage { _settings: &Settings, prefix: &str, ) -> StorageResult>>> { - let prefix = PathBuf::from_iter([self.prefix.as_str(), prefix]) - .into_os_string() - .into_string() - .map_err(StorageErrorKind::BadPrefix)?; + let prefix = format!("{}/{}", self.prefix, prefix).replace("//", "/"); let stream = self .get_client() .await @@ -798,4 +794,39 @@ mod tests { let deserialized: S3Storage = serde_json::from_str(&serialized).unwrap(); assert_eq!(storage.config, deserialized.config); } + + #[tokio::test] + async fn test_s3_paths() { + let storage = S3Storage::new( + S3Options { + region: Some("us-west-2".to_string()), + endpoint_url: None, + allow_http: true, + anonymous: false, + }, + "bucket".to_string(), + Some("prefix".to_string()), + S3Credentials::FromEnv, + ) + .unwrap(); + + let ref_path = storage.ref_key("ref_key").unwrap(); + assert_eq!(ref_path, "prefix/refs/ref_key"); + + let snapshot_id = SnapshotId::random(); + let snapshot_path = storage.get_snapshot_path(&snapshot_id).unwrap(); + assert_eq!(snapshot_path, format!("prefix/snapshots/{snapshot_id}")); + + let manifest_id = ManifestId::random(); + let manifest_path = storage.get_manifest_path(&manifest_id).unwrap(); + assert_eq!(manifest_path, format!("prefix/manifests/{manifest_id}")); + + let chunk_id = ChunkId::random(); + let chunk_path = storage.get_chunk_path(&chunk_id).unwrap(); + assert_eq!(chunk_path, format!("prefix/chunks/{chunk_id}")); + + let transaction_id = SnapshotId::random(); + let transaction_path = storage.get_transaction_path(&transaction_id).unwrap(); + assert_eq!(transaction_path, format!("prefix/transactions/{transaction_id}")); + } }