From f1dc0bbe231a9f5742d0814fb2d4a0f4c9bc77e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Galkin?= Date: Mon, 17 Feb 2025 23:35:59 -0300 Subject: [PATCH] Flatbuffers for serialization (#733) * flatbuffers manifest * wip * working * testing flatbuffers perf * flatbuffers snapshots * code quality on manifest * Manifest working * All tests pass wit the new flatbuffers snapshot * Working on code qty, tests passing * more code qty * WIP: transaction log" * Diffs and status working * All tests passing * Fix stateful test * code qty * Clean up * Documentation and some name changes * Better ManifestExtents type and serialization * lint and tests --- Cargo.lock | 15 +- .../008-no-copy-serialization-formats.md | 127 + docs/docs/spec.md | 62 +- ...4E0AY7Y4V5081TANG => 0DX66KVWRNQGEEWC8QN0} | Bin ...31EJC9785Q61Y8VHG => 9HZ3J7FEC9CCEB6GEKVG} | Bin ...28FFV4Q7D070R2BJG => MDYP3ZEV630YPNCEENC0} | Bin ...GCRCJ90NJZ58SFA60 => Y9DTSVWNZV9HKT2R17T0} | Bin .../tests/data/test-repo/config.yaml | 25 +- .../test-repo/manifests/3AS90VB6T0GSE4J67XX0 | Bin 166 -> 0 bytes .../test-repo/manifests/3C9WRKTE3PNDSNYBKD60 | Bin 0 -> 165 bytes .../test-repo/manifests/5ZW0V1ZQXQ16804S897G | Bin 119 -> 0 bytes .../test-repo/manifests/G94WC9CN23R53A63CRXG | Bin 0 -> 278 bytes .../test-repo/manifests/MWE7J4Y1V04W0DCXB8Z0 | Bin 0 -> 174 bytes .../test-repo/manifests/R666SBH9YHZMB04ZMARG | Bin 221 -> 0 bytes .../test-repo/manifests/STWYFSPWFCD62MQTDM20 | Bin 117 -> 0 bytes .../test-repo/manifests/T9PRDPYDRCEHC2GAVR8G | Bin 0 -> 241 bytes .../test-repo/refs/branch.main/ZZZZZZZW.json | 2 +- .../test-repo/refs/branch.main/ZZZZZZZX.json | 2 +- .../test-repo/refs/branch.main/ZZZZZZZY.json | 2 +- .../test-repo/refs/branch.main/ZZZZZZZZ.json | 2 +- .../refs/branch.my-branch/ZZZZZZZX.json | 2 +- .../refs/branch.my-branch/ZZZZZZZY.json | 2 +- .../refs/branch.my-branch/ZZZZZZZZ.json | 2 +- .../data/test-repo/refs/tag.deleted/ref.json | 2 +- .../refs/tag.it also works!/ref.json | 2 +- .../test-repo/refs/tag.it works!/ref.json | 2 +- .../test-repo/snapshots/6Q9GDTXKF17BGQVSQZFG | Bin 0 -> 177 bytes .../test-repo/snapshots/949AXZ49X764TMDC6D4G | Bin 0 -> 787 bytes .../test-repo/snapshots/9W0W1DS2BKRV4MK2A2S0 | Bin 490 -> 0 bytes .../test-repo/snapshots/A2RD2Y65PR6D3B6BR1K0 | Bin 0 -> 587 bytes .../test-repo/snapshots/FK0CX5JQH2DVDZ6PD6WG | Bin 493 -> 0 bytes .../test-repo/snapshots/G0BR0G9NKT75ZZS7BWWG | Bin 646 -> 0 bytes .../test-repo/snapshots/K1BMYVG1HNVTNV1FSBH0 | Bin 0 -> 577 bytes .../test-repo/snapshots/KCR7ES7JPCBY23X6MY3G | Bin 481 -> 0 bytes .../test-repo/snapshots/QY5JG2BWG2VPPDJR4JE0 | Bin 429 -> 0 bytes .../test-repo/snapshots/RPA0WQCNM2N9HBBRHJQ0 | Bin 0 -> 513 bytes .../test-repo/snapshots/SNF98D1SK7NWD5KQJM20 | Bin 0 -> 586 bytes .../test-repo/snapshots/VNPWJSZWB9G990XV1V8G | Bin 132 -> 0 bytes .../transactions/949AXZ49X764TMDC6D4G | Bin 0 -> 172 bytes .../transactions/9W0W1DS2BKRV4MK2A2S0 | Bin 71 -> 0 bytes .../transactions/A2RD2Y65PR6D3B6BR1K0 | Bin 0 -> 148 bytes .../transactions/FK0CX5JQH2DVDZ6PD6WG | Bin 72 -> 0 bytes .../transactions/G0BR0G9NKT75ZZS7BWWG | Bin 141 -> 0 bytes .../transactions/K1BMYVG1HNVTNV1FSBH0 | Bin 0 -> 235 bytes .../transactions/KCR7ES7JPCBY23X6MY3G | Bin 104 -> 0 bytes .../transactions/QY5JG2BWG2VPPDJR4JE0 | Bin 116 -> 0 bytes .../transactions/RPA0WQCNM2N9HBBRHJQ0 | Bin 0 -> 167 bytes .../transactions/SNF98D1SK7NWD5KQJM20 | Bin 0 -> 148 bytes .../tests/test_zarr/test_stateful.py | 14 +- icechunk/Cargo.toml | 1 + icechunk/examples/low_level_dataset.rs | 1 + .../examples/multithreaded_get_chunk_refs.rs | 195 + icechunk/flatbuffers/all.fbs | 14 + icechunk/flatbuffers/manifest.fbs | 56 + icechunk/flatbuffers/object_ids.fbs | 11 + icechunk/flatbuffers/snapshot.fbs | 137 + icechunk/flatbuffers/transaction_log.fbs | 52 + icechunk/src/asset_manager.rs | 81 +- icechunk/src/change_set.rs | 24 +- icechunk/src/config.rs | 2 +- icechunk/src/conflicts/detector.rs | 35 +- .../src/format/flatbuffers/all_generated.rs | 3385 +++++++++++++++++ icechunk/src/format/manifest.rs | 402 +- icechunk/src/format/mod.rs | 55 +- icechunk/src/format/serializers/current.rs | 141 - icechunk/src/format/serializers/mod.rs | 63 +- icechunk/src/format/snapshot.rs | 657 +++- icechunk/src/format/transaction_log.rs | 312 +- icechunk/src/ops/gc.rs | 38 +- icechunk/src/repository.rs | 129 +- icechunk/src/session.rs | 173 +- icechunk/src/store.rs | 2 +- icechunk/tests/test_concurrency.rs | 2 +- 73 files changed, 5375 insertions(+), 856 deletions(-) create mode 100644 design-docs/008-no-copy-serialization-formats.md rename icechunk-python/tests/data/test-repo/chunks/{2TF4E0AY7Y4V5081TANG => 0DX66KVWRNQGEEWC8QN0} (100%) rename icechunk-python/tests/data/test-repo/chunks/{DV031EJC9785Q61Y8VHG => 9HZ3J7FEC9CCEB6GEKVG} (100%) rename icechunk-python/tests/data/test-repo/chunks/{HKZ28FFV4Q7D070R2BJG => MDYP3ZEV630YPNCEENC0} (100%) rename icechunk-python/tests/data/test-repo/chunks/{ZEEGCRCJ90NJZ58SFA60 => Y9DTSVWNZV9HKT2R17T0} (100%) delete mode 100644 icechunk-python/tests/data/test-repo/manifests/3AS90VB6T0GSE4J67XX0 create mode 100644 icechunk-python/tests/data/test-repo/manifests/3C9WRKTE3PNDSNYBKD60 delete mode 100644 icechunk-python/tests/data/test-repo/manifests/5ZW0V1ZQXQ16804S897G create mode 100644 icechunk-python/tests/data/test-repo/manifests/G94WC9CN23R53A63CRXG create mode 100644 icechunk-python/tests/data/test-repo/manifests/MWE7J4Y1V04W0DCXB8Z0 delete mode 100644 icechunk-python/tests/data/test-repo/manifests/R666SBH9YHZMB04ZMARG delete mode 100644 icechunk-python/tests/data/test-repo/manifests/STWYFSPWFCD62MQTDM20 create mode 100644 icechunk-python/tests/data/test-repo/manifests/T9PRDPYDRCEHC2GAVR8G create mode 100644 icechunk-python/tests/data/test-repo/snapshots/6Q9GDTXKF17BGQVSQZFG create mode 100644 icechunk-python/tests/data/test-repo/snapshots/949AXZ49X764TMDC6D4G delete mode 100644 icechunk-python/tests/data/test-repo/snapshots/9W0W1DS2BKRV4MK2A2S0 create mode 100644 icechunk-python/tests/data/test-repo/snapshots/A2RD2Y65PR6D3B6BR1K0 delete mode 100644 icechunk-python/tests/data/test-repo/snapshots/FK0CX5JQH2DVDZ6PD6WG delete mode 100644 icechunk-python/tests/data/test-repo/snapshots/G0BR0G9NKT75ZZS7BWWG create mode 100644 icechunk-python/tests/data/test-repo/snapshots/K1BMYVG1HNVTNV1FSBH0 delete mode 100644 icechunk-python/tests/data/test-repo/snapshots/KCR7ES7JPCBY23X6MY3G delete mode 100644 icechunk-python/tests/data/test-repo/snapshots/QY5JG2BWG2VPPDJR4JE0 create mode 100644 icechunk-python/tests/data/test-repo/snapshots/RPA0WQCNM2N9HBBRHJQ0 create mode 100644 icechunk-python/tests/data/test-repo/snapshots/SNF98D1SK7NWD5KQJM20 delete mode 100644 icechunk-python/tests/data/test-repo/snapshots/VNPWJSZWB9G990XV1V8G create mode 100644 icechunk-python/tests/data/test-repo/transactions/949AXZ49X764TMDC6D4G delete mode 100644 icechunk-python/tests/data/test-repo/transactions/9W0W1DS2BKRV4MK2A2S0 create mode 100644 icechunk-python/tests/data/test-repo/transactions/A2RD2Y65PR6D3B6BR1K0 delete mode 100644 icechunk-python/tests/data/test-repo/transactions/FK0CX5JQH2DVDZ6PD6WG delete mode 100644 icechunk-python/tests/data/test-repo/transactions/G0BR0G9NKT75ZZS7BWWG create mode 100644 icechunk-python/tests/data/test-repo/transactions/K1BMYVG1HNVTNV1FSBH0 delete mode 100644 icechunk-python/tests/data/test-repo/transactions/KCR7ES7JPCBY23X6MY3G delete mode 100644 icechunk-python/tests/data/test-repo/transactions/QY5JG2BWG2VPPDJR4JE0 create mode 100644 icechunk-python/tests/data/test-repo/transactions/RPA0WQCNM2N9HBBRHJQ0 create mode 100644 icechunk-python/tests/data/test-repo/transactions/SNF98D1SK7NWD5KQJM20 create mode 100644 icechunk/examples/multithreaded_get_chunk_refs.rs create mode 100644 icechunk/flatbuffers/all.fbs create mode 100644 icechunk/flatbuffers/manifest.fbs create mode 100644 icechunk/flatbuffers/object_ids.fbs create mode 100644 icechunk/flatbuffers/snapshot.fbs create mode 100644 icechunk/flatbuffers/transaction_log.fbs create mode 100644 icechunk/src/format/flatbuffers/all_generated.rs delete mode 100644 icechunk/src/format/serializers/current.rs diff --git a/Cargo.lock b/Cargo.lock index 7f749c30..f681826f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -592,9 +592,9 @@ checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" [[package]] name = "block-buffer" @@ -917,6 +917,16 @@ dependencies = [ "subtle", ] +[[package]] +name = "flatbuffers" +version = "25.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +dependencies = [ + "bitflags", + "rustc_version", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1362,6 +1372,7 @@ dependencies = [ "bytes", "chrono", "err-into", + "flatbuffers", "futures", "itertools 0.14.0", "object_store", diff --git a/design-docs/008-no-copy-serialization-formats.md b/design-docs/008-no-copy-serialization-formats.md new file mode 100644 index 00000000..f83f3a15 --- /dev/null +++ b/design-docs/008-no-copy-serialization-formats.md @@ -0,0 +1,127 @@ +# Evaluation of different serialization formats + +We want to move away from msgpack serialization for Icechunk metadata files. + +## Why + +* Msgpack requires a expensive parsing process upfront. If the user only wants +to pull a few chunk refs from a manifest, they still need to parse the whole manifest. +* Msgpack deserializes to Rust datastructures. This is good for simplicity of code, but +probably not good for memory consumption (more pointers everywhere). +* Msgpack gives too many options on how to serialize things, there is no canonical way, +so it's not easy to predict how `serde` is going to serialize our detastructures, and +could even change from version to version. +* It's hard to explain in the spec what goes into the metadata files, we would need to go +into `rmp_serde` implementation, see what they do, and document that in the spec. + +## Other options + +There is a never ending menu. From a custom binary format, to Parquet, and everything else. +We focused mostly on no-copy formats, for some of the issues enumerated above. Also +there is a preference for formats that have a tight schema and can be documented with +some form of IDL. + +## Performance evaluation + +We evaluated performance of msgpack, flatbuffers and capnproto. Evaluation looks at: + +* Manifest file size, for a big manifest with 1M native chunk refs. +* Speed of writing. +* Speed of reading. + +We wrote an example program in `examples/multithreaded_get_chunk_refs.rs`. +This program writes a big repo to local file storage, it doesn't really write the chunks, +we are not interested in benchmarking that. It executes purely in Rust, not using the python interface. + +It writes a manifest with 1M native chunk refs, using zstd compression level 3. The writes are done +from 1M concurrent async tasks. + +It then executes 1M chunk ref reads (notice, the refs are read, not the chunks that are not there). +Reads are executed from 4 threads with 250k concurrent async tasks each. + +Notice: + +* We are comparing local file system on purpose, to not account for network times +* We are comparing pulling refs only, not chunks, which is a worst case. In the real + world, read operations are dominated by the time taken to fetch the chunks. +* The evaluation was done in an early state of the code, where many parts were unsafe, + but we have verified there are no huge differences. + +### Results for writes + +```sh +nix run nixpkgs#hyperfine -- \ + --prepare 'rm -rf /tmp/test-perf' \ + --warmup 1 \ + 'cargo run --release --example multithreaded_get_chunk_refs -- --write /tmp/test-perf' +``` + +#### Flatbuffers + +Compressed manifest size: 27_527_680 bytes + +``` +Time (mean ± σ): 5.698 s ± 0.163 s [User: 4.764 s, System: 0.910 s] +Range (min … max): 5.562 s … 6.103 s 10 runs +``` + +#### Capnproto + +Compressed manifest size: 26_630_927 bytes + +``` +Time (mean ± σ): 6.276 s ± 0.163 s [User: 5.225 s, System: 1.017 s] +Range (min … max): 6.126 s … 6.630 s 10 runs +``` + +#### Msgpack + +Compressed manifest size: 22_250_152 bytes + +``` +Time (mean ± σ): 6.224 s ± 0.155 s [User: 5.488 s, System: 0.712 s] +Range (min … max): 6.033 s … 6.532 s 10 runs +``` + +### Results for reads + +```sh +nix run nixpkgs#hyperfine -- \ + --warmup 1 \ + 'cargo run --release --example multithreaded_get_chunk_refs -- --read /tmp/test-perf' +``` + +#### Flatbuffers + +``` +Time (mean ± σ): 3.676 s ± 0.257 s [User: 7.385 s, System: 1.819 s] +Range (min … max): 3.171 s … 4.038 s 10 runs +``` + +#### Capnproto + +``` +Time (mean ± σ): 5.254 s ± 0.234 s [User: 11.370 s, System: 1.962 s] +Range (min … max): 4.992 s … 5.799 s 10 runs +``` + +#### Msgpack + +``` +Time (mean ± σ): 3.310 s ± 0.606 s [User: 5.975 s, System: 1.762 s] +Range (min … max): 2.392 s … 4.102 s 10 runs +``` + +## Conclusions + +* Compressed manifest is 25% larger in flatbuffers than msgpack +* Flatbuffers is slightly faster for commits +* Flatbuffers is slightly slower for reads +* Timing differences are not significant for real world scenarios, where performance +is dominated by the time taken downloading or uploading chunks. +* Manifest fetch time differences could be somewhat significant for workloads where +latency to first byte is important. This is not the use case Icechunk optimizes for. + +## Decision + +We are going to use flatbuffers for our metadata on-disk format. diff --git a/docs/docs/spec.md b/docs/docs/spec.md index 3a582341..58292dbb 100644 --- a/docs/docs/spec.md +++ b/docs/docs/spec.md @@ -72,7 +72,6 @@ Finally, in an atomic put-if-not-exists operation, to commit the transaction, it This operation may fail if a different client has already committed the next snapshot. In this case, the client may attempt to resolve the conflicts and retry the commit. - ```mermaid flowchart TD subgraph metadata[Metadata] @@ -121,6 +120,7 @@ All data and metadata files are stored within a root directory (typically a pref - `$ROOT/snapshots/` snapshot files - `$ROOT/attributes/` attribute files - `$ROOT/manifests/` chunk manifests +- `$ROOT/transactions/` transaction log files - `$ROOT/chunks/` chunks ### File Formats @@ -128,7 +128,6 @@ All data and metadata files are stored within a root directory (typically a pref !!! warning The actual file formats used for each type of metadata file are in flux. The spec currently describes the data structures encoded in these files, rather than a specific file format. - ### Reference Files Similar to Git, Icechunk supports the concept of _branches_ and _tags_. @@ -149,9 +148,8 @@ Different client sessions may simultaneously create two inconsistent snapshots; References (both branches and tags) are stored as JSON files, the content is a JSON object with: -* keys: a single key `"snapshot"`, -* value: a string representation of the snapshot id, using [Base 32 Crockford](https://www.crockford.com/base32.html) encoding. The snapshot id is 12 byte random binary, so the encoded string has 20 characters. - +- keys: a single key `"snapshot"`, +- value: a string representation of the snapshot id, using [Base 32 Crockford](https://www.crockford.com/base32.html) encoding. The snapshot id is 12 byte random binary, so the encoded string has 20 characters. Here is an example of a JSON file corresponding to a tag or branch: @@ -186,6 +184,7 @@ Branch references are stored in the `refs/` directory within a subdirectory corr Branch names may not contain the `/` character. To facilitate easy lookups of the latest branch reference, we use the following encoding for the sequence number: + - subtract the sequence number from the integer `1099511627775` - encode the resulting integer as a string using [Base 32 Crockford](https://www.crockford.com/base32.html) - left-padding the string with 0s to a length of 8 characters @@ -216,30 +215,8 @@ Tags cannot be deleted once created. The snapshot file fully describes the schema of the repository, including all arrays and groups. -The snapshot file is currently encoded using [MessagePack](https://msgpack.org/), but this may change before Icechunk version 1.0. Given the alpha status of this spec, the best way to understand the information stored -in the snapshot file is through the data structure used internally by the Icechunk library for serialization. This data structure will most certainly change before the spec stabilization: - -```rust -pub struct Snapshot { - pub icechunk_snapshot_format_version: IcechunkFormatVersion, - pub icechunk_snapshot_format_flags: BTreeMap, - - pub manifest_files: Vec, - pub attribute_files: Vec, - - pub total_parents: u32, - pub short_term_parents: u16, - pub short_term_history: VecDeque, - - pub metadata: SnapshotMetadata, - pub started_at: DateTime, - pub properties: SnapshotProperties, - nodes: BTreeMap, -} -``` - -To get full details on what each field contains, please refer to the [Icechunk library code](https://github.com/earth-mover/icechunk/blob/f460a56577ec560c4debfd89e401a98153cd3560/icechunk/src/format/snapshot.rs#L97). - +The snapshot file is encoded using [flatbuffers](https://github.com/google/flatbuffers). The IDL for the +on-disk format can be found in [the repository file](https://github.com/earth-mover/icechunk/tree/main/icechunk/flatbuffers/snapshot.fbs) ### Attributes Files @@ -248,8 +225,7 @@ Attribute files hold user-defined attributes separately from the snapshot file. !!! warning Attribute files have not been implemented. -The on-disk format for attribute files has not been defined yet, but it will probably be a -MessagePack serialization of the attributes map. +The on-disk format for attribute files has not been defined in full yet. ### Chunk Manifest Files @@ -257,28 +233,14 @@ A chunk manifest file stores chunk references. Chunk references from multiple arrays can be stored in the same chunk manifest. The chunks from a single array can also be spread across multiple manifests. -Manifest files are currently encoded using [MessagePack](https://msgpack.org/), but this may change before Icechunk version 1.0. Given the alpha status of this spec, the best way to understand the information stored -in the snapshot file is through the data structure used internally by the Icechunk library. This data structure will most certainly change before the spec stabilization: - -```rust -pub struct Manifest { - pub icechunk_manifest_format_version: IcechunkFormatVersion, - pub icechunk_manifest_format_flags: BTreeMap, - chunks: BTreeMap<(NodeId, ChunkIndices), ChunkPayload>, -} - -pub enum ChunkPayload { - Inline(Bytes), - Virtual(VirtualChunkRef), - Ref(ChunkRef), -} -``` +Manifest files are encoded using [flatbuffers](https://github.com/google/flatbuffers). The IDL for the +on-disk format can be found in [the repository file](https://github.com/earth-mover/icechunk/tree/main/icechunk/flatbuffers/manifest.fbs) The most important part to understand from the data structure is the fact that manifests can hold three types of references: -* Native (`Ref`), pointing to the id of a chunk within the Icechunk repository. -* Inline (`Inline`), an optimization for very small chunks that can be embedded directly in the manifest. Mostly used for coordinate arrays. -* Virtual (`Virtual`), pointing to a region of a file outside of the Icechunk repository, for example, +- Native (`Ref`), pointing to the id of a chunk within the Icechunk repository. +- Inline (`Inline`), an optimization for very small chunks that can be embedded directly in the manifest. Mostly used for coordinate arrays. +- Virtual (`Virtual`), pointing to a region of a file outside of the Icechunk repository, for example, a chunk that is inside a NetCDF file in object store To get full details on what each field contains, please refer to the [Icechunk library code](https://github.com/earth-mover/icechunk/blob/f460a56577ec560c4debfd89e401a98153cd3560/icechunk/src/format/manifest.rs#L106). diff --git a/icechunk-python/tests/data/test-repo/chunks/2TF4E0AY7Y4V5081TANG b/icechunk-python/tests/data/test-repo/chunks/0DX66KVWRNQGEEWC8QN0 similarity index 100% rename from icechunk-python/tests/data/test-repo/chunks/2TF4E0AY7Y4V5081TANG rename to icechunk-python/tests/data/test-repo/chunks/0DX66KVWRNQGEEWC8QN0 diff --git a/icechunk-python/tests/data/test-repo/chunks/DV031EJC9785Q61Y8VHG b/icechunk-python/tests/data/test-repo/chunks/9HZ3J7FEC9CCEB6GEKVG similarity index 100% rename from icechunk-python/tests/data/test-repo/chunks/DV031EJC9785Q61Y8VHG rename to icechunk-python/tests/data/test-repo/chunks/9HZ3J7FEC9CCEB6GEKVG diff --git a/icechunk-python/tests/data/test-repo/chunks/HKZ28FFV4Q7D070R2BJG b/icechunk-python/tests/data/test-repo/chunks/MDYP3ZEV630YPNCEENC0 similarity index 100% rename from icechunk-python/tests/data/test-repo/chunks/HKZ28FFV4Q7D070R2BJG rename to icechunk-python/tests/data/test-repo/chunks/MDYP3ZEV630YPNCEENC0 diff --git a/icechunk-python/tests/data/test-repo/chunks/ZEEGCRCJ90NJZ58SFA60 b/icechunk-python/tests/data/test-repo/chunks/Y9DTSVWNZV9HKT2R17T0 similarity index 100% rename from icechunk-python/tests/data/test-repo/chunks/ZEEGCRCJ90NJZ58SFA60 rename to icechunk-python/tests/data/test-repo/chunks/Y9DTSVWNZV9HKT2R17T0 diff --git a/icechunk-python/tests/data/test-repo/config.yaml b/icechunk-python/tests/data/test-repo/config.yaml index 71b6f838..e3d9b500 100644 --- a/icechunk-python/tests/data/test-repo/config.yaml +++ b/icechunk-python/tests/data/test-repo/config.yaml @@ -5,14 +5,10 @@ compression: null caching: null storage: null virtual_chunk_containers: - s3: - name: s3 - url_prefix: s3:// - store: !s3_compatible - region: us-east-1 - endpoint_url: http://localhost:9000 - anonymous: false - allow_http: true + gcs: + name: gcs + url_prefix: gcs + store: !gcs {} az: name: az url_prefix: az @@ -25,11 +21,16 @@ virtual_chunk_containers: endpoint_url: https://fly.storage.tigris.dev anonymous: false allow_http: false - gcs: - name: gcs - url_prefix: gcs - store: !gcs {} + s3: + name: s3 + url_prefix: s3:// + store: !s3_compatible + region: us-east-1 + endpoint_url: http://localhost:9000 + anonymous: false + allow_http: true file: name: file url_prefix: file store: !local_file_system '' +manifest: null diff --git a/icechunk-python/tests/data/test-repo/manifests/3AS90VB6T0GSE4J67XX0 b/icechunk-python/tests/data/test-repo/manifests/3AS90VB6T0GSE4J67XX0 deleted file mode 100644 index 5f709f25a2b6d05190d7d737b86e74a544ede68b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 166 zcmeZtcKtAad6%MUp1j|(h&~fWch|)hHncdv`k`PXj~kWnl>4zEboB9E+5Ye(KGDa z9#3Epp2WcTdoEkzBoA%^hfr?of Q7O?E#6k$``DlW_g0Q)>TkpKVy diff --git a/icechunk-python/tests/data/test-repo/manifests/3C9WRKTE3PNDSNYBKD60 b/icechunk-python/tests/data/test-repo/manifests/3C9WRKTE3PNDSNYBKD60 new file mode 100644 index 0000000000000000000000000000000000000000..a7da5f1cff2276e5701a1168fb0e3470d6f70d30 GIT binary patch literal 165 zcmeZtcKtAad6%}6EDl)`m>o$pM(A2tQ50bRj|H~)C>;ofNjJ#(pdN`t?GKmzp1wkf5kQ`rWG&Vs%81$8s^0Con9RzHD@1&*CVh(&yfF#=%UB Ji=(!(0|1RxK=c3r literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/manifests/5ZW0V1ZQXQ16804S897G b/icechunk-python/tests/data/test-repo/manifests/5ZW0V1ZQXQ16804S897G deleted file mode 100644 index b2f7c0ab9f513be58f477d1e802c649ad89b780b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 119 zcmeZtcKtAad6%U)_0O=>+zbyF7LFW}UM79pxRtj0N>#$wf^Cl@y%c4v{vYIRb^r7EW$M=&*F%rF9`s}cT;XZh|7+#a z=i)wlIa0J()D7juC%AGv?Kk2-5Fy+;sVCd8;>L_0tE=-l4zK^|SidDg$mzB6(qt>W z<@UcGmnt#^37+C-;ym`z`}E`Dm-2tKzMU6ju#gJcrnFOS{f}E`Px0=!Zdg&W(2{R6 Y!_uXZ>nBLw5ikyBTD)Mdoa0MI0A=)ec>n+a literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/manifests/MWE7J4Y1V04W0DCXB8Z0 b/icechunk-python/tests/data/test-repo/manifests/MWE7J4Y1V04W0DCXB8Z0 new file mode 100644 index 0000000000000000000000000000000000000000..fdfbb82dff590d2c8cc1fa697f9787b316db4893 GIT binary patch literal 174 zcmeZtcKtAad6%wAE1RlRxrd93LHaO*U;DDu1(gdb T-A!-K6kqx5^P^KTj&B(ONj^!| literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/manifests/R666SBH9YHZMB04ZMARG b/icechunk-python/tests/data/test-repo/manifests/R666SBH9YHZMB04ZMARG deleted file mode 100644 index c1bf69730dff0446e647cfff386422741ddfad6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 221 zcmeZtcKtAad6%omXAyEe>Uv~fb?q$3={$?^@q4Br+^Xqm*o(6~G-v#6vrF=xt6m11Knef^Tu z;*zA&OCBFG7nQ57+Dd+xw`oJ)W zfw6IMP-@y_pmo3JvL#OP(AJ+SS=BRvL3k1)gV;QG#yI=VO^u9~Rx|eeQ?dQ6dhUXZ UgwRub%7 diff --git a/icechunk-python/tests/data/test-repo/manifests/STWYFSPWFCD62MQTDM20 b/icechunk-python/tests/data/test-repo/manifests/STWYFSPWFCD62MQTDM20 deleted file mode 100644 index 0abad3e14504ab5ece574d7abaf184621fa69a1e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 117 zcmeZtcKtAad6%ke|D!@DlzC+F3W_*Nk=%6w!V(pz*bT_p=BaN<1){@oXot`BU~U|3XGf#3?UO4 RnV6Xw8pJ-_J)Q5w2mo>mDfj>Y diff --git a/icechunk-python/tests/data/test-repo/manifests/T9PRDPYDRCEHC2GAVR8G b/icechunk-python/tests/data/test-repo/manifests/T9PRDPYDRCEHC2GAVR8G new file mode 100644 index 0000000000000000000000000000000000000000..9962c58957ca3224574c224f3edcff9673d26ce0 GIT binary patch literal 241 zcmeZtcKtAad6%_=3u6@tWBIHc4|?}VYjnf(>xBZbzVN{yKLLt+jZj3QJaGQ&uQau(R6irH1UOk z<%~o@7~GQ?E*lpW3r!UM{Bm-F`eD_^*4Z04H7hJ) zUhm>_E$N>9Cs&na&RQ3?BfpZaE#KcbbCnjy$}JP@^y2)E?u&e-t>MOdo{679-kHH+ l&Dvc~$7Grd#b-S>;0}nI^_6SpL0&^+RRgYDuLY;b0RZ*PV1fVu literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZW.json b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZW.json index affc0587..5b2aafdc 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZW.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZW.json @@ -1 +1 @@ -{"snapshot":"FK0CX5JQH2DVDZ6PD6WG"} \ No newline at end of file +{"snapshot":"A2RD2Y65PR6D3B6BR1K0"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZX.json b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZX.json index 563cc044..75370f32 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZX.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZX.json @@ -1 +1 @@ -{"snapshot":"KCR7ES7JPCBY23X6MY3G"} \ No newline at end of file +{"snapshot":"K1BMYVG1HNVTNV1FSBH0"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZY.json b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZY.json index fe6793c8..d26031d3 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZY.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZY.json @@ -1 +1 @@ -{"snapshot":"QY5JG2BWG2VPPDJR4JE0"} \ No newline at end of file +{"snapshot":"RPA0WQCNM2N9HBBRHJQ0"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZZ.json b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZZ.json index 0ba516a6..66c78fb1 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZZ.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZZ.json @@ -1 +1 @@ -{"snapshot":"VNPWJSZWB9G990XV1V8G"} \ No newline at end of file +{"snapshot":"6Q9GDTXKF17BGQVSQZFG"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZX.json b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZX.json index 340ac458..c31e956b 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZX.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZX.json @@ -1 +1 @@ -{"snapshot":"G0BR0G9NKT75ZZS7BWWG"} \ No newline at end of file +{"snapshot":"949AXZ49X764TMDC6D4G"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZY.json b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZY.json index 13219d1c..b3ffccc0 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZY.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZY.json @@ -1 +1 @@ -{"snapshot":"9W0W1DS2BKRV4MK2A2S0"} \ No newline at end of file +{"snapshot":"SNF98D1SK7NWD5KQJM20"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZZ.json b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZZ.json index affc0587..5b2aafdc 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZZ.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZZ.json @@ -1 +1 @@ -{"snapshot":"FK0CX5JQH2DVDZ6PD6WG"} \ No newline at end of file +{"snapshot":"A2RD2Y65PR6D3B6BR1K0"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/tag.deleted/ref.json b/icechunk-python/tests/data/test-repo/refs/tag.deleted/ref.json index 13219d1c..b3ffccc0 100644 --- a/icechunk-python/tests/data/test-repo/refs/tag.deleted/ref.json +++ b/icechunk-python/tests/data/test-repo/refs/tag.deleted/ref.json @@ -1 +1 @@ -{"snapshot":"9W0W1DS2BKRV4MK2A2S0"} \ No newline at end of file +{"snapshot":"SNF98D1SK7NWD5KQJM20"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/tag.it also works!/ref.json b/icechunk-python/tests/data/test-repo/refs/tag.it also works!/ref.json index 340ac458..c31e956b 100644 --- a/icechunk-python/tests/data/test-repo/refs/tag.it also works!/ref.json +++ b/icechunk-python/tests/data/test-repo/refs/tag.it also works!/ref.json @@ -1 +1 @@ -{"snapshot":"G0BR0G9NKT75ZZS7BWWG"} \ No newline at end of file +{"snapshot":"949AXZ49X764TMDC6D4G"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/tag.it works!/ref.json b/icechunk-python/tests/data/test-repo/refs/tag.it works!/ref.json index 13219d1c..b3ffccc0 100644 --- a/icechunk-python/tests/data/test-repo/refs/tag.it works!/ref.json +++ b/icechunk-python/tests/data/test-repo/refs/tag.it works!/ref.json @@ -1 +1 @@ -{"snapshot":"9W0W1DS2BKRV4MK2A2S0"} \ No newline at end of file +{"snapshot":"SNF98D1SK7NWD5KQJM20"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/snapshots/6Q9GDTXKF17BGQVSQZFG b/icechunk-python/tests/data/test-repo/snapshots/6Q9GDTXKF17BGQVSQZFG new file mode 100644 index 0000000000000000000000000000000000000000..b049d7ac7d76e8e4b3e47ddeb72baacc7bfc16c6 GIT binary patch literal 177 zcmeZtcKtAad6%iX7*(s|6 zCW-c)jOMA=562u--k{}QBO~OUa`40H)%N%9$%q(fH6}HD_+a9oH+NfCklT^Dy+I5P VX`jSc7(Q~9&v_xeOF_GX2>{IPM3?{o literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/snapshots/949AXZ49X764TMDC6D4G b/icechunk-python/tests/data/test-repo/snapshots/949AXZ49X764TMDC6D4G new file mode 100644 index 0000000000000000000000000000000000000000..391bc09fe71ec7954d598ca79876b7bf35971a58 GIT binary patch literal 787 zcmV+u1MK`sLq+hPr;0;JRZdH3V=XW)F)lM8ARr(hARr(hARr(hARr(C0RbqrFZ}>m z9Txx=>5NFglGYr$aiI)~&St-x;H?ca{iJ3O8 zuFCT7^}+Rung!@+>Ne2Y#&~&G+s19Y-60FK=i9@*2N|RKUSQf28-Y(s#Mh*YV zZ{7>Gaulxc1=*rq%G(+5r5Dm(kx8g!lZeB#*S2bJYSpPiYdW>WrY%xXaX9>Ah8H+g zOFUNB>nWb+P-dpp{dz-GApx|iU&l#~1IUwNS5x^z>7NDa^>jWK7jxTj6w+$iHAgI; zYT8vLo=;ML&cBWatu8MSDiS7ak)Ygr%@XF`TWk8WK+FgbgrY(Wt8&A$Fp+!1gfIhg zRB*MEKp4x5e7H9Zt@^^D2#*g!ocIMK4cwbF-k4bd=|)6>ByIjve(K(2OYH;yzgd?2 z7ytj*DKOaXg20wVQ{c~sroJEhll(X(+2*umFEjaFT=3}egM)xK5-Z>2*|%lerp3&d z-hA7?Q$nP`MTi5aB6gvO)F52U$PJ18W`iLXMjoUsssL7w>8IVM{k5B)Hf{6T0Yj*f zG^CJIrX-j^Do{|;!U4bnnF*-q0TSTAakN4yM+Fm2?8ME)%ZmXpubcU>gIy#i(ELZ< zxP*}H|DYAt(B=r=wVXn3+x#Kk@1 zJpPOawkySa+!T})uxSF2UvuExwD6AE+x}e3+&N*Nk%5t^u7RPhk!6UHp_QqHm5I5Y zxsidfnVE&9Mbx^K)ST3kRE0!^BbRl9}d`PYKRw-cIIWG1JAZDL$foLQB6+QF3p43?B+WEQW_ECxCx52!=|tQTU? zCa6LBNtx;KV7%iSO%r|J(dFie`XG^w&AwYYIvYFW2+37{_O5KMIAj-T6cqw?3*wn2mmwU$oc>P diff --git a/icechunk-python/tests/data/test-repo/snapshots/A2RD2Y65PR6D3B6BR1K0 b/icechunk-python/tests/data/test-repo/snapshots/A2RD2Y65PR6D3B6BR1K0 new file mode 100644 index 0000000000000000000000000000000000000000..b8eebc1cbdd7bd5a125c5445ecc4b1270e41ec98 GIT binary patch literal 587 zcmV-R0<`@}Lq+hPr;0;JRZdH3V=XW)F)lM8ARr(hARr(hARr(hARr(C0RbqrFZ}>m z-4FoAUTaA3usCaOC)51%aPi_Pnv+48Z9I@;0AjI$*NKbm3gYp|_PwS_ITk-Gn#0lsE^<`;VPk2-;UD?yy+1Z`vrX$_&D#nV80s1=IoilpQuM_}vu2u>X3Z&< zE2piAL2Vja-Dos0m7XeF42gh@2?M7ENmwDkU$h@p^}xlk{=wrJ{tT-|IObcQsaP+$SsD<4w!%R zf@z8@C@KL4Sj(%0IwrneVra+m48;&FOQ0p0+Ky1NV_!+Yn)gSday zT6G@H7ACAwm~yKa$e@8ok@V+OUh5;Fh* literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/snapshots/FK0CX5JQH2DVDZ6PD6WG b/icechunk-python/tests/data/test-repo/snapshots/FK0CX5JQH2DVDZ6PD6WG deleted file mode 100644 index 9454635d8c45e023896e762da057ab1d007356d6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 493 zcmeZtcKtAad6%k@1 zJT(W-O$+asz3tDn%$*Z}{MiQODW5irKjdGwynVtxBLgE-T?0d1Bg+sYLn~7YD-&}) zb3;>O6B7$#lc(FDElEwPC4yQGRK` za{Y7=We8GLyM^WX*Mi2k6Pj0KCZ~dJVq8+3S(SR)!Ic3FmXu^<7O&4NR!CIH11eDf z>xCG!32KmjQf7KQSZ^`NOyOkthF^wn3nnBM6(v?SE_DP^lV(if;#!`Tlb={(Y&40L zb!l);VsS>}ayN*`srmy943j1;O{y$OEpA+vnwOH9n71q^v!o;^b<&cm;*yl670DT? z$=SuFxrdhKq?V=TFibcwanhtoKy6G+TZ(fNb8?clzK+?zR#H1*%S6^CnRz7^6B(Cy z0x6D36B#D5@-jF+VK{wDGoiRYRoO96hQTq}*eZjiKcah1QAdxI*4^MErzXq@0s#8a B%3lBg diff --git a/icechunk-python/tests/data/test-repo/snapshots/G0BR0G9NKT75ZZS7BWWG b/icechunk-python/tests/data/test-repo/snapshots/G0BR0G9NKT75ZZS7BWWG deleted file mode 100644 index 8d4c7da9ff52d96dd631d14e81df38c4f2194292..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 646 zcmeZtcKtAad6%2s&%B(>ywrxJ?nU{f z10}Kq4CM`{>EJ-bHT$Y-bl9`ydEGM(1Bqw#!lB(j8l%^HQ z8L7$H#ihB2mgc0ErRFe9I52V2q)9++OiWvfa}#rNlD58%*}zs(J7LR2)+L#FB^DDI zmv{mxj!6?4CbAfT+$MVeAj>w#sP+l#j3C}O28q@$`|)y7{H?zewn79=K!O_&3#8|M zD7iXe>mG=xDM)mM-~yqvTP7@m8jq@S6Wms$i8W_4OAsvRx9e8L13g#xFfZPNdC za4u0`3O;$^WC_o-YttT2Z+QIdK&|*4qcf6H0uz|k<3pw&TEDr!c!J1~veTND+Ae diff --git a/icechunk-python/tests/data/test-repo/snapshots/K1BMYVG1HNVTNV1FSBH0 b/icechunk-python/tests/data/test-repo/snapshots/K1BMYVG1HNVTNV1FSBH0 new file mode 100644 index 0000000000000000000000000000000000000000..bfcd527f6b50ebac99cd5bc30782bc066aca903d GIT binary patch literal 577 zcmV-H0>1r8Lq+hPr;0;JRZdH3V=XW)F)lM8ARr(hARr(hARr(hARr(C0RbqrFZ}>m zjSv9V9&1S8vN&r}fkX%j%JW!2+7H1f1%`T{EXedH_gtCE6ILX1(n&$D%%Im?*dwES zF(O3)a1M&lG*OyEfkZxhL|ovV>bKjL*gFMP0AB!I05*S%5i$U%P`to6!Et{tked(s zuJqmPuX`vgE)LS~yPyxLAM$rSepAoQh7vR$5g;lAnEBlJK>_h-_`sPFDA4}Wrwv?S z@RUuHuBR^jw@9(&2Ls|L+MNzR%3tpV!sCW>Ic6Bk+6ou!Y>6W&?$oT%Qzn&-C5qZo z#|-Vt9Cc{U5sj&8GTE0x&6bpgJj2qZ#^t0?)25*u+RyTb{vW)AU~n6up<+PHWV`oP zF6G`k3JcIMri8JHL^9exq5G=<8zNB&0TwT6cyPriaVr1!cYXBtkA4zA&`FL1I!)2*q(70b`TC?^LJ6zE2?xh3 zMR#W`t0K9gzj63kv4ui11py}@aDt%54GeXSEJKV8txPSfOw9Gn z4GoPf3@j`SqE@D5=Hw`(B$gyLOqvYTtN(+y{rlTPDGW26{3n#Nf`p_tO~_5Vpg3K~ z&Aw{FnPn^!S{CY0nRJ9hS4nEdd;e<_CoI%&T;`dVlbM&=u++UMzqDYvemaOU1gWas z!t(rULF3yA%_}mKQe6u^2R25r($ z%1n<3YbpjACY&ta@XPRR!Gy%3qQuI^rH&wK(u_%5T+7pP@)JvpjV7_OE)C8}EY4_L z?gkM#ReykiVbY|fNtGq3#f{5S^HMSs^OoggmXzeAPFhk`T$0kXA~_>9IlH(t_t4Uu z)UwnZh6x8IPMS0csEvtfOL1;uPEOL+*D)K|N@^!;naH{%Gq1#ABI6QIAjL6hBEv*h vJ_g4p45x2tCKUIlDmwd-_1Rl{wFG7O7m{Mixc)485o)B8W`#tS%w%HTA5l{nV9RD zSz4Hy8XKCLMy*fHEhwo}C@v`~O)e=dN^O`hp=F`|lu1W8bd{uLy!XF0al%6V#$}#) zIhlE>4NKjN@=FVr>!*V#Ly)T4EiBK!7Bs${(7YluIW;+>G%uTRNpWUX>S+g81~6Ds zl95@wKC@UMQ6UegL;=VZzDs4ZjTE7EDMiDoU(uTo*k}?f>(bzy#Nv#`Q}qWJ7$!|xnp9bmTHLrSH7_MIF>hH;W=Tm- z>ZB!A#U&|CE0Qx(le3FUa}O=eNi9pwVVH1W!j|IP#GIU@t*>J?u$9zK*fNoINoHP& x#YDy>oyS{tD*n^ diff --git a/icechunk-python/tests/data/test-repo/snapshots/RPA0WQCNM2N9HBBRHJQ0 b/icechunk-python/tests/data/test-repo/snapshots/RPA0WQCNM2N9HBBRHJQ0 new file mode 100644 index 0000000000000000000000000000000000000000..574db73faaa944dbb300439c2f6eec11f28ed4c0 GIT binary patch literal 513 zcmV+c0{;C;Lq+hPr;0;JRZdH3V=XW)F)lM8ARr(hARr(hARr(hARr(C0RbqrFZ}>m zjSc{In_xu%u{LY+)_t0Wg37f&0d2Z0)o>Wb@z3h}9yirqJ}^m%ev9#St!PGAwmkmg&`MaUxbU zDOtekg(olUMZ}7<{JgmSlK;@XH{|q}A$9tcq&>0{#+*ANy@>?(H{^pQKa61_qJ@U`K|%#~hE|7R_;d z%WE(CCk#M2Ai2yKak2}8qNa`u`kW0cNM=5t9RT3?SmmCWb|CIQYOUeCkF@Z$aHduO z*~1yR3|~7nwuZ|_4>0cG#GXymH3I{G<}{4kOm@>B1Bg=(w)Hy)VP5k4Gv71wF$m z)er!7-fBqTvN&rZ9iWLFVl>9G_%Uf5*ywSD!0@Tz&?Eb**N3$_l@?^L%piDQHk0nR zH4sZIaUNDH>CoSu;&b z6Xz7m)oF2NP@BT2Y_l17lAfvB42g;+i><3UNzbw-s=4`9{wtKo`Pev7z`_L?0tVcB zYiDxrB?S#q@x^52j7GDYKZEyI39J`43_vBD13amisYCg}!Rw^KpY-eeIY@pI0Fl^y zXnx52qZ>?9bU{%IIKWm`PEEYDC3(Gmq(l4)%UC^;KbqP5O-2LFHUME1uNK~= zwMtdU9pPk;0}&%7M54GeXSEJKV8txPSfOw9GnEKH4!3{1>TqqYU57UUOa hmgE;zDrDwmmSiU8WLBl7G%kyeFUrp^IXq!P0|0AzE`k66 diff --git a/icechunk-python/tests/data/test-repo/transactions/949AXZ49X764TMDC6D4G b/icechunk-python/tests/data/test-repo/transactions/949AXZ49X764TMDC6D4G new file mode 100644 index 0000000000000000000000000000000000000000..31d4665cf9fcf4c41bc584a0dfdb3fb060f0ed3b GIT binary patch literal 172 zcmeZtcKtAad6%>z?mU{O8{g<7v2rAypi}>|Hm>+6^eY$df5CQf7l07$DDIRF3v diff --git a/icechunk-python/tests/data/test-repo/transactions/A2RD2Y65PR6D3B6BR1K0 b/icechunk-python/tests/data/test-repo/transactions/A2RD2Y65PR6D3B6BR1K0 new file mode 100644 index 0000000000000000000000000000000000000000..55b103341af849b89fa6f25c5190b8b022e4b4ba GIT binary patch literal 148 zcmeZtcKtAad6%}S^BvotmOaa^{`>lib2Ec@rqd;p{R#?!+x7bxq=5hIl)2`|) znittTRX<|ga8`UM;CPv_Hgtb@`cm_9zFMo&W}yj8 zB7EFEt{X2*_^4j`AcU*dYW-7VMaG2%nj#F6*+;}bHP%eO74nep216wKw42gDWp(%K d2kl$4cd{Up1aA+UTe?(P_P?D@VLZ|X$ktVon(^NM+QbPUiKMNsV>Ymr)Pf|0ljR$J8NMx;Fk!-!24;pZ MQHds=99b?d0B8{_8~^|S diff --git a/icechunk-python/tests/data/test-repo/transactions/RPA0WQCNM2N9HBBRHJQ0 b/icechunk-python/tests/data/test-repo/transactions/RPA0WQCNM2N9HBBRHJQ0 new file mode 100644 index 0000000000000000000000000000000000000000..29bcdf8cc8e638f4007bd65f0eb288ebe26734ec GIT binary patch literal 167 zcmeZtcKtAad6%5ekjsI9+BlS8d4BDV-^TJ#qiF)Rg4J>h|6}w*K%%pE?d!hG0vcgFD<6 LgC^XUXpjT|e>Om@ literal 0 HcmV?d00001 diff --git a/icechunk-python/tests/data/test-repo/transactions/SNF98D1SK7NWD5KQJM20 b/icechunk-python/tests/data/test-repo/transactions/SNF98D1SK7NWD5KQJM20 new file mode 100644 index 0000000000000000000000000000000000000000..271c8d02de150762851ae5032e5301a040213812 GIT binary patch literal 148 zcmeZtcKtAad6% None: f"listing changed before ({len(lsbefore)} items) and after ({len(lsafter)} items) committing." f" \n\n Before : {lsbefore!r} \n\n After: {lsafter!r}, \n\n Expected: {lsexpect!r}" ) + + # if it's metadata, we need to compare the data parsed, not raw (because of map ordering) + if path.endswith(".json"): + get_after = json.loads(get_after.to_bytes()) + get_before = json.loads(get_before.to_bytes()) + else: + get_after = get_after.to_bytes() + get_before = get_before.to_bytes() + if get_before != get_after: get_expect = self._sync(self.model.get(path, prototype=PROTOTYPE)) assert get_expect raise ValueError( f"Value changed before and after commit for path {path}" - f" \n\n Before : {get_before.to_bytes()!r} \n\n " - f"After: {get_after.to_bytes()!r}, \n\n " + f" \n\n Before : {get_before!r} \n\n " + f"After: {get_after!r}, \n\n " f"Expected: {get_expect.to_bytes()!r}" ) diff --git a/icechunk/Cargo.toml b/icechunk/Cargo.toml index 928cf819..e595c498 100644 --- a/icechunk/Cargo.toml +++ b/icechunk/Cargo.toml @@ -55,6 +55,7 @@ tracing-subscriber = { version = "0.3.19", features = [ tracing = "0.1.41" err-into = "1.0.1" serde_yaml_ng = "0.10.0" +flatbuffers = "25.2.10" [dev-dependencies] pretty_assertions = "1.4.1" diff --git a/icechunk/examples/low_level_dataset.rs b/icechunk/examples/low_level_dataset.rs index 12739113..8088f82e 100644 --- a/icechunk/examples/low_level_dataset.rs +++ b/icechunk/examples/low_level_dataset.rs @@ -286,6 +286,7 @@ async fn print_nodes(ds: &Session) -> Result<(), SessionError> { let rows = ds .list_nodes() .await? + .map(|n| n.unwrap()) .sorted_by_key(|n| n.path.clone()) .map(|node| { format!( diff --git a/icechunk/examples/multithreaded_get_chunk_refs.rs b/icechunk/examples/multithreaded_get_chunk_refs.rs new file mode 100644 index 00000000..b0a94d85 --- /dev/null +++ b/icechunk/examples/multithreaded_get_chunk_refs.rs @@ -0,0 +1,195 @@ +//! This example is used to benchmark multithreaded reads and writes of manifest files +//! +//! It launches hundreds of thousands of tasks to writes and then read refs. +//! It generates a manifest with 1M refs and executes 1M random reads. +//! Local filesystem storage is used to try to measure times without depending +//! on bandwidith. +//! +//! Run the example passing --write /path/to/repo +//! and then passing --read /path/to/repo + +#![allow(clippy::unwrap_used)] + +use std::{ + collections::HashMap, + env::{self}, + num::NonZeroU64, + sync::Arc, + time::Instant, +}; + +use futures::{stream::FuturesUnordered, StreamExt}; +use icechunk::{ + config::CompressionConfig, + format::{ + manifest::{ChunkPayload, ChunkRef}, + snapshot::ZarrArrayMetadata, + ChunkId, ChunkIndices, Path, + }, + metadata::{ + ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, + }, + new_local_filesystem_storage, + repository::VersionInfo, + session::Session, + Repository, RepositoryConfig, +}; +use itertools::iproduct; +use rand::random_range; +use tokio::sync::RwLock; + +const MAX_I: u64 = 10; +const MAX_J: u64 = 10; +const MAX_L: u64 = 100; +const MAX_K: u64 = 100; +const READS: u64 = 1_000_000; + +async fn mk_repo( + path: &std::path::Path, +) -> Result> { + let storage = new_local_filesystem_storage(path).await?; + let config = RepositoryConfig { + compression: Some(CompressionConfig { + level: Some(3), + ..CompressionConfig::default() + }), + ..RepositoryConfig::default() + }; + let repo = Repository::open_or_create(Some(config), storage, HashMap::new()).await?; + Ok(repo) +} + +async fn do_writes(path: &std::path::Path) -> Result<(), Box> { + let repo = mk_repo(path).await?; + let mut session = repo.writable_session("main").await?; + let meta = ZarrArrayMetadata { + shape: vec![MAX_I, MAX_J, MAX_L, MAX_K], + data_type: DataType::Int32, + chunk_shape: ChunkShape(vec![ + NonZeroU64::new(1).unwrap(), + NonZeroU64::new(1).unwrap(), + NonZeroU64::new(1).unwrap(), + NonZeroU64::new(1).unwrap(), + ]), + chunk_key_encoding: ChunkKeyEncoding::Slash, + fill_value: FillValue::Int32(0), + codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], + storage_transformers: Some(vec![StorageTransformer { + name: "mytransformer".to_string(), + configuration: None, + }]), + dimension_names: Some(vec![ + Some("x".to_string()), + Some("y".to_string()), + Some("t".to_string()), + ]), + }; + let path: Path = "/array".try_into().unwrap(); + session.add_array(path.clone(), meta).await?; + session.commit("array created", None).await?; + + let session = Arc::new(RwLock::new(repo.writable_session("main").await?)); + println!("Doing {} writes, wait...", MAX_I * MAX_J * MAX_K * MAX_L); + let before = Instant::now(); + let futures: FuturesUnordered<_> = iproduct!(0..MAX_I, 0..MAX_J, 0..MAX_L, 0..MAX_K) + .map(|(i, j, k, l)| { + let path = path.clone(); + let session = session.clone(); + async move { + let mut session = session.write().await; + let payload = ChunkPayload::Ref(ChunkRef { + id: ChunkId::random(), + offset: i * j * k * l, + length: random_range(1_000_000..2_000_000), + }); + session + .set_chunk_ref( + path.clone(), + ChunkIndices(vec![i as u32, j as u32, k as u32, l as u32]), + Some(payload), + ) + .await + .unwrap(); + } + }) + .collect(); + + futures.collect::<()>().await; + println!("Time to execute writes: {:?}", before.elapsed()); + let before = Instant::now(); + println!("Committing"); + session.write().await.commit("array created", None).await?; + println!("Time to execute commit: {:?}", before.elapsed()); + Ok(()) +} + +async fn do_reads(path: &std::path::Path) -> Result<(), Box> { + let repo = mk_repo(path).await?; + let session = Arc::new(RwLock::new( + repo.readonly_session(&VersionInfo::BranchTipRef("main".to_string())).await?, + )); + + let path: Path = "/array".try_into().unwrap(); + println!("Doing {} reads, wait...", 4 * (READS / 4)); + let before = Instant::now(); + let join1 = tokio::spawn(thread_reads(session.clone(), path.clone(), READS / 4)); + let join2 = tokio::spawn(thread_reads(session.clone(), path.clone(), READS / 4)); + let join3 = tokio::spawn(thread_reads(session.clone(), path.clone(), READS / 4)); + let join4 = tokio::spawn(thread_reads(session.clone(), path.clone(), READS / 4)); + + let total = join1.await? + join2.await? + join3.await? + join4.await?; + assert_eq!(total, 4 * (READS / 4)); + println!("Time to execute reads: {:?}", before.elapsed()); + + Ok(()) +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let args: Vec<_> = env::args().collect(); + if args.len() != 3 { + println!("Error: Pass either\n --write path/to/repo\n or\n --read path/to/repo\n as command line argument."); + return Err("Invalid arguments".into()); + } + + let path = std::path::PathBuf::from(args[2].as_str()); + + match &args[1] { + s if s == "--write" => do_writes(path.as_path()).await?, + s if s == "--read" => do_reads(path.as_path()).await?, + _ => { + println!("Error: Pass either --write or --read as command line argument."); + let err: Box = "Invalid arguments".into(); + return Err(err); + } + } + + Ok(()) +} + +async fn thread_reads(session: Arc>, path: Path, reads: u64) -> u64 { + let futures: FuturesUnordered<_> = (0..reads) + .map(|_| { + let i = random_range(0..MAX_I); + let j = random_range(0..MAX_J); + let k = random_range(0..MAX_K); + let l = random_range(0..MAX_L); + let path = path.clone(); + let session = session.clone(); + async move { + let session = session.read().await; + let the_ref = session + .get_chunk_ref( + &path, + &ChunkIndices(vec![i as u32, j as u32, k as u32, l as u32]), + ) + .await + .unwrap(); + assert!(matches!(the_ref, Some(ChunkPayload::Ref(ChunkRef{ offset, .. })) if offset == i*j*k*l)); + 1 + } + }) + .collect(); + + futures.collect::>().await.iter().sum() +} diff --git a/icechunk/flatbuffers/all.fbs b/icechunk/flatbuffers/all.fbs new file mode 100644 index 00000000..0e62a267 --- /dev/null +++ b/icechunk/flatbuffers/all.fbs @@ -0,0 +1,14 @@ +// run this command in the directory icechunk/flatbuffers +// +// flatc --rust -o ../src/format/flatbuffers/ --gen-all all.fbs +// +// This will generate the file all_generated.rs + +include "object_ids.fbs"; +include "manifest.fbs"; +include "snapshot.fbs"; +include "transaction_log.fbs"; + +// This is the way we have found to make it easy to generate code for all files +// flatbuffers rust generation seems to have some issues trying to generate +// separate files diff --git a/icechunk/flatbuffers/manifest.fbs b/icechunk/flatbuffers/manifest.fbs new file mode 100644 index 00000000..8d32b79c --- /dev/null +++ b/icechunk/flatbuffers/manifest.fbs @@ -0,0 +1,56 @@ +include "object_ids.fbs"; + +namespace gen; + +// We don't use unions and datastructures for the different types of refs +// If we do that, the manifest grows in size a lot, because of the extra +// offsets needed. This makes the code more complex because we need to +// interpret the different fields to know what type of ref we have + +table ChunkRef { + // the coordinates of this chunk ref, in the same order as in the array definition + index: [uint32] (required); + + // if this is an inline chunk ref, the data for the chunk will be put here unmodified + inline: [uint8]; + + // if this is a virtual or native chunk ref, offset and length allow to fetch + // the chunk from inside a larger object + offset: uint64 = 0; + length: uint64 = 0; + + // only native chunk refs will have this field, and it points to a file + // in the repository's object store + chunk_id: ObjectId12; + + // only virtual chunk refs will have the following fields + // location is the absolute url to the object where the chunk is stored + location: string; + + // only 0 or 1 of the following fields will be present and only for virtual chunk refs + // the etag assigned by the object store + checksum_etag: string; + // time, in seconds since the unix epoch, when the object containing the chunk + // was last modified + checksum_last_modified: uint32 = 0; +} + +table ArrayManifest { + // the id of the node the chunk refs belong to + node_id: ObjectId8 (required); + + // one element per chunk reference in the array + // this array is sorted in ascending order of the index in the ChunkRef + refs: [ChunkRef] (required); +} + +table Manifest { + // the manifest id + id: ObjectId12 (required); + + // one element for each array that has chunk refs in this manifest + // this array is sorted in ascending order of the node_id of the ArrayManifest + arrays: [ArrayManifest] (required); +} + +root_type Manifest; diff --git a/icechunk/flatbuffers/object_ids.fbs b/icechunk/flatbuffers/object_ids.fbs new file mode 100644 index 00000000..7bd26e80 --- /dev/null +++ b/icechunk/flatbuffers/object_ids.fbs @@ -0,0 +1,11 @@ +namespace gen; + +// used for SnapshotIds, ChunkIds, etc +struct ObjectId12 { + bytes:[uint8:12]; +} + +// used for NodeIds +struct ObjectId8 { + bytes:[uint8:8]; +} diff --git a/icechunk/flatbuffers/snapshot.fbs b/icechunk/flatbuffers/snapshot.fbs new file mode 100644 index 00000000..7f36da33 --- /dev/null +++ b/icechunk/flatbuffers/snapshot.fbs @@ -0,0 +1,137 @@ +include "object_ids.fbs"; + +namespace gen; + +// a single key-value of snapshot metadata +table MetadataItem { + // the name of the attribute + name: string (required); + + // the value, serialized as rmp_serde of the json value + // TODO: better serialization format + value: [uint8] (required); +} + +// a pointer to a manifest file +struct ManifestFileInfo { + // id of the object in the repo's object store + id: ObjectId12; + + // size in bytes of the whole manifest + size_bytes: uint64; + + // number of chunk refs in the manifest + num_chunk_refs: uint32; +} + +// a pointer to a user attributes file +struct AttributeFileInfo { + // id of the object in the repo's object store + id: ObjectId12; +} + +// a pointer to a user attributes file +table UserAttributesRef { + // id of the object in the repo's object store + object_id: ObjectId12 (required); + + // index where the user attributes for the array start + location: uint32; +} + +// user attributes written inline +table InlineUserAttributes { + // user attributes data, serialized as rmp_serde of the json value + // TODO: better serialization format + data :[uint8] (required); +} + +union UserAttributesSnapshot { + Inline :InlineUserAttributes, + Reference :UserAttributesRef, +} + +// A range of chunk indexes +struct ChunkIndexRange { + // inclusive + from: uint32; + + // exclusive + to: uint32; +} + +// a pointer to a manifest +table ManifestRef { + // id of the object in the repo's object store + object_id: ObjectId12 (required); + + // one element per dimension of the array, same order as in metadata + extents: [ChunkIndexRange] (required); +} + +// a marker for a group node +table GroupNodeData {} + +// data for an array node +table ArrayNodeData { + // the zarr metadata for the array + // serialized as rmp_serde of the json value + // TODO: better serialization format + zarr_metadata: [uint8] (required); + + // pointers to all the manifests where this array has chunk references + manifests: [ManifestRef] (required); +} + +// the node contents, that can be either a group or an array +union NodeData { + Array :ArrayNodeData, + Group :GroupNodeData, +} + +// a node +table NodeSnapshot { + // id of the object in the repo's object store + id: ObjectId8 (required); + + // absolute path of the node within the repo + path: string (required); + + // pointer to the user attributes for the node + user_attributes: UserAttributesSnapshot; + + // node's data + node_data: NodeData (required); +} + + +table Snapshot { + // the id of this snapshot + id: ObjectId12 (required); + + // the id of the parent snapshot, can be null for a root snapshot + parent_id: ObjectId12; + + nodes: [NodeSnapshot] (required); + + // time at which this snapshot was generated + // non-leap microseconds since Jan 1, 1970 UTC + flushed_at: uint64; + + // commit message + message: string (required); + + // metadata for the snapshot + // sorted in ascending order of MetadataItem.name + metadata: [MetadataItem] (required); + + // the list of all manifest files this snapshot points to + // sorted in ascending order of ManifestFileInfo.id + manifest_files: [ManifestFileInfo] (required); + + // the list of all attribute files this snapshot points to + // sorted in ascending order of AttributeFileInfo.id + attribute_files: [AttributeFileInfo] (required); +} + +root_type Snapshot; diff --git a/icechunk/flatbuffers/transaction_log.fbs b/icechunk/flatbuffers/transaction_log.fbs new file mode 100644 index 00000000..3afbe564 --- /dev/null +++ b/icechunk/flatbuffers/transaction_log.fbs @@ -0,0 +1,52 @@ +include "object_ids.fbs"; + +namespace gen; + +table ChunkIndices { + coords: [uint32] (required); +} + +table ArrayUpdatedChunks { + // the node id of the array to which the chunks belong to + node_id: ObjectId8 (required); + + // the coordinates of all the chunks modified in this transaction for this array + // sorted in ascending lexicographical order + chunks: [ChunkIndices] (required); +} + +table TransactionLog { + // id of the transaction log file, + // it will be the same as the corresponding snapshot + id: ObjectId12 (required); + + // node ids of the groups created in this transaction + // sorted in ascending order + new_groups: [ObjectId8] (required); + + // node ids of the arrays created in this transaction + // sorted in ascending order + new_arrays: [ObjectId8] (required); + + // node ids of the groups deleted in this transaction + // sorted in ascending order + deleted_groups: [ObjectId8] (required); + + // node ids of the arrays deleted in this transaction + // sorted in ascending order + deleted_arrays: [ObjectId8] (required); + + // node ids of the nodes that had user attributes modified in this transaction + // sorted in ascending order + updated_user_attributes: [ObjectId8] (required); + + // node ids of the nodes that had zarr metadata modified in this transaction + // sorted in ascending order + updated_zarr_metadata: [ObjectId8] (required); + + // chunk ref changes made in this transaction + // sorted in ascending order of the node_id of the ArrayUpdatedChunks + updated_chunks: [ArrayUpdatedChunks] (required); +} + +root_type TransactionLog; diff --git a/icechunk/src/asset_manager.rs b/icechunk/src/asset_manager.rs index 1d6d9078..30db9fa3 100644 --- a/icechunk/src/asset_manager.rs +++ b/icechunk/src/asset_manager.rs @@ -148,7 +148,7 @@ impl AssetManager { &self.storage_settings, ) .await?; - self.manifest_cache.insert(manifest.id.clone(), manifest); + self.manifest_cache.insert(manifest.id().clone(), manifest); Ok(res) } @@ -193,6 +193,8 @@ impl AssetManager { ) .await?; let snapshot_id = snapshot.id().clone(); + // This line is critical for expiration: + // When we edit snapshots in place, we need the cache to return the new version self.snapshot_cache.insert(snapshot_id, snapshot); Ok(()) } @@ -297,10 +299,11 @@ impl AssetManager { ) -> RepositoryResult>> { let mut this = self.fetch_snapshot(snapshot_id).await?; let stream = try_stream! { - yield this.as_ref().into(); + let info: SnapshotInfo = this.as_ref().try_into()?; + yield info; while let Some(parent) = this.parent_id() { - let snap = self.fetch_snapshot(parent).await?; - let info: SnapshotInfo = snap.as_ref().into(); + let snap = self.fetch_snapshot(&parent).await?; + let info: SnapshotInfo = snap.as_ref().try_into()?; yield info; this = snap; } @@ -423,7 +426,7 @@ async fn write_new_manifest( ), ]; - let id = new_manifest.id.clone(); + let id = new_manifest.id().clone(); let span = Span::current(); // TODO: we should compress only when the manifest reaches a certain size @@ -477,9 +480,7 @@ async fn fetch_manifest( let _entered = span.entered(); let (spec_version, decompressor) = check_and_get_decompressor(reader, FileTypeBin::Manifest)?; - deserialize_manifest(spec_version, decompressor).map_err(|err| { - RepositoryError::from(RepositoryErrorKind::DeserializationError(err)) - }) + deserialize_manifest(spec_version, decompressor).map_err(RepositoryError::from) }) .await? .map(Arc::new) @@ -564,9 +565,7 @@ async fn fetch_snapshot( Reader::Asynchronous(read), FileTypeBin::Snapshot, )?; - deserialize_snapshot(spec_version, decompressor).map_err(|err| { - RepositoryError::from(RepositoryErrorKind::DeserializationError(err)) - }) + deserialize_snapshot(spec_version, decompressor).map_err(RepositoryError::from) }) .await? .map(Arc::new) @@ -638,9 +637,8 @@ async fn fetch_transaction_log( Reader::Asynchronous(read), FileTypeBin::TransactionLog, )?; - deserialize_transaction_log(spec_version, decompressor).map_err(|err| { - RepositoryError::from(RepositoryErrorKind::DeserializationError(err)) - }) + deserialize_transaction_log(spec_version, decompressor) + .map_err(RepositoryError::from) }) .await? .map(Arc::new) @@ -677,7 +675,7 @@ impl Weighter> for FileWeighter { #[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] mod test { - use itertools::Itertools; + use itertools::{assert_equal, Itertools}; use super::*; use crate::{ @@ -694,20 +692,22 @@ mod test { let settings = storage::Settings::default(); let manager = AssetManager::new_no_cache(backend.clone(), settings.clone(), 1); + let node1 = NodeId::random(); + let node2 = NodeId::random(); let ci1 = ChunkInfo { - node: NodeId::random(), - coord: ChunkIndices(vec![]), + node: node1.clone(), + coord: ChunkIndices(vec![0]), payload: ChunkPayload::Inline(Bytes::copy_from_slice(b"a")), }; let ci2 = ChunkInfo { - node: NodeId::random(), - coord: ChunkIndices(vec![]), + node: node2.clone(), + coord: ChunkIndices(vec![1]), payload: ChunkPayload::Inline(Bytes::copy_from_slice(b"b")), }; let pre_existing_manifest = Manifest::from_iter(vec![ci1].into_iter()).await?.unwrap(); let pre_existing_manifest = Arc::new(pre_existing_manifest); - let pre_existing_id = &pre_existing_manifest.id; + let pre_existing_id = pre_existing_manifest.id(); let pre_size = manager.write_manifest(Arc::clone(&pre_existing_manifest)).await?; let logging = Arc::new(LoggingStorage::new(Arc::clone(&backend))); @@ -720,37 +720,38 @@ mod test { ); let manifest = - Arc::new(Manifest::from_iter(vec![ci2].into_iter()).await?.unwrap()); - let id = &manifest.id; + Arc::new(Manifest::from_iter(vec![ci2.clone()].into_iter()).await?.unwrap()); + let id = manifest.id(); let size = caching.write_manifest(Arc::clone(&manifest)).await?; - assert_eq!(caching.fetch_manifest(id, size).await?, manifest); - assert_eq!(caching.fetch_manifest(id, size).await?, manifest); + let fetched = caching.fetch_manifest(&id, size).await?; + assert_eq!(fetched.len(), 1); + assert_equal( + fetched.iter(node2.clone()).map(|x| x.unwrap()), + [(ci2.coord.clone(), ci2.payload.clone())], + ); + + // fetch again + caching.fetch_manifest(&id, size).await?; // when we insert we cache, so no fetches assert_eq!(logging.fetch_operations(), vec![]); // first time it sees an ID it calls the backend - assert_eq!( - caching.fetch_manifest(pre_existing_id, pre_size).await?, - pre_existing_manifest - ); + caching.fetch_manifest(&pre_existing_id, pre_size).await?; assert_eq!( logging.fetch_operations(), vec![("fetch_manifest_splitting".to_string(), pre_existing_id.to_string())] ); // only calls backend once - assert_eq!( - caching.fetch_manifest(pre_existing_id, pre_size).await?, - pre_existing_manifest - ); + caching.fetch_manifest(&pre_existing_id, pre_size).await?; assert_eq!( logging.fetch_operations(), vec![("fetch_manifest_splitting".to_string(), pre_existing_id.to_string())] ); // other walues still cached - assert_eq!(caching.fetch_manifest(id, size).await?, manifest); + caching.fetch_manifest(&id, size).await?; assert_eq!( logging.fetch_operations(), vec![("fetch_manifest_splitting".to_string(), pre_existing_id.to_string())] @@ -780,15 +781,15 @@ mod test { let manifest1 = Arc::new(Manifest::from_iter(vec![ci1, ci2, ci3]).await?.unwrap()); - let id1 = &manifest1.id; + let id1 = manifest1.id(); let size1 = manager.write_manifest(Arc::clone(&manifest1)).await?; let manifest2 = Arc::new(Manifest::from_iter(vec![ci4, ci5, ci6]).await?.unwrap()); - let id2 = &manifest2.id; + let id2 = manifest2.id(); let size2 = manager.write_manifest(Arc::clone(&manifest2)).await?; let manifest3 = Arc::new(Manifest::from_iter(vec![ci7, ci8, ci9]).await?.unwrap()); - let id3 = &manifest3.id; + let id3 = manifest3.id(); let size3 = manager.write_manifest(Arc::clone(&manifest3)).await?; let logging = Arc::new(LoggingStorage::new(Arc::clone(&backend))); @@ -809,9 +810,9 @@ mod test { // we keep asking for all 3 items, but the cache can only fit 2 for _ in 0..20 { - assert_eq!(caching.fetch_manifest(id1, size1).await?, manifest1); - assert_eq!(caching.fetch_manifest(id2, size2).await?, manifest2); - assert_eq!(caching.fetch_manifest(id3, size3).await?, manifest3); + caching.fetch_manifest(&id1, size1).await?; + caching.fetch_manifest(&id2, size2).await?; + caching.fetch_manifest(&id3, size3).await?; } // after the initial warming requests, we only request the file that doesn't fit in the cache assert_eq!(logging.fetch_operations()[10..].iter().unique().count(), 1); @@ -837,7 +838,7 @@ mod test { .await .unwrap() .unwrap(); - let manifest_id = manifest.id.clone(); + let manifest_id = manifest.id().clone(); let size = manager.write_manifest(Arc::new(manifest)).await?; let logging = Arc::new(LoggingStorage::new(Arc::clone(&storage))); diff --git a/icechunk/src/change_set.rs b/icechunk/src/change_set.rs index ec5f4d33..1bca04ed 100644 --- a/icechunk/src/change_set.rs +++ b/icechunk/src/change_set.rs @@ -1,10 +1,10 @@ use std::{ - collections::{HashMap, HashSet}, + collections::{BTreeMap, HashMap, HashSet}, iter, mem::take, }; -use itertools::Either; +use itertools::{Either, Itertools as _}; use serde::{Deserialize, Serialize}; use crate::{ @@ -25,8 +25,8 @@ pub struct ChangeSet { // These paths may point to Arrays or Groups, // since both Groups and Arrays support UserAttributes updated_attributes: HashMap>, - // FIXME: issue with too many inline chunks kept in mem - set_chunks: HashMap>>, + // It's important we keep these sorted, we use this fact in TransactionLog creation + set_chunks: BTreeMap>>, deleted_groups: HashSet<(Path, NodeId)>, deleted_arrays: HashSet<(Path, NodeId)>, } @@ -54,7 +54,7 @@ impl ChangeSet { pub fn chunk_changes( &self, - ) -> impl Iterator>)> + ) -> impl Iterator>)> { self.set_chunks.iter() } @@ -167,7 +167,7 @@ impl ChangeSet { .and_modify(|h| { h.insert(coord.clone(), data.clone()); }) - .or_insert(HashMap::from([(coord, data)])); + .or_insert(BTreeMap::from([(coord, data)])); } pub fn get_chunk_ref( @@ -242,13 +242,13 @@ impl ChangeSet { pub fn take_chunks( &mut self, - ) -> HashMap>> { + ) -> BTreeMap>> { take(&mut self.set_chunks) } pub fn set_chunks( &mut self, - chunks: HashMap>>, + chunks: BTreeMap>>, ) { self.set_chunks = chunks } @@ -302,12 +302,12 @@ impl ChangeSet { Ok(rmp_serde::from_slice(bytes)?) } - pub fn update_existing_chunks<'a>( + pub fn update_existing_chunks<'a, E>( &'a self, node: NodeId, - chunks: impl Iterator + 'a, - ) -> impl Iterator + 'a { - chunks.filter_map(move |chunk| match self.get_chunk_ref(&node, &chunk.coord) { + chunks: impl Iterator> + 'a, + ) -> impl Iterator> + 'a { + chunks.filter_map_ok(move |chunk| match self.get_chunk_ref(&node, &chunk.coord) { None => Some(chunk), Some(new_payload) => { new_payload.clone().map(|pl| ChunkInfo { payload: pl, ..chunk }) diff --git a/icechunk/src/config.rs b/icechunk/src/config.rs index 9430d87c..bbb9b391 100644 --- a/icechunk/src/config.rs +++ b/icechunk/src/config.rs @@ -55,7 +55,7 @@ impl CompressionConfig { } pub fn level(&self) -> u8 { - self.level.unwrap_or(1) + self.level.unwrap_or(3) } pub fn merge(&self, other: Self) -> Self { diff --git a/icechunk/src/conflicts/detector.rs b/icechunk/src/conflicts/detector.rs index 2c8d94d7..2569b560 100644 --- a/icechunk/src/conflicts/detector.rs +++ b/icechunk/src/conflicts/detector.rs @@ -67,7 +67,7 @@ impl ConflictSolver for ConflictDetector { let updated_arrays_already_updated = current_changes .zarr_updated_arrays() - .filter(|node_id| previous_change.updated_zarr_metadata.contains(node_id)) + .filter(|node_id| previous_change.zarr_metadata_updated(node_id)) .map(Ok); let updated_arrays_already_updated = stream::iter(updated_arrays_already_updated) @@ -78,7 +78,7 @@ impl ConflictSolver for ConflictDetector { let updated_arrays_were_deleted = current_changes .zarr_updated_arrays() - .filter(|node_id| previous_change.deleted_arrays.contains(node_id)) + .filter(|node_id| previous_change.array_deleted(node_id)) .map(Ok); let updated_arrays_were_deleted = stream::iter(updated_arrays_were_deleted) @@ -89,7 +89,7 @@ impl ConflictSolver for ConflictDetector { let updated_attributes_already_updated = current_changes .user_attributes_updated_nodes() - .filter(|node_id| previous_change.updated_user_attributes.contains(node_id)) + .filter(|node_id| previous_change.user_attributes_updated(node_id)) .map(Ok); let updated_attributes_already_updated = @@ -104,8 +104,8 @@ impl ConflictSolver for ConflictDetector { let updated_attributes_on_deleted_node = current_changes .user_attributes_updated_nodes() .filter(|node_id| { - previous_change.deleted_arrays.contains(node_id) - || previous_change.deleted_groups.contains(node_id) + previous_change.array_deleted(node_id) + || previous_change.group_deleted(node_id) }) .map(Ok); @@ -117,7 +117,7 @@ impl ConflictSolver for ConflictDetector { let chunks_updated_in_deleted_array = current_changes .arrays_with_chunk_changes() - .filter(|node_id| previous_change.deleted_arrays.contains(node_id)) + .filter(|node_id| previous_change.array_deleted(node_id)) .map(Ok); let chunks_updated_in_deleted_array = @@ -131,7 +131,7 @@ impl ConflictSolver for ConflictDetector { let chunks_updated_in_updated_array = current_changes .arrays_with_chunk_changes() - .filter(|node_id| previous_change.updated_zarr_metadata.contains(node_id)) + .filter(|node_id| previous_change.zarr_metadata_updated(node_id)) .map(Ok); let chunks_updated_in_updated_array = @@ -145,9 +145,11 @@ impl ConflictSolver for ConflictDetector { let chunks_double_updated = current_changes.chunk_changes().filter_map(|(node_id, changes)| { - if let Some(previous_changes) = - previous_change.updated_chunks.get(node_id) - { + let previous_changes: HashSet<_> = + previous_change.updated_chunks_for(node_id).collect(); + if previous_changes.is_empty() { + None + } else { let conflicting: HashSet<_> = changes .keys() .filter(|coord| previous_changes.contains(coord)) @@ -158,8 +160,6 @@ impl ConflictSolver for ConflictDetector { } else { Some(Ok((node_id, conflicting))) } - } else { - None } }); @@ -187,9 +187,9 @@ impl ConflictSolver for ConflictDetector { }; if let Some(node_id) = id { - if previous_change.updated_zarr_metadata.contains(&node_id) - || previous_change.updated_user_attributes.contains(&node_id) - || previous_change.updated_chunks.contains_key(&node_id) + if previous_change.zarr_metadata_updated(&node_id) + || previous_change.user_attributes_updated(&node_id) + || previous_change.chunks_updated(&node_id) { Ok(Some(Conflict::DeleteOfUpdatedArray { path: path.clone(), @@ -216,7 +216,7 @@ impl ConflictSolver for ConflictDetector { }; if let Some(node_id) = id { - if previous_change.updated_user_attributes.contains(&node_id) { + if previous_change.user_attributes_updated(&node_id) { Ok(Some(Conflict::DeleteOfUpdatedGroup { path: path.clone(), node_id: node_id.clone(), @@ -256,7 +256,7 @@ impl ConflictSolver for ConflictDetector { struct PathFinder(Mutex<(HashMap, Option)>); -impl> PathFinder { +impl>> PathFinder { fn new(iter: It) -> Self { Self(Mutex::new((HashMap::new(), Some(iter)))) } @@ -272,6 +272,7 @@ impl> PathFinder { Ok(cached.clone()) } else if let Some(iterator) = iter { for node in iterator { + let node = node?; if &node.id == node_id { cache.insert(node.id, node.path.clone()); return Ok(node.path); diff --git a/icechunk/src/format/flatbuffers/all_generated.rs b/icechunk/src/format/flatbuffers/all_generated.rs new file mode 100644 index 00000000..6b67d66e --- /dev/null +++ b/icechunk/src/format/flatbuffers/all_generated.rs @@ -0,0 +1,3385 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +// @generated + +use core::cmp::Ordering; +use core::mem; + +extern crate flatbuffers; +use self::flatbuffers::{EndianScalar, Follow}; + +#[allow(unused_imports, dead_code)] +pub mod gen { + + use core::cmp::Ordering; + use core::mem; + + extern crate flatbuffers; + use self::flatbuffers::{EndianScalar, Follow}; + + #[deprecated( + since = "2.0.0", + note = "Use associated constants instead. This will no longer be generated in 2021." + )] + pub const ENUM_MIN_USER_ATTRIBUTES_SNAPSHOT: u8 = 0; + #[deprecated( + since = "2.0.0", + note = "Use associated constants instead. This will no longer be generated in 2021." + )] + pub const ENUM_MAX_USER_ATTRIBUTES_SNAPSHOT: u8 = 2; + #[deprecated( + since = "2.0.0", + note = "Use associated constants instead. This will no longer be generated in 2021." + )] + #[allow(non_camel_case_types)] + pub const ENUM_VALUES_USER_ATTRIBUTES_SNAPSHOT: [UserAttributesSnapshot; 3] = [ + UserAttributesSnapshot::NONE, + UserAttributesSnapshot::Inline, + UserAttributesSnapshot::Reference, + ]; + + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] + #[repr(transparent)] + pub struct UserAttributesSnapshot(pub u8); + #[allow(non_upper_case_globals)] + impl UserAttributesSnapshot { + pub const NONE: Self = Self(0); + pub const Inline: Self = Self(1); + pub const Reference: Self = Self(2); + + pub const ENUM_MIN: u8 = 0; + pub const ENUM_MAX: u8 = 2; + pub const ENUM_VALUES: &'static [Self] = + &[Self::NONE, Self::Inline, Self::Reference]; + /// Returns the variant's name or "" if unknown. + pub fn variant_name(self) -> Option<&'static str> { + match self { + Self::NONE => Some("NONE"), + Self::Inline => Some("Inline"), + Self::Reference => Some("Reference"), + _ => None, + } + } + } + impl core::fmt::Debug for UserAttributesSnapshot { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if let Some(name) = self.variant_name() { + f.write_str(name) + } else { + f.write_fmt(format_args!("", self.0)) + } + } + } + impl<'a> flatbuffers::Follow<'a> for UserAttributesSnapshot { + type Inner = Self; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + let b = flatbuffers::read_scalar_at::(buf, loc); + Self(b) + } + } + + impl flatbuffers::Push for UserAttributesSnapshot { + type Output = UserAttributesSnapshot; + #[inline] + unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { + flatbuffers::emplace_scalar::(dst, self.0); + } + } + + impl flatbuffers::EndianScalar for UserAttributesSnapshot { + type Scalar = u8; + #[inline] + fn to_little_endian(self) -> u8 { + self.0.to_le() + } + #[inline] + #[allow(clippy::wrong_self_convention)] + fn from_little_endian(v: u8) -> Self { + let b = u8::from_le(v); + Self(b) + } + } + + impl<'a> flatbuffers::Verifiable for UserAttributesSnapshot { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + u8::run_verifier(v, pos) + } + } + + impl flatbuffers::SimpleToVerifyInSlice for UserAttributesSnapshot {} + pub struct UserAttributesSnapshotUnionTableOffset {} + + #[deprecated( + since = "2.0.0", + note = "Use associated constants instead. This will no longer be generated in 2021." + )] + pub const ENUM_MIN_NODE_DATA: u8 = 0; + #[deprecated( + since = "2.0.0", + note = "Use associated constants instead. This will no longer be generated in 2021." + )] + pub const ENUM_MAX_NODE_DATA: u8 = 2; + #[deprecated( + since = "2.0.0", + note = "Use associated constants instead. This will no longer be generated in 2021." + )] + #[allow(non_camel_case_types)] + pub const ENUM_VALUES_NODE_DATA: [NodeData; 3] = + [NodeData::NONE, NodeData::Array, NodeData::Group]; + + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] + #[repr(transparent)] + pub struct NodeData(pub u8); + #[allow(non_upper_case_globals)] + impl NodeData { + pub const NONE: Self = Self(0); + pub const Array: Self = Self(1); + pub const Group: Self = Self(2); + + pub const ENUM_MIN: u8 = 0; + pub const ENUM_MAX: u8 = 2; + pub const ENUM_VALUES: &'static [Self] = &[Self::NONE, Self::Array, Self::Group]; + /// Returns the variant's name or "" if unknown. + pub fn variant_name(self) -> Option<&'static str> { + match self { + Self::NONE => Some("NONE"), + Self::Array => Some("Array"), + Self::Group => Some("Group"), + _ => None, + } + } + } + impl core::fmt::Debug for NodeData { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if let Some(name) = self.variant_name() { + f.write_str(name) + } else { + f.write_fmt(format_args!("", self.0)) + } + } + } + impl<'a> flatbuffers::Follow<'a> for NodeData { + type Inner = Self; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + let b = flatbuffers::read_scalar_at::(buf, loc); + Self(b) + } + } + + impl flatbuffers::Push for NodeData { + type Output = NodeData; + #[inline] + unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { + flatbuffers::emplace_scalar::(dst, self.0); + } + } + + impl flatbuffers::EndianScalar for NodeData { + type Scalar = u8; + #[inline] + fn to_little_endian(self) -> u8 { + self.0.to_le() + } + #[inline] + #[allow(clippy::wrong_self_convention)] + fn from_little_endian(v: u8) -> Self { + let b = u8::from_le(v); + Self(b) + } + } + + impl<'a> flatbuffers::Verifiable for NodeData { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + u8::run_verifier(v, pos) + } + } + + impl flatbuffers::SimpleToVerifyInSlice for NodeData {} + pub struct NodeDataUnionTableOffset {} + + // struct ObjectId12, aligned to 1 + #[repr(transparent)] + #[derive(Clone, Copy, PartialEq)] + pub struct ObjectId12(pub [u8; 12]); + impl Default for ObjectId12 { + fn default() -> Self { + Self([0; 12]) + } + } + impl core::fmt::Debug for ObjectId12 { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("ObjectId12").field("bytes", &self.bytes()).finish() + } + } + + impl flatbuffers::SimpleToVerifyInSlice for ObjectId12 {} + impl<'a> flatbuffers::Follow<'a> for ObjectId12 { + type Inner = &'a ObjectId12; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + <&'a ObjectId12>::follow(buf, loc) + } + } + impl<'a> flatbuffers::Follow<'a> for &'a ObjectId12 { + type Inner = &'a ObjectId12; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + flatbuffers::follow_cast_ref::(buf, loc) + } + } + impl<'b> flatbuffers::Push for ObjectId12 { + type Output = ObjectId12; + #[inline] + unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { + let src = ::core::slice::from_raw_parts( + self as *const ObjectId12 as *const u8, + ::size(), + ); + dst.copy_from_slice(src); + } + #[inline] + fn alignment() -> flatbuffers::PushAlignment { + flatbuffers::PushAlignment::new(1) + } + } + + impl<'a> flatbuffers::Verifiable for ObjectId12 { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.in_buffer::(pos) + } + } + + impl<'a> ObjectId12 { + #[allow(clippy::too_many_arguments)] + pub fn new(bytes: &[u8; 12]) -> Self { + let mut s = Self([0; 12]); + s.set_bytes(bytes); + s + } + + pub fn bytes(&'a self) -> flatbuffers::Array<'a, u8, 12> { + // Safety: + // Created from a valid Table for this object + // Which contains a valid array in this slot + unsafe { flatbuffers::Array::follow(&self.0, 0) } + } + + pub fn set_bytes(&mut self, items: &[u8; 12]) { + // Safety: + // Created from a valid Table for this object + // Which contains a valid array in this slot + unsafe { flatbuffers::emplace_scalar_array(&mut self.0, 0, items) }; + } + } + + // struct ObjectId8, aligned to 1 + #[repr(transparent)] + #[derive(Clone, Copy, PartialEq)] + pub struct ObjectId8(pub [u8; 8]); + impl Default for ObjectId8 { + fn default() -> Self { + Self([0; 8]) + } + } + impl core::fmt::Debug for ObjectId8 { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("ObjectId8").field("bytes", &self.bytes()).finish() + } + } + + impl flatbuffers::SimpleToVerifyInSlice for ObjectId8 {} + impl<'a> flatbuffers::Follow<'a> for ObjectId8 { + type Inner = &'a ObjectId8; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + <&'a ObjectId8>::follow(buf, loc) + } + } + impl<'a> flatbuffers::Follow<'a> for &'a ObjectId8 { + type Inner = &'a ObjectId8; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + flatbuffers::follow_cast_ref::(buf, loc) + } + } + impl<'b> flatbuffers::Push for ObjectId8 { + type Output = ObjectId8; + #[inline] + unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { + let src = ::core::slice::from_raw_parts( + self as *const ObjectId8 as *const u8, + ::size(), + ); + dst.copy_from_slice(src); + } + #[inline] + fn alignment() -> flatbuffers::PushAlignment { + flatbuffers::PushAlignment::new(1) + } + } + + impl<'a> flatbuffers::Verifiable for ObjectId8 { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.in_buffer::(pos) + } + } + + impl<'a> ObjectId8 { + #[allow(clippy::too_many_arguments)] + pub fn new(bytes: &[u8; 8]) -> Self { + let mut s = Self([0; 8]); + s.set_bytes(bytes); + s + } + + pub fn bytes(&'a self) -> flatbuffers::Array<'a, u8, 8> { + // Safety: + // Created from a valid Table for this object + // Which contains a valid array in this slot + unsafe { flatbuffers::Array::follow(&self.0, 0) } + } + + pub fn set_bytes(&mut self, items: &[u8; 8]) { + // Safety: + // Created from a valid Table for this object + // Which contains a valid array in this slot + unsafe { flatbuffers::emplace_scalar_array(&mut self.0, 0, items) }; + } + } + + // struct ManifestFileInfo, aligned to 8 + #[repr(transparent)] + #[derive(Clone, Copy, PartialEq)] + pub struct ManifestFileInfo(pub [u8; 32]); + impl Default for ManifestFileInfo { + fn default() -> Self { + Self([0; 32]) + } + } + impl core::fmt::Debug for ManifestFileInfo { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("ManifestFileInfo") + .field("id", &self.id()) + .field("size_bytes", &self.size_bytes()) + .field("num_chunk_refs", &self.num_chunk_refs()) + .finish() + } + } + + impl flatbuffers::SimpleToVerifyInSlice for ManifestFileInfo {} + impl<'a> flatbuffers::Follow<'a> for ManifestFileInfo { + type Inner = &'a ManifestFileInfo; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + <&'a ManifestFileInfo>::follow(buf, loc) + } + } + impl<'a> flatbuffers::Follow<'a> for &'a ManifestFileInfo { + type Inner = &'a ManifestFileInfo; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + flatbuffers::follow_cast_ref::(buf, loc) + } + } + impl<'b> flatbuffers::Push for ManifestFileInfo { + type Output = ManifestFileInfo; + #[inline] + unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { + let src = ::core::slice::from_raw_parts( + self as *const ManifestFileInfo as *const u8, + ::size(), + ); + dst.copy_from_slice(src); + } + #[inline] + fn alignment() -> flatbuffers::PushAlignment { + flatbuffers::PushAlignment::new(8) + } + } + + impl<'a> flatbuffers::Verifiable for ManifestFileInfo { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.in_buffer::(pos) + } + } + + impl<'a> ManifestFileInfo { + #[allow(clippy::too_many_arguments)] + pub fn new(id: &ObjectId12, size_bytes: u64, num_chunk_refs: u32) -> Self { + let mut s = Self([0; 32]); + s.set_id(id); + s.set_size_bytes(size_bytes); + s.set_num_chunk_refs(num_chunk_refs); + s + } + + pub fn id(&self) -> &ObjectId12 { + // Safety: + // Created from a valid Table for this object + // Which contains a valid struct in this slot + unsafe { &*(self.0[0..].as_ptr() as *const ObjectId12) } + } + + #[allow(clippy::identity_op)] + pub fn set_id(&mut self, x: &ObjectId12) { + self.0[0..0 + 12].copy_from_slice(&x.0) + } + + pub fn size_bytes(&self) -> u64 { + let mut mem = + core::mem::MaybeUninit::<::Scalar>::uninit(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + EndianScalar::from_little_endian(unsafe { + core::ptr::copy_nonoverlapping( + self.0[16..].as_ptr(), + mem.as_mut_ptr() as *mut u8, + core::mem::size_of::<::Scalar>(), + ); + mem.assume_init() + }) + } + + pub fn set_size_bytes(&mut self, x: u64) { + let x_le = x.to_little_endian(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + unsafe { + core::ptr::copy_nonoverlapping( + &x_le as *const _ as *const u8, + self.0[16..].as_mut_ptr(), + core::mem::size_of::<::Scalar>(), + ); + } + } + + pub fn num_chunk_refs(&self) -> u32 { + let mut mem = + core::mem::MaybeUninit::<::Scalar>::uninit(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + EndianScalar::from_little_endian(unsafe { + core::ptr::copy_nonoverlapping( + self.0[24..].as_ptr(), + mem.as_mut_ptr() as *mut u8, + core::mem::size_of::<::Scalar>(), + ); + mem.assume_init() + }) + } + + pub fn set_num_chunk_refs(&mut self, x: u32) { + let x_le = x.to_little_endian(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + unsafe { + core::ptr::copy_nonoverlapping( + &x_le as *const _ as *const u8, + self.0[24..].as_mut_ptr(), + core::mem::size_of::<::Scalar>(), + ); + } + } + } + + // struct AttributeFileInfo, aligned to 1 + #[repr(transparent)] + #[derive(Clone, Copy, PartialEq)] + pub struct AttributeFileInfo(pub [u8; 12]); + impl Default for AttributeFileInfo { + fn default() -> Self { + Self([0; 12]) + } + } + impl core::fmt::Debug for AttributeFileInfo { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("AttributeFileInfo").field("id", &self.id()).finish() + } + } + + impl flatbuffers::SimpleToVerifyInSlice for AttributeFileInfo {} + impl<'a> flatbuffers::Follow<'a> for AttributeFileInfo { + type Inner = &'a AttributeFileInfo; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + <&'a AttributeFileInfo>::follow(buf, loc) + } + } + impl<'a> flatbuffers::Follow<'a> for &'a AttributeFileInfo { + type Inner = &'a AttributeFileInfo; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + flatbuffers::follow_cast_ref::(buf, loc) + } + } + impl<'b> flatbuffers::Push for AttributeFileInfo { + type Output = AttributeFileInfo; + #[inline] + unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { + let src = ::core::slice::from_raw_parts( + self as *const AttributeFileInfo as *const u8, + ::size(), + ); + dst.copy_from_slice(src); + } + #[inline] + fn alignment() -> flatbuffers::PushAlignment { + flatbuffers::PushAlignment::new(1) + } + } + + impl<'a> flatbuffers::Verifiable for AttributeFileInfo { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.in_buffer::(pos) + } + } + + impl<'a> AttributeFileInfo { + #[allow(clippy::too_many_arguments)] + pub fn new(id: &ObjectId12) -> Self { + let mut s = Self([0; 12]); + s.set_id(id); + s + } + + pub fn id(&self) -> &ObjectId12 { + // Safety: + // Created from a valid Table for this object + // Which contains a valid struct in this slot + unsafe { &*(self.0[0..].as_ptr() as *const ObjectId12) } + } + + #[allow(clippy::identity_op)] + pub fn set_id(&mut self, x: &ObjectId12) { + self.0[0..0 + 12].copy_from_slice(&x.0) + } + } + + // struct ChunkIndexRange, aligned to 4 + #[repr(transparent)] + #[derive(Clone, Copy, PartialEq)] + pub struct ChunkIndexRange(pub [u8; 8]); + impl Default for ChunkIndexRange { + fn default() -> Self { + Self([0; 8]) + } + } + impl core::fmt::Debug for ChunkIndexRange { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("ChunkIndexRange") + .field("from", &self.from()) + .field("to", &self.to()) + .finish() + } + } + + impl flatbuffers::SimpleToVerifyInSlice for ChunkIndexRange {} + impl<'a> flatbuffers::Follow<'a> for ChunkIndexRange { + type Inner = &'a ChunkIndexRange; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + <&'a ChunkIndexRange>::follow(buf, loc) + } + } + impl<'a> flatbuffers::Follow<'a> for &'a ChunkIndexRange { + type Inner = &'a ChunkIndexRange; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + flatbuffers::follow_cast_ref::(buf, loc) + } + } + impl<'b> flatbuffers::Push for ChunkIndexRange { + type Output = ChunkIndexRange; + #[inline] + unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { + let src = ::core::slice::from_raw_parts( + self as *const ChunkIndexRange as *const u8, + ::size(), + ); + dst.copy_from_slice(src); + } + #[inline] + fn alignment() -> flatbuffers::PushAlignment { + flatbuffers::PushAlignment::new(4) + } + } + + impl<'a> flatbuffers::Verifiable for ChunkIndexRange { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.in_buffer::(pos) + } + } + + impl<'a> ChunkIndexRange { + #[allow(clippy::too_many_arguments)] + pub fn new(from: u32, to: u32) -> Self { + let mut s = Self([0; 8]); + s.set_from(from); + s.set_to(to); + s + } + + pub fn from(&self) -> u32 { + let mut mem = + core::mem::MaybeUninit::<::Scalar>::uninit(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + EndianScalar::from_little_endian(unsafe { + core::ptr::copy_nonoverlapping( + self.0[0..].as_ptr(), + mem.as_mut_ptr() as *mut u8, + core::mem::size_of::<::Scalar>(), + ); + mem.assume_init() + }) + } + + pub fn set_from(&mut self, x: u32) { + let x_le = x.to_little_endian(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + unsafe { + core::ptr::copy_nonoverlapping( + &x_le as *const _ as *const u8, + self.0[0..].as_mut_ptr(), + core::mem::size_of::<::Scalar>(), + ); + } + } + + pub fn to(&self) -> u32 { + let mut mem = + core::mem::MaybeUninit::<::Scalar>::uninit(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + EndianScalar::from_little_endian(unsafe { + core::ptr::copy_nonoverlapping( + self.0[4..].as_ptr(), + mem.as_mut_ptr() as *mut u8, + core::mem::size_of::<::Scalar>(), + ); + mem.assume_init() + }) + } + + pub fn set_to(&mut self, x: u32) { + let x_le = x.to_little_endian(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + unsafe { + core::ptr::copy_nonoverlapping( + &x_le as *const _ as *const u8, + self.0[4..].as_mut_ptr(), + core::mem::size_of::<::Scalar>(), + ); + } + } + } + + pub enum ChunkRefOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct ChunkRef<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for ChunkRef<'a> { + type Inner = ChunkRef<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> ChunkRef<'a> { + pub const VT_INDEX: flatbuffers::VOffsetT = 4; + pub const VT_INLINE: flatbuffers::VOffsetT = 6; + pub const VT_OFFSET: flatbuffers::VOffsetT = 8; + pub const VT_LENGTH: flatbuffers::VOffsetT = 10; + pub const VT_CHUNK_ID: flatbuffers::VOffsetT = 12; + pub const VT_LOCATION: flatbuffers::VOffsetT = 14; + pub const VT_CHECKSUM_ETAG: flatbuffers::VOffsetT = 16; + pub const VT_CHECKSUM_LAST_MODIFIED: flatbuffers::VOffsetT = 18; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + ChunkRef { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args ChunkRefArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = ChunkRefBuilder::new(_fbb); + builder.add_length(args.length); + builder.add_offset(args.offset); + builder.add_checksum_last_modified(args.checksum_last_modified); + if let Some(x) = args.checksum_etag { + builder.add_checksum_etag(x); + } + if let Some(x) = args.location { + builder.add_location(x); + } + if let Some(x) = args.chunk_id { + builder.add_chunk_id(x); + } + if let Some(x) = args.inline { + builder.add_inline(x); + } + if let Some(x) = args.index { + builder.add_index(x); + } + builder.finish() + } + + #[inline] + pub fn index(&self) -> flatbuffers::Vector<'a, u32> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + ChunkRef::VT_INDEX, + None, + ) + .unwrap() + } + } + #[inline] + pub fn inline(&self) -> Option> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + ChunkRef::VT_INLINE, + None, + ) + } + } + #[inline] + pub fn offset(&self) -> u64 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(ChunkRef::VT_OFFSET, Some(0)).unwrap() } + } + #[inline] + pub fn length(&self) -> u64 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(ChunkRef::VT_LENGTH, Some(0)).unwrap() } + } + #[inline] + pub fn chunk_id(&self) -> Option<&'a ObjectId12> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(ChunkRef::VT_CHUNK_ID, None) } + } + #[inline] + pub fn location(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>( + ChunkRef::VT_LOCATION, + None, + ) + } + } + #[inline] + pub fn checksum_etag(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>( + ChunkRef::VT_CHECKSUM_ETAG, + None, + ) + } + } + #[inline] + pub fn checksum_last_modified(&self) -> u32 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::(ChunkRef::VT_CHECKSUM_LAST_MODIFIED, Some(0)) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for ChunkRef<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>>("index", Self::VT_INDEX, true)? + .visit_field::>>("inline", Self::VT_INLINE, false)? + .visit_field::("offset", Self::VT_OFFSET, false)? + .visit_field::("length", Self::VT_LENGTH, false)? + .visit_field::("chunk_id", Self::VT_CHUNK_ID, false)? + .visit_field::>("location", Self::VT_LOCATION, false)? + .visit_field::>("checksum_etag", Self::VT_CHECKSUM_ETAG, false)? + .visit_field::("checksum_last_modified", Self::VT_CHECKSUM_LAST_MODIFIED, false)? + .finish(); + Ok(()) + } + } + pub struct ChunkRefArgs<'a> { + pub index: Option>>, + pub inline: Option>>, + pub offset: u64, + pub length: u64, + pub chunk_id: Option<&'a ObjectId12>, + pub location: Option>, + pub checksum_etag: Option>, + pub checksum_last_modified: u32, + } + impl<'a> Default for ChunkRefArgs<'a> { + #[inline] + fn default() -> Self { + ChunkRefArgs { + index: None, // required field + inline: None, + offset: 0, + length: 0, + chunk_id: None, + location: None, + checksum_etag: None, + checksum_last_modified: 0, + } + } + } + + pub struct ChunkRefBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ChunkRefBuilder<'a, 'b, A> { + #[inline] + pub fn add_index( + &mut self, + index: flatbuffers::WIPOffset>, + ) { + self.fbb_ + .push_slot_always::>(ChunkRef::VT_INDEX, index); + } + #[inline] + pub fn add_inline( + &mut self, + inline: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + ChunkRef::VT_INLINE, + inline, + ); + } + #[inline] + pub fn add_offset(&mut self, offset: u64) { + self.fbb_.push_slot::(ChunkRef::VT_OFFSET, offset, 0); + } + #[inline] + pub fn add_length(&mut self, length: u64) { + self.fbb_.push_slot::(ChunkRef::VT_LENGTH, length, 0); + } + #[inline] + pub fn add_chunk_id(&mut self, chunk_id: &ObjectId12) { + self.fbb_.push_slot_always::<&ObjectId12>(ChunkRef::VT_CHUNK_ID, chunk_id); + } + #[inline] + pub fn add_location(&mut self, location: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + ChunkRef::VT_LOCATION, + location, + ); + } + #[inline] + pub fn add_checksum_etag( + &mut self, + checksum_etag: flatbuffers::WIPOffset<&'b str>, + ) { + self.fbb_.push_slot_always::>( + ChunkRef::VT_CHECKSUM_ETAG, + checksum_etag, + ); + } + #[inline] + pub fn add_checksum_last_modified(&mut self, checksum_last_modified: u32) { + self.fbb_.push_slot::( + ChunkRef::VT_CHECKSUM_LAST_MODIFIED, + checksum_last_modified, + 0, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> ChunkRefBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + ChunkRefBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, ChunkRef::VT_INDEX, "index"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for ChunkRef<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("ChunkRef"); + ds.field("index", &self.index()); + ds.field("inline", &self.inline()); + ds.field("offset", &self.offset()); + ds.field("length", &self.length()); + ds.field("chunk_id", &self.chunk_id()); + ds.field("location", &self.location()); + ds.field("checksum_etag", &self.checksum_etag()); + ds.field("checksum_last_modified", &self.checksum_last_modified()); + ds.finish() + } + } + pub enum ArrayManifestOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct ArrayManifest<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for ArrayManifest<'a> { + type Inner = ArrayManifest<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> ArrayManifest<'a> { + pub const VT_NODE_ID: flatbuffers::VOffsetT = 4; + pub const VT_REFS: flatbuffers::VOffsetT = 6; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + ArrayManifest { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args ArrayManifestArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = ArrayManifestBuilder::new(_fbb); + if let Some(x) = args.refs { + builder.add_refs(x); + } + if let Some(x) = args.node_id { + builder.add_node_id(x); + } + builder.finish() + } + + #[inline] + pub fn node_id(&self) -> &'a ObjectId8 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::(ArrayManifest::VT_NODE_ID, None).unwrap() + } + } + #[inline] + pub fn refs( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>, + >>(ArrayManifest::VT_REFS, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for ArrayManifest<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::("node_id", Self::VT_NODE_ID, true)? + .visit_field::>, + >>("refs", Self::VT_REFS, true)? + .finish(); + Ok(()) + } + } + pub struct ArrayManifestArgs<'a> { + pub node_id: Option<&'a ObjectId8>, + pub refs: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + } + impl<'a> Default for ArrayManifestArgs<'a> { + #[inline] + fn default() -> Self { + ArrayManifestArgs { + node_id: None, // required field + refs: None, // required field + } + } + } + + pub struct ArrayManifestBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayManifestBuilder<'a, 'b, A> { + #[inline] + pub fn add_node_id(&mut self, node_id: &ObjectId8) { + self.fbb_.push_slot_always::<&ObjectId8>(ArrayManifest::VT_NODE_ID, node_id); + } + #[inline] + pub fn add_refs( + &mut self, + refs: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_.push_slot_always::>( + ArrayManifest::VT_REFS, + refs, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> ArrayManifestBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + ArrayManifestBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, ArrayManifest::VT_NODE_ID, "node_id"); + self.fbb_.required(o, ArrayManifest::VT_REFS, "refs"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for ArrayManifest<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("ArrayManifest"); + ds.field("node_id", &self.node_id()); + ds.field("refs", &self.refs()); + ds.finish() + } + } + pub enum ManifestOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct Manifest<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for Manifest<'a> { + type Inner = Manifest<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> Manifest<'a> { + pub const VT_ID: flatbuffers::VOffsetT = 4; + pub const VT_ARRAYS: flatbuffers::VOffsetT = 6; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + Manifest { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args ManifestArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = ManifestBuilder::new(_fbb); + if let Some(x) = args.arrays { + builder.add_arrays(x); + } + if let Some(x) = args.id { + builder.add_id(x); + } + builder.finish() + } + + #[inline] + pub fn id(&self) -> &'a ObjectId12 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(Manifest::VT_ID, None).unwrap() } + } + #[inline] + pub fn arrays( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> + { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::, + >, + >>(Manifest::VT_ARRAYS, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for Manifest<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::("id", Self::VT_ID, true)? + .visit_field::>, + >>("arrays", Self::VT_ARRAYS, true)? + .finish(); + Ok(()) + } + } + pub struct ManifestArgs<'a> { + pub id: Option<&'a ObjectId12>, + pub arrays: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + } + impl<'a> Default for ManifestArgs<'a> { + #[inline] + fn default() -> Self { + ManifestArgs { + id: None, // required field + arrays: None, // required field + } + } + } + + pub struct ManifestBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ManifestBuilder<'a, 'b, A> { + #[inline] + pub fn add_id(&mut self, id: &ObjectId12) { + self.fbb_.push_slot_always::<&ObjectId12>(Manifest::VT_ID, id); + } + #[inline] + pub fn add_arrays( + &mut self, + arrays: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_.push_slot_always::>( + Manifest::VT_ARRAYS, + arrays, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> ManifestBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + ManifestBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, Manifest::VT_ID, "id"); + self.fbb_.required(o, Manifest::VT_ARRAYS, "arrays"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for Manifest<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("Manifest"); + ds.field("id", &self.id()); + ds.field("arrays", &self.arrays()); + ds.finish() + } + } + pub enum MetadataItemOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct MetadataItem<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for MetadataItem<'a> { + type Inner = MetadataItem<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> MetadataItem<'a> { + pub const VT_NAME: flatbuffers::VOffsetT = 4; + pub const VT_VALUE: flatbuffers::VOffsetT = 6; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + MetadataItem { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args MetadataItemArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = MetadataItemBuilder::new(_fbb); + if let Some(x) = args.value { + builder.add_value(x); + } + if let Some(x) = args.name { + builder.add_name(x); + } + builder.finish() + } + + #[inline] + pub fn name(&self) -> &'a str { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>( + MetadataItem::VT_NAME, + None, + ) + .unwrap() + } + } + #[inline] + pub fn value(&self) -> flatbuffers::Vector<'a, u8> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + MetadataItem::VT_VALUE, + None, + ) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for MetadataItem<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>( + "name", + Self::VT_NAME, + true, + )? + .visit_field::>>( + "value", + Self::VT_VALUE, + true, + )? + .finish(); + Ok(()) + } + } + pub struct MetadataItemArgs<'a> { + pub name: Option>, + pub value: Option>>, + } + impl<'a> Default for MetadataItemArgs<'a> { + #[inline] + fn default() -> Self { + MetadataItemArgs { + name: None, // required field + value: None, // required field + } + } + } + + pub struct MetadataItemBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> MetadataItemBuilder<'a, 'b, A> { + #[inline] + pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + MetadataItem::VT_NAME, + name, + ); + } + #[inline] + pub fn add_value( + &mut self, + value: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + MetadataItem::VT_VALUE, + value, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> MetadataItemBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + MetadataItemBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, MetadataItem::VT_NAME, "name"); + self.fbb_.required(o, MetadataItem::VT_VALUE, "value"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for MetadataItem<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("MetadataItem"); + ds.field("name", &self.name()); + ds.field("value", &self.value()); + ds.finish() + } + } + pub enum UserAttributesRefOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct UserAttributesRef<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for UserAttributesRef<'a> { + type Inner = UserAttributesRef<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> UserAttributesRef<'a> { + pub const VT_OBJECT_ID: flatbuffers::VOffsetT = 4; + pub const VT_LOCATION: flatbuffers::VOffsetT = 6; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + UserAttributesRef { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args UserAttributesRefArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = UserAttributesRefBuilder::new(_fbb); + builder.add_location(args.location); + if let Some(x) = args.object_id { + builder.add_object_id(x); + } + builder.finish() + } + + #[inline] + pub fn object_id(&self) -> &'a ObjectId12 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::(UserAttributesRef::VT_OBJECT_ID, None) + .unwrap() + } + } + #[inline] + pub fn location(&self) -> u32 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::(UserAttributesRef::VT_LOCATION, Some(0)).unwrap() + } + } + } + + impl flatbuffers::Verifiable for UserAttributesRef<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::("object_id", Self::VT_OBJECT_ID, true)? + .visit_field::("location", Self::VT_LOCATION, false)? + .finish(); + Ok(()) + } + } + pub struct UserAttributesRefArgs<'a> { + pub object_id: Option<&'a ObjectId12>, + pub location: u32, + } + impl<'a> Default for UserAttributesRefArgs<'a> { + #[inline] + fn default() -> Self { + UserAttributesRefArgs { + object_id: None, // required field + location: 0, + } + } + } + + pub struct UserAttributesRefBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> UserAttributesRefBuilder<'a, 'b, A> { + #[inline] + pub fn add_object_id(&mut self, object_id: &ObjectId12) { + self.fbb_.push_slot_always::<&ObjectId12>( + UserAttributesRef::VT_OBJECT_ID, + object_id, + ); + } + #[inline] + pub fn add_location(&mut self, location: u32) { + self.fbb_.push_slot::(UserAttributesRef::VT_LOCATION, location, 0); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> UserAttributesRefBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + UserAttributesRefBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, UserAttributesRef::VT_OBJECT_ID, "object_id"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for UserAttributesRef<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("UserAttributesRef"); + ds.field("object_id", &self.object_id()); + ds.field("location", &self.location()); + ds.finish() + } + } + pub enum InlineUserAttributesOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct InlineUserAttributes<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for InlineUserAttributes<'a> { + type Inner = InlineUserAttributes<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> InlineUserAttributes<'a> { + pub const VT_DATA: flatbuffers::VOffsetT = 4; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + InlineUserAttributes { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args InlineUserAttributesArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = InlineUserAttributesBuilder::new(_fbb); + if let Some(x) = args.data { + builder.add_data(x); + } + builder.finish() + } + + #[inline] + pub fn data(&self) -> flatbuffers::Vector<'a, u8> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + InlineUserAttributes::VT_DATA, + None, + ) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for InlineUserAttributes<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>>( + "data", + Self::VT_DATA, + true, + )? + .finish(); + Ok(()) + } + } + pub struct InlineUserAttributesArgs<'a> { + pub data: Option>>, + } + impl<'a> Default for InlineUserAttributesArgs<'a> { + #[inline] + fn default() -> Self { + InlineUserAttributesArgs { + data: None, // required field + } + } + } + + pub struct InlineUserAttributesBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> InlineUserAttributesBuilder<'a, 'b, A> { + #[inline] + pub fn add_data( + &mut self, + data: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + InlineUserAttributes::VT_DATA, + data, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> InlineUserAttributesBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + InlineUserAttributesBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, InlineUserAttributes::VT_DATA, "data"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for InlineUserAttributes<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("InlineUserAttributes"); + ds.field("data", &self.data()); + ds.finish() + } + } + pub enum ManifestRefOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct ManifestRef<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for ManifestRef<'a> { + type Inner = ManifestRef<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> ManifestRef<'a> { + pub const VT_OBJECT_ID: flatbuffers::VOffsetT = 4; + pub const VT_EXTENTS: flatbuffers::VOffsetT = 6; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + ManifestRef { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args ManifestRefArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = ManifestRefBuilder::new(_fbb); + if let Some(x) = args.extents { + builder.add_extents(x); + } + if let Some(x) = args.object_id { + builder.add_object_id(x); + } + builder.finish() + } + + #[inline] + pub fn object_id(&self) -> &'a ObjectId12 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::(ManifestRef::VT_OBJECT_ID, None).unwrap() + } + } + #[inline] + pub fn extents(&self) -> flatbuffers::Vector<'a, ChunkIndexRange> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>>(ManifestRef::VT_EXTENTS, None).unwrap() + } + } + } + + impl flatbuffers::Verifiable for ManifestRef<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::("object_id", Self::VT_OBJECT_ID, true)? + .visit_field::>>("extents", Self::VT_EXTENTS, true)? + .finish(); + Ok(()) + } + } + pub struct ManifestRefArgs<'a> { + pub object_id: Option<&'a ObjectId12>, + pub extents: + Option>>, + } + impl<'a> Default for ManifestRefArgs<'a> { + #[inline] + fn default() -> Self { + ManifestRefArgs { + object_id: None, // required field + extents: None, // required field + } + } + } + + pub struct ManifestRefBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ManifestRefBuilder<'a, 'b, A> { + #[inline] + pub fn add_object_id(&mut self, object_id: &ObjectId12) { + self.fbb_ + .push_slot_always::<&ObjectId12>(ManifestRef::VT_OBJECT_ID, object_id); + } + #[inline] + pub fn add_extents( + &mut self, + extents: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + ManifestRef::VT_EXTENTS, + extents, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> ManifestRefBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + ManifestRefBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, ManifestRef::VT_OBJECT_ID, "object_id"); + self.fbb_.required(o, ManifestRef::VT_EXTENTS, "extents"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for ManifestRef<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("ManifestRef"); + ds.field("object_id", &self.object_id()); + ds.field("extents", &self.extents()); + ds.finish() + } + } + pub enum GroupNodeDataOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct GroupNodeData<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for GroupNodeData<'a> { + type Inner = GroupNodeData<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> GroupNodeData<'a> { + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + GroupNodeData { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + _args: &'args GroupNodeDataArgs, + ) -> flatbuffers::WIPOffset> { + let mut builder = GroupNodeDataBuilder::new(_fbb); + builder.finish() + } + } + + impl flatbuffers::Verifiable for GroupNodeData<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)?.finish(); + Ok(()) + } + } + pub struct GroupNodeDataArgs {} + impl<'a> Default for GroupNodeDataArgs { + #[inline] + fn default() -> Self { + GroupNodeDataArgs {} + } + } + + pub struct GroupNodeDataBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> GroupNodeDataBuilder<'a, 'b, A> { + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> GroupNodeDataBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + GroupNodeDataBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for GroupNodeData<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("GroupNodeData"); + ds.finish() + } + } + pub enum ArrayNodeDataOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct ArrayNodeData<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for ArrayNodeData<'a> { + type Inner = ArrayNodeData<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> ArrayNodeData<'a> { + pub const VT_ZARR_METADATA: flatbuffers::VOffsetT = 4; + pub const VT_MANIFESTS: flatbuffers::VOffsetT = 6; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + ArrayNodeData { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args ArrayNodeDataArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = ArrayNodeDataBuilder::new(_fbb); + if let Some(x) = args.manifests { + builder.add_manifests(x); + } + if let Some(x) = args.zarr_metadata { + builder.add_zarr_metadata(x); + } + builder.finish() + } + + #[inline] + pub fn zarr_metadata(&self) -> flatbuffers::Vector<'a, u8> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + ArrayNodeData::VT_ZARR_METADATA, + None, + ) + .unwrap() + } + } + #[inline] + pub fn manifests( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> + { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::, + >, + >>(ArrayNodeData::VT_MANIFESTS, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for ArrayNodeData<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>>( + "zarr_metadata", + Self::VT_ZARR_METADATA, + true, + )? + .visit_field::>, + >>("manifests", Self::VT_MANIFESTS, true)? + .finish(); + Ok(()) + } + } + pub struct ArrayNodeDataArgs<'a> { + pub zarr_metadata: Option>>, + pub manifests: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + } + impl<'a> Default for ArrayNodeDataArgs<'a> { + #[inline] + fn default() -> Self { + ArrayNodeDataArgs { + zarr_metadata: None, // required field + manifests: None, // required field + } + } + } + + pub struct ArrayNodeDataBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayNodeDataBuilder<'a, 'b, A> { + #[inline] + pub fn add_zarr_metadata( + &mut self, + zarr_metadata: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + ArrayNodeData::VT_ZARR_METADATA, + zarr_metadata, + ); + } + #[inline] + pub fn add_manifests( + &mut self, + manifests: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_.push_slot_always::>( + ArrayNodeData::VT_MANIFESTS, + manifests, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> ArrayNodeDataBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + ArrayNodeDataBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, ArrayNodeData::VT_ZARR_METADATA, "zarr_metadata"); + self.fbb_.required(o, ArrayNodeData::VT_MANIFESTS, "manifests"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for ArrayNodeData<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("ArrayNodeData"); + ds.field("zarr_metadata", &self.zarr_metadata()); + ds.field("manifests", &self.manifests()); + ds.finish() + } + } + pub enum NodeSnapshotOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct NodeSnapshot<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for NodeSnapshot<'a> { + type Inner = NodeSnapshot<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> NodeSnapshot<'a> { + pub const VT_ID: flatbuffers::VOffsetT = 4; + pub const VT_PATH: flatbuffers::VOffsetT = 6; + pub const VT_USER_ATTRIBUTES_TYPE: flatbuffers::VOffsetT = 8; + pub const VT_USER_ATTRIBUTES: flatbuffers::VOffsetT = 10; + pub const VT_NODE_DATA_TYPE: flatbuffers::VOffsetT = 12; + pub const VT_NODE_DATA: flatbuffers::VOffsetT = 14; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + NodeSnapshot { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args NodeSnapshotArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = NodeSnapshotBuilder::new(_fbb); + if let Some(x) = args.node_data { + builder.add_node_data(x); + } + if let Some(x) = args.user_attributes { + builder.add_user_attributes(x); + } + if let Some(x) = args.path { + builder.add_path(x); + } + if let Some(x) = args.id { + builder.add_id(x); + } + builder.add_node_data_type(args.node_data_type); + builder.add_user_attributes_type(args.user_attributes_type); + builder.finish() + } + + #[inline] + pub fn id(&self) -> &'a ObjectId8 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(NodeSnapshot::VT_ID, None).unwrap() } + } + #[inline] + pub fn path(&self) -> &'a str { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>( + NodeSnapshot::VT_PATH, + None, + ) + .unwrap() + } + } + #[inline] + pub fn user_attributes_type(&self) -> UserAttributesSnapshot { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::( + NodeSnapshot::VT_USER_ATTRIBUTES_TYPE, + Some(UserAttributesSnapshot::NONE), + ) + .unwrap() + } + } + #[inline] + pub fn user_attributes(&self) -> Option> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>>( + NodeSnapshot::VT_USER_ATTRIBUTES, + None, + ) + } + } + #[inline] + pub fn node_data_type(&self) -> NodeData { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::( + NodeSnapshot::VT_NODE_DATA_TYPE, + Some(NodeData::NONE), + ) + .unwrap() + } + } + #[inline] + pub fn node_data(&self) -> flatbuffers::Table<'a> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + NodeSnapshot::VT_NODE_DATA, + None, + ) + .unwrap() + } + } + #[inline] + #[allow(non_snake_case)] + pub fn user_attributes_as_inline(&self) -> Option> { + if self.user_attributes_type() == UserAttributesSnapshot::Inline { + self.user_attributes().map(|t| { + // Safety: + // Created from a valid Table for this object + // Which contains a valid union in this slot + unsafe { InlineUserAttributes::init_from_table(t) } + }) + } else { + None + } + } + + #[inline] + #[allow(non_snake_case)] + pub fn user_attributes_as_reference(&self) -> Option> { + if self.user_attributes_type() == UserAttributesSnapshot::Reference { + self.user_attributes().map(|t| { + // Safety: + // Created from a valid Table for this object + // Which contains a valid union in this slot + unsafe { UserAttributesRef::init_from_table(t) } + }) + } else { + None + } + } + + #[inline] + #[allow(non_snake_case)] + pub fn node_data_as_array(&self) -> Option> { + if self.node_data_type() == NodeData::Array { + let u = self.node_data(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid union in this slot + Some(unsafe { ArrayNodeData::init_from_table(u) }) + } else { + None + } + } + + #[inline] + #[allow(non_snake_case)] + pub fn node_data_as_group(&self) -> Option> { + if self.node_data_type() == NodeData::Group { + let u = self.node_data(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid union in this slot + Some(unsafe { GroupNodeData::init_from_table(u) }) + } else { + None + } + } + } + + impl flatbuffers::Verifiable for NodeSnapshot<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::("id", Self::VT_ID, true)? + .visit_field::>("path", Self::VT_PATH, true)? + .visit_union::("user_attributes_type", Self::VT_USER_ATTRIBUTES_TYPE, "user_attributes", Self::VT_USER_ATTRIBUTES, false, |key, v, pos| { + match key { + UserAttributesSnapshot::Inline => v.verify_union_variant::>("UserAttributesSnapshot::Inline", pos), + UserAttributesSnapshot::Reference => v.verify_union_variant::>("UserAttributesSnapshot::Reference", pos), + _ => Ok(()), + } + })? + .visit_union::("node_data_type", Self::VT_NODE_DATA_TYPE, "node_data", Self::VT_NODE_DATA, true, |key, v, pos| { + match key { + NodeData::Array => v.verify_union_variant::>("NodeData::Array", pos), + NodeData::Group => v.verify_union_variant::>("NodeData::Group", pos), + _ => Ok(()), + } + })? + .finish(); + Ok(()) + } + } + pub struct NodeSnapshotArgs<'a> { + pub id: Option<&'a ObjectId8>, + pub path: Option>, + pub user_attributes_type: UserAttributesSnapshot, + pub user_attributes: Option>, + pub node_data_type: NodeData, + pub node_data: Option>, + } + impl<'a> Default for NodeSnapshotArgs<'a> { + #[inline] + fn default() -> Self { + NodeSnapshotArgs { + id: None, // required field + path: None, // required field + user_attributes_type: UserAttributesSnapshot::NONE, + user_attributes: None, + node_data_type: NodeData::NONE, + node_data: None, // required field + } + } + } + + pub struct NodeSnapshotBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> NodeSnapshotBuilder<'a, 'b, A> { + #[inline] + pub fn add_id(&mut self, id: &ObjectId8) { + self.fbb_.push_slot_always::<&ObjectId8>(NodeSnapshot::VT_ID, id); + } + #[inline] + pub fn add_path(&mut self, path: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + NodeSnapshot::VT_PATH, + path, + ); + } + #[inline] + pub fn add_user_attributes_type( + &mut self, + user_attributes_type: UserAttributesSnapshot, + ) { + self.fbb_.push_slot::( + NodeSnapshot::VT_USER_ATTRIBUTES_TYPE, + user_attributes_type, + UserAttributesSnapshot::NONE, + ); + } + #[inline] + pub fn add_user_attributes( + &mut self, + user_attributes: flatbuffers::WIPOffset, + ) { + self.fbb_.push_slot_always::>( + NodeSnapshot::VT_USER_ATTRIBUTES, + user_attributes, + ); + } + #[inline] + pub fn add_node_data_type(&mut self, node_data_type: NodeData) { + self.fbb_.push_slot::( + NodeSnapshot::VT_NODE_DATA_TYPE, + node_data_type, + NodeData::NONE, + ); + } + #[inline] + pub fn add_node_data( + &mut self, + node_data: flatbuffers::WIPOffset, + ) { + self.fbb_.push_slot_always::>( + NodeSnapshot::VT_NODE_DATA, + node_data, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> NodeSnapshotBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + NodeSnapshotBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, NodeSnapshot::VT_ID, "id"); + self.fbb_.required(o, NodeSnapshot::VT_PATH, "path"); + self.fbb_.required(o, NodeSnapshot::VT_NODE_DATA, "node_data"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for NodeSnapshot<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("NodeSnapshot"); + ds.field("id", &self.id()); + ds.field("path", &self.path()); + ds.field("user_attributes_type", &self.user_attributes_type()); + match self.user_attributes_type() { + UserAttributesSnapshot::Inline => { + if let Some(x) = self.user_attributes_as_inline() { + ds.field("user_attributes", &x) + } else { + ds.field("user_attributes", &"InvalidFlatbuffer: Union discriminant does not match value.") + } + } + UserAttributesSnapshot::Reference => { + if let Some(x) = self.user_attributes_as_reference() { + ds.field("user_attributes", &x) + } else { + ds.field("user_attributes", &"InvalidFlatbuffer: Union discriminant does not match value.") + } + } + _ => { + let x: Option<()> = None; + ds.field("user_attributes", &x) + } + }; + ds.field("node_data_type", &self.node_data_type()); + match self.node_data_type() { + NodeData::Array => { + if let Some(x) = self.node_data_as_array() { + ds.field("node_data", &x) + } else { + ds.field("node_data", &"InvalidFlatbuffer: Union discriminant does not match value.") + } + } + NodeData::Group => { + if let Some(x) = self.node_data_as_group() { + ds.field("node_data", &x) + } else { + ds.field("node_data", &"InvalidFlatbuffer: Union discriminant does not match value.") + } + } + _ => { + let x: Option<()> = None; + ds.field("node_data", &x) + } + }; + ds.finish() + } + } + pub enum SnapshotOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct Snapshot<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for Snapshot<'a> { + type Inner = Snapshot<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> Snapshot<'a> { + pub const VT_ID: flatbuffers::VOffsetT = 4; + pub const VT_PARENT_ID: flatbuffers::VOffsetT = 6; + pub const VT_NODES: flatbuffers::VOffsetT = 8; + pub const VT_FLUSHED_AT: flatbuffers::VOffsetT = 10; + pub const VT_MESSAGE: flatbuffers::VOffsetT = 12; + pub const VT_METADATA: flatbuffers::VOffsetT = 14; + pub const VT_MANIFEST_FILES: flatbuffers::VOffsetT = 16; + pub const VT_ATTRIBUTE_FILES: flatbuffers::VOffsetT = 18; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + Snapshot { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args SnapshotArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = SnapshotBuilder::new(_fbb); + builder.add_flushed_at(args.flushed_at); + if let Some(x) = args.attribute_files { + builder.add_attribute_files(x); + } + if let Some(x) = args.manifest_files { + builder.add_manifest_files(x); + } + if let Some(x) = args.metadata { + builder.add_metadata(x); + } + if let Some(x) = args.message { + builder.add_message(x); + } + if let Some(x) = args.nodes { + builder.add_nodes(x); + } + if let Some(x) = args.parent_id { + builder.add_parent_id(x); + } + if let Some(x) = args.id { + builder.add_id(x); + } + builder.finish() + } + + #[inline] + pub fn id(&self) -> &'a ObjectId12 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(Snapshot::VT_ID, None).unwrap() } + } + #[inline] + pub fn parent_id(&self) -> Option<&'a ObjectId12> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(Snapshot::VT_PARENT_ID, None) } + } + #[inline] + pub fn nodes( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> + { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::, + >, + >>(Snapshot::VT_NODES, None) + .unwrap() + } + } + #[inline] + pub fn flushed_at(&self) -> u64 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(Snapshot::VT_FLUSHED_AT, Some(0)).unwrap() } + } + #[inline] + pub fn message(&self) -> &'a str { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>(Snapshot::VT_MESSAGE, None) + .unwrap() + } + } + #[inline] + pub fn metadata( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> + { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::, + >, + >>(Snapshot::VT_METADATA, None) + .unwrap() + } + } + #[inline] + pub fn manifest_files(&self) -> flatbuffers::Vector<'a, ManifestFileInfo> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::, + >>(Snapshot::VT_MANIFEST_FILES, None) + .unwrap() + } + } + #[inline] + pub fn attribute_files(&self) -> flatbuffers::Vector<'a, AttributeFileInfo> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::, + >>(Snapshot::VT_ATTRIBUTE_FILES, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for Snapshot<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::("id", Self::VT_ID, true)? + .visit_field::("parent_id", Self::VT_PARENT_ID, false)? + .visit_field::>>>("nodes", Self::VT_NODES, true)? + .visit_field::("flushed_at", Self::VT_FLUSHED_AT, false)? + .visit_field::>("message", Self::VT_MESSAGE, true)? + .visit_field::>>>("metadata", Self::VT_METADATA, true)? + .visit_field::>>("manifest_files", Self::VT_MANIFEST_FILES, true)? + .visit_field::>>("attribute_files", Self::VT_ATTRIBUTE_FILES, true)? + .finish(); + Ok(()) + } + } + pub struct SnapshotArgs<'a> { + pub id: Option<&'a ObjectId12>, + pub parent_id: Option<&'a ObjectId12>, + pub nodes: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + pub flushed_at: u64, + pub message: Option>, + pub metadata: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + pub manifest_files: + Option>>, + pub attribute_files: + Option>>, + } + impl<'a> Default for SnapshotArgs<'a> { + #[inline] + fn default() -> Self { + SnapshotArgs { + id: None, // required field + parent_id: None, + nodes: None, // required field + flushed_at: 0, + message: None, // required field + metadata: None, // required field + manifest_files: None, // required field + attribute_files: None, // required field + } + } + } + + pub struct SnapshotBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> SnapshotBuilder<'a, 'b, A> { + #[inline] + pub fn add_id(&mut self, id: &ObjectId12) { + self.fbb_.push_slot_always::<&ObjectId12>(Snapshot::VT_ID, id); + } + #[inline] + pub fn add_parent_id(&mut self, parent_id: &ObjectId12) { + self.fbb_.push_slot_always::<&ObjectId12>(Snapshot::VT_PARENT_ID, parent_id); + } + #[inline] + pub fn add_nodes( + &mut self, + nodes: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_ + .push_slot_always::>(Snapshot::VT_NODES, nodes); + } + #[inline] + pub fn add_flushed_at(&mut self, flushed_at: u64) { + self.fbb_.push_slot::(Snapshot::VT_FLUSHED_AT, flushed_at, 0); + } + #[inline] + pub fn add_message(&mut self, message: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + Snapshot::VT_MESSAGE, + message, + ); + } + #[inline] + pub fn add_metadata( + &mut self, + metadata: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_.push_slot_always::>( + Snapshot::VT_METADATA, + metadata, + ); + } + #[inline] + pub fn add_manifest_files( + &mut self, + manifest_files: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, ManifestFileInfo>, + >, + ) { + self.fbb_.push_slot_always::>( + Snapshot::VT_MANIFEST_FILES, + manifest_files, + ); + } + #[inline] + pub fn add_attribute_files( + &mut self, + attribute_files: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, AttributeFileInfo>, + >, + ) { + self.fbb_.push_slot_always::>( + Snapshot::VT_ATTRIBUTE_FILES, + attribute_files, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> SnapshotBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + SnapshotBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, Snapshot::VT_ID, "id"); + self.fbb_.required(o, Snapshot::VT_NODES, "nodes"); + self.fbb_.required(o, Snapshot::VT_MESSAGE, "message"); + self.fbb_.required(o, Snapshot::VT_METADATA, "metadata"); + self.fbb_.required(o, Snapshot::VT_MANIFEST_FILES, "manifest_files"); + self.fbb_.required(o, Snapshot::VT_ATTRIBUTE_FILES, "attribute_files"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for Snapshot<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("Snapshot"); + ds.field("id", &self.id()); + ds.field("parent_id", &self.parent_id()); + ds.field("nodes", &self.nodes()); + ds.field("flushed_at", &self.flushed_at()); + ds.field("message", &self.message()); + ds.field("metadata", &self.metadata()); + ds.field("manifest_files", &self.manifest_files()); + ds.field("attribute_files", &self.attribute_files()); + ds.finish() + } + } + pub enum ChunkIndicesOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct ChunkIndices<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for ChunkIndices<'a> { + type Inner = ChunkIndices<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> ChunkIndices<'a> { + pub const VT_COORDS: flatbuffers::VOffsetT = 4; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + ChunkIndices { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args ChunkIndicesArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = ChunkIndicesBuilder::new(_fbb); + if let Some(x) = args.coords { + builder.add_coords(x); + } + builder.finish() + } + + #[inline] + pub fn coords(&self) -> flatbuffers::Vector<'a, u32> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>>( + ChunkIndices::VT_COORDS, + None, + ) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for ChunkIndices<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>>("coords", Self::VT_COORDS, true)? + .finish(); + Ok(()) + } + } + pub struct ChunkIndicesArgs<'a> { + pub coords: Option>>, + } + impl<'a> Default for ChunkIndicesArgs<'a> { + #[inline] + fn default() -> Self { + ChunkIndicesArgs { + coords: None, // required field + } + } + } + + pub struct ChunkIndicesBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ChunkIndicesBuilder<'a, 'b, A> { + #[inline] + pub fn add_coords( + &mut self, + coords: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + ChunkIndices::VT_COORDS, + coords, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> ChunkIndicesBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + ChunkIndicesBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, ChunkIndices::VT_COORDS, "coords"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for ChunkIndices<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("ChunkIndices"); + ds.field("coords", &self.coords()); + ds.finish() + } + } + pub enum ArrayUpdatedChunksOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct ArrayUpdatedChunks<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for ArrayUpdatedChunks<'a> { + type Inner = ArrayUpdatedChunks<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> ArrayUpdatedChunks<'a> { + pub const VT_NODE_ID: flatbuffers::VOffsetT = 4; + pub const VT_CHUNKS: flatbuffers::VOffsetT = 6; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + ArrayUpdatedChunks { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args ArrayUpdatedChunksArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = ArrayUpdatedChunksBuilder::new(_fbb); + if let Some(x) = args.chunks { + builder.add_chunks(x); + } + if let Some(x) = args.node_id { + builder.add_node_id(x); + } + builder.finish() + } + + #[inline] + pub fn node_id(&self) -> &'a ObjectId8 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::(ArrayUpdatedChunks::VT_NODE_ID, None).unwrap() + } + } + #[inline] + pub fn chunks( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> + { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::, + >, + >>(ArrayUpdatedChunks::VT_CHUNKS, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for ArrayUpdatedChunks<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::("node_id", Self::VT_NODE_ID, true)? + .visit_field::>, + >>("chunks", Self::VT_CHUNKS, true)? + .finish(); + Ok(()) + } + } + pub struct ArrayUpdatedChunksArgs<'a> { + pub node_id: Option<&'a ObjectId8>, + pub chunks: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, + } + impl<'a> Default for ArrayUpdatedChunksArgs<'a> { + #[inline] + fn default() -> Self { + ArrayUpdatedChunksArgs { + node_id: None, // required field + chunks: None, // required field + } + } + } + + pub struct ArrayUpdatedChunksBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayUpdatedChunksBuilder<'a, 'b, A> { + #[inline] + pub fn add_node_id(&mut self, node_id: &ObjectId8) { + self.fbb_ + .push_slot_always::<&ObjectId8>(ArrayUpdatedChunks::VT_NODE_ID, node_id); + } + #[inline] + pub fn add_chunks( + &mut self, + chunks: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, + ) { + self.fbb_.push_slot_always::>( + ArrayUpdatedChunks::VT_CHUNKS, + chunks, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> ArrayUpdatedChunksBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + ArrayUpdatedChunksBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, ArrayUpdatedChunks::VT_NODE_ID, "node_id"); + self.fbb_.required(o, ArrayUpdatedChunks::VT_CHUNKS, "chunks"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for ArrayUpdatedChunks<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("ArrayUpdatedChunks"); + ds.field("node_id", &self.node_id()); + ds.field("chunks", &self.chunks()); + ds.finish() + } + } + pub enum TransactionLogOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct TransactionLog<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for TransactionLog<'a> { + type Inner = TransactionLog<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> TransactionLog<'a> { + pub const VT_ID: flatbuffers::VOffsetT = 4; + pub const VT_NEW_GROUPS: flatbuffers::VOffsetT = 6; + pub const VT_NEW_ARRAYS: flatbuffers::VOffsetT = 8; + pub const VT_DELETED_GROUPS: flatbuffers::VOffsetT = 10; + pub const VT_DELETED_ARRAYS: flatbuffers::VOffsetT = 12; + pub const VT_UPDATED_USER_ATTRIBUTES: flatbuffers::VOffsetT = 14; + pub const VT_UPDATED_ZARR_METADATA: flatbuffers::VOffsetT = 16; + pub const VT_UPDATED_CHUNKS: flatbuffers::VOffsetT = 18; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + TransactionLog { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args TransactionLogArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = TransactionLogBuilder::new(_fbb); + if let Some(x) = args.updated_chunks { + builder.add_updated_chunks(x); + } + if let Some(x) = args.updated_zarr_metadata { + builder.add_updated_zarr_metadata(x); + } + if let Some(x) = args.updated_user_attributes { + builder.add_updated_user_attributes(x); + } + if let Some(x) = args.deleted_arrays { + builder.add_deleted_arrays(x); + } + if let Some(x) = args.deleted_groups { + builder.add_deleted_groups(x); + } + if let Some(x) = args.new_arrays { + builder.add_new_arrays(x); + } + if let Some(x) = args.new_groups { + builder.add_new_groups(x); + } + if let Some(x) = args.id { + builder.add_id(x); + } + builder.finish() + } + + #[inline] + pub fn id(&self) -> &'a ObjectId12 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(TransactionLog::VT_ID, None).unwrap() } + } + #[inline] + pub fn new_groups(&self) -> flatbuffers::Vector<'a, ObjectId8> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>>(TransactionLog::VT_NEW_GROUPS, None).unwrap() + } + } + #[inline] + pub fn new_arrays(&self) -> flatbuffers::Vector<'a, ObjectId8> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>>(TransactionLog::VT_NEW_ARRAYS, None).unwrap() + } + } + #[inline] + pub fn deleted_groups(&self) -> flatbuffers::Vector<'a, ObjectId8> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>>(TransactionLog::VT_DELETED_GROUPS, None).unwrap() + } + } + #[inline] + pub fn deleted_arrays(&self) -> flatbuffers::Vector<'a, ObjectId8> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>>(TransactionLog::VT_DELETED_ARRAYS, None).unwrap() + } + } + #[inline] + pub fn updated_user_attributes(&self) -> flatbuffers::Vector<'a, ObjectId8> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>>(TransactionLog::VT_UPDATED_USER_ATTRIBUTES, None).unwrap() + } + } + #[inline] + pub fn updated_zarr_metadata(&self) -> flatbuffers::Vector<'a, ObjectId8> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>>(TransactionLog::VT_UPDATED_ZARR_METADATA, None).unwrap() + } + } + #[inline] + pub fn updated_chunks( + &self, + ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>> + { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::, + >, + >>(TransactionLog::VT_UPDATED_CHUNKS, None) + .unwrap() + } + } + } + + impl flatbuffers::Verifiable for TransactionLog<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::("id", Self::VT_ID, true)? + .visit_field::>>("new_groups", Self::VT_NEW_GROUPS, true)? + .visit_field::>>("new_arrays", Self::VT_NEW_ARRAYS, true)? + .visit_field::>>("deleted_groups", Self::VT_DELETED_GROUPS, true)? + .visit_field::>>("deleted_arrays", Self::VT_DELETED_ARRAYS, true)? + .visit_field::>>("updated_user_attributes", Self::VT_UPDATED_USER_ATTRIBUTES, true)? + .visit_field::>>("updated_zarr_metadata", Self::VT_UPDATED_ZARR_METADATA, true)? + .visit_field::>>>("updated_chunks", Self::VT_UPDATED_CHUNKS, true)? + .finish(); + Ok(()) + } + } + pub struct TransactionLogArgs<'a> { + pub id: Option<&'a ObjectId12>, + pub new_groups: + Option>>, + pub new_arrays: + Option>>, + pub deleted_groups: + Option>>, + pub deleted_arrays: + Option>>, + pub updated_user_attributes: + Option>>, + pub updated_zarr_metadata: + Option>>, + pub updated_chunks: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector< + 'a, + flatbuffers::ForwardsUOffset>, + >, + >, + >, + } + impl<'a> Default for TransactionLogArgs<'a> { + #[inline] + fn default() -> Self { + TransactionLogArgs { + id: None, // required field + new_groups: None, // required field + new_arrays: None, // required field + deleted_groups: None, // required field + deleted_arrays: None, // required field + updated_user_attributes: None, // required field + updated_zarr_metadata: None, // required field + updated_chunks: None, // required field + } + } + } + + pub struct TransactionLogBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TransactionLogBuilder<'a, 'b, A> { + #[inline] + pub fn add_id(&mut self, id: &ObjectId12) { + self.fbb_.push_slot_always::<&ObjectId12>(TransactionLog::VT_ID, id); + } + #[inline] + pub fn add_new_groups( + &mut self, + new_groups: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + TransactionLog::VT_NEW_GROUPS, + new_groups, + ); + } + #[inline] + pub fn add_new_arrays( + &mut self, + new_arrays: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + TransactionLog::VT_NEW_ARRAYS, + new_arrays, + ); + } + #[inline] + pub fn add_deleted_groups( + &mut self, + deleted_groups: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + TransactionLog::VT_DELETED_GROUPS, + deleted_groups, + ); + } + #[inline] + pub fn add_deleted_arrays( + &mut self, + deleted_arrays: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + TransactionLog::VT_DELETED_ARRAYS, + deleted_arrays, + ); + } + #[inline] + pub fn add_updated_user_attributes( + &mut self, + updated_user_attributes: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, ObjectId8>, + >, + ) { + self.fbb_.push_slot_always::>( + TransactionLog::VT_UPDATED_USER_ATTRIBUTES, + updated_user_attributes, + ); + } + #[inline] + pub fn add_updated_zarr_metadata( + &mut self, + updated_zarr_metadata: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, ObjectId8>, + >, + ) { + self.fbb_.push_slot_always::>( + TransactionLog::VT_UPDATED_ZARR_METADATA, + updated_zarr_metadata, + ); + } + #[inline] + pub fn add_updated_chunks( + &mut self, + updated_chunks: flatbuffers::WIPOffset< + flatbuffers::Vector< + 'b, + flatbuffers::ForwardsUOffset>, + >, + >, + ) { + self.fbb_.push_slot_always::>( + TransactionLog::VT_UPDATED_CHUNKS, + updated_chunks, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> TransactionLogBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + TransactionLogBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + self.fbb_.required(o, TransactionLog::VT_ID, "id"); + self.fbb_.required(o, TransactionLog::VT_NEW_GROUPS, "new_groups"); + self.fbb_.required(o, TransactionLog::VT_NEW_ARRAYS, "new_arrays"); + self.fbb_.required(o, TransactionLog::VT_DELETED_GROUPS, "deleted_groups"); + self.fbb_.required(o, TransactionLog::VT_DELETED_ARRAYS, "deleted_arrays"); + self.fbb_.required( + o, + TransactionLog::VT_UPDATED_USER_ATTRIBUTES, + "updated_user_attributes", + ); + self.fbb_.required( + o, + TransactionLog::VT_UPDATED_ZARR_METADATA, + "updated_zarr_metadata", + ); + self.fbb_.required(o, TransactionLog::VT_UPDATED_CHUNKS, "updated_chunks"); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for TransactionLog<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("TransactionLog"); + ds.field("id", &self.id()); + ds.field("new_groups", &self.new_groups()); + ds.field("new_arrays", &self.new_arrays()); + ds.field("deleted_groups", &self.deleted_groups()); + ds.field("deleted_arrays", &self.deleted_arrays()); + ds.field("updated_user_attributes", &self.updated_user_attributes()); + ds.field("updated_zarr_metadata", &self.updated_zarr_metadata()); + ds.field("updated_chunks", &self.updated_chunks()); + ds.finish() + } + } +} // pub mod gen diff --git a/icechunk/src/format/manifest.rs b/icechunk/src/format/manifest.rs index fba27443..8e92cd34 100644 --- a/icechunk/src/format/manifest.rs +++ b/icechunk/src/format/manifest.rs @@ -1,24 +1,25 @@ -use ::futures::{pin_mut, Stream, TryStreamExt}; -use itertools::Itertools; -use std::{ - collections::BTreeMap, - convert::Infallible, - ops::{Bound, Range}, - sync::Arc, -}; -use thiserror::Error; +use std::{borrow::Cow, convert::Infallible, ops::Range, sync::Arc}; +use crate::format::flatbuffers::gen; use bytes::Bytes; +use flatbuffers::VerifierOptions; +use futures::{Stream, TryStreamExt}; +use itertools::Itertools; use serde::{Deserialize, Serialize}; +use thiserror::Error; -use crate::{error::ICError, storage::ETag}; +use crate::{ + error::ICError, + format::{IcechunkFormatError, IcechunkFormatErrorKind}, + storage::ETag, +}; use super::{ - ChunkId, ChunkIndices, ChunkLength, ChunkOffset, IcechunkFormatErrorKind, - IcechunkResult, ManifestId, NodeId, + ChunkId, ChunkIndices, ChunkLength, ChunkOffset, IcechunkResult, ManifestId, NodeId, }; -type ManifestExtents = Range; +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ManifestExtents(Vec>); #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct ManifestRef { @@ -26,6 +27,21 @@ pub struct ManifestRef { pub extents: ManifestExtents, } +impl ManifestExtents { + pub fn new(from: &[u32], to: &[u32]) -> Self { + let v = from + .iter() + .zip(to.iter()) + .map(|(a, b)| Range { start: *a, end: *b }) + .collect(); + Self(v) + } + + pub fn iter(&self) -> impl Iterator> { + self.0.iter() + } +} + #[derive(Debug, Error)] #[non_exhaustive] pub enum VirtualReferenceErrorKind { @@ -137,66 +153,75 @@ pub struct ChunkInfo { pub payload: ChunkPayload, } -#[derive(Debug, PartialEq)] +#[derive(Debug)] pub struct Manifest { - pub id: ManifestId, - pub(crate) chunks: BTreeMap>, -} - -impl Default for Manifest { - fn default() -> Self { - Self { id: ManifestId::random(), chunks: Default::default() } - } + buffer: Vec, } impl Manifest { - pub fn get_chunk_payload( - &self, - node: &NodeId, - coord: &ChunkIndices, - ) -> IcechunkResult<&ChunkPayload> { - self.chunks.get(node).and_then(|m| m.get(coord)).ok_or_else(|| { - IcechunkFormatErrorKind::ChunkCoordinatesNotFound { coords: coord.clone() } - .into() - }) + pub fn id(&self) -> ManifestId { + ManifestId::new(self.root().id().0) } - pub fn iter( - self: Arc, - node: NodeId, - ) -> impl Iterator { - PayloadIterator { manifest: self, for_node: node, last_key: None } + pub fn bytes(&self) -> &[u8] { + self.buffer.as_slice() } - pub fn new(chunks: BTreeMap>) -> Self { - Self { chunks, id: ManifestId::random() } + pub fn from_buffer(buffer: Vec) -> Result { + let _ = flatbuffers::root_with_opts::( + &ROOT_OPTIONS, + buffer.as_slice(), + )?; + Ok(Manifest { buffer }) } pub async fn from_stream( stream: impl Stream>, ) -> Result, E> { - pin_mut!(stream); - let mut chunk_map: BTreeMap> = - BTreeMap::new(); - while let Some(chunk) = stream.try_next().await? { - // This could be done with BTreeMap.entry instead, but would require cloning both keys - match chunk_map.get_mut(&chunk.node) { - Some(m) => { - m.insert(chunk.coord, chunk.payload); - } - None => { - chunk_map.insert( - chunk.node, - BTreeMap::from([(chunk.coord, chunk.payload)]), - ); - } - }; + // TODO: what's a good capacity? + let mut builder = flatbuffers::FlatBufferBuilder::with_capacity(1024 * 1024); + let mut all = stream.try_collect::>().await?; + // FIXME: should we sort here or can we sort outside? + all.sort_by(|a, b| (&a.node, &a.coord).cmp(&(&b.node, &b.coord))); + + let mut all = all.iter().peekable(); + + let mut array_manifests = Vec::with_capacity(1); + while let Some(current_node) = all.peek().map(|chunk| &chunk.node).cloned() { + // TODO: what is a good capacity + let mut refs = Vec::with_capacity(8_192); + while let Some(chunk) = all.next_if(|chunk| chunk.node == current_node) { + refs.push(mk_chunk_ref(&mut builder, chunk)); + } + + let node_id = Some(gen::ObjectId8::new(¤t_node.0)); + let refs = Some(builder.create_vector(refs.as_slice())); + let array_manifest = gen::ArrayManifest::create( + &mut builder, + &gen::ArrayManifestArgs { node_id: node_id.as_ref(), refs }, + ); + array_manifests.push(array_manifest); } - if chunk_map.is_empty() { - Ok(None) - } else { - Ok(Some(Self::new(chunk_map))) + + if array_manifests.is_empty() { + // empty manifet + return Ok(None); } + + let arrays = builder.create_vector(array_manifests.as_slice()); + let manifest_id = ManifestId::random(); + let bin_manifest_id = gen::ObjectId12::new(&manifest_id.0); + + let manifest = gen::Manifest::create( + &mut builder, + &gen::ManifestArgs { id: Some(&bin_manifest_id), arrays: Some(arrays) }, + ); + + builder.finish(manifest, Some("Ichk")); + let (mut buffer, offset) = builder.collapse(); + buffer.drain(0..offset); + buffer.shrink_to_fit(); + Ok(Some(Manifest { buffer })) } /// Used for tests @@ -206,111 +231,206 @@ impl Manifest { Self::from_stream(futures::stream::iter(iter.into_iter().map(Ok))).await } - pub fn chunk_payloads(&self) -> impl Iterator { - self.chunks.values().flat_map(|m| m.values()) - } - pub fn len(&self) -> usize { - self.chunks.values().map(|m| m.len()).sum() + self.root().arrays().iter().map(|am| am.refs().len()).sum() } #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 } + + fn root(&self) -> gen::Manifest { + // without the unsafe version this is too slow + // if we try to keep the root in the Manifest struct, we would need a lifetime + unsafe { flatbuffers::root_unchecked::(&self.buffer) } + } + + pub fn get_chunk_payload( + &self, + node: &NodeId, + coord: &ChunkIndices, + ) -> IcechunkResult { + let manifest = self.root(); + let chunk_ref = lookup_node(manifest, node) + .and_then(|array_manifest| lookup_ref(array_manifest, coord)) + .ok_or_else(|| { + IcechunkFormatError::from( + IcechunkFormatErrorKind::ChunkCoordinatesNotFound { + coords: coord.clone(), + }, + ) + })?; + ref_to_payload(chunk_ref) + } + + pub fn iter( + self: Arc, + node: NodeId, + ) -> impl Iterator> + { + PayloadIterator::new(self, node) + } + + pub fn chunk_payloads( + &self, + ) -> impl Iterator> + '_ { + self.root().arrays().iter().flat_map(move |array_manifest| { + array_manifest.refs().iter().map(|r| ref_to_payload(r)) + }) + } +} + +fn lookup_node<'a>( + manifest: gen::Manifest<'a>, + node: &NodeId, +) -> Option> { + manifest.arrays().lookup_by_key(node.0, |am, id| am.node_id().0.cmp(id)) +} + +fn lookup_ref<'a>( + array_manifest: gen::ArrayManifest<'a>, + coord: &ChunkIndices, +) -> Option> { + let res = + array_manifest.refs().lookup_by_key(coord.0.as_slice(), |chunk_ref, coords| { + chunk_ref.index().iter().cmp(coords.iter().copied()) + }); + res } struct PayloadIterator { manifest: Arc, - for_node: NodeId, - last_key: Option, + node_id: NodeId, + last_ref_index: usize, +} + +impl PayloadIterator { + fn new(manifest: Arc, node_id: NodeId) -> Self { + Self { manifest, node_id, last_ref_index: 0 } + } } impl Iterator for PayloadIterator { - type Item = (ChunkIndices, ChunkPayload); + type Item = Result<(ChunkIndices, ChunkPayload), IcechunkFormatError>; fn next(&mut self) -> Option { - if let Some(map) = self.manifest.chunks.get(&self.for_node) { - match &self.last_key { - None => { - if let Some((coord, payload)) = map.iter().next() { - self.last_key = Some(coord.clone()); - Some((coord.clone(), payload.clone())) - } else { - None - } - } - Some(last_key) => { - if let Some((coord, payload)) = - map.range((Bound::Excluded(last_key), Bound::Unbounded)).next() - { - self.last_key = Some(coord.clone()); - Some((coord.clone(), payload.clone())) - } else { - None - } - } + let manifest = self.manifest.root(); + lookup_node(manifest, &self.node_id).and_then(|array_manifest| { + let refs = array_manifest.refs(); + if self.last_ref_index >= refs.len() { + return None; } - } else { - None - } + + let chunk_ref = refs.get(self.last_ref_index); + self.last_ref_index += 1; + Some( + ref_to_payload(chunk_ref) + .map(|payl| (ChunkIndices(chunk_ref.index().iter().collect()), payl)), + ) + }) } } -#[cfg(test)] -#[allow(clippy::expect_used, clippy::unwrap_used)] -mod tests { - - use std::error::Error; - - use crate::{format::manifest::ChunkInfo, format::ObjectId}; - - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn test_virtual_chunk_location_bad() { - // errors relative chunk location - assert!(matches!( - VirtualChunkLocation::from_absolute_path("abcdef"), - Err(VirtualReferenceError { - kind: VirtualReferenceErrorKind::CannotParseUrl(_), - .. - }), - )); - // extra / prevents bucket name detection - assert!(matches!( - VirtualChunkLocation::from_absolute_path("s3:///foo/path"), - Err(VirtualReferenceError { - kind: VirtualReferenceErrorKind::CannotParseBucketName(_), - .. - }), - )); +fn ref_to_payload( + chunk_ref: gen::ChunkRef<'_>, +) -> Result { + if let Some(chunk_id) = chunk_ref.chunk_id() { + let id = ChunkId::new(chunk_id.0); + Ok(ChunkPayload::Ref(ChunkRef { + id, + offset: chunk_ref.offset(), + length: chunk_ref.length(), + })) + } else if let Some(location) = chunk_ref.location() { + let location = VirtualChunkLocation::from_absolute_path(location)?; + Ok(ChunkPayload::Virtual(VirtualChunkRef { + location, + checksum: checksum(&chunk_ref), + offset: chunk_ref.offset(), + length: chunk_ref.length(), + })) + } else if let Some(data) = chunk_ref.inline() { + Ok(ChunkPayload::Inline(Bytes::copy_from_slice(data.bytes()))) + } else { + Err(IcechunkFormatErrorKind::InvalidFlatBuffer( + flatbuffers::InvalidFlatbuffer::InconsistentUnion { + field: Cow::Borrowed("chunk_id+location+inline"), + field_type: Cow::Borrowed("invalid"), + error_trace: Default::default(), + }, + ) + .into()) } +} - #[tokio::test] - async fn test_manifest_chunk_iterator_yields_requested_nodes_only( - ) -> Result<(), Box> { - // This is a regression test for a bug found by hypothesis. - // Because we use a `.range` query on the HashMap, we have to be careful - // to not yield chunks from a node that was not requested. - let mut array_ids = [NodeId::random(), NodeId::random()]; - array_ids.sort(); - - // insert with a chunk in the manifest for the array with the larger NodeId - let chunk1 = ChunkInfo { - node: array_ids[1].clone(), - coord: ChunkIndices(vec![0, 0, 0]), - payload: ChunkPayload::Ref(ChunkRef { - id: ObjectId::random(), - offset: 0, - length: 4, - }), - }; - let manifest = Manifest::from_iter(vec![chunk1]).await?.unwrap(); - let chunks = Arc::new(manifest).iter(array_ids[0].clone()).collect::>(); - assert_eq!(chunks, vec![]); +fn checksum(payload: &gen::ChunkRef<'_>) -> Option { + if let Some(etag) = payload.checksum_etag() { + Some(Checksum::ETag(etag.to_string())) + } else if payload.checksum_last_modified() > 0 { + Some(Checksum::LastModified(SecondsSinceEpoch(payload.checksum_last_modified()))) + } else { + None + } +} - Ok(()) +fn mk_chunk_ref<'bldr>( + builder: &mut flatbuffers::FlatBufferBuilder<'bldr>, + chunk: &ChunkInfo, +) -> flatbuffers::WIPOffset> { + let index = Some(builder.create_vector(chunk.coord.0.as_slice())); + match &chunk.payload { + ChunkPayload::Inline(bytes) => { + let bytes = builder.create_vector(bytes.as_ref()); + let args = + gen::ChunkRefArgs { inline: Some(bytes), index, ..Default::default() }; + gen::ChunkRef::create(builder, &args) + } + ChunkPayload::Virtual(virtual_chunk_ref) => { + let args = gen::ChunkRefArgs { + index, + location: Some( + builder.create_string(virtual_chunk_ref.location.0.as_str()), + ), + offset: virtual_chunk_ref.offset, + length: virtual_chunk_ref.length, + checksum_etag: match &virtual_chunk_ref.checksum { + Some(cs) => match cs { + Checksum::LastModified(_) => None, + Checksum::ETag(etag) => { + Some(builder.create_string(etag.as_str())) + } + }, + None => None, + }, + checksum_last_modified: match &virtual_chunk_ref.checksum { + Some(cs) => match cs { + Checksum::LastModified(seconds) => seconds.0, + Checksum::ETag(_) => 0, + }, + None => 0, + }, + ..Default::default() + }; + gen::ChunkRef::create(builder, &args) + } + ChunkPayload::Ref(chunk_ref) => { + let id = gen::ObjectId12::new(&chunk_ref.id.0); + let args = gen::ChunkRefArgs { + index, + offset: chunk_ref.offset, + length: chunk_ref.length, + chunk_id: Some(&id), + ..Default::default() + }; + gen::ChunkRef::create(builder, &args) + } } } + +static ROOT_OPTIONS: VerifierOptions = VerifierOptions { + max_depth: 64, + max_tables: 50_000_000, + max_apparent_size: 1 << 31, // taken from the default + ignore_missing_null_terminator: true, +}; diff --git a/icechunk/src/format/mod.rs b/icechunk/src/format/mod.rs index 44f103ea..202820e7 100644 --- a/icechunk/src/format/mod.rs +++ b/icechunk/src/format/mod.rs @@ -7,9 +7,12 @@ use std::{ ops::Range, }; +use ::flatbuffers::InvalidFlatbuffer; use bytes::Bytes; +use flatbuffers::gen; use format_constants::FileTypeBin; use itertools::Itertools; +use manifest::{VirtualReferenceError, VirtualReferenceErrorKind}; use rand::{rng, Rng}; use serde::{Deserialize, Serialize}; use serde_with::{serde_as, TryFromInto}; @@ -20,6 +23,20 @@ use crate::{error::ICError, metadata::DataType, private}; pub mod attributes; pub mod manifest; + +#[allow( + dead_code, + unused_imports, + clippy::unwrap_used, + clippy::expect_used, + clippy::needless_lifetimes, + clippy::extra_unused_lifetimes, + clippy::missing_safety_doc, + clippy::derivable_impls +)] +#[path = "./flatbuffers/all_generated.rs"] +pub mod flatbuffers; + pub mod serializers; pub mod snapshot; pub mod transaction_log; @@ -150,6 +167,12 @@ pub struct ChunkIndices(pub Vec); pub type ChunkOffset = u64; pub type ChunkLength = u64; +impl<'a> From> for ChunkIndices { + fn from(value: gen::ChunkIndices<'a>) -> Self { + ChunkIndices(value.coords().iter().collect()) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ByteRange { /// The fixed length range represented by the given `Range` @@ -213,9 +236,12 @@ impl From<(Option, Option)> for ByteRange { pub type TableOffset = u32; -#[derive(Debug, Clone, Error, PartialEq, Eq)] +#[derive(Debug, Error)] #[non_exhaustive] pub enum IcechunkFormatErrorKind { + #[error(transparent)] + VirtualReferenceError(VirtualReferenceErrorKind), + #[error("error decoding fill_value from array. Found size: {found_size}, target size: {target_size}, type: {target_type}")] FillValueDecodeError { found_size: usize, target_size: usize, target_type: DataType }, #[error("error decoding fill_value from json. Type: {data_type}, value: {value}")] @@ -234,6 +260,18 @@ pub enum IcechunkFormatErrorKind { InvalidFileType { expected: FileTypeBin, got: u8 }, // TODO: add more info #[error("Icechunk cannot read file, invalid compression algorithm")] InvalidCompressionAlgorithm, // TODO: add more info + #[error("Invalid Icechunk metadata file")] + InvalidFlatBuffer(#[from] InvalidFlatbuffer), + #[error("error during metadata file deserialization")] + DeserializationError(#[from] rmp_serde::decode::Error), + #[error("error during metadata file serialization")] + SerializationError(#[from] rmp_serde::encode::Error), + #[error("I/O error")] + IO(#[from] std::io::Error), + #[error("path error")] + Path(#[from] PathError), + #[error("invalid timestamp in file")] + InvalidTimestamp, } pub type IcechunkFormatError = ICError; @@ -249,6 +287,21 @@ where } } +impl From for IcechunkFormatError { + fn from(value: VirtualReferenceError) -> Self { + Self::with_context( + IcechunkFormatErrorKind::VirtualReferenceError(value.kind), + value.context, + ) + } +} + +impl From for IcechunkFormatErrorKind { + fn from(value: Infallible) -> Self { + match value {} + } +} + pub type IcechunkResult = Result; pub mod format_constants { diff --git a/icechunk/src/format/serializers/current.rs b/icechunk/src/format/serializers/current.rs deleted file mode 100644 index 77fd2def..00000000 --- a/icechunk/src/format/serializers/current.rs +++ /dev/null @@ -1,141 +0,0 @@ -use std::collections::{BTreeMap, HashMap, HashSet}; - -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; - -use crate::format::{ - manifest::{ChunkPayload, Manifest}, - snapshot::{ - AttributeFileInfo, ManifestFileInfo, NodeSnapshot, Snapshot, SnapshotProperties, - }, - transaction_log::TransactionLog, - ChunkIndices, ManifestId, NodeId, Path, SnapshotId, -}; - -#[derive(Debug, Deserialize)] -pub struct SnapshotDeserializer { - id: SnapshotId, - parent_id: Option, - flushed_at: DateTime, - message: String, - metadata: SnapshotProperties, - manifest_files: Vec, - attribute_files: Vec, - nodes: BTreeMap, -} - -#[derive(Debug, Serialize)] -pub struct SnapshotSerializer<'a> { - id: &'a SnapshotId, - parent_id: &'a Option, - flushed_at: &'a DateTime, - message: &'a String, - metadata: &'a SnapshotProperties, - manifest_files: Vec, - attribute_files: &'a Vec, - nodes: &'a BTreeMap, -} - -impl From for Snapshot { - fn from(value: SnapshotDeserializer) -> Self { - Self::from_fields( - value.id, - value.parent_id, - value.flushed_at, - value.message, - value.metadata, - value.manifest_files.into_iter().map(|fi| (fi.id.clone(), fi)).collect(), - value.attribute_files, - value.nodes, - ) - } -} - -impl<'a> From<&'a Snapshot> for SnapshotSerializer<'a> { - fn from(value: &'a Snapshot) -> Self { - Self { - id: value.id(), - parent_id: value.parent_id(), - flushed_at: value.flushed_at(), - message: value.message(), - metadata: value.metadata(), - manifest_files: value.manifest_files().values().cloned().collect(), - attribute_files: value.attribute_files(), - nodes: value.nodes(), - } - } -} - -#[derive(Debug, Deserialize)] -pub struct ManifestDeserializer { - id: ManifestId, - chunks: BTreeMap>, -} - -#[derive(Debug, Serialize)] -pub struct ManifestSerializer<'a> { - id: &'a ManifestId, - chunks: &'a BTreeMap>, -} - -impl From for Manifest { - fn from(value: ManifestDeserializer) -> Self { - Self { id: value.id, chunks: value.chunks } - } -} - -impl<'a> From<&'a Manifest> for ManifestSerializer<'a> { - fn from(value: &'a Manifest) -> Self { - Self { id: &value.id, chunks: &value.chunks } - } -} - -#[derive(Debug, Deserialize)] -pub struct TransactionLogDeserializer { - new_groups: HashSet, - new_arrays: HashSet, - deleted_groups: HashSet, - deleted_arrays: HashSet, - updated_user_attributes: HashSet, - updated_zarr_metadata: HashSet, - updated_chunks: HashMap>, -} - -#[derive(Debug, Serialize)] -pub struct TransactionLogSerializer<'a> { - new_groups: &'a HashSet, - new_arrays: &'a HashSet, - deleted_groups: &'a HashSet, - deleted_arrays: &'a HashSet, - updated_user_attributes: &'a HashSet, - updated_zarr_metadata: &'a HashSet, - updated_chunks: &'a HashMap>, -} - -impl From for TransactionLog { - fn from(value: TransactionLogDeserializer) -> Self { - Self { - new_groups: value.new_groups, - new_arrays: value.new_arrays, - deleted_groups: value.deleted_groups, - deleted_arrays: value.deleted_arrays, - updated_user_attributes: value.updated_user_attributes, - updated_zarr_metadata: value.updated_zarr_metadata, - updated_chunks: value.updated_chunks, - } - } -} - -impl<'a> From<&'a TransactionLog> for TransactionLogSerializer<'a> { - fn from(value: &'a TransactionLog) -> Self { - Self { - new_groups: &value.new_groups, - new_arrays: &value.new_arrays, - deleted_groups: &value.deleted_groups, - deleted_arrays: &value.deleted_arrays, - updated_user_attributes: &value.updated_user_attributes, - updated_zarr_metadata: &value.updated_zarr_metadata, - updated_chunks: &value.updated_chunks, - } - } -} diff --git a/icechunk/src/format/serializers/mod.rs b/icechunk/src/format/serializers/mod.rs index 1d94ec07..81801b16 100644 --- a/icechunk/src/format/serializers/mod.rs +++ b/icechunk/src/format/serializers/mod.rs @@ -42,28 +42,18 @@ //! spec version number and use the right (de)-serializer to do the job. use std::io::{Read, Write}; -use current::{ - ManifestDeserializer, ManifestSerializer, SnapshotDeserializer, SnapshotSerializer, - TransactionLogDeserializer, TransactionLogSerializer, -}; - use super::{ format_constants::SpecVersionBin, manifest::Manifest, snapshot::Snapshot, - transaction_log::TransactionLog, + transaction_log::TransactionLog, IcechunkFormatError, }; -pub mod current; - pub fn serialize_snapshot( snapshot: &Snapshot, version: SpecVersionBin, write: &mut impl Write, -) -> Result<(), rmp_serde::encode::Error> { +) -> Result<(), std::io::Error> { match version { - SpecVersionBin::V0dot1 => { - let serializer = SnapshotSerializer::from(snapshot); - rmp_serde::encode::write(write, &serializer) - } + SpecVersionBin::V0dot1 => write.write_all(snapshot.bytes()), } } @@ -71,12 +61,9 @@ pub fn serialize_manifest( manifest: &Manifest, version: SpecVersionBin, write: &mut impl Write, -) -> Result<(), rmp_serde::encode::Error> { +) -> Result<(), std::io::Error> { match version { - SpecVersionBin::V0dot1 => { - let serializer = ManifestSerializer::from(manifest); - rmp_serde::encode::write(write, &serializer) - } + SpecVersionBin::V0dot1 => write.write_all(manifest.bytes()), } } @@ -84,47 +71,53 @@ pub fn serialize_transaction_log( transaction_log: &TransactionLog, version: SpecVersionBin, write: &mut impl Write, -) -> Result<(), rmp_serde::encode::Error> { +) -> Result<(), std::io::Error> { match version { - SpecVersionBin::V0dot1 => { - let serializer = TransactionLogSerializer::from(transaction_log); - rmp_serde::encode::write(write, &serializer) - } + SpecVersionBin::V0dot1 => write.write_all(transaction_log.bytes()), } } pub fn deserialize_snapshot( version: SpecVersionBin, - read: Box, -) -> Result { + mut read: Box, +) -> Result { match version { SpecVersionBin::V0dot1 => { - let deserializer: SnapshotDeserializer = rmp_serde::from_read(read)?; - Ok(deserializer.into()) + // TODO: what's a good capacity? + let mut buffer = Vec::with_capacity(8_192); + read.read_to_end(&mut buffer)?; + buffer.shrink_to_fit(); + Snapshot::from_buffer(buffer) } } } pub fn deserialize_manifest( version: SpecVersionBin, - read: Box, -) -> Result { + mut read: Box, +) -> Result { match version { SpecVersionBin::V0dot1 => { - let deserializer: ManifestDeserializer = rmp_serde::from_read(read)?; - Ok(deserializer.into()) + // TODO: what's a good capacity? + let mut buffer = Vec::with_capacity(1024 * 1024); + read.read_to_end(&mut buffer)?; + buffer.shrink_to_fit(); + Manifest::from_buffer(buffer) } } } pub fn deserialize_transaction_log( version: SpecVersionBin, - read: Box, -) -> Result { + mut read: Box, +) -> Result { match version { SpecVersionBin::V0dot1 => { - let deserializer: TransactionLogDeserializer = rmp_serde::from_read(read)?; - Ok(deserializer.into()) + // TODO: what's a good capacity? + let mut buffer = Vec::with_capacity(1024 * 1024); + read.read_to_end(&mut buffer)?; + buffer.shrink_to_fit(); + TransactionLog::from_buffer(buffer) } } } diff --git a/icechunk/src/format/snapshot.rs b/icechunk/src/format/snapshot.rs index 5f0b33e3..13aeaac6 100644 --- a/icechunk/src/format/snapshot.rs +++ b/icechunk/src/format/snapshot.rs @@ -1,10 +1,9 @@ -use std::{ - collections::{BTreeMap, HashMap}, - ops::Bound, - sync::Arc, -}; +use std::{collections::BTreeMap, convert::Infallible, sync::Arc}; use chrono::{DateTime, Utc}; +use err_into::ErrorInto; +use flatbuffers::{FlatBufferBuilder, VerifierOptions}; +use itertools::Itertools as _; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -14,9 +13,10 @@ use crate::metadata::{ }; use super::{ - manifest::{Manifest, ManifestRef}, - AttributesId, ChunkIndices, IcechunkFormatErrorKind, IcechunkResult, ManifestId, - NodeId, Path, SnapshotId, TableOffset, + flatbuffers::gen, + manifest::{Manifest, ManifestExtents, ManifestRef}, + AttributesId, ChunkIndices, IcechunkFormatError, IcechunkFormatErrorKind, + IcechunkResult, ManifestId, NodeId, Path, SnapshotId, TableOffset, }; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -120,18 +120,143 @@ impl NodeSnapshot { } } -pub type SnapshotProperties = HashMap; +impl From<&gen::ObjectId8> for NodeId { + fn from(value: &gen::ObjectId8) -> Self { + NodeId::new(value.0) + } +} + +impl From<&gen::ObjectId12> for ManifestId { + fn from(value: &gen::ObjectId12) -> Self { + ManifestId::new(value.0) + } +} + +impl From<&gen::ObjectId12> for AttributesId { + fn from(value: &gen::ObjectId12) -> Self { + AttributesId::new(value.0) + } +} + +impl<'a> From> for ManifestRef { + fn from(value: gen::ManifestRef<'a>) -> Self { + let from = value.extents().iter().map(|range| range.from()).collect::>(); + let to = value.extents().iter().map(|range| range.to()).collect::>(); + let extents = ManifestExtents::new(from.as_slice(), to.as_slice()); + ManifestRef { object_id: value.object_id().into(), extents } + } +} + +impl<'a> TryFrom> for NodeData { + type Error = rmp_serde::decode::Error; + + fn try_from(value: gen::ArrayNodeData<'a>) -> Result { + // TODO: is it ok to call `bytes` here? Or do we need to collect an iterator + let meta = rmp_serde::from_slice(value.zarr_metadata().bytes())?; + let manifest_refs = value.manifests().iter().map(|m| m.into()).collect(); + Ok(Self::Array(meta, manifest_refs)) + } +} + +impl<'a> From> for NodeData { + fn from(_: gen::GroupNodeData<'a>) -> Self { + Self::Group + } +} + +impl<'a> TryFrom> for UserAttributesSnapshot { + type Error = rmp_serde::decode::Error; + + fn try_from(value: gen::InlineUserAttributes<'a>) -> Result { + let parsed = rmp_serde::from_slice(value.data().bytes())?; + Ok(Self::Inline(UserAttributes { parsed })) + } +} + +impl<'a> From> for UserAttributesSnapshot { + fn from(value: gen::UserAttributesRef<'a>) -> Self { + Self::Ref(UserAttributesRef { + object_id: value.object_id().into(), + location: value.location(), + }) + } +} + +impl<'a> TryFrom> for NodeSnapshot { + type Error = IcechunkFormatError; + + fn try_from(value: gen::NodeSnapshot<'a>) -> Result { + #[allow(clippy::expect_used, clippy::panic)] + let node_data: NodeData = match value.node_data_type() { + gen::NodeData::Array => value + .node_data_as_array() + .expect("Bug in flatbuffers library") + .try_into()?, + gen::NodeData::Group => { + value.node_data_as_group().expect("Bug in flatbuffers library").into() + } + x => panic!("Invalid node data type in flatbuffers file {:?}", x), + }; + #[allow(clippy::expect_used, clippy::panic)] + let user_attributes: Option = + match value.user_attributes_type() { + gen::UserAttributesSnapshot::Inline => Some( + value + .user_attributes_as_inline() + .expect("Bug in flatbuffers library") + .try_into()?, + ), + gen::UserAttributesSnapshot::Reference => Some( + value + .user_attributes_as_reference() + .expect("Bug in flatbuffers library") + .into(), + ), + gen::UserAttributesSnapshot::NONE => None, + x => panic!("Invalid user attributes type in flatbuffers file {:?}", x), + }; + let res = NodeSnapshot { + id: value.id().into(), + path: value.path().to_string().try_into()?, + user_attributes, + node_data, + }; + Ok(res) + } +} + +impl From<&gen::ManifestFileInfo> for ManifestFileInfo { + fn from(value: &gen::ManifestFileInfo) -> Self { + Self { + id: value.id().into(), + size_bytes: value.size_bytes(), + num_chunk_refs: value.num_chunk_refs(), + } + } +} + +impl From<&gen::AttributeFileInfo> for AttributeFileInfo { + fn from(value: &gen::AttributeFileInfo) -> Self { + Self { id: value.id().into() } + } +} + +pub type SnapshotProperties = BTreeMap; #[derive(Debug, PartialEq, Serialize, Deserialize, Clone, Eq, Hash)] pub struct ManifestFileInfo { pub id: ManifestId, pub size_bytes: u64, - pub num_rows: u32, + pub num_chunk_refs: u32, } impl ManifestFileInfo { pub fn new(manifest: &Manifest, size_bytes: u64) -> Self { - Self { id: manifest.id.clone(), num_rows: manifest.len() as u32, size_bytes } + Self { + id: manifest.id().clone(), + num_chunk_refs: manifest.len() as u32, + size_bytes, + } } } @@ -142,14 +267,7 @@ pub struct AttributeFileInfo { #[derive(Debug, PartialEq)] pub struct Snapshot { - id: SnapshotId, - parent_id: Option, - flushed_at: DateTime, - message: String, - metadata: SnapshotProperties, - manifest_files: HashMap, - attribute_files: Vec, - nodes: BTreeMap, + buffer: Vec, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -161,173 +279,245 @@ pub struct SnapshotInfo { pub metadata: SnapshotProperties, } -impl From<&Snapshot> for SnapshotInfo { - fn from(value: &Snapshot) -> Self { - Self { +impl TryFrom<&Snapshot> for SnapshotInfo { + type Error = IcechunkFormatError; + + fn try_from(value: &Snapshot) -> Result { + Ok(Self { id: value.id().clone(), parent_id: value.parent_id().clone(), - flushed_at: *value.flushed_at(), + flushed_at: value.flushed_at()?, message: value.message().to_string(), - metadata: value.metadata().clone(), - } + metadata: value.metadata()?.clone(), + }) } } impl SnapshotInfo { pub fn is_initial(&self) -> bool { - // FIXME: add check for known initial id self.parent_id.is_none() } } +static ROOT_OPTIONS: VerifierOptions = VerifierOptions { + max_depth: 64, + max_tables: 500_000, + max_apparent_size: 1 << 31, // taken from the default + ignore_missing_null_terminator: true, +}; + impl Snapshot { pub const INITIAL_COMMIT_MESSAGE: &'static str = "Repository initialized"; - fn new( - parent_id: Option, - message: String, - metadata: Option, - nodes: BTreeMap, - manifest_files: Vec, - attribute_files: Vec, - ) -> Self { - let metadata = metadata.unwrap_or_default(); - let flushed_at = Utc::now(); - Self { - id: SnapshotId::random(), - parent_id, - flushed_at, - message, - manifest_files: manifest_files - .into_iter() - .map(|fi| (fi.id.clone(), fi)) - .collect(), - attribute_files, - metadata, - nodes, - } + pub fn from_buffer(buffer: Vec) -> IcechunkResult { + let _ = flatbuffers::root_with_opts::( + &ROOT_OPTIONS, + buffer.as_slice(), + )?; + Ok(Snapshot { buffer }) } - #[allow(clippy::too_many_arguments)] - pub fn from_fields( - id: SnapshotId, - parent_id: Option, - flushed_at: DateTime, - message: String, - metadata: SnapshotProperties, - manifest_files: HashMap, - attribute_files: Vec, - nodes: BTreeMap, - ) -> Self { - Self { - id, - parent_id, - flushed_at, - message, - metadata, - manifest_files, - attribute_files, - nodes, - } + pub fn bytes(&self) -> &[u8] { + self.buffer.as_slice() } - pub fn from_iter>( - parent_id: SnapshotId, + pub fn from_iter( + id: Option, + parent_id: Option, message: String, properties: Option, - manifest_files: Vec, - attribute_files: Vec, - iter: T, - ) -> Self { - let nodes = iter.into_iter().map(|node| (node.path.clone(), node)).collect(); - - Self::new( - Some(parent_id), - message, - properties, - nodes, - manifest_files, - attribute_files, - ) + mut manifest_files: Vec, + mut attribute_files: Vec, + sorted_iter: I, + ) -> IcechunkResult + where + IcechunkFormatError: From, + I: IntoIterator>, + { + // TODO: what's a good capacity? + let mut builder = flatbuffers::FlatBufferBuilder::with_capacity(4_096); + + manifest_files.sort_by(|a, b| a.id.cmp(&b.id)); + let manifest_files = manifest_files + .iter() + .map(|mfi| { + let id = gen::ObjectId12::new(&mfi.id.0); + gen::ManifestFileInfo::new(&id, mfi.size_bytes, mfi.num_chunk_refs) + }) + .collect::>(); + let manifest_files = builder.create_vector(&manifest_files); + + attribute_files.sort_by(|a, b| a.id.cmp(&b.id)); + let attribute_files = attribute_files + .iter() + .map(|att| { + let id = gen::ObjectId12::new(&att.id.0); + gen::AttributeFileInfo::new(&id) + }) + .collect::>(); + let attribute_files = builder.create_vector(&attribute_files); + + let metadata_items: Vec<_> = properties + .unwrap_or_default() + .iter() + .map(|(k, v)| { + let name = builder.create_shared_string(k.as_str()); + let serialized = rmp_serde::to_vec(v)?; + let value = builder.create_vector(serialized.as_slice()); + Ok::<_, IcechunkFormatError>(gen::MetadataItem::create( + &mut builder, + &gen::MetadataItemArgs { name: Some(name), value: Some(value) }, + )) + }) + .try_collect()?; + let metadata_items = builder.create_vector(metadata_items.as_slice()); + + let message = builder.create_string(&message); + let parent_id = parent_id.map(|oid| gen::ObjectId12::new(&oid.0)); + let flushed_at = Utc::now().timestamp_micros() as u64; + let id = gen::ObjectId12::new(&id.unwrap_or_else(SnapshotId::random).0); + + let nodes: Vec<_> = sorted_iter + .into_iter() + .map(|node| node.err_into().and_then(|node| mk_node(&mut builder, &node))) + .try_collect()?; + let nodes = builder.create_vector(&nodes); + + let snap = gen::Snapshot::create( + &mut builder, + &gen::SnapshotArgs { + id: Some(&id), + parent_id: parent_id.as_ref(), + nodes: Some(nodes), + flushed_at, + message: Some(message), + metadata: Some(metadata_items), + manifest_files: Some(manifest_files), + attribute_files: Some(attribute_files), + }, + ); + + builder.finish(snap, Some("Ichk")); + let (mut buffer, offset) = builder.collapse(); + buffer.drain(0..offset); + buffer.shrink_to_fit(); + Ok(Snapshot { buffer }) } - pub fn initial() -> Self { + pub fn initial() -> IcechunkResult { let properties = [("__root".to_string(), serde_json::Value::from(true))].into(); - Self::new( + let nodes: Vec> = Vec::new(); + Self::from_iter( + None, None, Self::INITIAL_COMMIT_MESSAGE.to_string(), Some(properties), Default::default(), Default::default(), - Default::default(), + nodes, ) } - pub fn id(&self) -> &SnapshotId { - &self.id + fn root(&self) -> gen::Snapshot { + // without the unsafe version this is too slow + // if we try to keep the root in the Manifest struct, we would need a lifetime + unsafe { flatbuffers::root_unchecked::(&self.buffer) } } - pub fn parent_id(&self) -> &Option { - &self.parent_id + pub fn id(&self) -> SnapshotId { + SnapshotId::new(self.root().id().0) } - pub fn metadata(&self) -> &SnapshotProperties { - &self.metadata + pub fn parent_id(&self) -> Option { + self.root().parent_id().map(|pid| SnapshotId::new(pid.0)) } - pub fn flushed_at(&self) -> &DateTime { - &self.flushed_at + pub fn metadata(&self) -> IcechunkResult { + self.root() + .metadata() + .iter() + .map(|item| { + let key = item.name().to_string(); + let value = rmp_serde::from_slice(item.value().bytes())?; + Ok((key, value)) + }) + .try_collect() } - pub fn message(&self) -> &String { - &self.message + pub fn flushed_at(&self) -> IcechunkResult> { + let ts = self.root().flushed_at(); + let ts: i64 = ts.try_into().map_err(|_| { + IcechunkFormatError::from(IcechunkFormatErrorKind::InvalidTimestamp) + })?; + DateTime::from_timestamp_micros(ts) + .ok_or_else(|| IcechunkFormatErrorKind::InvalidTimestamp.into()) } - pub fn nodes(&self) -> &BTreeMap { - &self.nodes + pub fn message(&self) -> String { + self.root().message().to_string() } - pub fn get_manifest_file(&self, id: &ManifestId) -> Option<&ManifestFileInfo> { - self.manifest_files.get(id) + // pub fn nodes(&self) -> &BTreeMap { + // &self.nodes + // } + + pub fn get_manifest_file(&self, id: &ManifestId) -> Option { + self.root().manifest_files().iter().find(|mf| mf.id().0 == id.0.as_slice()).map( + |mf| ManifestFileInfo { + id: ManifestId::new(mf.id().0), + size_bytes: mf.size_bytes(), + num_chunk_refs: mf.num_chunk_refs(), + }, + ) } - pub fn manifest_files(&self) -> &HashMap { - &self.manifest_files + pub fn manifest_files(&self) -> impl Iterator + '_ { + self.root().manifest_files().iter().map(|mf| mf.into()) } - pub fn attribute_files(&self) -> &Vec { - &self.attribute_files + + pub fn attribute_files(&self) -> impl Iterator + '_ { + self.root().attribute_files().iter().map(|f| f.into()) } - /// Cretase a new `Snapshot` with all the same data as `self` but a different parent - pub fn adopt(&self, parent: &Snapshot) -> Self { - Self { - id: self.id.clone(), - parent_id: Some(parent.id().clone()), - flushed_at: *self.flushed_at(), - message: self.message().clone(), - metadata: self.metadata().clone(), - manifest_files: self.manifest_files().clone(), - attribute_files: self.attribute_files().clone(), - nodes: self.nodes.clone(), - } + /// Cretase a new `Snapshot` with all the same data as `new_child` but `self` as parent + pub fn adopt(&self, new_child: &Snapshot) -> IcechunkResult { + // Rust flatbuffers implementation doesn't allow mutation of scalars, so we need to + // create a whole new buffer and write to it in full + + Snapshot::from_iter( + Some(new_child.id()), + Some(self.id()), + new_child.message().clone(), + Some(new_child.metadata()?.clone()), + new_child.manifest_files().collect(), + new_child.attribute_files().collect(), + new_child.iter(), + ) } - pub fn get_node(&self, path: &Path) -> IcechunkResult<&NodeSnapshot> { - self.nodes - .get(path) - .ok_or(IcechunkFormatErrorKind::NodeNotFound { path: path.clone() }.into()) + pub fn get_node(&self, path: &Path) -> IcechunkResult { + let res = self + .root() + .nodes() + .lookup_by_key(path.to_string().as_str(), |node, path| node.path().cmp(path)) + .ok_or(IcechunkFormatError::from(IcechunkFormatErrorKind::NodeNotFound { + path: path.clone(), + }))?; + res.try_into() } - pub fn iter(&self) -> impl Iterator + '_ { - self.nodes.values() + pub fn iter(&self) -> impl Iterator> + '_ { + self.root().nodes().iter().map(|node| node.try_into().err_into()) } - pub fn iter_arc(self: Arc) -> impl Iterator { - NodeIterator { table: self, last_key: None } + pub fn iter_arc( + self: Arc, + ) -> impl Iterator> { + NodeIterator { snapshot: self, last_index: 0 } } pub fn len(&self) -> usize { - self.nodes.len() + self.root().nodes().len() } #[must_use] @@ -335,45 +525,141 @@ impl Snapshot { self.len() == 0 } - pub fn manifest_info(&self, id: &ManifestId) -> Option<&ManifestFileInfo> { - self.manifest_files.get(id) + pub fn manifest_info(&self, id: &ManifestId) -> Option { + self.root() + .manifest_files() + .iter() + .find(|mi| mi.id().0 == id.0) + .map(|man| man.into()) } } -// We need this complex dance because Rust makes it really hard to put together an object and a -// reference to it (in the iterator) in a single self-referential struct struct NodeIterator { - table: Arc, - last_key: Option, + snapshot: Arc, + last_index: usize, } impl Iterator for NodeIterator { - type Item = NodeSnapshot; + type Item = IcechunkResult; fn next(&mut self) -> Option { - match &self.last_key { - None => { - if let Some((k, v)) = self.table.nodes.first_key_value() { - self.last_key = Some(k.clone()); - Some(v.clone()) - } else { - None - } - } - Some(last_key) => { - if let Some((k, v)) = self - .table - .nodes - .range::((Bound::Excluded(last_key), Bound::Unbounded)) - .next() - { - self.last_key = Some(k.clone()); - Some(v.clone()) - } else { - None - } - } + let nodes = self.snapshot.root().nodes(); + if self.last_index < nodes.len() { + let res = Some(nodes.get(self.last_index).try_into().err_into()); + self.last_index += 1; + res + } else { + None + } + } +} + +fn mk_node<'bldr>( + builder: &mut flatbuffers::FlatBufferBuilder<'bldr>, + node: &NodeSnapshot, +) -> IcechunkResult>> { + let id = gen::ObjectId8::new(&node.id.0); + let path = builder.create_string(node.path.to_string().as_str()); + let (user_attributes_type, user_attributes) = + mk_user_attributes(builder, node.user_attributes.as_ref())?; + let (node_data_type, node_data) = mk_node_data(builder, &node.node_data)?; + Ok(gen::NodeSnapshot::create( + builder, + &gen::NodeSnapshotArgs { + id: Some(&id), + path: Some(path), + user_attributes_type, + user_attributes, + node_data_type, + node_data, + }, + )) +} + +fn mk_user_attributes( + builder: &mut flatbuffers::FlatBufferBuilder<'_>, + atts: Option<&UserAttributesSnapshot>, +) -> IcechunkResult<( + gen::UserAttributesSnapshot, + Option>, +)> { + match atts { + Some(UserAttributesSnapshot::Inline(user_attributes)) => { + let data = builder + .create_vector(rmp_serde::to_vec(&user_attributes.parsed)?.as_slice()); + let inl = gen::InlineUserAttributes::create( + builder, + &gen::InlineUserAttributesArgs { data: Some(data) }, + ); + Ok((gen::UserAttributesSnapshot::Inline, Some(inl.as_union_value()))) } + Some(UserAttributesSnapshot::Ref(uatts)) => { + let id = gen::ObjectId12::new(&uatts.object_id.0); + let reference = gen::UserAttributesRef::create( + builder, + &gen::UserAttributesRefArgs { + object_id: Some(&id), + location: uatts.location, + }, + ); + Ok((gen::UserAttributesSnapshot::Reference, Some(reference.as_union_value()))) + } + None => Ok((gen::UserAttributesSnapshot::NONE, None)), + } +} + +fn mk_node_data( + builder: &mut FlatBufferBuilder<'_>, + node_data: &NodeData, +) -> IcechunkResult<( + gen::NodeData, + Option>, +)> { + match node_data { + NodeData::Array(zarr, manifests) => { + let zarr_metadata = + Some(builder.create_vector(rmp_serde::to_vec(zarr)?.as_slice())); + let manifests = manifests + .iter() + .map(|manref| { + let object_id = gen::ObjectId12::new(&manref.object_id.0); + let extents = manref + .extents + .iter() + .map(|range| gen::ChunkIndexRange::new(range.start, range.end)) + .collect::>(); + let extents = builder.create_vector(&extents); + gen::ManifestRef::create( + builder, + &gen::ManifestRefArgs { + object_id: Some(&object_id), + extents: Some(extents), + }, + ) + }) + .collect::>(); + let manifests = builder.create_vector(manifests.as_slice()); + Ok(( + gen::NodeData::Array, + Some( + gen::ArrayNodeData::create( + builder, + &gen::ArrayNodeDataArgs { + zarr_metadata, + manifests: Some(manifests), + }, + ) + .as_union_value(), + ), + )) + } + NodeData::Group => Ok(( + gen::NodeData::Group, + Some( + gen::GroupNodeData::create(builder, &gen::GroupNodeDataArgs {}) + .as_union_value(), + ), + )), } } @@ -434,15 +720,16 @@ mod tests { ZarrArrayMetadata { dimension_names: None, ..zarr_meta2.clone() }; let man_ref1 = ManifestRef { object_id: ObjectId::random(), - extents: ChunkIndices(vec![0, 0, 0])..ChunkIndices(vec![100, 100, 100]), + extents: ManifestExtents::new(&[0, 0, 0], &[100, 100, 100]), }; let man_ref2 = ManifestRef { object_id: ObjectId::random(), - extents: ChunkIndices(vec![0, 0, 0])..ChunkIndices(vec![100, 100, 100]), + extents: ManifestExtents::new(&[0, 0, 0], &[100, 100, 100]), }; let oid = ObjectId::random(); let node_ids = iter::repeat_with(NodeId::random).take(7).collect::>(); + // nodes must be sorted by path let nodes = vec![ NodeSnapshot { path: Path::root(), @@ -457,17 +744,15 @@ mod tests { node_data: NodeData::Group, }, NodeSnapshot { - path: "/b".try_into().unwrap(), - id: node_ids[2].clone(), + path: "/array2".try_into().unwrap(), + id: node_ids[5].clone(), user_attributes: None, - node_data: NodeData::Group, + node_data: NodeData::Array(zarr_meta2.clone(), vec![]), }, NodeSnapshot { - path: "/b/c".try_into().unwrap(), - id: node_ids[3].clone(), - user_attributes: Some(UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"foo": "some inline"}"#).unwrap(), - )), + path: "/b".try_into().unwrap(), + id: node_ids[2].clone(), + user_attributes: None, node_data: NodeData::Group, }, NodeSnapshot { @@ -482,40 +767,44 @@ mod tests { vec![man_ref1.clone(), man_ref2.clone()], ), }, - NodeSnapshot { - path: "/array2".try_into().unwrap(), - id: node_ids[5].clone(), - user_attributes: None, - node_data: NodeData::Array(zarr_meta2.clone(), vec![]), - }, NodeSnapshot { path: "/b/array3".try_into().unwrap(), id: node_ids[6].clone(), user_attributes: None, node_data: NodeData::Array(zarr_meta3.clone(), vec![]), }, + NodeSnapshot { + path: "/b/c".try_into().unwrap(), + id: node_ids[3].clone(), + user_attributes: Some(UserAttributesSnapshot::Inline( + UserAttributes::try_new(br#"{"foo": "some inline"}"#).unwrap(), + )), + node_data: NodeData::Group, + }, ]; - let initial = Snapshot::initial(); + let initial = Snapshot::initial().unwrap(); let manifests = vec![ ManifestFileInfo { id: man_ref1.object_id.clone(), size_bytes: 1_000_000, - num_rows: 100_000, + num_chunk_refs: 100_000, }, ManifestFileInfo { id: man_ref2.object_id.clone(), size_bytes: 1_000_000, - num_rows: 100_000, + num_chunk_refs: 100_000, }, ]; let st = Snapshot::from_iter( - initial.id.clone(), + None, + Some(initial.id().clone()), String::default(), Default::default(), manifests, vec![], - nodes, - ); + nodes.into_iter().map(Ok::), + ) + .unwrap(); assert!(matches!( st.get_node(&"/nonexistent".try_into().unwrap()), @@ -530,7 +819,7 @@ mod tests { let node = st.get_node(&"/b/c".try_into().unwrap()).unwrap(); assert_eq!( node, - &NodeSnapshot { + NodeSnapshot { path: "/b/c".try_into().unwrap(), id: node_ids[3].clone(), user_attributes: Some(UserAttributesSnapshot::Inline( @@ -542,7 +831,7 @@ mod tests { let node = st.get_node(&Path::root()).unwrap(); assert_eq!( node, - &NodeSnapshot { + NodeSnapshot { path: Path::root(), id: node_ids[0].clone(), user_attributes: None, @@ -552,7 +841,7 @@ mod tests { let node = st.get_node(&"/b/array1".try_into().unwrap()).unwrap(); assert_eq!( node, - &NodeSnapshot { + NodeSnapshot { path: "/b/array1".try_into().unwrap(), id: node_ids[4].clone(), user_attributes: Some(UserAttributesSnapshot::Ref(UserAttributesRef { @@ -565,7 +854,7 @@ mod tests { let node = st.get_node(&"/array2".try_into().unwrap()).unwrap(); assert_eq!( node, - &NodeSnapshot { + NodeSnapshot { path: "/array2".try_into().unwrap(), id: node_ids[5].clone(), user_attributes: None, @@ -575,7 +864,7 @@ mod tests { let node = st.get_node(&"/b/array3".try_into().unwrap()).unwrap(); assert_eq!( node, - &NodeSnapshot { + NodeSnapshot { path: "/b/array3".try_into().unwrap(), id: node_ids[6].clone(), user_attributes: None, diff --git a/icechunk/src/format/transaction_log.rs b/icechunk/src/format/transaction_log.rs index d69a7c5f..9c995d46 100644 --- a/icechunk/src/format/transaction_log.rs +++ b/icechunk/src/format/transaction_log.rs @@ -1,77 +1,281 @@ -use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::{ + collections::{BTreeMap, BTreeSet, HashMap, HashSet}, + iter, +}; -use crate::change_set::ChangeSet; +use flatbuffers::VerifierOptions; +use itertools::{Either, Itertools as _}; -use super::{ChunkIndices, NodeId, Path}; +use crate::{ + change_set::ChangeSet, + format::flatbuffers::gen::ObjectId12, + session::{Session, SessionResult}, +}; + +use super::{flatbuffers::gen, ChunkIndices, IcechunkResult, NodeId, Path, SnapshotId}; #[derive(Clone, Debug, PartialEq, Default)] pub struct TransactionLog { - // FIXME: better, more stable on-disk format - pub new_groups: HashSet, - pub new_arrays: HashSet, - pub deleted_groups: HashSet, - pub deleted_arrays: HashSet, - pub updated_user_attributes: HashSet, - pub updated_zarr_metadata: HashSet, - pub updated_chunks: HashMap>, + buffer: Vec, } impl TransactionLog { - pub fn new(cs: &ChangeSet) -> Self { - let new_groups = cs.new_groups().map(|(_, node_id)| node_id).cloned().collect(); - let new_arrays = cs.new_arrays().map(|(_, node_id)| node_id).cloned().collect(); - let deleted_groups = cs.deleted_groups().map(|(_, id)| id.clone()).collect(); - let deleted_arrays = cs.deleted_arrays().map(|(_, id)| id.clone()).collect(); + pub fn new(id: &SnapshotId, cs: &ChangeSet) -> Self { + let mut new_groups: Vec<_> = + cs.new_groups().map(|(_, id)| gen::ObjectId8::new(&id.0)).collect(); + let mut new_arrays: Vec<_> = + cs.new_arrays().map(|(_, id)| gen::ObjectId8::new(&id.0)).collect(); + let mut deleted_groups: Vec<_> = + cs.deleted_groups().map(|(_, id)| gen::ObjectId8::new(&id.0)).collect(); + let mut deleted_arrays: Vec<_> = + cs.deleted_arrays().map(|(_, id)| gen::ObjectId8::new(&id.0)).collect(); - let updated_user_attributes = - cs.user_attributes_updated_nodes().cloned().collect(); - let updated_zarr_metadata = cs.zarr_updated_arrays().cloned().collect(); + let mut updated_user_attributes: Vec<_> = cs + .user_attributes_updated_nodes() + .map(|id| gen::ObjectId8::new(&id.0)) + .collect(); + let mut updated_zarr_metadata: Vec<_> = + cs.zarr_updated_arrays().map(|id| gen::ObjectId8::new(&id.0)).collect(); + + // TODO: what's a good capacity? + let mut builder = flatbuffers::FlatBufferBuilder::with_capacity(1_024 * 1_024); + + // these come sorted from the change set let updated_chunks = cs .chunk_changes() - .map(|(k, v)| (k.clone(), v.keys().cloned().collect())) - .collect(); + .map(|(node_id, chunks)| { + let node_id = gen::ObjectId8::new(&node_id.0); + let node_id = Some(&node_id); + let chunks = chunks + .keys() + .map(|indices| { + let coords = Some(builder.create_vector(indices.0.as_slice())); + gen::ChunkIndices::create( + &mut builder, + &gen::ChunkIndicesArgs { coords }, + ) + }) + .collect::>(); + let chunks = Some(builder.create_vector(chunks.as_slice())); + gen::ArrayUpdatedChunks::create( + &mut builder, + &gen::ArrayUpdatedChunksArgs { node_id, chunks }, + ) + }) + .collect::>(); + let updated_chunks = builder.create_vector(updated_chunks.as_slice()); + let updated_chunks = Some(updated_chunks); - Self { - new_groups, - new_arrays, - deleted_groups, - deleted_arrays, - updated_user_attributes, - updated_zarr_metadata, - updated_chunks, + new_groups.sort_by(|a, b| a.0.cmp(&b.0)); + new_arrays.sort_by(|a, b| a.0.cmp(&b.0)); + deleted_groups.sort_by(|a, b| a.0.cmp(&b.0)); + deleted_arrays.sort_by(|a, b| a.0.cmp(&b.0)); + updated_user_attributes.sort_by(|a, b| a.0.cmp(&b.0)); + updated_zarr_metadata.sort_by(|a, b| a.0.cmp(&b.0)); + + let new_groups = Some(builder.create_vector(new_groups.as_slice())); + let new_arrays = Some(builder.create_vector(new_arrays.as_slice())); + let deleted_groups = Some(builder.create_vector(deleted_groups.as_slice())); + let deleted_arrays = Some(builder.create_vector(deleted_arrays.as_slice())); + let updated_user_attributes = + Some(builder.create_vector(updated_user_attributes.as_slice())); + let updated_zarr_metadata = + Some(builder.create_vector(updated_zarr_metadata.as_slice())); + + let id = ObjectId12::new(&id.0); + let id = Some(&id); + let tx = gen::TransactionLog::create( + &mut builder, + &gen::TransactionLogArgs { + id, + new_groups, + new_arrays, + deleted_groups, + deleted_arrays, + updated_user_attributes, + updated_zarr_metadata, + updated_chunks, + }, + ); + + builder.finish(tx, Some("Ichk")); + let (mut buffer, offset) = builder.collapse(); + buffer.drain(0..offset); + buffer.shrink_to_fit(); + Self { buffer } + } + + pub fn from_buffer(buffer: Vec) -> IcechunkResult { + let _ = flatbuffers::root_with_opts::( + &ROOT_OPTIONS, + buffer.as_slice(), + )?; + Ok(TransactionLog { buffer }) + } + + pub fn new_groups(&self) -> impl Iterator + '_ { + self.root().new_groups().iter().map(From::from) + } + + pub fn new_arrays(&self) -> impl Iterator + '_ { + self.root().new_arrays().iter().map(From::from) + } + + pub fn deleted_groups(&self) -> impl Iterator + '_ { + self.root().deleted_groups().iter().map(From::from) + } + + pub fn deleted_arrays(&self) -> impl Iterator + '_ { + self.root().deleted_arrays().iter().map(From::from) + } + + pub fn updated_user_attributes(&self) -> impl Iterator + '_ { + self.root().updated_user_attributes().iter().map(From::from) + } + + pub fn updated_zarr_metadata(&self) -> impl Iterator + '_ { + self.root().updated_zarr_metadata().iter().map(From::from) + } + + pub fn updated_chunks( + &self, + ) -> impl Iterator + '_)> + '_ + { + self.root().updated_chunks().iter().map(|arr_chunks| { + let id: NodeId = arr_chunks.node_id().into(); + let chunks = arr_chunks.chunks().iter().map(|idx| idx.into()); + (id, chunks) + }) + } + + pub fn updated_chunks_for( + &self, + node: &NodeId, + ) -> impl Iterator + '_ { + let arr = self + .root() + .updated_chunks() + .lookup_by_key(node.0, |a, b| a.node_id().0.cmp(b)); + + match arr { + Some(arr) => Either::Left(arr.chunks().iter().map(From::from)), + None => Either::Right(iter::empty()), } } + pub fn group_created(&self, id: &NodeId) -> bool { + self.root().new_groups().lookup_by_key(id.0, |a, b| a.0.cmp(b)).is_some() + } + + pub fn array_created(&self, id: &NodeId) -> bool { + self.root().new_arrays().lookup_by_key(id.0, |a, b| a.0.cmp(b)).is_some() + } + + pub fn group_deleted(&self, id: &NodeId) -> bool { + self.root().deleted_groups().lookup_by_key(id.0, |a, b| a.0.cmp(b)).is_some() + } + + pub fn array_deleted(&self, id: &NodeId) -> bool { + self.root().deleted_arrays().lookup_by_key(id.0, |a, b| a.0.cmp(b)).is_some() + } + + pub fn user_attributes_updated(&self, id: &NodeId) -> bool { + self.root() + .updated_user_attributes() + .lookup_by_key(id.0, |a, b| a.0.cmp(b)) + .is_some() + } + + pub fn chunks_updated(&self, id: &NodeId) -> bool { + self.root() + .updated_chunks() + .lookup_by_key(id.0, |a, b| a.node_id().0.cmp(b)) + .is_some() + } + + pub fn zarr_metadata_updated(&self, id: &NodeId) -> bool { + self.root() + .updated_zarr_metadata() + .lookup_by_key(id.0, |a, b| a.0.cmp(b)) + .is_some() + } + + fn root(&self) -> gen::TransactionLog { + // without the unsafe version this is too slow + // if we try to keep the root in the TransactionLog struct, we would need a lifetime + unsafe { flatbuffers::root_unchecked::(&self.buffer) } + } + + pub fn bytes(&self) -> &[u8] { + self.buffer.as_slice() + } + pub fn len(&self) -> usize { - self.new_groups.len() - + self.new_arrays.len() - + self.deleted_groups.len() - + self.deleted_arrays.len() - + self.updated_user_attributes.len() - + self.updated_zarr_metadata.len() - + self.updated_chunks.values().map(|s| s.len()).sum::() + let root = self.root(); + root.new_groups().len() + + root.new_arrays().len() + + root.deleted_groups().len() + + root.deleted_arrays().len() + + root.updated_user_attributes().len() + + root.updated_zarr_metadata().len() + + root.updated_chunks().iter().map(|s| s.chunks().len()).sum::() } #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 } +} + +static ROOT_OPTIONS: VerifierOptions = VerifierOptions { + max_depth: 64, + max_tables: 50_000_000, + max_apparent_size: 1 << 31, // taken from the default + ignore_missing_null_terminator: true, +}; + +#[derive(Debug, Default)] +pub struct DiffBuilder { + new_groups: HashSet, + new_arrays: HashSet, + deleted_groups: HashSet, + deleted_arrays: HashSet, + updated_user_attributes: HashSet, + updated_zarr_metadata: HashSet, + // we use sorted set here to simply move it to a diff without having to rebuild + updated_chunks: HashMap>, +} - pub fn merge(&mut self, other: &TransactionLog) { - self.new_groups.extend(other.new_groups.iter().cloned()); - self.new_arrays.extend(other.new_arrays.iter().cloned()); - self.deleted_groups.extend(other.deleted_groups.iter().cloned()); - self.deleted_arrays.extend(other.deleted_arrays.iter().cloned()); - self.updated_user_attributes - .extend(other.updated_user_attributes.iter().cloned()); - self.updated_zarr_metadata.extend(other.updated_zarr_metadata.iter().cloned()); - for (node, chunks) in other.updated_chunks.iter() { - self.updated_chunks - .entry(node.clone()) - .and_modify(|set| set.extend(chunks.iter().cloned())) - .or_insert_with(|| chunks.clone()); +impl DiffBuilder { + pub fn add_changes(&mut self, tx: &TransactionLog) { + self.new_groups.extend(tx.new_groups()); + self.new_arrays.extend(tx.new_arrays()); + self.deleted_groups.extend(tx.deleted_groups()); + self.deleted_arrays.extend(tx.deleted_arrays()); + self.updated_user_attributes.extend(tx.updated_user_attributes()); + self.updated_zarr_metadata.extend(tx.updated_zarr_metadata()); + + for (node, chunks) in tx.updated_chunks() { + match self.updated_chunks.get_mut(&node) { + Some(all_chunks) => { + all_chunks.extend(chunks); + } + None => { + self.updated_chunks.insert(node, BTreeSet::from_iter(chunks)); + } + } } } + + pub async fn to_diff(self, from: &Session, to: &Session) -> SessionResult { + let nodes: HashMap = from + .list_nodes() + .await? + .chain(to.list_nodes().await?) + .map_ok(|n| (n.id, n.path)) + .try_collect()?; + Ok(Diff::from_diff_builder(self, nodes)) + } } #[derive(Clone, Debug, PartialEq)] @@ -86,10 +290,7 @@ pub struct Diff { } impl Diff { - pub fn from_transaction_log( - tx: &TransactionLog, - nodes: HashMap, - ) -> Self { + fn from_diff_builder(tx: DiffBuilder, nodes: HashMap) -> Self { let new_groups = tx .new_groups .iter() @@ -128,9 +329,10 @@ impl Diff { .collect(); let updated_chunks = tx .updated_chunks - .iter() + .into_iter() .flat_map(|(node_id, chunks)| { - nodes.get(node_id).map(|n| (n.clone(), chunks.iter().cloned().collect())) + let path = nodes.get(&node_id).cloned()?; + Some((path, chunks)) }) .collect(); Self { diff --git a/icechunk/src/ops/gc.rs b/icechunk/src/ops/gc.rs index a0802b58..5af763ee 100644 --- a/icechunk/src/ops/gc.rs +++ b/icechunk/src/ops/gc.rs @@ -182,25 +182,33 @@ pub async fn garbage_collect( } if config.deletes_manifests() { - keep_manifests.extend(snap.manifest_files().keys().cloned()); + keep_manifests.extend(snap.manifest_files().map(|mf| mf.id)); } if config.deletes_chunks() { - for manifest_id in snap.manifest_files().keys() { - let manifest_info = snap.manifest_info(manifest_id).ok_or_else(|| { - IcechunkFormatError::from( - IcechunkFormatErrorKind::ManifestInfoNotFound { - manifest_id: manifest_id.clone(), - }, - ) - })?; + for manifest_id in snap.manifest_files().map(|mf| mf.id) { + let manifest_info = + snap.manifest_info(&manifest_id).ok_or_else(|| { + IcechunkFormatError::from( + IcechunkFormatErrorKind::ManifestInfoNotFound { + manifest_id: manifest_id.clone(), + }, + ) + })?; let manifest = asset_manager - .fetch_manifest(manifest_id, manifest_info.size_bytes) + .fetch_manifest(&manifest_id, manifest_info.size_bytes) .await?; let chunk_ids = manifest.chunk_payloads().filter_map(|payload| match payload { - ChunkPayload::Ref(chunk_ref) => Some(chunk_ref.id.clone()), - _ => None, + Ok(ChunkPayload::Ref(chunk_ref)) => Some(chunk_ref.id.clone()), + Ok(_) => None, + Err(err) => { + tracing::error!( + error = %err, + "Error in chunk payload iterator" + ); + None + } }); keep_chunks.extend(chunk_ids); } @@ -260,7 +268,7 @@ async fn pointed_snapshots<'a>( async move { let snap = asset_manager.fetch_snapshot(&snap_id).await?; let parents = Arc::clone(&asset_manager) - .snapshot_ancestry(snap.id()) + .snapshot_ancestry(&snap.id()) .await? .map_ok(|parent| parent.id) .err_into(); @@ -440,7 +448,7 @@ pub async fn expire_ref( let editable_snap = asset_manager.fetch_snapshot(&editable_snap).await?; let parent_id = editable_snap.parent_id(); - if editable_snap.id() == &root || Some(&root) == parent_id.as_ref() { + if editable_snap.id() == root || Some(&root) == parent_id.as_ref() { // Either the reference is the root, or it is pointing to the root as first parent // Nothing to do return Ok(ExpireRefResult::NothingToDo); @@ -448,7 +456,7 @@ pub async fn expire_ref( let root = asset_manager.fetch_snapshot(&root).await?; // TODO: add properties to the snapshot that tell us it was history edited - let new_snapshot = Arc::new(editable_snap.adopt(root.as_ref())); + let new_snapshot = Arc::new(root.adopt(&editable_snap)?); asset_manager.write_snapshot(new_snapshot).await?; Ok(ExpireRefResult::Done { diff --git a/icechunk/src/repository.rs b/icechunk/src/repository.rs index 3430c222..bd4f3657 100644 --- a/icechunk/src/repository.rs +++ b/icechunk/src/repository.rs @@ -22,7 +22,7 @@ use crate::{ error::ICError, format::{ snapshot::{ManifestFileInfo, NodeData, Snapshot, SnapshotInfo}, - transaction_log::{Diff, TransactionLog}, + transaction_log::{Diff, DiffBuilder}, IcechunkFormatError, IcechunkFormatErrorKind, ManifestId, NodeId, Path, SnapshotId, }, @@ -165,7 +165,7 @@ impl Repository { compression, ); // On create we need to create the default branch - let new_snapshot = Arc::new(Snapshot::initial()); + let new_snapshot = Arc::new(Snapshot::initial()?); asset_manager.write_snapshot(Arc::clone(&new_snapshot)).await?; update_branch( @@ -629,9 +629,9 @@ impl Repository { }) .collect(); - let full_log = fut - .try_fold(TransactionLog::default(), |mut res, log| { - res.merge(log.as_ref()); + let builder = fut + .try_fold(DiffBuilder::default(), |mut res, log| { + res.add_changes(log.as_ref()); ready(Ok(res)) }) .await?; @@ -641,7 +641,7 @@ impl Repository { self.readonly_session(&VersionInfo::SnapshotId(from)).await?; let to_session = self.readonly_session(&VersionInfo::SnapshotId(to_snap)).await?; - tx_to_diff(&full_log, &from_session, &to_session).await + builder.to_diff(&from_session, &to_session).await } else { Err(SessionErrorKind::BadSnapshotChainForDiff.into()) } @@ -704,53 +704,60 @@ impl Repository { if let Ok(snap) = asset_manager.fetch_snapshot(&snapshot_id).await { let snap_c = Arc::clone(&snap); for node in snap.iter_arc() { - match node.node_data { - NodeData::Group => {} - NodeData::Array(_, manifests) => { - for manifest in manifests { - if !loaded_manifests.contains(&manifest.object_id) { - let manifest_id = manifest.object_id; - if let Some(manifest_info) = - snap_c.manifest_info(&manifest_id) - { - if loaded_refs + manifest_info.num_rows - <= preload_config.max_total_refs() - && preload_config - .preload_if() - .matches(&node.path, manifest_info) + match node { + Err(err) => { + error!(error=%err, "Error retrieving snapshot nodes"); + } + Ok(node) => match node.node_data { + NodeData::Group => {} + NodeData::Array(_, manifests) => { + for manifest in manifests { + if !loaded_manifests.contains(&manifest.object_id) { + let manifest_id = manifest.object_id; + if let Some(manifest_info) = + snap_c.manifest_info(&manifest_id) { - let size_bytes = manifest_info.size_bytes; - let asset_manager = - Arc::clone(&asset_manager); - let manifest_id_c = manifest_id.clone(); - let path = node.path.clone(); - futures.push(async move { - trace!("Preloading manifest {} for array {}", &manifest_id_c, path); - if let Err(err) = asset_manager - .fetch_manifest( - &manifest_id_c, - size_bytes, - ) - .await - { - error!( - "Failure pre-loading manifest {}: {}", - &manifest_id_c, err - ); - } - }); - loaded_manifests.insert(manifest_id); - loaded_refs += manifest_info.num_rows; + if loaded_refs + manifest_info.num_chunk_refs + <= preload_config.max_total_refs() + && preload_config + .preload_if() + .matches(&node.path, &manifest_info) + { + let size_bytes = manifest_info.size_bytes; + let asset_manager = + Arc::clone(&asset_manager); + let manifest_id_c = manifest_id.clone(); + let path = node.path.clone(); + futures.push(async move { + trace!("Preloading manifest {} for array {}", &manifest_id_c, path); + if let Err(err) = asset_manager + .fetch_manifest( + &manifest_id_c, + size_bytes, + ) + .await + { + error!( + "Failure pre-loading manifest {}: {}", + &manifest_id_c, err + ); + } + }); + loaded_manifests.insert(manifest_id); + loaded_refs += + manifest_info.num_chunk_refs; + } } } } } - } + }, } } - } - futures.collect::<()>().await; - }.in_current_span()); + futures.collect::<()>().await; + }; + ().in_current_span() + }); } } @@ -776,7 +783,7 @@ impl ManifestPreloadCondition { }) .unwrap_or(false), ManifestPreloadCondition::NumRefs { from, to } => { - (*from, *to).contains(&info.num_rows) + (*from, *to).contains(&info.num_chunk_refs) } ManifestPreloadCondition::True => true, ManifestPreloadCondition::False => false, @@ -813,20 +820,6 @@ pub async fn raise_if_invalid_snapshot_id( Ok(()) } -pub async fn tx_to_diff( - tx: &TransactionLog, - from: &Session, - to: &Session, -) -> SessionResult { - let nodes: HashMap = from - .list_nodes() - .await? - .chain(to.list_nodes().await?) - .map(|n| (n.id, n.path)) - .collect(); - Ok(Diff::from_transaction_log(tx, nodes)) -} - #[cfg(test)] #[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] mod tests { @@ -996,12 +989,20 @@ mod tests { // no name match assert!(!condition.matches( &"/array".try_into().unwrap(), - &ManifestFileInfo { id: ManifestId::random(), size_bytes: 1, num_rows: 1 } + &ManifestFileInfo { + id: ManifestId::random(), + size_bytes: 1, + num_chunk_refs: 1 + } )); // partial match only assert!(!condition.matches( &"/nottime".try_into().unwrap(), - &ManifestFileInfo { id: ManifestId::random(), size_bytes: 1, num_rows: 1 } + &ManifestFileInfo { + id: ManifestId::random(), + size_bytes: 1, + num_chunk_refs: 1 + } )); // too large to match assert!(!condition.matches( @@ -1009,7 +1010,7 @@ mod tests { &ManifestFileInfo { id: ManifestId::random(), size_bytes: 1, - num_rows: 1_000_000 + num_chunk_refs: 1_000_000 } )); } diff --git a/icechunk/src/session.rs b/icechunk/src/session.rs index fd0f144e..f610bc29 100644 --- a/icechunk/src/session.rs +++ b/icechunk/src/session.rs @@ -13,6 +13,7 @@ use bytes::Bytes; use chrono::{DateTime, Utc}; use err_into::ErrorInto; use futures::{future::Either, stream, FutureExt, Stream, StreamExt, TryStreamExt}; +use itertools::Itertools as _; use serde::{Deserialize, Serialize}; use thiserror::Error; use tokio::task::JoinError; @@ -25,7 +26,7 @@ use crate::{ error::ICError, format::{ manifest::{ - ChunkInfo, ChunkPayload, ChunkRef, Manifest, ManifestRef, + ChunkInfo, ChunkPayload, ChunkRef, Manifest, ManifestExtents, ManifestRef, VirtualChunkLocation, VirtualChunkRef, VirtualReferenceError, VirtualReferenceErrorKind, }, @@ -33,13 +34,13 @@ use crate::{ ManifestFileInfo, NodeData, NodeSnapshot, NodeType, Snapshot, SnapshotProperties, UserAttributesSnapshot, ZarrArrayMetadata, }, - transaction_log::{Diff, TransactionLog}, + transaction_log::{Diff, DiffBuilder, TransactionLog}, ByteRange, ChunkIndices, ChunkOffset, IcechunkFormatError, IcechunkFormatErrorKind, ManifestId, NodeId, ObjectId, Path, SnapshotId, }, metadata::UserAttributes, refs::{fetch_branch_tip, update_branch, RefError, RefErrorKind}, - repository::{tx_to_diff, RepositoryError, RepositoryErrorKind}, + repository::{RepositoryError, RepositoryErrorKind}, storage::{self, StorageErrorKind}, virtual_chunks::{VirtualChunkContainer, VirtualChunkResolver}, RepositoryConfig, Storage, StorageError, @@ -253,7 +254,8 @@ impl Session { /// Compute an overview of the current session changes pub async fn status(&self) -> SessionResult { - let tx_log = TransactionLog::new(&self.change_set); + // it doesn't really matter what Id we give to the tx log, it's not going to be persisted + let tx_log = TransactionLog::new(&SnapshotId::random(), &self.change_set); let from_session = Self::create_readonly_session( self.config().clone(), self.storage_settings.as_ref().clone(), @@ -262,7 +264,9 @@ impl Session { Arc::clone(&self.virtual_resolver), self.snapshot_id.clone(), ); - tx_to_diff(&tx_log, &from_session, self).await + let mut builder = DiffBuilder::default(); + builder.add_changes(&tx_log); + builder.to_diff(&from_session, self).await } /// Add a group to the store. @@ -306,8 +310,8 @@ impl Session { let nodes_iter: Vec = self .list_nodes() .await? - .filter(|node| node.path.starts_with(&parent.path)) - .collect(); + .filter_ok(|node| node.path.starts_with(&parent.path)) + .try_collect()?; for node in nodes_iter { match node.node_type() { NodeType::Group => { @@ -655,8 +659,11 @@ impl Session { #[instrument(skip(self))] pub async fn clear(&mut self) -> SessionResult<()> { // TODO: can this be a delete_group("/") instead? - let to_delete: Vec<(NodeType, Path)> = - self.list_nodes().await?.map(|node| (node.node_type(), node.path)).collect(); + let to_delete: Vec<(NodeType, Path)> = self + .list_nodes() + .await? + .map_ok(|node| (node.node_type(), node.path)) + .try_collect()?; for (t, p) in to_delete { match t { @@ -697,7 +704,7 @@ impl Session { #[instrument(skip(self))] pub async fn list_nodes( &self, - ) -> SessionResult + '_> { + ) -> SessionResult> + '_> { updated_nodes(&self.asset_manager, &self.change_set, &self.snapshot_id).await } @@ -925,7 +932,7 @@ impl Session { let current_snapshot = self.asset_manager.fetch_snapshot(&ref_data.snapshot).await?; let ancestry = Arc::clone(&self.asset_manager) - .snapshot_ancestry(current_snapshot.id()) + .snapshot_ancestry(¤t_snapshot.id()) .await? .map_ok(|meta| meta.id); let new_commits = @@ -989,10 +996,11 @@ async fn updated_chunk_iterator<'a>( ) -> SessionResult> + 'a> { let snapshot = asset_manager.fetch_snapshot(snapshot_id).await?; let nodes = futures::stream::iter(snapshot.iter_arc()); - let res = nodes.then(move |node| async move { - updated_node_chunks_iterator(asset_manager, change_set, snapshot_id, node).await + let res = nodes.and_then(move |node| async move { + Ok(updated_node_chunks_iterator(asset_manager, change_set, snapshot_id, node) + .await) }); - Ok(res.flatten()) + Ok(res.try_flatten()) } async fn updated_node_chunks_iterator<'a>( @@ -1082,10 +1090,10 @@ async fn verified_node_chunk_iterator<'a>( Ok(manifest) => { let old_chunks = manifest .iter(node_id_c.clone()) - .filter(move |(coord, _)| { + .filter_ok(move |(coord, _)| { !new_chunk_indices.contains(coord) }) - .map(move |(coord, payload)| ChunkInfo { + .map_ok(move |(coord, payload)| ChunkInfo { node: node_id_c2.clone(), coord, payload, @@ -1096,7 +1104,8 @@ async fn verified_node_chunk_iterator<'a>( node_id_c3, old_chunks, ); futures::future::Either::Left( - futures::stream::iter(old_chunks.map(Ok)), + futures::stream::iter(old_chunks) + .map_err(|e| e.into()), ) } // if we cannot even fetch the manifest, we generate a @@ -1163,12 +1172,16 @@ async fn updated_existing_nodes<'a>( asset_manager: &AssetManager, change_set: &'a ChangeSet, parent_id: &SnapshotId, -) -> SessionResult + 'a> { +) -> SessionResult> + 'a> { let updated_nodes = asset_manager .fetch_snapshot(parent_id) .await? .iter_arc() - .filter_map(move |node| change_set.update_existing_node(node)); + .filter_map_ok(move |node| change_set.update_existing_node(node)) + .map(|n| match n { + Ok(n) => Ok(n), + Err(err) => Err(SessionError::from(err)), + }); Ok(updated_nodes) } @@ -1179,10 +1192,10 @@ async fn updated_nodes<'a>( asset_manager: &AssetManager, change_set: &'a ChangeSet, parent_id: &SnapshotId, -) -> SessionResult + 'a> { +) -> SessionResult> + 'a> { Ok(updated_existing_nodes(asset_manager, change_set, parent_id) .await? - .chain(change_set.new_nodes_iterator())) + .chain(change_set.new_nodes_iterator().map(Ok))) } async fn get_node( @@ -1340,8 +1353,8 @@ impl<'a> FlushProcess<'a> { node_id: &NodeId, node_path: &Path, ) -> SessionResult<()> { - let mut from = ChunkIndices(vec![]); - let mut to = ChunkIndices(vec![]); + let mut from = vec![]; + let mut to = vec![]; let chunks = stream::iter( self.change_set .new_array_chunk_iterator(node_id, node_path) @@ -1358,8 +1371,10 @@ impl<'a> FlushProcess<'a> { ManifestFileInfo::new(new_manifest.as_ref(), new_manifest_size); self.manifest_files.insert(file_info); - let new_ref = - ManifestRef { object_id: new_manifest.id.clone(), extents: from..to }; + let new_ref = ManifestRef { + object_id: new_manifest.id().clone(), + extents: ManifestExtents::new(&from, &to), + }; self.manifest_refs .entry(node_id.clone()) @@ -1384,8 +1399,8 @@ impl<'a> FlushProcess<'a> { ) .await .map_ok(|(_path, chunk_info)| chunk_info); - let mut from = ChunkIndices(vec![]); - let mut to = ChunkIndices(vec![]); + let mut from = vec![]; + let mut to = vec![]; let updated_chunks = aggregate_extents(&mut from, &mut to, updated_chunks, |ci| &ci.coord); @@ -1398,8 +1413,10 @@ impl<'a> FlushProcess<'a> { ManifestFileInfo::new(new_manifest.as_ref(), new_manifest_size); self.manifest_files.insert(file_info); - let new_ref = - ManifestRef { object_id: new_manifest.id.clone(), extents: from..to }; + let new_ref = ManifestRef { + object_id: new_manifest.id().clone(), + extents: ManifestExtents::new(&from, &to), + }; self.manifest_refs .entry(node.id.clone()) .and_modify(|v| v.push(new_ref.clone())) @@ -1450,7 +1467,9 @@ async fn flush( // We first go through all existing nodes to see if we need to rewrite any manifests - for node in old_snapshot.iter().filter(|node| node.node_type() == NodeType::Array) { + for node in old_snapshot.iter().filter_ok(|node| node.node_type() == NodeType::Array) + { + let node = node?; trace!(path=%node.path, "Flushing node"); let node_id = &node.id; @@ -1462,13 +1481,13 @@ async fn flush( if flush_data.change_set.has_chunk_changes(node_id) { trace!(path=%node.path, "Node has changes, writing a new manifest"); // Array wasn't deleted and has changes in this session - flush_data.write_manifest_for_existing_node(node).await?; + flush_data.write_manifest_for_existing_node(&node).await?; } else { trace!(path=%node.path, "Node has no changes, keeping the previous manifest"); // Array wasn't deleted but has no changes in this session // FIXME: deal with the case of metadata shrinking an existing array, we should clear // extra chunks that no longer fit in the array - flush_data.copy_previous_manifest(node, old_snapshot.as_ref()); + flush_data.copy_previous_manifest(&node, old_snapshot.as_ref()); } } @@ -1480,13 +1499,15 @@ async fn flush( } trace!("Building new snapshot"); - let all_nodes = updated_nodes( + // gather and sort nodes: + // this is a requirement for Snapshot::from_iter + let mut all_nodes: Vec<_> = updated_nodes( flush_data.asset_manager.as_ref(), flush_data.change_set, flush_data.parent_id, ) .await? - .map(|node| { + .map_ok(|node| { let id = &node.id; // TODO: many clones if let NodeData::Array(meta, _) = node.node_data { @@ -1500,26 +1521,32 @@ async fn flush( } else { node } - }); + }) + .try_collect()?; + + all_nodes.sort_by(|a, b| a.path.cmp(&b.path)); let new_snapshot = Snapshot::from_iter( - old_snapshot.id().clone(), + None, + Some(old_snapshot.id().clone()), message.to_string(), Some(properties), flush_data.manifest_files.into_iter().collect(), vec![], - all_nodes, - ); + all_nodes.into_iter().map(Ok::<_, Infallible>), + )?; - if new_snapshot.flushed_at() <= old_snapshot.flushed_at() { + let new_ts = new_snapshot.flushed_at()?; + let old_ts = old_snapshot.flushed_at()?; + if new_ts <= old_ts { tracing::error!( - new_timestamp = %new_snapshot.flushed_at(), - old_timestamp = %old_snapshot.flushed_at(), + new_timestamp = %new_ts, + old_timestamp = %old_ts, "Snapshot timestamp older than parent, aborting commit" ); return Err(SessionErrorKind::InvalidSnapshotTimestampOrdering { - parent: *old_snapshot.flushed_at(), - child: *new_snapshot.flushed_at(), + parent: old_ts, + child: new_ts, } .into()); } @@ -1530,15 +1557,15 @@ async fn flush( let snapshot_timestamp = tokio::spawn( async move { asset_manager.write_snapshot(Arc::clone(&new_snapshot_c)).await?; - asset_manager.get_snapshot_last_modified(new_snapshot_c.id()).await + asset_manager.get_snapshot_last_modified(&new_snapshot_c.id()).await } .in_current_span(), ); trace!(transaction_log_id = %new_snapshot.id(), "Creating transaction log"); - // FIXME: this should execute in a non-blocking context - let tx_log = TransactionLog::new(flush_data.change_set); let new_snapshot_id = new_snapshot.id(); + // FIXME: this should execute in a non-blocking context + let tx_log = TransactionLog::new(&new_snapshot_id, flush_data.change_set); flush_data .asset_manager @@ -1552,15 +1579,15 @@ async fn flush( // Fail if there is too much clock difference with the object store // This is to prevent issues with snapshot ordering and expiration - if (snapshot_timestamp - new_snapshot.flushed_at()).num_seconds().abs() > 600 { + if (snapshot_timestamp - new_ts).num_seconds().abs() > 600 { tracing::error!( - snapshot_timestamp = %new_snapshot.flushed_at(), + snapshot_timestamp = %new_ts, object_store_timestamp = %snapshot_timestamp, "Snapshot timestamp drifted from object store clock, aborting commit" ); return Err(SessionErrorKind::InvalidSnapshotTimestamp { object_store_time: snapshot_timestamp, - snapshot_time: *new_snapshot.flushed_at(), + snapshot_time: new_ts, } .into()); } @@ -1640,15 +1667,15 @@ async fn fetch_manifest( /// /// Yes, this is horrible. fn aggregate_extents<'a, T: std::fmt::Debug, E>( - from: &'a mut ChunkIndices, - to: &'a mut ChunkIndices, + from: &'a mut Vec, + to: &'a mut Vec, it: impl Stream> + 'a, extract_index: impl for<'b> Fn(&'b T) -> &'b ChunkIndices + 'a, ) -> impl Stream> + 'a { // we initialize the destination with an empty array, because we don't know // the dimensions of the array yet. On the first element we will re-initialize - from.0 = Vec::new(); - to.0 = Vec::new(); + *from = Vec::new(); + *to = Vec::new(); it.map_ok(move |t| { // these are the coordinates for the chunk let idx = extract_index(&t); @@ -1657,20 +1684,20 @@ fn aggregate_extents<'a, T: std::fmt::Debug, E>( // we initialize with the value of the first element // this obviously doesn't work for empty streams // but we never generate manifests for them - if from.0.is_empty() { - from.0 = idx.0.clone(); + if from.is_empty() { + *from = idx.0.clone(); // important to remember that `to` is not inclusive, so we need +1 - to.0 = idx.0.iter().map(|n| n + 1).collect(); + *to = idx.0.iter().map(|n| n + 1).collect(); } else { // We need to iterate over coordinates, and update the // minimum and maximum for each if needed for (coord_idx, value) in idx.0.iter().enumerate() { - if let Some(from_current) = from.0.get_mut(coord_idx) { + if let Some(from_current) = from.get_mut(coord_idx) { if value < from_current { *from_current = *value } } - if let Some(to_current) = to.0.get_mut(coord_idx) { + if let Some(to_current) = to.get_mut(coord_idx) { let range_value = value + 1; if range_value > *to_current { *to_current = range_value @@ -1692,6 +1719,7 @@ mod tests { basic_solver::{BasicConflictSolver, VersionSelection}, detector::ConflictDetector, }, + format::manifest::ManifestExtents, metadata::{ ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, }, @@ -1824,19 +1852,19 @@ mod tests { #[strategy(proptest::collection::vec(chunk_indices(3, 0..1_000_000), 1..50))] indices: Vec, ) { - let mut from = ChunkIndices(vec![]); - let mut to = ChunkIndices(vec![]); + let mut from = vec![]; + let mut to = vec![]; - let expected_from = ChunkIndices(vec![ + let expected_from = vec![ indices.iter().map(|i| i.0[0]).min().unwrap(), indices.iter().map(|i| i.0[1]).min().unwrap(), indices.iter().map(|i| i.0[2]).min().unwrap(), - ]); - let expected_to = ChunkIndices(vec![ + ]; + let expected_to = vec![ indices.iter().map(|i| i.0[0]).max().unwrap() + 1, indices.iter().map(|i| i.0[1]).max().unwrap() + 1, indices.iter().map(|i| i.0[2]).max().unwrap() + 1, - ]); + ]; let _ = aggregate_extents( &mut from, @@ -1878,7 +1906,7 @@ mod tests { let manifest = Manifest::from_iter(vec![chunk1.clone(), chunk2.clone()]).await?.unwrap(); let manifest = Arc::new(manifest); - let manifest_id = &manifest.id; + let manifest_id = manifest.id(); let manifest_size = asset_manager.write_manifest(Arc::clone(&manifest)).await?; let zarr_meta1 = ZarrArrayMetadata { @@ -1904,7 +1932,7 @@ mod tests { }; let manifest_ref = ManifestRef { object_id: manifest_id.clone(), - extents: ChunkIndices(vec![0, 0, 0])..ChunkIndices(vec![1, 1, 2]), + extents: ManifestExtents::new(&[0, 0, 0], &[1, 1, 2]), }; let array1_path: Path = "/array1".try_into().unwrap(); let node_id = NodeId::random(); @@ -1925,16 +1953,17 @@ mod tests { }, ]; - let initial = Snapshot::initial(); + let initial = Snapshot::initial().unwrap(); let manifests = vec![ManifestFileInfo::new(manifest.as_ref(), manifest_size)]; let snapshot = Arc::new(Snapshot::from_iter( - initial.id().clone(), + None, + Some(initial.id().clone()), "message".to_string(), None, manifests, vec![], - nodes.iter().cloned(), - )); + nodes.iter().cloned().map(Ok::), + )?); asset_manager.write_snapshot(Arc::clone(&snapshot)).await?; update_branch( storage.as_ref(), @@ -2241,7 +2270,7 @@ mod tests { NodeData::Array(_, manifests) => { assert_eq!( manifests.first().unwrap().extents, - ChunkIndices(vec![0, 0, 0])..ChunkIndices(vec![1, 1, 2]) + ManifestExtents::new(&[0, 0, 0], &[1, 1, 2]) ); } NodeData::Group => { @@ -2280,7 +2309,7 @@ mod tests { NodeData::Array(_, manifests) => { assert_eq!( manifests.first().unwrap().extents, - ChunkIndices(vec![0, 0, 0])..ChunkIndices(vec![1, 1, 1]) + ManifestExtents::new(&[0, 0, 0], &[1, 1, 1]) ); } NodeData::Group => { diff --git a/icechunk/src/store.rs b/icechunk/src/store.rs index 5518a66e..12ec9134 100644 --- a/icechunk/src/store.rs +++ b/icechunk/src/store.rs @@ -700,7 +700,7 @@ impl Store { let repository = Arc::clone(&self.session).read_owned().await; for node in repository.list_nodes().await? { // TODO: handle non-utf8? - let meta_key = Key::Metadata { node_path: node.path }.to_string(); + let meta_key = Key::Metadata { node_path: node?.path }.to_string(); if is_prefix_match(&meta_key, prefix) { if strip_prefix { yield meta_key.trim_start_matches(prefix).trim_start_matches("/").to_string(); diff --git a/icechunk/tests/test_concurrency.rs b/icechunk/tests/test_concurrency.rs index 880f5eaf..41418fd4 100644 --- a/icechunk/tests/test_concurrency.rs +++ b/icechunk/tests/test_concurrency.rs @@ -165,7 +165,7 @@ async fn list_task(ds: Arc>, barrier: Arc) { .list_nodes() .await .expect("list_nodes failed") - .map(|n| n.path.to_string()) + .map(|n| n.unwrap().path.to_string()) .collect::>(); assert_eq!(expected_nodes, nodes);