Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to conditional PUT for branch updates #755

Merged
merged 2 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions docs/docs/icechunk-python/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ It allows you to configure the following parameters:

The threshold for when to inline a chunk into a manifest instead of storing it as a separate object in the storage backend.

### [`unsafe_overwrite_refs`](./reference.md#icechunk.RepositoryConfig.unsafe_overwrite_refs)

Whether to allow overwriting references in the repository.

### [`get_partial_values_concurrency`](./reference.md#icechunk.RepositoryConfig.get_partial_values_concurrency)

The number of concurrent requests to make when getting partial values from storage.
Expand Down
25 changes: 0 additions & 25 deletions icechunk-python/python/icechunk/_icechunk_python.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,6 @@ class RepositoryConfig:
def __init__(
self,
inline_chunk_threshold_bytes: int | None = None,
unsafe_overwrite_refs: bool | None = None,
get_partial_values_concurrency: int | None = None,
compression: CompressionConfig | None = None,
caching: CachingConfig | None = None,
Expand All @@ -585,8 +584,6 @@ class RepositoryConfig:
----------
inline_chunk_threshold_bytes: int | None
The maximum size of a chunk that will be stored inline in the repository.
unsafe_overwrite_refs: bool | None
Whether to allow overwriting references in the repository.
get_partial_values_concurrency: int | None
The number of concurrent requests to make when getting partial values from storage.
compression: CompressionConfig | None
Expand Down Expand Up @@ -618,28 +615,6 @@ class RepositoryConfig:
"""
...
@property
def unsafe_overwrite_refs(self) -> bool | None:
"""
Whether to allow overwriting references in the repository.

Returns
-------
bool | None
Whether to allow overwriting references in the repository.
"""
...
@unsafe_overwrite_refs.setter
def unsafe_overwrite_refs(self, value: bool | None) -> None:
"""
Set whether to allow overwriting references in the repository.

Parameters
----------
value: bool | None
Whether to allow overwriting references in the repository.
"""
...
@property
def get_partial_values_concurrency(self) -> int | None:
"""
The number of concurrent requests to make when getting partial values from storage.
Expand Down
11 changes: 2 additions & 9 deletions icechunk-python/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -969,8 +969,6 @@ pub struct PyRepositoryConfig {
#[pyo3(get, set)]
pub inline_chunk_threshold_bytes: Option<u16>,
#[pyo3(get, set)]
pub unsafe_overwrite_refs: Option<bool>,
#[pyo3(get, set)]
pub get_partial_values_concurrency: Option<u16>,
#[pyo3(get, set)]
pub compression: Option<Py<PyCompressionConfig>>,
Expand All @@ -996,7 +994,6 @@ impl From<&PyRepositoryConfig> for RepositoryConfig {
fn from(value: &PyRepositoryConfig) -> Self {
Python::with_gil(|py| Self {
inline_chunk_threshold_bytes: value.inline_chunk_threshold_bytes,
unsafe_overwrite_refs: value.unsafe_overwrite_refs,
get_partial_values_concurrency: value.get_partial_values_concurrency,
compression: value.compression.as_ref().map(|c| (&*c.borrow(py)).into()),
caching: value.caching.as_ref().map(|c| (&*c.borrow(py)).into()),
Expand All @@ -1014,7 +1011,6 @@ impl From<RepositoryConfig> for PyRepositoryConfig {
#[allow(clippy::expect_used)]
Python::with_gil(|py| Self {
inline_chunk_threshold_bytes: value.inline_chunk_threshold_bytes,
unsafe_overwrite_refs: value.unsafe_overwrite_refs,
get_partial_values_concurrency: value.get_partial_values_concurrency,
compression: value.compression.map(|c| {
Py::new(py, Into::<PyCompressionConfig>::into(c))
Expand Down Expand Up @@ -1049,11 +1045,10 @@ impl PyRepositoryConfig {
}

#[new]
#[pyo3(signature = (inline_chunk_threshold_bytes = None, unsafe_overwrite_refs = None, get_partial_values_concurrency = None, compression = None, caching = None, storage = None, virtual_chunk_containers = None, manifest = None))]
#[pyo3(signature = (inline_chunk_threshold_bytes = None, get_partial_values_concurrency = None, compression = None, caching = None, storage = None, virtual_chunk_containers = None, manifest = None))]
#[allow(clippy::too_many_arguments)]
pub fn new(
inline_chunk_threshold_bytes: Option<u16>,
unsafe_overwrite_refs: Option<bool>,
get_partial_values_concurrency: Option<u16>,
compression: Option<Py<PyCompressionConfig>>,
caching: Option<Py<PyCachingConfig>>,
Expand All @@ -1063,7 +1058,6 @@ impl PyRepositoryConfig {
) -> Self {
Self {
inline_chunk_threshold_bytes,
unsafe_overwrite_refs,
get_partial_values_concurrency,
compression,
caching,
Expand Down Expand Up @@ -1129,9 +1123,8 @@ impl PyRepositoryConfig {
}));
// TODO: virtual chunk containers
format!(
r#"RepositoryConfig(inline_chunk_threshold_bytes={inl}, unsafe_overwrite_refs={uns}, get_partial_values_concurrency={partial}, compression={comp}, caching={caching}, storage={storage}, manifest={manifest})"#,
r#"RepositoryConfig(inline_chunk_threshold_bytes={inl}, get_partial_values_concurrency={partial}, compression={comp}, caching={caching}, storage={storage}, manifest={manifest})"#,
inl = format_option_to_string(self.inline_chunk_threshold_bytes),
uns = format_option(self.unsafe_overwrite_refs.map(format_bool)),
partial = format_option_to_string(self.get_partial_values_concurrency),
comp = comp,
caching = caching,
Expand Down
3 changes: 2 additions & 1 deletion icechunk-python/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use icechunk::{
manifest::{Checksum, SecondsSinceEpoch, VirtualChunkLocation, VirtualChunkRef},
ChunkLength, ChunkOffset,
},
storage::ETag,
store::{StoreError, StoreErrorKind},
Store,
};
Expand Down Expand Up @@ -37,7 +38,7 @@ enum ChecksumArgument {
impl From<ChecksumArgument> for Checksum {
fn from(value: ChecksumArgument) -> Self {
match value {
ChecksumArgument::String(etag) => Checksum::ETag(etag),
ChecksumArgument::String(etag) => Checksum::ETag(ETag(etag)),
ChecksumArgument::Datetime(date_time) => {
Checksum::LastModified(SecondsSinceEpoch(date_time.timestamp() as u32))
}
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
9 changes: 4 additions & 5 deletions icechunk-python/tests/data/test-repo/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
inline_chunk_threshold_bytes: 12
unsafe_overwrite_refs: null
get_partial_values_concurrency: null
compression: null
caching: null
Expand All @@ -13,6 +12,10 @@ virtual_chunk_containers:
endpoint_url: https://fly.storage.tigris.dev
anonymous: false
allow_http: false
az:
name: az
url_prefix: az
store: !azure {}
file:
name: file
url_prefix: file
Expand All @@ -21,10 +24,6 @@ virtual_chunk_containers:
name: gcs
url_prefix: gcs
store: !gcs {}
az:
name: az
url_prefix: az
store: !azure {}
s3:
name: s3
url_prefix: s3://
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"snapshot":"NXH3M0HJ7EEJ0699DPP0"}

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"snapshot":"XDZ162T1TYBEJMK99NPG"}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"snapshot":"394QWZDXAY74HP6Q8P3G"}
{"snapshot":"4QF8JA0YPDN51MHSSYVG"}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"snapshot":"TNE0TX645A2G7VTXFA1G"}
{"snapshot":"XDZ162T1TYBEJMK99NPG"}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"snapshot":"394QWZDXAY74HP6Q8P3G"}
{"snapshot":"4QF8JA0YPDN51MHSSYVG"}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
6 changes: 2 additions & 4 deletions icechunk-python/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def test_virtual_chunk_containers() -> None:
container = icechunk.VirtualChunkContainer("custom", "s3://", store_config)
config.set_virtual_chunk_container(container)
assert re.match(
r"RepositoryConfig\(inline_chunk_threshold_bytes=None, unsafe_overwrite_refs=None, get_partial_values_concurrency=None, compression=None, caching=None, storage=None, manifest=.*\)",
r"RepositoryConfig\(inline_chunk_threshold_bytes=None, get_partial_values_concurrency=None, compression=None, caching=None, storage=None, manifest=.*\)",
repr(config),
)
assert config.virtual_chunk_containers
Expand Down Expand Up @@ -158,11 +158,9 @@ def test_can_change_deep_config_values() -> None:
)
config = icechunk.RepositoryConfig(
inline_chunk_threshold_bytes=11,
unsafe_overwrite_refs=False,
compression=icechunk.CompressionConfig(level=0),
)
config.inline_chunk_threshold_bytes = 5
config.unsafe_overwrite_refs = True
config.get_partial_values_concurrency = 42
config.compression = icechunk.CompressionConfig(level=8)
config.compression.level = 2
Expand All @@ -180,7 +178,7 @@ def test_can_change_deep_config_values() -> None:
)

assert re.match(
r"RepositoryConfig\(inline_chunk_threshold_bytes=5, unsafe_overwrite_refs=True, get_partial_values_concurrency=42, compression=CompressionConfig\(algorithm=None, level=2\), caching=CachingConfig\(num_snapshot_nodes=None, num_chunk_refs=8, num_transaction_changes=None, num_bytes_attributes=None, num_bytes_chunks=None\), storage=StorageSettings\(concurrency=StorageConcurrencySettings\(max_concurrent_requests_for_object=5, ideal_concurrent_request_size=1000000\)\), manifest=.*\)",
r"RepositoryConfig\(inline_chunk_threshold_bytes=5, get_partial_values_concurrency=42, compression=CompressionConfig\(algorithm=None, level=2\), caching=CachingConfig\(num_snapshot_nodes=None, num_chunk_refs=8, num_transaction_changes=None, num_bytes_attributes=None, num_bytes_chunks=None\), storage=StorageSettings\(concurrency=StorageConcurrencySettings\(max_concurrent_requests_for_object=5, ideal_concurrent_request_size=1000000\)\), manifest=.*\)",
repr(config),
)
repo = icechunk.Repository.open(
Expand Down
1 change: 0 additions & 1 deletion icechunk/examples/multithreaded_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let storage = new_in_memory_storage().await?;
let config = RepositoryConfig {
inline_chunk_threshold_bytes: Some(128),
unsafe_overwrite_refs: Some(true),
..Default::default()
};
let repo = Repository::create(Some(config), storage, HashMap::new()).await?;
Expand Down
11 changes: 0 additions & 11 deletions icechunk/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,6 @@ impl ManifestConfig {
pub struct RepositoryConfig {
/// Chunks smaller than this will be stored inline in the manifst
pub inline_chunk_threshold_bytes: Option<u16>,
/// Unsafely overwrite refs on write. This is not recommended, users should only use it at their
/// own risk in object stores for which we don't support write-object-if-not-exists. There is
/// the possibility of race conditions if this variable is set to true and there are concurrent
/// commit attempts.
pub unsafe_overwrite_refs: Option<bool>,

/// Concurrency used by the get_partial_values operation to fetch different keys in parallel
pub get_partial_values_concurrency: Option<u16>,
Expand All @@ -236,9 +231,6 @@ impl RepositoryConfig {
pub fn inline_chunk_threshold_bytes(&self) -> u16 {
self.inline_chunk_threshold_bytes.unwrap_or(512)
}
pub fn unsafe_overwrite_refs(&self) -> bool {
self.unsafe_overwrite_refs.unwrap_or(false)
}
pub fn get_partial_values_concurrency(&self) -> u16 {
self.get_partial_values_concurrency.unwrap_or(10)
}
Expand Down Expand Up @@ -268,9 +260,6 @@ impl RepositoryConfig {
inline_chunk_threshold_bytes: other
.inline_chunk_threshold_bytes
.or(self.inline_chunk_threshold_bytes),
unsafe_overwrite_refs: other
.unsafe_overwrite_refs
.or(self.unsafe_overwrite_refs),
get_partial_values_concurrency: other
.get_partial_values_concurrency
.or(self.get_partial_values_concurrency),
Expand Down
4 changes: 2 additions & 2 deletions icechunk/src/format/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ fn ref_to_payload(

fn checksum(payload: &gen::ChunkRef<'_>) -> Option<Checksum> {
if let Some(etag) = payload.checksum_etag() {
Some(Checksum::ETag(etag.to_string()))
Some(Checksum::ETag(ETag(etag.to_string())))
} else if payload.checksum_last_modified() > 0 {
Some(Checksum::LastModified(SecondsSinceEpoch(payload.checksum_last_modified())))
} else {
Expand Down Expand Up @@ -398,7 +398,7 @@ fn mk_chunk_ref<'bldr>(
Some(cs) => match cs {
Checksum::LastModified(_) => None,
Checksum::ETag(etag) => {
Some(builder.create_string(etag.as_str()))
Some(builder.create_string(etag.0.as_str()))
}
},
None => None,
Expand Down
2 changes: 1 addition & 1 deletion icechunk/src/ops/gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ pub async fn expire(
ExpireRefResult::RefIsExpired => match &r {
Ref::Tag(name) => {
if expired_tags == ExpiredRefAction::Delete {
delete_tag(storage, storage_settings, name.as_str(), false)
delete_tag(storage, storage_settings, name.as_str())
.await
.map_err(GCError::Ref)?;
result.deleted_refs.insert(r);
Expand Down
Loading