Skip to content

Commit

Permalink
perf: Adjust coalesce for [<tiny range>, <massive range>] (#19730)
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion authored Nov 11, 2024
1 parent c4f0cc2 commit 62ef918
Showing 1 changed file with 28 additions and 2 deletions.
30 changes: 28 additions & 2 deletions crates/polars-io/src/cloud/polars_object_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,13 @@ fn merge_ranges(ranges: &[Range<usize>]) -> impl Iterator<Item = (Range<usize>,
let should_merge =
is_overlapping // Always merge if overlapping
|| (
// Don't merge if the result size is not closer to the `chunk_size`
new_merged.len().abs_diff(chunk_size) < current_merged_range.len().abs_diff(chunk_size)
(
// Either one range is extremely small compared to the other, with a limit of 8MiB..
range.len().min(current_merged_range.len())
< (range.len().max(current_merged_range.len()) / 128).min(8 * 1024 * 1024)
// ..or the new size is closer to the chunk_size
|| new_merged.len().abs_diff(chunk_size) < current_merged_range.len().abs_diff(chunk_size)
)
&& (
// Either the gap is less than 1MiB..
distance <= 1024 * 1024
Expand Down Expand Up @@ -439,6 +444,27 @@ mod tests {
[(0..66584576, 0), (66584576..133169152, 2)]
);

assert_eq!(
merge_ranges(&[
0..1,
1..128 * 1024 * 1024,
1 + 128 * 1024 * 1024..2 + 128 * 1024 * 1024,
2 + 128 * 1024 * 1024..256 * 1024 * 1024
])
.collect::<Vec<_>>(),
[
(0..67108865, 0),
(67108865..134217730, 3),
(134217730..201326593, 0),
(201326593..268435456, 4)
]
);

assert_eq!(
merge_ranges(&[0..1, 1..128 * 1024 * 1024]).collect::<Vec<_>>(),
[(0..67108864, 0), (67108864..134217728, 2)]
);

// <= 1MiB gap, merge
assert_eq!(
merge_ranges(&[0..1, 1024 * 1024 + 1..1024 * 1024 + 2]).collect::<Vec<_>>(),
Expand Down

0 comments on commit 62ef918

Please sign in to comment.