Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into hoytak/250122-move-xe…
Browse files Browse the repository at this point in the history
…t-runtime
  • Loading branch information
hoytak committed Jan 25, 2025
2 parents 36a6bba + ebb9ff5 commit c95980a
Show file tree
Hide file tree
Showing 23 changed files with 912 additions and 179 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@ jobs:
platform:
- runner: ubuntu-22.04
target: x86_64
manylinux: auto
features: "--features openssl_vendored"
- runner: ubuntu-22.04
target: aarch64
manylinux: manylinux_2_28
features: "--features openssl_vendored"
steps:
- uses: actions/checkout@v4
Expand All @@ -42,7 +44,7 @@ jobs:
target: ${{ matrix.platform.target }}
args: --release ${{ matrix.platform.features }} --out dist
sccache: 'true'
manylinux: auto
manylinux: ${{ matrix.platform.manylinux }}
working-directory: hf_xet
before-script-linux: |
if command -v apt-get &> /dev/null; then
Expand Down
17 changes: 14 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion cas_object/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@ name = "cas_object"
version = "0.1.0"
edition = "2021"

[[bench]]
name = "compression_bench"
harness = false
bench = true

[[bench]]
name = "bg_split_regroup_bench"
harness = false
bench = true

[dependencies]
thiserror = "2.0"
error_printer = { path = "../error_printer" }
Expand All @@ -17,4 +27,4 @@ blake3 = "1.5.4"
futures = { version = "0.3.31" }
tokio-util = {version = "0.7.12", features = ["io"]}
tokio = {version = "1.41.1" }

half = "2.4.1"
151 changes: 151 additions & 0 deletions cas_object/benches/bg_split_regroup_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
use std::time::Instant;

use byte_grouping::bg4::bg4_split_separate;
use cas_object::*;
use rand::Rng;

use crate::byte_grouping::bg4;

// Benchmark results on Apple M2 Max

// split_separate speed: 2738.11 MB/s
// regroup_separate speed: 2708.04 MB/s
// split_together speed: 24439.23 MB/s
// regroup_together speed: 32791.84 MB/s
// regroup_together_cw_4 speed: 33268.32 MB/s
// regroup_together_cw_8 speed: 13925.35 MB/s

fn main() {
let mut rng = rand::thread_rng();

let n = 64 * 1024 + 123; // 64 KiB data
let random_u8s: Vec<_> = (0..n).map(|_| rng.gen_range(0..255)).collect();

bench_split_separate(random_u8s.clone());

let groups = bg4_split_separate(&random_u8s);
bench_regroup_separate(groups);

bench_split_together(random_u8s.clone());
bench_regroup_together(random_u8s.clone());
bench_regroup_together_combined_write_4(random_u8s.clone());
bench_regroup_together_combined_write_8(random_u8s.clone());
}

fn bench_speed_1(mut data: [Vec<u8>; 4], num_bytes: usize, f: fn(&[Vec<u8>]) -> u8, description: &str) {
const ITER: usize = 100000;

let mut sum = 0u64;

let s = Instant::now();
for _ in 0..ITER {
sum += f(&data) as u64;
// Prevent compilers from optimizing away iterations.
data[0][0] = data[0][0].wrapping_mul(5).wrapping_add(13);
}
let runtime = s.elapsed().as_secs_f64();

println!("{description} speed: {:.2} MB/s", num_bytes as f64 / 1e6 / runtime * ITER as f64);

if sum == 0x5c26a6e {
eprintln!("{sum}");
}
}

fn bench_speed_2(mut data: Vec<u8>, num_bytes: usize, f: fn(&[u8]) -> u8, description: &str) {
const ITER: usize = 100000;

let mut sum = 0u64;

let s = Instant::now();
for _ in 0..ITER {
sum += f(&data) as u64;
// Prevent compilers from optimizing away iterations.
data[0] = data[0].wrapping_mul(5).wrapping_add(13);
}
let runtime = s.elapsed().as_secs_f64();

println!("{description} speed: {:.2} MB/s", num_bytes as f64 / 1e6 / runtime * ITER as f64);

if sum == 0x5c26a6e {
eprintln!("{sum}");
}
}

fn bench_split_separate(data: Vec<u8>) {
let n = data.len();
bench_speed_2(
data,
n,
|data| {
let ret = bg4::bg4_split_separate(data);
ret[0][0]
},
"split_separate",
)
}

fn bench_split_together(data: Vec<u8>) {
let n = data.len();
bench_speed_2(
data,
n,
|data| {
let ret = bg4::bg4_split_together(data);
ret[0]
},
"split_together",
)
}

fn bench_regroup_separate(g: [Vec<u8>; 4]) {
let n = g.iter().map(|g| g.len()).sum();
bench_speed_1(
g,
n,
|g| {
let ret = bg4::bg4_regroup_separate(g);
ret[0]
},
"regroup_separate",
)
}

fn bench_regroup_together(g: Vec<u8>) {
let n = g.len();
bench_speed_2(
g,
n,
|g| {
let ret = bg4::bg4_regroup_together(g);
ret[0]
},
"regroup_together",
)
}

fn bench_regroup_together_combined_write_4(g: Vec<u8>) {
let n = g.len();
bench_speed_2(
g,
n,
|g| {
let ret = bg4::bg4_regroup_together_combined_write_4(g);
ret[0]
},
"regroup_together_cw_4",
)
}

fn bench_regroup_together_combined_write_8(g: Vec<u8>) {
let n = g.len();
bench_speed_2(
g,
n,
|g| {
let ret = bg4::bg4_regroup_together_combined_write_8(g);
ret[0]
},
"regroup_together_cw_8",
)
}
Loading

0 comments on commit c95980a

Please sign in to comment.