Skip to content

Commit

Permalink
Added benchmarking and fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jun 20, 2024
1 parent 0f7382e commit b1a5c0a
Show file tree
Hide file tree
Showing 12 changed files with 627 additions and 72 deletions.
364 changes: 362 additions & 2 deletions Cargo.lock

Large diffs are not rendered by default.

17 changes: 15 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ workspace = { members = ["app"] }

[package]
name = "pic-scale"
version = "0.1.14"
version = "0.1.15"
edition = "2021"
description = "High performance image scaling"
readme = "README.md"
Expand All @@ -13,9 +13,22 @@ documentation = "https://github.com/awxkee/pic-scale"
categories = ["multimedia::images", "multimedia::video"]
homepage = "https://github.com/awxkee/pic-scale"
repository = "https://github.com/awxkee/pic-scale"
exclude = ["*.jpg", "/assets"]
exclude = ["*.jpg", "/assets", "*.png"]

[dependencies]
colorutils-rs = "0.4.8"
num-traits = "0.2.19"
rayon = "1.10.0"

[dev-dependencies]
criterion = "0.5.1"
image = "0.25.1"
fast_image_resize = "4.0.0"

[[bench]]
name = "resize_rgb"
harness = false

[[bench]]
name = "resize_rgba"
harness = false
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ M3 Pro. NEON

| | Lanczos3 |
|-----------|:--------:|
| pic-scale | 14.54 |
| pic-scale | 27.58 |
| fir sse | 36.69 |

Example comparison time for downscale RGBA 4928x3279 image in two times for x86_64 SSE with premultiplying alpha.
Expand All @@ -59,8 +59,8 @@ M3 Pro. NEON

| | Lanczos3 |
|-----------|:--------:|
| pic-scale | 28.70 |
| fir sse | 50.43 |
| pic-scale | 26.24 |
| fir sse | 31.43 |

Example comparison time for downscale RGBA 4928x3279 image in two times for x86_64 SSE without premultiplying alpha.

Expand All @@ -73,8 +73,8 @@ M3 Pro. NEON

| | Lanczos3 |
|-----------|:--------:|
| pic-scale | 16.46 |
| fir sse | 39.84 |
| pic-scale | 18.89 |
| fir sse | 25.82 |

#### Example in sRGB

Expand Down
20 changes: 8 additions & 12 deletions app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,19 @@ use fast_image_resize::{
use image::io::Reader as ImageReader;
use image::{EncodableLayout, GenericImageView};

use pic_scale::{
ImageSize, ImageStore, LinearScaler, ResamplingFunction, Scaler, Scaling, ThreadingPolicy,
};
use pic_scale::{ImageSize, ImageStore, ResamplingFunction, Scaler, Scaling, ThreadingPolicy};

fn main() {
// test_fast_image();

let img = ImageReader::open("./assets/nasa-4928x3279.png")
let img = ImageReader::open("./assets/asset_5.png")
.unwrap()
.decode()
.unwrap();
let dimensions = img.dimensions();
let mut bytes = Vec::from(img.as_bytes());

let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
let mut scaler = Scaler::new(ResamplingFunction::Hann);
scaler.set_threading_policy(ThreadingPolicy::Single);
// let store =
// ImageStore::<u8, 4>::from_slice(&mut bytes, dimensions.0 as usize, dimensions.1 as usize);
Expand All @@ -36,14 +34,12 @@ fn main() {
//
let start_time = Instant::now();

let store = ImageStore::<u8, 3>::from_slice(
&mut bytes,
dimensions.0 as usize,
dimensions.1 as usize,
);
let resized = scaler.resize_rgb(
ImageSize::new(dimensions.0 as usize / 2, dimensions.1 as usize / 2),
let store =
ImageStore::<u8, 4>::from_slice(&mut bytes, dimensions.0 as usize, dimensions.1 as usize);
let resized = scaler.resize_rgba(
ImageSize::new(dimensions.0 as usize / 4, dimensions.1 as usize / 4),
store,
true,
);

let elapsed_time = start_time.elapsed();
Expand Down
Binary file added assets/asset_alpha_rgba.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
64 changes: 64 additions & 0 deletions benches/resize_rgb/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
use criterion::{criterion_group, criterion_main, Criterion};
use fast_image_resize::images::Image;
use fast_image_resize::FilterType::Lanczos3;
use fast_image_resize::{CpuExtensions, PixelType, ResizeAlg, ResizeOptions, Resizer};
use image::io::Reader as ImageReader;
use image::GenericImageView;
use pic_scale::{ImageSize, ImageStore, ResamplingFunction, Scaler, Scaling, ThreadingPolicy};

pub fn criterion_benchmark(c: &mut Criterion) {
let img = ImageReader::open("assets/asset.jpg")
.unwrap()
.decode()
.unwrap();
let dimensions = img.dimensions();
let src_bytes = img.as_bytes();
c.bench_function("Pic scale RGB: Lanczos 3", |b| {
b.iter(|| {
let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
scaler.set_threading_policy(ThreadingPolicy::Single);
let mut copied: Vec<u8> = src_bytes.iter().map(|&x| x).collect();
let store = ImageStore::<u8, 3>::from_slice(
&mut copied,
dimensions.0 as usize,
dimensions.1 as usize,
);
_ = scaler.resize_rgb(
ImageSize::new(dimensions.0 as usize / 2, dimensions.1 as usize / 2),
store,
);
})
});

c.bench_function("Fast image resize RGB: Lanczos 3", |b| {
b.iter(|| {
let mut vc = Vec::from(img.as_bytes());
let pixel_type: PixelType = PixelType::U8x3;
let src_image =
Image::from_slice_u8(dimensions.0, dimensions.1, &mut vc, pixel_type).unwrap();
let mut dst_image = Image::new(dimensions.0 / 2, dimensions.1 / 2, pixel_type);

let mut resizer = Resizer::new();
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Neon);
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Sse4_1);
}
resizer
.resize(
&src_image,
&mut dst_image,
&ResizeOptions::new()
.resize_alg(ResizeAlg::Convolution(Lanczos3))
.use_alpha(false),
)
.unwrap();
})
});
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
112 changes: 112 additions & 0 deletions benches/resize_rgba/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
use criterion::{criterion_group, criterion_main, Criterion};
use fast_image_resize::images::Image;
use fast_image_resize::FilterType::Lanczos3;
use fast_image_resize::{CpuExtensions, PixelType, ResizeAlg, ResizeOptions, Resizer};
use image::io::Reader as ImageReader;
use image::GenericImageView;
use pic_scale::{ImageSize, ImageStore, ResamplingFunction, Scaler, Scaling, ThreadingPolicy};

pub fn criterion_benchmark(c: &mut Criterion) {
let img = ImageReader::open("assets/asset_alpha_rgba.png")
.unwrap()
.decode()
.unwrap();
let dimensions = img.dimensions();
let src_bytes = img.as_bytes();
c.bench_function("Pic scale RGBA with alpha: Lanczos 3", |b| {
b.iter(|| {
let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
scaler.set_threading_policy(ThreadingPolicy::Single);
let mut copied: Vec<u8> = src_bytes.iter().map(|&x| x).collect();
let store = ImageStore::<u8, 4>::from_slice(
&mut copied,
dimensions.0 as usize,
dimensions.1 as usize,
);
_ = scaler.resize_rgba(
ImageSize::new(dimensions.0 as usize / 2, dimensions.1 as usize / 2),
store,
true,
);
})
});

c.bench_function("Fast image resize RGBA with alpha: Lanczos 3", |b| {
b.iter(|| {
let mut vc = Vec::from(img.as_bytes());
let pixel_type: PixelType = PixelType::U8x4;
let src_image =
Image::from_slice_u8(dimensions.0, dimensions.1, &mut vc, pixel_type).unwrap();
let mut dst_image = Image::new(dimensions.0 / 2, dimensions.1 / 2, pixel_type);

let mut resizer = Resizer::new();
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Neon);
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Sse4_1);
}
resizer
.resize(
&src_image,
&mut dst_image,
&ResizeOptions::new()
.resize_alg(ResizeAlg::Convolution(Lanczos3))
.use_alpha(true),
)
.unwrap();
})
});

c.bench_function("Pic scale RGBA without alpha: Lanczos 3", |b| {
b.iter(|| {
let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
scaler.set_threading_policy(ThreadingPolicy::Single);
let mut copied: Vec<u8> = src_bytes.iter().map(|&x| x).collect();
let store = ImageStore::<u8, 4>::from_slice(
&mut copied,
dimensions.0 as usize,
dimensions.1 as usize,
);
_ = scaler.resize_rgba(
ImageSize::new(dimensions.0 as usize / 2, dimensions.1 as usize / 2),
store,
false,
);
})
});

c.bench_function("Fast image resize RGBA without alpha: Lanczos 3", |b| {
b.iter(|| {
let mut vc = Vec::from(img.as_bytes());
let pixel_type: PixelType = PixelType::U8x4;
let src_image =
Image::from_slice_u8(dimensions.0, dimensions.1, &mut vc, pixel_type).unwrap();
let mut dst_image = Image::new(dimensions.0 / 2, dimensions.1 / 2, pixel_type);

let mut resizer = Resizer::new();
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Neon);
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
unsafe {
resizer.set_cpu_extensions(CpuExtensions::Sse4_1);
}
resizer
.resize(
&src_image,
&mut dst_image,
&ResizeOptions::new()
.resize_alg(ResizeAlg::Convolution(Lanczos3))
.use_alpha(false),
)
.unwrap();
})
});
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
2 changes: 1 addition & 1 deletion src/convolution.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
* // license that can be found in the LICENSE file.
*/

use std::fmt::Debug;
use num_traits::FromPrimitive;
use rayon::ThreadPool;
use std::fmt::Debug;

use crate::filter_weights::FilterWeights;
use crate::ImageStore;
Expand Down
12 changes: 6 additions & 6 deletions src/neon/rgb_u8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
pub mod neon_rgb {
use crate::filter_weights::{FilterBounds, FilterWeights};
use crate::neon::utils::neon_convolve_u8;
use crate::support::ROUNDING_APPROX;
use crate::support::{PRECISION, ROUNDING_APPROX};
use std::arch::aarch64::*;

pub fn convolve_horizontal_rgb_neon_rows_4(
Expand Down Expand Up @@ -162,7 +162,7 @@ pub mod neon_rgb {
jx += 1;
}

let store_16 = vqshrun_n_s32::<12>(vmaxq_s32(store_0, zeros));
let store_16 = vqshrun_n_s32::<PRECISION>(vmaxq_s32(store_0, zeros));
let store_16_8 = vqmovn_u16(vcombine_u16(store_16, store_16));

let px = x * CHANNELS;
Expand All @@ -173,7 +173,7 @@ pub mod neon_rgb {
dest_ptr.add(1).write_unaligned(bytes[1]);
dest_ptr.add(2).write_unaligned(bytes[2]);

let store_16 = vqshrun_n_s32::<12>(vmaxq_s32(store_1, zeros));
let store_16 = vqshrun_n_s32::<PRECISION>(vmaxq_s32(store_1, zeros));
let store_16_8 = vqmovn_u16(vcombine_u16(store_16, store_16));

let px = x * CHANNELS;
Expand All @@ -185,7 +185,7 @@ pub mod neon_rgb {
dest_ptr.add(1).write_unaligned(bytes[1]);
dest_ptr.add(2).write_unaligned(bytes[2]);

let store_16 = vqshrun_n_s32::<12>(vmaxq_s32(store_2, zeros));
let store_16 = vqshrun_n_s32::<PRECISION>(vmaxq_s32(store_2, zeros));
let store_16_8 = vqmovn_u16(vcombine_u16(store_16, store_16));

let px = x * CHANNELS;
Expand All @@ -196,7 +196,7 @@ pub mod neon_rgb {
dest_ptr.add(1).write_unaligned(bytes[1]);
dest_ptr.add(2).write_unaligned(bytes[2]);

let store_16 = vqshrun_n_s32::<12>(vmaxq_s32(store_3, zeros));
let store_16 = vqshrun_n_s32::<PRECISION>(vmaxq_s32(store_3, zeros));
let store_16_8 = vqmovn_u16(vcombine_u16(store_16, store_16));

let px = x * CHANNELS;
Expand Down Expand Up @@ -284,7 +284,7 @@ pub mod neon_rgb {
jx += 1;
}

let store_16 = vqshrun_n_s32::<12>(vmaxq_s32(store, zeros));
let store_16 = vqshrun_n_s32::<PRECISION>(vmaxq_s32(store, zeros));
let store_16_8 = vqmovn_u16(vcombine_u16(store_16, store_16));

let px = x * CHANNELS;
Expand Down
Loading

0 comments on commit b1a5c0a

Please sign in to comment.