Skip to content

Commit

Permalink
Relaxed AR30 Requirements, fuzz fix
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jan 29, 2025
1 parent 06c54ee commit d9b7122
Show file tree
Hide file tree
Showing 14 changed files with 316 additions and 113 deletions.
2 changes: 1 addition & 1 deletion app/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ libc = "0.2.169"
criterion = "0.5.1"
#image = { version = "0.25.2", features = ["default"] }
fast_image_resize = { version = "5.0.0", features = [] }
pic-scale = { path = "..", features = ["nightly_f16"], default-features = false }
pic-scale = { path = "..", features = ["nightly_f16", "rdm"], default-features = false }

[[bench]]
name = "resize_rgb"
Expand Down
9 changes: 9 additions & 0 deletions app/accelerate/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ mod accelerate {
flags: libc::c_uint,
) -> libc::c_int;

#[allow(non_camel_case_types)]
#[allow(non_snake_case)]
pub fn vImageScale_XRGB2101010W(
src: *const vImage_Buffer,
dest: *mut vImage_Buffer,
temp_buffer: *mut libc::c_void,
flags: libc::c_uint,
) -> libc::c_int;

#[allow(non_camel_case_types)]
#[allow(non_snake_case)]
pub fn vImageScale_ARGBFFFF(
Expand Down
95 changes: 90 additions & 5 deletions app/benches/resize_rgba/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use fast_image_resize::FilterType::Lanczos3;
use fast_image_resize::{CpuExtensions, PixelType, ResizeAlg, ResizeOptions, Resizer};
use image::{GenericImageView, ImageReader};
use pic_scale::{
ImageStore, ImageStoreMut, ResamplingFunction, Scaler, Scaling, ScalingF32, ScalingU16,
ThreadingPolicy, WorkloadStrategy,
Ar30ByteOrder, ImageSize, ImageStore, ImageStoreMut, ResamplingFunction, Scaler, Scaling,
ScalingF32, ScalingU16, ThreadingPolicy, WorkloadStrategy,
};

pub fn criterion_benchmark(c: &mut Criterion) {
Expand All @@ -17,7 +17,7 @@ pub fn criterion_benchmark(c: &mut Criterion) {
let dimensions = img.dimensions();
let src_bytes = img.as_bytes();

c.bench_function("Pic scale RGBA with alpha: Lanczos 3", |b| {
/*c.bench_function("Pic scale RGBA with alpha: Lanczos 3", |b| {
let copied: Vec<u8> = Vec::from(src_bytes);
b.iter(|| {
let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
Expand Down Expand Up @@ -118,7 +118,7 @@ pub fn criterion_benchmark(c: &mut Criterion) {
});
#[cfg(any(target_os = "macos", target_os = "ios"))]
c.bench_function("Apple Accelerate: Lanczos 3", |b| {
c.bench_function("Apple Accelerate RGBA: Lanczos 3", |b| {
let copied: Vec<u8> = Vec::from(src_bytes);
use accelerate::{kvImageDoNotTile, vImageScale_ARGB8888, vImage_Buffer};
b.iter(|| {
Expand Down Expand Up @@ -367,7 +367,7 @@ pub fn criterion_benchmark(c: &mut Criterion) {
.unwrap();
let mut target =
ImageStoreMut::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
_ = scaler.resize_rgba_f16(&store, &mut target, false);
scaler.resize_rgba_f16(&store, &mut target, false).unwrap();
})
});
Expand Down Expand Up @@ -410,6 +410,91 @@ pub fn criterion_benchmark(c: &mut Criterion) {
panic!("Can't resize by accelerate");
}
})
});*/

c.bench_function("Pic scale RGBA1010102(N0: Lanczos 3/Speed", |b| {
let copied: Vec<u8> = Vec::from(src_bytes);
b.iter(|| {
let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
scaler.set_threading_policy(ThreadingPolicy::Single);
scaler.set_workload_strategy(WorkloadStrategy::PreferSpeed);

let mut dst_data_ar30 =
vec![1u8; (dimensions.0 as usize / 4) * (dimensions.1 as usize / 4) * 4];
scaler
.resize_ar30(
&copied,
dimensions.0 as usize * 4,
ImageSize::new(dimensions.0 as usize, dimensions.1 as usize),
&mut dst_data_ar30,
(dimensions.0 as usize / 4) * 4,
ImageSize::new(dimensions.0 as usize / 4, dimensions.1 as usize / 4),
Ar30ByteOrder::Network,
)
.unwrap();
})
});

c.bench_function("Pic scale RGBA1010102(N): Lanczos 3/Quality", |b| {
let copied: Vec<u8> = Vec::from(src_bytes);
b.iter(|| {
let mut scaler = Scaler::new(ResamplingFunction::Lanczos3);
scaler.set_threading_policy(ThreadingPolicy::Single);
scaler.set_workload_strategy(WorkloadStrategy::PreferQuality);

let mut dst_data_ar30 =
vec![1u8; (dimensions.0 as usize / 4) * (dimensions.1 as usize / 4) * 4];
scaler
.resize_ar30(
&copied,
dimensions.0 as usize * 4,
ImageSize::new(dimensions.0 as usize, dimensions.1 as usize),
&mut dst_data_ar30,
(dimensions.0 as usize / 4) * 4,
ImageSize::new(dimensions.0 as usize / 4, dimensions.1 as usize / 4),
Ar30ByteOrder::Network,
)
.unwrap();
})
});

#[cfg(any(target_os = "macos", target_os = "ios"))]
c.bench_function("Apple Accelerate RGBX1010102(N): Lanczos 3", |b| {
let copied: Vec<u8> = Vec::from(src_bytes);
use accelerate::{kvImageDoNotTile, vImageScale_XRGB2101010W, vImage_Buffer};
b.iter(|| {
let mut target =
ImageStoreMut::<u8, 4>::alloc(dimensions.0 as usize / 4, dimensions.1 as usize / 4);

let src_buffer = vImage_Buffer {
data: copied.as_ptr() as *mut libc::c_void,
width: dimensions.0 as usize,
height: dimensions.1 as usize,
row_bytes: dimensions.0 as usize * 4,
};

let target_stride = target.stride();
let target_ptr = target.buffer.borrow_mut().as_mut_ptr() as *mut libc::c_void;

let mut dst_buffer = vImage_Buffer {
data: target_ptr,
width: target.width,
height: target.height,
row_bytes: target_stride,
};

let result = unsafe {
vImageScale_XRGB2101010W(
&src_buffer,
&mut dst_buffer,
std::ptr::null_mut(),
kvImageDoNotTile,
)
};
if result != 0 {
panic!("Can't resize by accelerate");
}
})
});
}

Expand Down
24 changes: 23 additions & 1 deletion app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ use fast_image_resize::{
CpuExtensions, FilterType, IntoImageView, PixelType, ResizeAlg, ResizeOptions, Resizer,
};
use image::{EncodableLayout, GenericImageView, ImageReader};
use pic_scale::{Ar30ByteOrder, ImageSize, ImageStore, ImageStoreMut, ImageStoreScaling, ResamplingFunction, RgbF16ImageStore, RgbF16ImageStoreMut, Rgba16ImageStoreMut, RgbaF16ImageStore, RgbaF16ImageStoreMut, Scaler, Scaling, ScalingU16, ThreadingPolicy, WorkloadStrategy};
use pic_scale::{
Ar30ByteOrder, ImageSize, ImageStore, ImageStoreMut, ImageStoreScaling, ResamplingFunction,
RgbF16ImageStore, RgbF16ImageStoreMut, Rgba16ImageStoreMut, RgbaF16ImageStore,
RgbaF16ImageStoreMut, Scaler, Scaling, ScalingU16, ThreadingPolicy, WorkloadStrategy,
};

fn resize_plane(
src_width: usize,
Expand Down Expand Up @@ -58,6 +62,24 @@ fn main() {

// let mut choke: Vec<u16> = bytes.iter().map(|&x| (x as u16) << 2).collect();

let src_width = 289;
let src_height = 257;
let dst_width = 257;
let dst_height = 511;
let src_data_ar30 = vec![1u8; src_width * src_height * 4];
let mut dst_data_ar30 = vec![1u8; dst_width * dst_height * 4];
scaler
.resize_ar30(
&src_data_ar30,
src_width * 4,
ImageSize::new(src_width, src_height),
&mut dst_data_ar30,
dst_width * 4,
ImageSize::new(dst_width, dst_height),
Ar30ByteOrder::Host,
)
.unwrap();

let rgb_feature16 = transient
.iter()
.map(|&x| (x as f32 / 255f32) as f16)
Expand Down
27 changes: 20 additions & 7 deletions src/ar30.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,24 @@ const fn ntohl(netlong: u32) -> u32 {
}

impl Rgb30 {
// #[inline]
// pub(crate) const fn pack_w_a<const STORE: usize>(self, r: i32, g: i32, b: i32, a: i32) -> u32 {
// let value: u32 = match self {
// Rgb30::Ar30 => (((a << 30) | (b << 20)) | ((g << 10) | r)) as u32,
// Rgb30::Ra30 => (((r << 22) | (g << 12)) | ((b << 2) | a)) as u32,
// };
// if STORE == 0 {
// value
// } else {
// htonl(value)
// }
// }

#[inline]
pub(crate) const fn pack_w_a<const STORE: usize>(self, r: i32, g: i32, b: i32, a: i32) -> u32 {
pub(crate) const fn pack_w_a<const STORE: usize>(self, r: i32, g: i32, b: i32, _: i32) -> u32 {
let value: u32 = match self {
Rgb30::Ar30 => (((a << 30) | (b << 20)) | ((g << 10) | r)) as u32,
Rgb30::Ra30 => (((r << 22) | (g << 12)) | ((b << 2) | a)) as u32,
Rgb30::Ar30 => (((3 << 30) | (b << 20)) | ((g << 10) | r)) as u32,
Rgb30::Ra30 => (((r << 22) | (g << 12)) | ((b << 2) | 3)) as u32,
};
if STORE == 0 {
value
Expand All @@ -79,15 +92,15 @@ impl Rgb30 {
let r10 = pixel & 0x3ff;
let g10 = (pixel >> 10) & 0x3ff;
let b10 = (pixel >> 20) & 0x3ff;
let a10 = pixel >> 30;
(r10, g10, b10, a10)
// let a10 = pixel >> 30;
(r10, g10, b10, 3)
}
Rgb30::Ra30 => {
let a2 = pixel & 0x3;
// let a2 = pixel & 0x3;
let r10 = (pixel >> 22) & 0x3ff;
let g10 = (pixel >> 12) & 0x3ff;
let b10 = (pixel >> 2) & 0x3ff;
(r10, g10, b10, a2)
(r10, g10, b10, 3)
}
}
}
Expand Down
39 changes: 25 additions & 14 deletions src/dispatch_group_ar30.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,12 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

use crate::convolution::ConvolutionOptions;
use crate::filter_weights::{FilterBounds, FilterWeights};
use crate::fixed_point_horizontal_ar30::{
convolve_row_handler_fixed_point_4_ar30, convolve_row_handler_fixed_point_ar30,
};
use crate::fixed_point_vertical_ar30::column_handler_fixed_point_ar30;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
use crate::neon::{
neon_column_handler_fixed_point_ar30, neon_convolve_horizontal_rgba_rows_4_ar30,
};
use crate::support::PRECISION;
use rayon::iter::{IndexedParallelIterator, ParallelIterator};
use rayon::prelude::{ParallelSlice, ParallelSliceMut};
Expand All @@ -49,8 +46,9 @@ pub(crate) fn convolve_horizontal_dispatch_ar30<const AR30_TYPE: usize, const AR
dst: &mut [u8],
dst_stride: usize,
pool: &Option<ThreadPool>,
_options: ConvolutionOptions,
) {
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "rdm"))]
let is_rdm_available = std::arch::is_aarch64_feature_detected!("rdm");
if let Some(pool) = pool {
pool.install(|| {
Expand All @@ -60,8 +58,11 @@ pub(crate) fn convolve_horizontal_dispatch_ar30<const AR30_TYPE: usize, const AR
.for_each(|(dst, src)| {
let mut _dispatch: fn(&[u8], usize, &mut [u8], usize, &FilterWeights<i16>) =
convolve_row_handler_fixed_point_4_ar30::<AR30_TYPE, AR30_ORDER>;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
if is_rdm_available {
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "rdm"))]
if is_rdm_available
&& _options.workload_strategy == crate::WorkloadStrategy::PreferSpeed
{
use crate::neon::neon_convolve_horizontal_rgba_rows_4_ar30;
_dispatch =
neon_convolve_horizontal_rgba_rows_4_ar30::<AR30_TYPE, AR30_ORDER>;
}
Expand All @@ -87,8 +88,11 @@ pub(crate) fn convolve_horizontal_dispatch_ar30<const AR30_TYPE: usize, const AR
.for_each(|(dst, src)| {
let mut _dispatch: fn(&[u8], usize, &mut [u8], usize, &FilterWeights<i16>) =
convolve_row_handler_fixed_point_4_ar30::<AR30_TYPE, AR30_ORDER>;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
if is_rdm_available {
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "rdm"))]
if is_rdm_available
&& _options.workload_strategy == crate::WorkloadStrategy::PreferSpeed
{
use crate::neon::neon_convolve_horizontal_rgba_rows_4_ar30;
_dispatch = neon_convolve_horizontal_rgba_rows_4_ar30::<AR30_TYPE, AR30_ORDER>;
}
_dispatch(src, src_stride, dst, dst_stride, &approx);
Expand All @@ -114,8 +118,9 @@ pub(crate) fn convolve_vertical_dispatch_ar30<const AR30_TYPE: usize, const AR30
dst_stride: usize,
pool: &Option<ThreadPool>,
width: usize,
_options: ConvolutionOptions,
) {
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "rdm"))]
let is_rdm_available = std::arch::is_aarch64_feature_detected!("rdm");
if let Some(pool) = pool {
pool.install(|| {
Expand All @@ -128,8 +133,11 @@ pub(crate) fn convolve_vertical_dispatch_ar30<const AR30_TYPE: usize, const AR30
let weights = &approx.weights[filter_offset..];
let mut _dispatch: fn(&FilterBounds, &[u8], &mut [u8], usize, &[i16]) =
column_handler_fixed_point_ar30::<AR30_TYPE, AR30_ORDER>;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
if is_rdm_available {
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "rdm"))]
if is_rdm_available
&& _options.workload_strategy == crate::WorkloadStrategy::PreferSpeed
{
use crate::neon::neon_column_handler_fixed_point_ar30;
_dispatch = neon_column_handler_fixed_point_ar30::<AR30_TYPE, AR30_ORDER>;
}

Expand All @@ -149,8 +157,11 @@ pub(crate) fn convolve_vertical_dispatch_ar30<const AR30_TYPE: usize, const AR30

let mut _dispatch: fn(&FilterBounds, &[u8], &mut [u8], usize, &[i16]) =
column_handler_fixed_point_ar30::<AR30_TYPE, AR30_ORDER>;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
if is_rdm_available {
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "rdm"))]
if is_rdm_available
&& _options.workload_strategy == crate::WorkloadStrategy::PreferSpeed
{
use crate::neon::neon_column_handler_fixed_point_ar30;
_dispatch = neon_column_handler_fixed_point_ar30::<AR30_TYPE, AR30_ORDER>;
}

Expand Down
Loading

0 comments on commit d9b7122

Please sign in to comment.