diff --git a/README.md b/README.md index c101397..f748326 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,27 @@ Supported only NEON and SSE. This library provides for you some conveniences to scale in different color spaces. +#### Example integration with `image` crate + +```rust +let img = ImageReader::open("./assets/asset.png") + .unwrap() + .decode() + .unwrap(); +let dimensions = img.dimensions(); +let mut bytes = Vec::from(img.as_bytes()); + +let mut scaler = LinearScaler::new(ResamplingFunction::Lanczos3); +scaler.set_threading_policy(ThreadingPolicy::Adaptive); +let store = + ImageStore::::from_slice(&mut bytes, dimensions.0 as usize, dimensions.1 as usize); +let resized = scaler.resize_rgba( + ImageSize::new(dimensions.0 as usize / 2, dimensions.1 as usize / 2), + store, + true +); +``` + ### Performance Faster or comparable to `fast-image-resize`, when implemented equal SIMD and pixel type. @@ -54,27 +75,6 @@ M3 Pro. NEON | pic-scale | 38.75 | | fir sse | 45.79 | -#### Example integration with `image` crate - -```rust -let img = ImageReader::open("./assets/asset.png") - .unwrap() - .decode() - .unwrap(); -let dimensions = img.dimensions(); -let mut bytes = Vec::from(img.as_bytes()); - -let mut scaler = LinearScaler::new(ResamplingFunction::Lanczos3); -scaler.set_threading_policy(ThreadingPolicy::Adaptive); -let store = - ImageStore::::from_slice(&mut bytes, dimensions.0 as usize, dimensions.1 as usize); -let resized = scaler.resize_rgba( - ImageSize::new(dimensions.0 as usize / 2, dimensions.1 as usize / 2), - store, - true -); -``` - #### Example in sRGB In common, you should not downsize an image in sRGB colorspace, however if speed is more preferable than more proper scale you may omit linearizing diff --git a/src/rgba_f32.rs b/src/rgba_f32.rs index c7da6aa..f153d03 100644 --- a/src/rgba_f32.rs +++ b/src/rgba_f32.rs @@ -300,6 +300,7 @@ fn convolve_horizontal_rgba_f32_native( } } +#[cfg(all(target_arch = "aarch64", target_feature = "neon"))] #[inline(always)] fn convolve_vertical_rgb_native_row( total_width: usize, diff --git a/src/rgba_u8.rs b/src/rgba_u8.rs index 4a23edc..a1308d5 100644 --- a/src/rgba_u8.rs +++ b/src/rgba_u8.rs @@ -1,7 +1,5 @@ #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] use std::arch::aarch64::*; -#[cfg(target_arch = "x86")] -use std::arch::x86::*; use std::sync::Arc; use rayon::ThreadPool; @@ -14,9 +12,9 @@ use crate::neon_simd_u8::*; use crate::rgb_u8::*; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse_rgb_u8::sse_rgb::*; +use crate::support::{PRECISION, ROUNDING_APPROX}; use crate::unsafe_slice::UnsafeSlice; use crate::ImageStore; -use crate::support::ROUNDING_APPROX; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] fn convolve_horizontal_rgba_sse( @@ -280,10 +278,10 @@ fn convolve_horizontal_rgba_native( let dest_ptr = unsafe { unsafe_destination_ptr_0.add(px) }; unsafe { - *dest_ptr = (sum_r >> 12).min(255).max(0) as u8; - *dest_ptr.add(1) = (sum_g >> 12).min(255).max(0) as u8; - *dest_ptr.add(2) = (sum_b >> 12).min(255).max(0) as u8; - *dest_ptr.add(3) = (sum_a >> 12).min(255).max(0) as u8; + *dest_ptr = (sum_r >> PRECISION).min(255).max(0) as u8; + *dest_ptr.add(1) = (sum_g >> PRECISION).min(255).max(0) as u8; + *dest_ptr.add(2) = (sum_b >> PRECISION).min(255).max(0) as u8; + *dest_ptr.add(3) = (sum_a >> PRECISION).min(255).max(0) as u8; } filter_offset += approx_weights.aligned_size;