Improvements, added planar

awxkee · Jul 14, 2024 · a0183d5 · a0183d5
1 parent fb7b4ca
commit a0183d5
Show file tree

Hide file tree

Showing 14 changed files with 503 additions and 59 deletions.
diff --git a/app/benches/resize_rgba/main.rs b/app/benches/resize_rgba/main.rs
@@ -50,7 +50,6 @@ pub fn criterion_benchmark(c: &mut Criterion) {
         })
     });
 
-    //
     c.bench_function("Fast image resize RGBA with alpha: Lanczos 3", |b| {
         b.iter(|| {
             let mut vc = Vec::from(img.as_bytes());

diff --git a/app/src/main.rs b/app/src/main.rs
@@ -37,12 +37,28 @@ fn main() {
     //
     let start_time = Instant::now();
 
-    let store =
-        ImageStore::<u8, 4>::from_slice(&mut bytes, dimensions.0 as usize, dimensions.1 as usize);
+    let mut f16_slice: Vec<f16> = bytes
+        .iter()
+        .map(|&x| f16::from_f32(x as f32 / 255f32))
+        .collect();
 
-    let resized = scaler.resize_rgba(
-        ImageSize::new(dimensions.0 as usize / 4, dimensions.1 as usize / 4),
-        store, true,
+    // let store =
+    //     ImageStore::<u8, 4>::from_slice(&mut bytes, dimensions.0 as usize, dimensions.1 as usize);
+    //
+    // let resized = scaler.resize_rgba(
+    //     ImageSize::new(dimensions.0 as usize / 4, dimensions.1 as usize / 4),
+    //     store, true,
+    // );
+
+    let store = ImageStore::<f16, 4>::from_slice(
+        &mut f16_slice,
+        dimensions.0 as usize,
+        dimensions.1 as usize,
+    );
+
+    let resized = scaler.resize_rgba_f16(
+        ImageSize::new(dimensions.0 as usize / 1, dimensions.1 as usize / 1),
+        store,
     );
 
     let elapsed_time = start_time.elapsed();
@@ -54,7 +70,12 @@ fn main() {
     //     .iter()
     //     .map(|&x| (x * 255f32) as u8)
     //     .collect();
-    let dst = resized.as_bytes();
+    // let dst = resized.as_bytes();
+    let dst: Vec<u8> = resized
+        .as_bytes()
+        .iter()
+        .map(|&x| (x.to_f32() * 255f32).min(255f32) as u8)
+        .collect();
 
     if resized.channels == 4 {
         image::save_buffer(

diff --git a/src/convolve_naive_f32.rs b/src/convolve_naive_f32.rs
@@ -29,11 +29,9 @@
 use crate::filter_weights::{FilterBounds, FilterWeights};
 use num_traits::AsPrimitive;
 
-#[inline(always)]
 pub(crate) unsafe fn convolve_vertical_part_f32<
     T: Copy + 'static + AsPrimitive<f32>,
-    const PART: usize,
-    const CHANNELS: usize,
+    const BUFFER_SIZE: usize,
 >(
     start_y: usize,
     start_x: usize,
@@ -45,30 +43,25 @@ pub(crate) unsafe fn convolve_vertical_part_f32<
 ) where
     f32: AsPrimitive<T>,
 {
-    let mut store: [[f32; CHANNELS]; PART] = [[0f32; CHANNELS]; PART];
+    let mut store: [f32; BUFFER_SIZE] = [0f32; BUFFER_SIZE];
 
     for j in 0..bounds.size {
         let py = start_y + j;
         let weight = unsafe { filter.add(j).read_unaligned() };
         let src_ptr = src.add(src_stride * py);
-        for x in 0..PART {
-            let px = (start_x + x) * CHANNELS;
+        for x in 0..BUFFER_SIZE {
+            let px = start_x + x;
             let s_ptr = src_ptr.add(px);
-            for c in 0..CHANNELS {
-                let store_p = store.get_unchecked_mut(x);
-                let store_v = store_p.get_unchecked_mut(c);
-                *store_v += unsafe { s_ptr.add(c).read_unaligned().as_() } * weight;
-            }
+            let store_p = store.get_unchecked_mut(x);
+            *store_p += unsafe { s_ptr.read_unaligned().as_() } * weight;
         }
     }
 
-    for x in 0..PART {
-        let px = (start_x + x) * CHANNELS;
+    for x in 0..BUFFER_SIZE {
+        let px = start_x + x;
         let dst_ptr = dst.add(px);
-        for c in 0..CHANNELS {
-            let vl = *(*store.get_unchecked_mut(x)).get_unchecked_mut(c);
-            dst_ptr.add(c).write_unaligned(vl.as_());
-        }
+        let vl = *store.get_unchecked_mut(x);
+        dst_ptr.write_unaligned(vl.as_());
     }
 }
 

diff --git a/src/convolve_naive_u8.rs b/src/convolve_naive_u8.rs
@@ -31,8 +31,8 @@ use crate::filter_weights::{FilterBounds, FilterWeights};
 use crate::saturate_narrow::SaturateNarrow;
 use crate::support::ROUNDING_APPROX;
 
-#[inline(always)]
-pub(crate) unsafe fn convolve_vertical_part<const PART: usize, const CHANNELS: usize>(
+#[inline]
+pub(crate) unsafe fn convolve_vertical_part<const BUFFER_SIZE: usize>(
     start_y: usize,
     start_x: usize,
     src: *const u8,
@@ -41,30 +41,26 @@ pub(crate) unsafe fn convolve_vertical_part<const PART: usize, const CHANNELS: u
     filter: *const i16,
     bounds: &FilterBounds,
 ) {
-    let mut store: [[i32; CHANNELS]; PART] = [[ROUNDING_APPROX; CHANNELS]; PART];
+    let mut store: [i32; BUFFER_SIZE] = [ROUNDING_APPROX; BUFFER_SIZE];
 
     for j in 0..bounds.size {
         let py = start_y + j;
         let weight = unsafe { filter.add(j).read_unaligned() } as i32;
         let src_ptr = src.add(src_stride * py);
-        for x in 0..PART {
-            let px = (start_x + x) * CHANNELS;
+        for x in 0..BUFFER_SIZE {
+            let px = start_x + x;
             let s_ptr = src_ptr.add(px);
-            for c in 0..CHANNELS {
-                let store_p = store.get_unchecked_mut(x);
-                let store_v = store_p.get_unchecked_mut(c);
-                *store_v += unsafe { s_ptr.add(c).read_unaligned() } as i32 * weight;
-            }
+
+            let store_p = store.get_unchecked_mut(x);
+            *store_p += unsafe { s_ptr.read_unaligned() } as i32 * weight;
         }
     }
 
-    for x in 0..PART {
-        let px = (start_x + x) * CHANNELS;
+    for x in 0..BUFFER_SIZE {
+        let px = start_x + x;
         let dst_ptr = dst.add(px);
-        for c in 0..CHANNELS {
-            let vl = *(*store.get_unchecked_mut(x)).get_unchecked_mut(c);
-            dst_ptr.add(c).write_unaligned(vl.saturate_narrow());
-        }
+        let vl = *store.get_unchecked_mut(x);
+        dst_ptr.write_unaligned(vl.saturate_narrow());
     }
 }
 

diff --git a/src/f16.rs b/src/f16.rs
@@ -42,7 +42,6 @@ use crate::rgb_f32::convolve_vertical_rgb_native_row_f32;
 use crate::ImageStore;
 
 impl<'a> HorizontalConvolutionPass<f16, 4> for ImageStore<'a, f16, 4> {
-    #[inline(always)]
     fn convolve_horizontal(
         &self,
         filter_weights: FilterWeights<f32>,
@@ -79,7 +78,6 @@ impl<'a> VerticalConvolutionPass<f16, 4> for ImageStore<'a, f16, 4> {
 }
 
 impl<'a> HorizontalConvolutionPass<f16, 3> for ImageStore<'a, f16, 3> {
-    #[inline(always)]
     fn convolve_horizontal(
         &self,
         filter_weights: FilterWeights<f32>,
@@ -114,3 +112,39 @@ impl<'a> VerticalConvolutionPass<f16, 3> for ImageStore<'a, f16, 3> {
         convolve_vertical_dispatch_f16(self, filter_weights, destination, pool, _dispatcher);
     }
 }
+
+impl<'a> HorizontalConvolutionPass<f16, 1> for ImageStore<'a, f16, 1> {
+    fn convolve_horizontal(
+        &self,
+        filter_weights: FilterWeights<f32>,
+        destination: &mut ImageStore<f16, 1>,
+        pool: &Option<ThreadPool>,
+    ) {
+        let _dispatcher_4_rows: Option<
+            fn(usize, usize, &FilterWeights<f32>, *const f16, usize, *mut f16, usize),
+        > = Some(convolve_horizontal_rgba_4_row_f32::<f16, 1>);
+        let _dispatcher_row: fn(usize, usize, &FilterWeights<f32>, *const f16, *mut f16) =
+            convolve_horizontal_rgb_native_row::<f16, 1>;
+        convolve_horizontal_dispatch_f16(
+            self,
+            filter_weights,
+            destination,
+            pool,
+            _dispatcher_4_rows,
+            _dispatcher_row,
+        );
+    }
+}
+
+impl<'a> VerticalConvolutionPass<f16, 1> for ImageStore<'a, f16, 1> {
+    fn convolve_vertical(
+        &self,
+        filter_weights: FilterWeights<f32>,
+        destination: &mut ImageStore<f16, 1>,
+        pool: &Option<ThreadPool>,
+    ) {
+        let _dispatcher: fn(usize, &FilterBounds, *const f16, *mut f16, usize, *const f32) =
+            convolve_vertical_rgb_native_row_f32::<f16, 1>;
+        convolve_vertical_dispatch_f16(self, filter_weights, destination, pool, _dispatcher);
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -48,6 +48,8 @@ mod math;
 mod nearest_sampler;
 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
 mod neon;
+mod plane_f32;
+mod plane_u8;
 mod rgb_f32;
 mod rgb_u8;
 mod rgba_f32;

diff --git a/src/plane_f32.rs b/src/plane_f32.rs
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) Radzivon Bartoshyk. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * 1.  Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2.  Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3.  Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+use crate::convolution::{HorizontalConvolutionPass, VerticalConvolutionPass};
+use crate::convolve_naive_f32::{
+    convolve_horizontal_rgb_native_row, convolve_horizontal_rgba_4_row_f32,
+};
+use crate::dispatch_group_f32::{convolve_horizontal_dispatch_f32, convolve_vertical_dispatch_f32};
+use crate::filter_weights::{FilterBounds, FilterWeights};
+use crate::rgb_f32::convolve_vertical_rgb_native_row_f32;
+use crate::ImageStore;
+use rayon::ThreadPool;
+
+impl<'a> HorizontalConvolutionPass<f32, 1> for ImageStore<'a, f32, 1> {
+    #[inline(always)]
+    fn convolve_horizontal(
+        &self,
+        filter_weights: FilterWeights<f32>,
+        destination: &mut ImageStore<f32, 1>,
+        pool: &Option<ThreadPool>,
+    ) {
+        let _dispatcher_4_rows: Option<
+            fn(usize, usize, &FilterWeights<f32>, *const f32, usize, *mut f32, usize),
+        > = Some(convolve_horizontal_rgba_4_row_f32::<f32, 1>);
+        let _dispatcher_row: fn(usize, usize, &FilterWeights<f32>, *const f32, *mut f32) =
+            convolve_horizontal_rgb_native_row::<f32, 1>;
+        convolve_horizontal_dispatch_f32(
+            self,
+            filter_weights,
+            destination,
+            pool,
+            _dispatcher_4_rows,
+            _dispatcher_row,
+        );
+    }
+}
+
+impl<'a> VerticalConvolutionPass<f32, 1> for ImageStore<'a, f32, 1> {
+    fn convolve_vertical(
+        &self,
+        filter_weights: FilterWeights<f32>,
+        destination: &mut ImageStore<f32, 1>,
+        pool: &Option<ThreadPool>,
+    ) {
+        let _dispatcher: fn(usize, &FilterBounds, *const f32, *mut f32, usize, *const f32) =
+            convolve_vertical_rgb_native_row_f32::<f32, 1>;
+        convolve_vertical_dispatch_f32(self, filter_weights, destination, pool, _dispatcher);
+    }
+}
diff --git a/src/plane_u8.rs b/src/plane_u8.rs
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) Radzivon Bartoshyk. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * 1.  Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2.  Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3.  Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+use rayon::ThreadPool;
+
+use crate::convolution::{HorizontalConvolutionPass, VerticalConvolutionPass};
+use crate::convolve_naive_u8::convolve_horizontal_rgba_native_row;
+use crate::dispatch_group_u8::{convolve_horizontal_dispatch_u8, convolve_vertical_dispatch_u8};
+use crate::filter_weights::{FilterBounds, FilterWeights};
+use crate::rgb_u8::convolve_vertical_rgb_native_row_u8;
+use crate::ImageStore;
+
+impl<'a> HorizontalConvolutionPass<u8, 1> for ImageStore<'a, u8, 1> {
+    fn convolve_horizontal(
+        &self,
+        filter_weights: FilterWeights<f32>,
+        destination: &mut ImageStore<u8, 1>,
+        _pool: &Option<ThreadPool>,
+    ) {
+        let _dispatcher_4_rows: Option<
+            fn(usize, usize, &FilterWeights<i16>, *const u8, usize, *mut u8, usize),
+        > = None;
+        let _dispatcher_1_row: fn(usize, usize, &FilterWeights<i16>, *const u8, *mut u8) =
+            convolve_horizontal_rgba_native_row::<1>;
+        convolve_horizontal_dispatch_u8(
+            self,
+            filter_weights,
+            destination,
+            _pool,
+            _dispatcher_4_rows,
+            _dispatcher_1_row,
+        );
+    }
+}
+
+impl<'a> VerticalConvolutionPass<u8, 1> for ImageStore<'a, u8, 1> {
+    fn convolve_vertical(
+        &self,
+        filter_weights: FilterWeights<f32>,
+        destination: &mut ImageStore<u8, 1>,
+        pool: &Option<ThreadPool>,
+    ) {
+        let _dispatcher: fn(
+            dst_width: usize,
+            bounds: &FilterBounds,
+            unsafe_source_ptr_0: *const u8,
+            unsafe_destination_ptr_0: *mut u8,
+            src_stride: usize,
+            weight_ptr: *const i16,
+        ) = convolve_vertical_rgb_native_row_u8::<1>;
+        convolve_vertical_dispatch_u8(self, filter_weights, destination, pool, _dispatcher);
+    }
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -50,7 +50,6 @@ pub fn criterion_benchmark(c: &mut Criterion) { @@
             })
         });
-        //
         c.bench_function("Fast image resize RGBA with alpha: Lanczos 3", |b| {
             b.iter(|| {
                 let mut vc = Vec::from(img.as_bytes());
@@ Expand Down @@