Skip to content

Commit

Permalink
Improvements, added planar
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jul 14, 2024
1 parent fb7b4ca commit a0183d5
Show file tree
Hide file tree
Showing 14 changed files with 503 additions and 59 deletions.
1 change: 0 additions & 1 deletion app/benches/resize_rgba/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ pub fn criterion_benchmark(c: &mut Criterion) {
})
});

//
c.bench_function("Fast image resize RGBA with alpha: Lanczos 3", |b| {
b.iter(|| {
let mut vc = Vec::from(img.as_bytes());
Expand Down
33 changes: 27 additions & 6 deletions app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,28 @@ fn main() {
//
let start_time = Instant::now();

let store =
ImageStore::<u8, 4>::from_slice(&mut bytes, dimensions.0 as usize, dimensions.1 as usize);
let mut f16_slice: Vec<f16> = bytes
.iter()
.map(|&x| f16::from_f32(x as f32 / 255f32))
.collect();

let resized = scaler.resize_rgba(
ImageSize::new(dimensions.0 as usize / 4, dimensions.1 as usize / 4),
store, true,
// let store =
// ImageStore::<u8, 4>::from_slice(&mut bytes, dimensions.0 as usize, dimensions.1 as usize);
//
// let resized = scaler.resize_rgba(
// ImageSize::new(dimensions.0 as usize / 4, dimensions.1 as usize / 4),
// store, true,
// );

let store = ImageStore::<f16, 4>::from_slice(
&mut f16_slice,
dimensions.0 as usize,
dimensions.1 as usize,
);

let resized = scaler.resize_rgba_f16(
ImageSize::new(dimensions.0 as usize / 1, dimensions.1 as usize / 1),
store,
);

let elapsed_time = start_time.elapsed();
Expand All @@ -54,7 +70,12 @@ fn main() {
// .iter()
// .map(|&x| (x * 255f32) as u8)
// .collect();
let dst = resized.as_bytes();
// let dst = resized.as_bytes();
let dst: Vec<u8> = resized
.as_bytes()
.iter()
.map(|&x| (x.to_f32() * 255f32).min(255f32) as u8)
.collect();

if resized.channels == 4 {
image::save_buffer(
Expand Down
27 changes: 10 additions & 17 deletions src/convolve_naive_f32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,9 @@
use crate::filter_weights::{FilterBounds, FilterWeights};
use num_traits::AsPrimitive;

#[inline(always)]
pub(crate) unsafe fn convolve_vertical_part_f32<
T: Copy + 'static + AsPrimitive<f32>,
const PART: usize,
const CHANNELS: usize,
const BUFFER_SIZE: usize,
>(
start_y: usize,
start_x: usize,
Expand All @@ -45,30 +43,25 @@ pub(crate) unsafe fn convolve_vertical_part_f32<
) where
f32: AsPrimitive<T>,
{
let mut store: [[f32; CHANNELS]; PART] = [[0f32; CHANNELS]; PART];
let mut store: [f32; BUFFER_SIZE] = [0f32; BUFFER_SIZE];

for j in 0..bounds.size {
let py = start_y + j;
let weight = unsafe { filter.add(j).read_unaligned() };
let src_ptr = src.add(src_stride * py);
for x in 0..PART {
let px = (start_x + x) * CHANNELS;
for x in 0..BUFFER_SIZE {
let px = start_x + x;
let s_ptr = src_ptr.add(px);
for c in 0..CHANNELS {
let store_p = store.get_unchecked_mut(x);
let store_v = store_p.get_unchecked_mut(c);
*store_v += unsafe { s_ptr.add(c).read_unaligned().as_() } * weight;
}
let store_p = store.get_unchecked_mut(x);
*store_p += unsafe { s_ptr.read_unaligned().as_() } * weight;
}
}

for x in 0..PART {
let px = (start_x + x) * CHANNELS;
for x in 0..BUFFER_SIZE {
let px = start_x + x;
let dst_ptr = dst.add(px);
for c in 0..CHANNELS {
let vl = *(*store.get_unchecked_mut(x)).get_unchecked_mut(c);
dst_ptr.add(c).write_unaligned(vl.as_());
}
let vl = *store.get_unchecked_mut(x);
dst_ptr.write_unaligned(vl.as_());
}
}

Expand Down
28 changes: 12 additions & 16 deletions src/convolve_naive_u8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ use crate::filter_weights::{FilterBounds, FilterWeights};
use crate::saturate_narrow::SaturateNarrow;
use crate::support::ROUNDING_APPROX;

#[inline(always)]
pub(crate) unsafe fn convolve_vertical_part<const PART: usize, const CHANNELS: usize>(
#[inline]
pub(crate) unsafe fn convolve_vertical_part<const BUFFER_SIZE: usize>(
start_y: usize,
start_x: usize,
src: *const u8,
Expand All @@ -41,30 +41,26 @@ pub(crate) unsafe fn convolve_vertical_part<const PART: usize, const CHANNELS: u
filter: *const i16,
bounds: &FilterBounds,
) {
let mut store: [[i32; CHANNELS]; PART] = [[ROUNDING_APPROX; CHANNELS]; PART];
let mut store: [i32; BUFFER_SIZE] = [ROUNDING_APPROX; BUFFER_SIZE];

for j in 0..bounds.size {
let py = start_y + j;
let weight = unsafe { filter.add(j).read_unaligned() } as i32;
let src_ptr = src.add(src_stride * py);
for x in 0..PART {
let px = (start_x + x) * CHANNELS;
for x in 0..BUFFER_SIZE {
let px = start_x + x;
let s_ptr = src_ptr.add(px);
for c in 0..CHANNELS {
let store_p = store.get_unchecked_mut(x);
let store_v = store_p.get_unchecked_mut(c);
*store_v += unsafe { s_ptr.add(c).read_unaligned() } as i32 * weight;
}

let store_p = store.get_unchecked_mut(x);
*store_p += unsafe { s_ptr.read_unaligned() } as i32 * weight;
}
}

for x in 0..PART {
let px = (start_x + x) * CHANNELS;
for x in 0..BUFFER_SIZE {
let px = start_x + x;
let dst_ptr = dst.add(px);
for c in 0..CHANNELS {
let vl = *(*store.get_unchecked_mut(x)).get_unchecked_mut(c);
dst_ptr.add(c).write_unaligned(vl.saturate_narrow());
}
let vl = *store.get_unchecked_mut(x);
dst_ptr.write_unaligned(vl.saturate_narrow());
}
}

Expand Down
38 changes: 36 additions & 2 deletions src/f16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ use crate::rgb_f32::convolve_vertical_rgb_native_row_f32;
use crate::ImageStore;

impl<'a> HorizontalConvolutionPass<f16, 4> for ImageStore<'a, f16, 4> {
#[inline(always)]
fn convolve_horizontal(
&self,
filter_weights: FilterWeights<f32>,
Expand Down Expand Up @@ -79,7 +78,6 @@ impl<'a> VerticalConvolutionPass<f16, 4> for ImageStore<'a, f16, 4> {
}

impl<'a> HorizontalConvolutionPass<f16, 3> for ImageStore<'a, f16, 3> {
#[inline(always)]
fn convolve_horizontal(
&self,
filter_weights: FilterWeights<f32>,
Expand Down Expand Up @@ -114,3 +112,39 @@ impl<'a> VerticalConvolutionPass<f16, 3> for ImageStore<'a, f16, 3> {
convolve_vertical_dispatch_f16(self, filter_weights, destination, pool, _dispatcher);
}
}

impl<'a> HorizontalConvolutionPass<f16, 1> for ImageStore<'a, f16, 1> {
fn convolve_horizontal(
&self,
filter_weights: FilterWeights<f32>,
destination: &mut ImageStore<f16, 1>,
pool: &Option<ThreadPool>,
) {
let _dispatcher_4_rows: Option<
fn(usize, usize, &FilterWeights<f32>, *const f16, usize, *mut f16, usize),
> = Some(convolve_horizontal_rgba_4_row_f32::<f16, 1>);
let _dispatcher_row: fn(usize, usize, &FilterWeights<f32>, *const f16, *mut f16) =
convolve_horizontal_rgb_native_row::<f16, 1>;
convolve_horizontal_dispatch_f16(
self,
filter_weights,
destination,
pool,
_dispatcher_4_rows,
_dispatcher_row,
);
}
}

impl<'a> VerticalConvolutionPass<f16, 1> for ImageStore<'a, f16, 1> {
fn convolve_vertical(
&self,
filter_weights: FilterWeights<f32>,
destination: &mut ImageStore<f16, 1>,
pool: &Option<ThreadPool>,
) {
let _dispatcher: fn(usize, &FilterBounds, *const f16, *mut f16, usize, *const f32) =
convolve_vertical_rgb_native_row_f32::<f16, 1>;
convolve_vertical_dispatch_f16(self, filter_weights, destination, pool, _dispatcher);
}
}
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ mod math;
mod nearest_sampler;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
mod neon;
mod plane_f32;
mod plane_u8;
mod rgb_f32;
mod rgb_u8;
mod rgba_f32;
Expand Down
75 changes: 75 additions & 0 deletions src/plane_f32.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright (c) Radzivon Bartoshyk. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

use crate::convolution::{HorizontalConvolutionPass, VerticalConvolutionPass};
use crate::convolve_naive_f32::{
convolve_horizontal_rgb_native_row, convolve_horizontal_rgba_4_row_f32,
};
use crate::dispatch_group_f32::{convolve_horizontal_dispatch_f32, convolve_vertical_dispatch_f32};
use crate::filter_weights::{FilterBounds, FilterWeights};
use crate::rgb_f32::convolve_vertical_rgb_native_row_f32;
use crate::ImageStore;
use rayon::ThreadPool;

impl<'a> HorizontalConvolutionPass<f32, 1> for ImageStore<'a, f32, 1> {
#[inline(always)]
fn convolve_horizontal(
&self,
filter_weights: FilterWeights<f32>,
destination: &mut ImageStore<f32, 1>,
pool: &Option<ThreadPool>,
) {
let _dispatcher_4_rows: Option<
fn(usize, usize, &FilterWeights<f32>, *const f32, usize, *mut f32, usize),
> = Some(convolve_horizontal_rgba_4_row_f32::<f32, 1>);
let _dispatcher_row: fn(usize, usize, &FilterWeights<f32>, *const f32, *mut f32) =
convolve_horizontal_rgb_native_row::<f32, 1>;
convolve_horizontal_dispatch_f32(
self,
filter_weights,
destination,
pool,
_dispatcher_4_rows,
_dispatcher_row,
);
}
}

impl<'a> VerticalConvolutionPass<f32, 1> for ImageStore<'a, f32, 1> {
fn convolve_vertical(
&self,
filter_weights: FilterWeights<f32>,
destination: &mut ImageStore<f32, 1>,
pool: &Option<ThreadPool>,
) {
let _dispatcher: fn(usize, &FilterBounds, *const f32, *mut f32, usize, *const f32) =
convolve_vertical_rgb_native_row_f32::<f32, 1>;
convolve_vertical_dispatch_f32(self, filter_weights, destination, pool, _dispatcher);
}
}
78 changes: 78 additions & 0 deletions src/plane_u8.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (c) Radzivon Bartoshyk. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use rayon::ThreadPool;

use crate::convolution::{HorizontalConvolutionPass, VerticalConvolutionPass};
use crate::convolve_naive_u8::convolve_horizontal_rgba_native_row;
use crate::dispatch_group_u8::{convolve_horizontal_dispatch_u8, convolve_vertical_dispatch_u8};
use crate::filter_weights::{FilterBounds, FilterWeights};
use crate::rgb_u8::convolve_vertical_rgb_native_row_u8;
use crate::ImageStore;

impl<'a> HorizontalConvolutionPass<u8, 1> for ImageStore<'a, u8, 1> {
fn convolve_horizontal(
&self,
filter_weights: FilterWeights<f32>,
destination: &mut ImageStore<u8, 1>,
_pool: &Option<ThreadPool>,
) {
let _dispatcher_4_rows: Option<
fn(usize, usize, &FilterWeights<i16>, *const u8, usize, *mut u8, usize),
> = None;
let _dispatcher_1_row: fn(usize, usize, &FilterWeights<i16>, *const u8, *mut u8) =
convolve_horizontal_rgba_native_row::<1>;
convolve_horizontal_dispatch_u8(
self,
filter_weights,
destination,
_pool,
_dispatcher_4_rows,
_dispatcher_1_row,
);
}
}

impl<'a> VerticalConvolutionPass<u8, 1> for ImageStore<'a, u8, 1> {
fn convolve_vertical(
&self,
filter_weights: FilterWeights<f32>,
destination: &mut ImageStore<u8, 1>,
pool: &Option<ThreadPool>,
) {
let _dispatcher: fn(
dst_width: usize,
bounds: &FilterBounds,
unsafe_source_ptr_0: *const u8,
unsafe_destination_ptr_0: *mut u8,
src_stride: usize,
weight_ptr: *const i16,
) = convolve_vertical_rgb_native_row_u8::<1>;
convolve_vertical_dispatch_u8(self, filter_weights, destination, pool, _dispatcher);
}
}
Loading

0 comments on commit a0183d5

Please sign in to comment.