Skip to content

Commit

Permalink
Refactor, added 4 rows for u8
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jun 19, 2024
1 parent 9d7ad1f commit 933185b
Show file tree
Hide file tree
Showing 35 changed files with 2,751 additions and 3,325 deletions.
35 changes: 25 additions & 10 deletions app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,46 +8,61 @@ use fast_image_resize::{
use image::io::Reader as ImageReader;
use image::{EncodableLayout, GenericImageView};

use pic_scale::{ImageSize, ImageStore, LabScaler, LChScaler, LinearApproxScaler, LinearScaler, LuvScaler, ResamplingFunction, Scaler, Scaling, SigmoidalScaler, ThreadingPolicy, TransferFunction, XYZScaler};
use pic_scale::{
ImageSize, ImageStore, LinearScaler, ResamplingFunction, Scaler, Scaling, ThreadingPolicy,
};

fn main() {
// test_fast_image();

let img = ImageReader::open("./assets/beach_horizon.jpg")
let img = ImageReader::open("./assets/asset.jpg")
.unwrap()
.decode()
.unwrap();
let dimensions = img.dimensions();
let mut bytes = Vec::from(img.as_bytes());

let mut scaler = LinearScaler::new(ResamplingFunction::Lagrange3);
scaler.set_threading_policy(ThreadingPolicy::Single);
// let store =
// ImageStore::<u8, 4>::from_slice(&mut bytes, dimensions.0 as usize, dimensions.1 as usize);
// let resized = scaler.resize_rgba(
// ImageSize::new(dimensions.0 as usize / 3, dimensions.1 as usize / 3),
// store,
// false,
// );

let mut f_store: Vec<f32> = bytes.iter().map(|&x| x as f32 * (1f32 / 255f32)).collect();

let start_time = Instant::now();

let mut scaler = LChScaler::new(ResamplingFunction::Lanczos3);
scaler.set_threading_policy(ThreadingPolicy::Single);
let store =
ImageStore::<u8, 3>::from_slice(&mut bytes, dimensions.0 as usize, dimensions.1 as usize);
let resized = scaler.resize_rgb(
ImageSize::new(dimensions.0 as usize / 2, dimensions.1 as usize / 2),
ImageStore::<f32, 3>::from_slice(&mut f_store, dimensions.0 as usize, dimensions.1 as usize);
let resized = scaler.resize_rgb_f32(
ImageSize::new(dimensions.0 as usize / 3, dimensions.1 as usize / 3),
store,
);

let elapsed_time = start_time.elapsed();
// Print the elapsed time in milliseconds
println!("Scaler: {:.2?}", elapsed_time);

let j_store: Vec<u8> = resized.as_bytes().iter().map(|&x| (x * 255f32) as u8).collect();
let dst = j_store;

if resized.channels == 4 {
image::save_buffer(
"converted.png",
resized.as_bytes(),
&dst,
resized.width as u32,
resized.height as u32,
image::ExtendedColorType::Rgba8,
)
.unwrap();
} else {
image::save_buffer(
"converted_lch.jpg",
resized.as_bytes(),
"converted.jpg",
&dst,
resized.width as u32,
resized.height as u32,
image::ExtendedColorType::Rgb8,
Expand Down
File renamed without changes.
5 changes: 1 addition & 4 deletions src/lch_scaler.rs → src/colors/lch_scaler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
* // license that can be found in the LICENSE file.
*/

use colorutils_rs::{
lch_to_rgb, lch_with_alpha_to_rgba,
rgb_to_lch, rgba_to_lch_with_alpha,
};
use colorutils_rs::{lch_to_rgb, lch_with_alpha_to_rgba, rgb_to_lch, rgba_to_lch_with_alpha};

use crate::{ImageSize, ImageStore, ResamplingFunction, Scaler, Scaling, ThreadingPolicy};

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
15 changes: 15 additions & 0 deletions src/colors/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
mod lab_scaler;
mod lch_scaler;
mod linear_precise_scaler;
mod linear_scaler;
mod luv_scaler;
mod sigmoidal_scaler;
mod xyz_scaler;

pub use lab_scaler::*;
pub use lch_scaler::*;
pub use linear_precise_scaler::*;
pub use linear_scaler::*;
pub use luv_scaler::*;
pub use sigmoidal_scaler::*;
pub use xyz_scaler::*;
File renamed without changes.
File renamed without changes.
253 changes: 253 additions & 0 deletions src/convolve_naive_f32.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
/*
* // Copyright (c) the Radzivon Bartoshyk. All rights reserved.
* //
* // Use of this source code is governed by a BSD-style
* // license that can be found in the LICENSE file.
*/

use crate::filter_weights::{FilterBounds, FilterWeights};

#[inline(always)]
pub(crate) unsafe fn convolve_vertical_part_f32<const PART: usize, const CHANNELS: usize>(
start_y: usize,
start_x: usize,
src: *const f32,
src_stride: usize,
dst: *mut f32,
filter: *const f32,
bounds: &FilterBounds,
) {
let mut store: [[f32; CHANNELS]; PART] = [[0f32; CHANNELS]; PART];

for j in 0..bounds.size {
let py = start_y + j;
let weight = unsafe { filter.add(j).read_unaligned() };
let src_ptr = src.add(src_stride * py);
for x in 0..PART {
let px = (start_x + x) * CHANNELS;
let s_ptr = src_ptr.add(px);
for c in 0..CHANNELS {
let store_p = store.get_unchecked_mut(x);
let store_v = store_p.get_unchecked_mut(c);
*store_v += unsafe { s_ptr.add(c).read_unaligned() } * weight;
}
}
}

for x in 0..PART {
let px = (start_x + x) * CHANNELS;
let dst_ptr = dst.add(px);
for c in 0..CHANNELS {
let vl = *(*store.get_unchecked_mut(x)).get_unchecked_mut(c);
dst_ptr.add(c).write_unaligned(vl);
}
}
}

#[inline(always)]
pub(crate) fn convolve_horizontal_rgb_native_row<const CHANNELS: usize>(
dst_width: usize,
_: usize,
filter_weights: &FilterWeights<f32>,
unsafe_source_ptr_0: *const f32,
unsafe_destination_ptr_0: *mut f32,
) {
unsafe {
let weights_ptr = filter_weights.weights.as_ptr();
let mut filter_offset = 0usize;

for x in 0..dst_width {
let mut _sum_r = 0f32;
let mut _sum_g = 0f32;
let mut _sum_b = 0f32;
let mut _sum_a = 0f32;

let bounds = filter_weights.bounds.get_unchecked(x);
let start_x = bounds.start;
for j in 0..bounds.size {
let px = (start_x + j) * CHANNELS;
let weight = weights_ptr.add(j + filter_offset).read_unaligned();
let src = unsafe_source_ptr_0.add(px);
_sum_r += src.read_unaligned() * weight;
if CHANNELS > 1 {
_sum_g += src.add(1).read_unaligned() * weight;
}
if CHANNELS > 2 {
_sum_b += src.add(2).read_unaligned() * weight;
}
if CHANNELS == 4 {
_sum_a += src.add(3).read_unaligned() * weight;
}
}

let px = x * CHANNELS;

let dest_ptr = unsafe_destination_ptr_0.add(px);
dest_ptr.write_unaligned(_sum_r);
if CHANNELS > 1 {
dest_ptr.add(1).write_unaligned(_sum_g);
}
if CHANNELS > 2 {
dest_ptr.add(2).write_unaligned(_sum_b);
}
if CHANNELS == 4 {
dest_ptr.add(3).write_unaligned(_sum_a);
}

filter_offset += filter_weights.aligned_size;
}
}
}

#[allow(unused)]
pub(crate) fn convolve_horizontal_rgba_4_row_f32<const CHANNELS: usize>(
dst_width: usize,
_: usize,
filter_weights: &FilterWeights<f32>,
unsafe_source_ptr_0: *const f32,
src_stride: usize,
unsafe_destination_ptr_0: *mut f32,
dst_stride: usize,
) {
unsafe {
let mut filter_offset = 0usize;
let weights_ptr = filter_weights.weights.as_ptr();

let src_row0 = unsafe_source_ptr_0;
let src_row1 = unsafe_source_ptr_0.add(src_stride);
let src_row2 = unsafe_source_ptr_0.add(src_stride * 2);
let src_row3 = unsafe_source_ptr_0.add(src_stride * 3);

let dst_row0 = unsafe_destination_ptr_0;
let dst_row1 = unsafe_destination_ptr_0.add(dst_stride);
let dst_row2 = unsafe_destination_ptr_0.add(dst_stride * 2);
let dst_row3 = unsafe_destination_ptr_0.add(dst_stride * 3);

for x in 0..dst_width {
let mut sum_r_0 = 0f32;
let mut sum_g_0 = 0f32;
let mut sum_b_0 = 0f32;
let mut sum_a_0 = 0f32;
let mut sum_r_1 = 0f32;
let mut sum_g_1 = 0f32;
let mut sum_b_1 = 0f32;
let mut sum_a_1 = 0f32;
let mut sum_r_2 = 0f32;
let mut sum_g_2 = 0f32;
let mut sum_b_2 = 0f32;
let mut sum_a_2 = 0f32;
let mut sum_r_3 = 0f32;
let mut sum_g_3 = 0f32;
let mut sum_b_3 = 0f32;
let mut sum_a_3 = 0f32;

let bounds = filter_weights.bounds.get_unchecked(x);
let start_x = bounds.start;
for j in 0..bounds.size {
let px = (start_x + j) * CHANNELS;
let weight = weights_ptr.add(j + filter_offset).read_unaligned();

let src0 = src_row0.add(px);
sum_r_0 += src0.read_unaligned() * weight;
if CHANNELS > 1 {
sum_g_0 += src0.add(1).read_unaligned() * weight;
}
if CHANNELS > 2 {
sum_b_0 += src0.add(2).read_unaligned() * weight;
}
if CHANNELS == 4 {
sum_a_0 += src0.add(3).read_unaligned() * weight;
}

let src1 = src_row1.add(px);
sum_r_1 += src1.read_unaligned() * weight;
if CHANNELS > 1 {
sum_g_1 += src1.add(1).read_unaligned() * weight;
}
if CHANNELS > 2 {
sum_b_1 += src1.add(2).read_unaligned() * weight;
}
if CHANNELS == 4 {
sum_a_1 += src1.add(3).read_unaligned() * weight;
}

let src2 = src_row2.add(px);
sum_r_2 += src2.read_unaligned() * weight;
if CHANNELS > 1 {
sum_g_2 += src2.add(1).read_unaligned() * weight;
}
if CHANNELS > 2 {
sum_b_2 += src2.add(2).read_unaligned() * weight;
}
if CHANNELS == 4 {
sum_a_2 += src2.add(3).read_unaligned() * weight;
}

let src3 = src_row3.add(px);
sum_r_3 += src3.read_unaligned() * weight;
if CHANNELS > 1 {
sum_g_3 += src3.add(1).read_unaligned() * weight;
}
if CHANNELS > 2 {
sum_b_3 += src3.add(2).read_unaligned() * weight;
}
if CHANNELS == 4 {
sum_a_3 += src3.add(3).read_unaligned() * weight;
}
}

let px = x * CHANNELS;

let dest_ptr_0 = dst_row0.add(px);
let dest_ptr_1 = dst_row1.add(px);
let dest_ptr_2 = dst_row2.add(px);
let dest_ptr_3 = dst_row3.add(px);

dest_ptr_0.write_unaligned(sum_r_0);
if CHANNELS > 1 {
dest_ptr_0.add(1).write_unaligned(sum_g_0);
}
if CHANNELS > 2 {
dest_ptr_0.add(2).write_unaligned(sum_b_0);
}
if CHANNELS == 4 {
dest_ptr_0.add(3).write_unaligned(sum_a_0);
}

dest_ptr_1.write_unaligned(sum_r_1);
if CHANNELS > 1 {
dest_ptr_1.add(1).write_unaligned(sum_g_1);
}
if CHANNELS > 2 {
dest_ptr_1.add(2).write_unaligned(sum_b_1);
}
if CHANNELS == 4 {
dest_ptr_1.add(3).write_unaligned(sum_a_1);
}

dest_ptr_2.write_unaligned(sum_r_2);
if CHANNELS > 1 {
dest_ptr_2.add(1).write_unaligned(sum_g_2);
}
if CHANNELS > 2 {
dest_ptr_2.add(2).write_unaligned(sum_b_2);
}
if CHANNELS == 4 {
dest_ptr_2.add(3).write_unaligned(sum_a_2);
}

dest_ptr_3.write_unaligned(sum_r_3);
if CHANNELS > 1 {
dest_ptr_3.add(1).write_unaligned(sum_g_3);
}
if CHANNELS > 2 {
dest_ptr_3.add(2).write_unaligned(sum_b_3);
}
if CHANNELS == 4 {
dest_ptr_3.add(3).write_unaligned(sum_a_3);
}

filter_offset += filter_weights.aligned_size;
}
}
}
Loading

0 comments on commit 933185b

Please sign in to comment.