Skip to content

Commit

Permalink
Dot 8 RGB8
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Jan 6, 2025
1 parent 5733ef7 commit de97e0f
Show file tree
Hide file tree
Showing 4 changed files with 555 additions and 7 deletions.
6 changes: 6 additions & 0 deletions src/avx2/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ mod alpha_u16;
mod alpha_u8;
mod check_alpha;
mod rgb_u8;
#[cfg(feature = "nightly_avx512")]
mod rgb_u8_dot_i8;
#[cfg(feature = "half")]
mod rgba_f16;
mod rgba_f32;
Expand All @@ -57,6 +59,10 @@ pub(crate) use check_alpha::{
avx_has_non_constant_cap_alpha_rgba16, avx_has_non_constant_cap_alpha_rgba8,
};
pub(crate) use rgb_u8::{convolve_horizontal_rgb_avx_row_one, convolve_horizontal_rgb_avx_rows_4};
#[cfg(feature = "nightly_avx512")]
pub(crate) use rgb_u8_dot_i8::{
convolve_horizontal_rgb_avx_row_i8_one, convolve_horizontal_rgb_avx_rows_4_i8,
};
#[cfg(feature = "half")]
pub(crate) use rgba_f16::{
convolve_horizontal_rgba_avx_row_one_f16, convolve_horizontal_rgba_avx_rows_4_f16,
Expand Down
14 changes: 7 additions & 7 deletions src/avx2/rgb_u8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub(crate) fn convolve_horizontal_rgb_avx_rows_4(
#[cfg(feature = "nightly_avx512")]
{
if std::arch::is_x86_feature_detected!("avxvnni") {
return convolve_horizontal_rgb_sse_rows_4_vnni(
return convolve_horizontal_rgb_avx_rows_4_vnni(
src,
src_stride,
dst,
Expand All @@ -56,19 +56,19 @@ pub(crate) fn convolve_horizontal_rgb_avx_rows_4(
);
}
}
convolve_horizontal_rgb_sse_rows_4_reg(src, src_stride, dst, dst_stride, filter_weights);
convolve_horizontal_rgb_avx_rows_4_reg(src, src_stride, dst, dst_stride, filter_weights);
}
}

#[target_feature(enable = "avx2")]
unsafe fn convolve_horizontal_rgb_sse_rows_4_reg(
unsafe fn convolve_horizontal_rgb_avx_rows_4_reg(
src: &[u8],
src_stride: usize,
dst: &mut [u8],
dst_stride: usize,
filter_weights: &FilterWeights<i16>,
) {
convolve_horizontal_rgb_sse_rows_4_impl::<false>(
convolve_horizontal_rgb_avx_rows_4_impl::<false>(
src,
src_stride,
dst,
Expand All @@ -79,14 +79,14 @@ unsafe fn convolve_horizontal_rgb_sse_rows_4_reg(

#[cfg(feature = "nightly_avx512")]
#[target_feature(enable = "avx2", enable = "avxvnni")]
unsafe fn convolve_horizontal_rgb_sse_rows_4_vnni(
unsafe fn convolve_horizontal_rgb_avx_rows_4_vnni(
src: &[u8],
src_stride: usize,
dst: &mut [u8],
dst_stride: usize,
filter_weights: &FilterWeights<i16>,
) {
convolve_horizontal_rgb_sse_rows_4_impl::<true>(
convolve_horizontal_rgb_avx_rows_4_impl::<true>(
src,
src_stride,
dst,
Expand Down Expand Up @@ -167,7 +167,7 @@ unsafe fn make_second_4(pixel: __m128i, pixel2: __m128i, shuf: __m256i) -> __m25
}

#[inline(always)]
unsafe fn convolve_horizontal_rgb_sse_rows_4_impl<const HAS_DOT: bool>(
unsafe fn convolve_horizontal_rgb_avx_rows_4_impl<const HAS_DOT: bool>(
src: &[u8],
src_stride: usize,
dst: &mut [u8],
Expand Down
Loading

0 comments on commit de97e0f

Please sign in to comment.