diff --git a/include/sparkyuv-basic.h b/include/sparkyuv-basic.h index ec16853..e6d3a52 100644 --- a/include/sparkyuv-basic.h +++ b/include/sparkyuv-basic.h @@ -151,6 +151,38 @@ FLIP_HORIZONTAL_DECLARATION_H(RGB16, uint16_t, CHANNELS_3) FLIP_HORIZONTAL_DECLARATION_H(Channel16, uint16_t, CHANNEL) #undef FLIP_HORIZONTAL_DECLARATION_H + +#define TRANSPOSE_CLOCKWISE_DECLARATION_H(srcPixel, storageType) \ + void TransposeClockwise##srcPixel(const storageType* src, const uint32_t srcStride,\ + storageType * dst, const uint32_t dstStride,\ + const uint32_t width, const uint32_t height); + +TRANSPOSE_CLOCKWISE_DECLARATION_H(RGBA, uint8_t) +TRANSPOSE_CLOCKWISE_DECLARATION_H(RGB, uint8_t) +TRANSPOSE_CLOCKWISE_DECLARATION_H(Channel, uint8_t) +TRANSPOSE_CLOCKWISE_DECLARATION_H(RGBA1010102, uint8_t) + +TRANSPOSE_CLOCKWISE_DECLARATION_H(RGBA16, uint16_t) +TRANSPOSE_CLOCKWISE_DECLARATION_H(RGB16, uint16_t) +TRANSPOSE_CLOCKWISE_DECLARATION_H(Channel16, uint16_t) + +#undef TRANSPOSE_CLOCKWISE_DECLARATION_H + +#define TRANSPOSE_CCW_DECLARATION_H(srcPixel, storageType) \ + void TransposeCounterClockwise##srcPixel(const storageType * src, const uint32_t srcStride,\ + storageType * dst, const uint32_t dstStride,\ + const uint32_t width, const uint32_t height); + +TRANSPOSE_CCW_DECLARATION_H(RGBA, uint8_t) +TRANSPOSE_CCW_DECLARATION_H(RGB, uint8_t) +TRANSPOSE_CCW_DECLARATION_H(Channel, uint8_t) +TRANSPOSE_CCW_DECLARATION_H(RGBA1010102, uint8_t) + +TRANSPOSE_CCW_DECLARATION_H(RGBA16, uint16_t) +TRANSPOSE_CCW_DECLARATION_H(RGB16, uint16_t) +TRANSPOSE_CCW_DECLARATION_H(Channel16, uint16_t) + +#undef TRANSPOSE_CCW_DECLARATION_H } #endif //YUV_INCLUDE_SPARKYUV_BASIC_H_ diff --git a/src/Transpose-inl.h b/src/Transpose-inl.h index 2a6429a..e769912 100644 --- a/src/Transpose-inl.h +++ b/src/Transpose-inl.h @@ -28,6 +28,122 @@ HWY_BEFORE_NAMESPACE(); namespace sparkyuv::HWY_NAMESPACE { +using namespace hwy; +using namespace hwy::HWY_NAMESPACE; + +template +void +TransposeClockwiseImpl(const T *SPARKYUV_RESTRICT src, const uint32_t srcStride, + T *SPARKYUV_RESTRICT dst, const uint32_t newStride, + const uint32_t width, const uint32_t height) { + + auto mSourcePtr = reinterpret_cast(src); + auto mDestinationPtr = reinterpret_cast(dst); + + const int channels = Surface == SURFACE_CHANNEL ? 1 : (Surface == SURFACE_CHANNELS_3 ? 3 : 4); + + for (uint32_t y = 0; y < height; ++y) { + auto mSrc = reinterpret_cast(src + y * srcStride); + for (uint32_t x = 0; x < width; ++x) { + + auto mTransposed = reinterpret_cast(mDestinationPtr + x * newStride); + mTransposed += (height - 1 - y) * channels; + + if (Surface == SURFACE_CHANNEL) { + mTransposed[0] = mSrc[0]; + } else if (Surface == SURFACE_CHANNELS_3) { + mTransposed[0] = mSrc[0]; + mTransposed[1] = mSrc[1]; + mTransposed[2] = mSrc[2]; + } else if (Surface == SURFACE_CHANNELS_4) { + mTransposed[0] = mSrc[0]; + mTransposed[1] = mSrc[1]; + mTransposed[2] = mSrc[2]; + mTransposed[3] = mSrc[3]; + } + + mSrc += channels; + } + + mSourcePtr += srcStride; + } +} + +#define TRANSPOSE_CLOCKWISE_DECLARATION(srcPixel, storageType, surfaceType) \ + void TransposeClockwise##srcPixel##HWY(const storageType *SPARKYUV_RESTRICT src, const uint32_t srcStride,\ + storageType *SPARKYUV_RESTRICT dst, const uint32_t dstStride,\ + const uint32_t width, const uint32_t height) {\ + TransposeClockwiseImpl(src, srcStride, dst, dstStride,\ + width, height); \ + } + +TRANSPOSE_CLOCKWISE_DECLARATION(RGBA, uint8_t, CHANNELS_4) +TRANSPOSE_CLOCKWISE_DECLARATION(RGB, uint8_t, CHANNELS_3) +TRANSPOSE_CLOCKWISE_DECLARATION(Channel, uint8_t, CHANNEL) +TRANSPOSE_CLOCKWISE_DECLARATION(RGBA1010102, uint8_t, RGBA1010102) + +TRANSPOSE_CLOCKWISE_DECLARATION(RGBA16, uint16_t, CHANNELS_4) +TRANSPOSE_CLOCKWISE_DECLARATION(RGB16, uint16_t, CHANNELS_3) +TRANSPOSE_CLOCKWISE_DECLARATION(Channel16, uint16_t, CHANNEL) + +#undef TRANSPOSE_CLOCKWISE_DECLARATION + +template +void +TransposeCounterClockwiseImpl(const T *SPARKYUV_RESTRICT src, const uint32_t srcStride, + T *SPARKYUV_RESTRICT dst, const uint32_t newStride, + const uint32_t width, const uint32_t height) { + + auto mSourcePtr = reinterpret_cast(src); + auto mDestinationPtr = reinterpret_cast(dst); + + const int channels = Surface == SURFACE_CHANNEL ? 1 : (Surface == SURFACE_CHANNELS_3 ? 3 : 4); + + for (uint32_t y = 0; y < height; ++y) { + auto mSrc = reinterpret_cast(src + y * srcStride); + for (uint32_t x = 0; x < width; ++x) { + + auto mTransposed = reinterpret_cast(mDestinationPtr + (width - 1 - x) * newStride); + mTransposed += y * channels; + + if (Surface == SURFACE_CHANNEL) { + mTransposed[0] = mSrc[0]; + } else if (Surface == SURFACE_CHANNELS_3) { + mTransposed[0] = mSrc[0]; + mTransposed[1] = mSrc[1]; + mTransposed[2] = mSrc[2]; + } else if (Surface == SURFACE_CHANNELS_4) { + mTransposed[0] = mSrc[0]; + mTransposed[1] = mSrc[1]; + mTransposed[2] = mSrc[2]; + mTransposed[3] = mSrc[3]; + } + + mSrc += channels; + } + + mSourcePtr += srcStride; + } +} + +#define TRANSPOSE_CCW_DECLARATION(srcPixel, storageType, surfaceType) \ + void TransposeCounterClockwise##srcPixel##HWY(const storageType *SPARKYUV_RESTRICT src, const uint32_t srcStride,\ + storageType *SPARKYUV_RESTRICT dst, const uint32_t dstStride,\ + const uint32_t width, const uint32_t height) {\ + TransposeCounterClockwiseImpl(src, srcStride, dst, dstStride,\ + width, height); \ + } + +TRANSPOSE_CCW_DECLARATION(RGBA, uint8_t, CHANNELS_4) +TRANSPOSE_CCW_DECLARATION(RGB, uint8_t, CHANNELS_3) +TRANSPOSE_CCW_DECLARATION(Channel, uint8_t, CHANNEL) +TRANSPOSE_CCW_DECLARATION(RGBA1010102, uint8_t, RGBA1010102) + +TRANSPOSE_CCW_DECLARATION(RGBA16, uint16_t, CHANNELS_4) +TRANSPOSE_CCW_DECLARATION(RGB16, uint16_t, CHANNELS_3) +TRANSPOSE_CCW_DECLARATION(Channel16, uint16_t, CHANNEL) + +#undef TRANSPOSE_CCW_DECLARATION } HWY_AFTER_NAMESPACE(); diff --git a/src/Transpose.cpp b/src/Transpose.cpp index 9b73b72..c942385 100644 --- a/src/Transpose.cpp +++ b/src/Transpose.cpp @@ -25,6 +25,71 @@ #if HWY_ONCE namespace sparkyuv { +#define TRANSPOSE_CLOCKWISE_EXPORT(srcPixel, storageType)\ + HWY_EXPORT(TransposeClockwise##srcPixel##HWY); + +TRANSPOSE_CLOCKWISE_EXPORT(RGBA, uint8_t) +TRANSPOSE_CLOCKWISE_EXPORT(RGB, uint8_t) +TRANSPOSE_CLOCKWISE_EXPORT(Channel, uint8_t) +TRANSPOSE_CLOCKWISE_EXPORT(RGBA1010102, uint8_t) + +TRANSPOSE_CLOCKWISE_EXPORT(RGBA16, uint16_t) +TRANSPOSE_CLOCKWISE_EXPORT(RGB16, uint16_t) +TRANSPOSE_CLOCKWISE_EXPORT(Channel16, uint16_t) + +#undef TRANSPOSE_CLOCKWISE_EXPORT + +#define TRANSPOSE_CLOCKWISE_DECLARATION_R(srcPixel, storageType, surfaceType) \ + void TransposeClockwise##srcPixel(const storageType *SPARKYUV_RESTRICT src, const uint32_t srcStride,\ + storageType *SPARKYUV_RESTRICT dst, const uint32_t dstStride,\ + const uint32_t width, const uint32_t height) {\ + HWY_DYNAMIC_DISPATCH(TransposeClockwise##srcPixel##HWY)(src, srcStride, dst, dstStride,\ + width, height); \ + } + +TRANSPOSE_CLOCKWISE_DECLARATION_R(RGBA, uint8_t, CHANNELS_4) +TRANSPOSE_CLOCKWISE_DECLARATION_R(RGB, uint8_t, CHANNELS_3) +TRANSPOSE_CLOCKWISE_DECLARATION_R(Channel, uint8_t, CHANNEL) +TRANSPOSE_CLOCKWISE_DECLARATION_R(RGBA1010102, uint8_t, RGBA1010102) + +TRANSPOSE_CLOCKWISE_DECLARATION_R(RGBA16, uint16_t, CHANNELS_4) +TRANSPOSE_CLOCKWISE_DECLARATION_R(RGB16, uint16_t, CHANNELS_3) +TRANSPOSE_CLOCKWISE_DECLARATION_R(Channel16, uint16_t, CHANNEL) + +#undef TRANSPOSE_CLOCKWISE_DECLARATION_R + +#define TRANSPOSE_CCW_EXPORT(srcPixel, storageType)\ + HWY_EXPORT(TransposeCounterClockwise##srcPixel##HWY); + +TRANSPOSE_CCW_EXPORT(RGBA, uint8_t) +TRANSPOSE_CCW_EXPORT(RGB, uint8_t) +TRANSPOSE_CCW_EXPORT(Channel, uint8_t) +TRANSPOSE_CCW_EXPORT(RGBA1010102, uint8_t) + +TRANSPOSE_CCW_EXPORT(RGBA16, uint16_t) +TRANSPOSE_CCW_EXPORT(RGB16, uint16_t) +TRANSPOSE_CCW_EXPORT(Channel16, uint16_t) + +#undef TRANSPOSE_CCW_EXPORT + +#define TRANSPOSE_CCW_DECLARATION_E(srcPixel, storageType, surfaceType) \ + void TransposeCounterClockwise##srcPixel(const storageType *SPARKYUV_RESTRICT src, const uint32_t srcStride,\ + storageType *SPARKYUV_RESTRICT dst, const uint32_t dstStride,\ + const uint32_t width, const uint32_t height) {\ + HWY_DYNAMIC_DISPATCH(TransposeCounterClockwise##srcPixel##HWY)(src, srcStride, dst, dstStride,\ + width, height); \ + } + +TRANSPOSE_CCW_DECLARATION_E(RGBA, uint8_t, CHANNELS_4) +TRANSPOSE_CCW_DECLARATION_E(RGB, uint8_t, CHANNELS_3) +TRANSPOSE_CCW_DECLARATION_E(Channel, uint8_t, CHANNEL) +TRANSPOSE_CCW_DECLARATION_E(RGBA1010102, uint8_t, RGBA1010102) + +TRANSPOSE_CCW_DECLARATION_E(RGBA16, uint16_t, CHANNELS_4) +TRANSPOSE_CCW_DECLARATION_E(RGB16, uint16_t, CHANNELS_3) +TRANSPOSE_CCW_DECLARATION_E(Channel16, uint16_t, CHANNEL) + +#undef TRANSPOSE_CCW_DECLARATION_E } #endif \ No newline at end of file diff --git a/tools/main.cpp b/tools/main.cpp index 3224b5e..5df6935 100644 --- a/tools/main.cpp +++ b/tools/main.cpp @@ -151,9 +151,9 @@ int main() { bench(1, ANSI_COLOR_GREEN, "RGBA -> YCbCr420", [&]() { sparkyuv::RGBAToYCbCr444(rgbaData.data(), rgbaStride, width, height, - yPlane.data(), yPlaneStride, - uPlane.data(), uvPlaneStride, - vPlane.data(), uvPlaneStride, 0.299f, 0.114f, sparkyuv::YUV_RANGE_TV); + yPlane.data(), yPlaneStride, + uPlane.data(), uvPlaneStride, + vPlane.data(), uvPlaneStride, 0.299f, 0.114f, sparkyuv::YUV_RANGE_TV); // libyuv::ABGRToI420(rgbaData.data(), rgbaStride, yPlane.data(), yPlaneStride, // uPlane.data(), uvPlaneStride, // vPlane.data(), uvPlaneStride, width, height ); @@ -166,9 +166,9 @@ int main() { // uPlane.data(), uvPlaneStride, // vPlane.data(), uvPlaneStride, rgbaData.data(), rgbaStride, width, height); sparkyuv::YCbCr444ToRGBA(rgbaData.data(), rgbaStride, width, height, - yPlane.data(), yPlaneStride, - uPlane.data(), uvPlaneStride, - vPlane.data(), uvPlaneStride, 0.299f, 0.114f, sparkyuv::YUV_RANGE_TV); + yPlane.data(), yPlaneStride, + uPlane.data(), uvPlaneStride, + vPlane.data(), uvPlaneStride, 0.299f, 0.114f, sparkyuv::YUV_RANGE_TV); // sparkyuv::YCbCr400ToRGBA(rgbaData.data(), rgbaStride, width, height, // yPlane.data(), yPlaneStride,0.299f, 0.114f, sparkyuv::YUV_RANGE_PC); }); @@ -192,8 +192,6 @@ int main() { // vPlane.data(), uvPlaneStride, rgbaData.data(), rgbaStride, width, height); sparkyuv::FlipHorizontalRGBA(rgbaData.data(), rgbaStride, flipped2.data(), rgbaStride, width, height); rgbaData = flipped2; - sparkyuv::FlipHorizontalRGBA(rgbaData.data(), rgbaStride, flipped2.data(), rgbaStride, width, height); - rgbaData = flipped2; // sparkyuv::YCbCr400ToRGBA(rgbaData.data(), rgbaStride, width, height, // yPlane.data(), yPlaneStride,0.299f, 0.114f, sparkyuv::YUV_RANGE_PC); }); @@ -326,10 +324,18 @@ int main() { // rgbaStride, inWidth, // inHeight); + int trnsStride = sizeof(uint8_t) * height * 4; + std::vector transposed(trnsStride * width); + sparkyuv::TransposeCounterClockwiseRGBA(rgbaData.data(), rgbaStride, transposed.data(), trnsStride, width, height); + aire::JPEGEncoder encoder(rgbaData.data(), rgbaStride, width, height); auto encoded = encoder.encode(); saveVectorToFile(encoded, "jpeg.jpeg"); + aire::JPEGEncoder encoderTrns(transposed.data(), trnsStride, height, width); + auto encodedTrns = encoderTrns.encode(); + saveVectorToFile(encodedTrns, "jpeg_trns.jpeg"); + aire::JPEGEncoder encoderNV(rgbaNVData.data(), rgbaNVStride, width, height); auto encodedNV = encoderNV.encode(); saveVectorToFile(encodedNV, "jpeg_nv.jpeg");