diff --git a/tt_llk_blackhole/common/inc/cunpack_common.h b/tt_llk_blackhole/common/inc/cunpack_common.h index 5e859574..ee18f246 100644 --- a/tt_llk_blackhole/common/inc/cunpack_common.h +++ b/tt_llk_blackhole/common/inc/cunpack_common.h @@ -192,7 +192,7 @@ namespace ckernel::unpacker cfg_reg_rmw_tensix(alu_payload.val); } - template + template inline void configure_unpack_AB( const uint unpA_src_format, const uint unpB_src_format, @@ -272,6 +272,9 @@ namespace ckernel::unpacker cfg_reg_rmw_tensix(alu_payload.val); uint32_t src_zeroflags_disable = ((uint)unpA_dst_format == (uint)DataFormat::UInt16) || ((uint)unpB_dst_format == (uint)DataFormat::UInt16); + if constexpr (disable_src_zero_flag) { + src_zeroflags_disable = true; + } cfg_reg_rmw_tensix(src_zeroflags_disable); //Set FP8 E4M3 mode, bit is accessible by unpacker/packer diff --git a/tt_llk_blackhole/llk_lib/llk_unpack_A.h b/tt_llk_blackhole/llk_lib/llk_unpack_A.h index bdb2eb86..b357553d 100644 --- a/tt_llk_blackhole/llk_lib/llk_unpack_A.h +++ b/tt_llk_blackhole/llk_lib/llk_unpack_A.h @@ -140,13 +140,13 @@ inline void _llk_unpack_A_mop_config_(const bool transpose_of_faces, const std:: } } -template +template inline void _llk_unpack_A_hw_configure_(const std::uint32_t unpack_src_format, const std::uint32_t unpack_dst_format, const std::uint32_t face_r_dim = FACE_R_DIM, const std::uint32_t within_face_16x16_transpose = 0, const std::uint32_t num_faces = 4) { constexpr bool is_row_pool = false; constexpr bool stoch_rnd_en = (stoch_rnd_mode == StochRndType::All); constexpr bool fpu_srnd_en = stoch_rnd_en || (stoch_rnd_mode == StochRndType::Fpu); constexpr bool pack_srnd_en = stoch_rnd_en ||(stoch_rnd_mode == StochRndType::Pack); - configure_unpack_AB( + configure_unpack_AB( unpack_src_format, unpack_src_format, unpack_dst_format, diff --git a/tt_llk_wormhole_b0/common/inc/cunpack_common.h b/tt_llk_wormhole_b0/common/inc/cunpack_common.h index bc70e419..3d44a701 100644 --- a/tt_llk_wormhole_b0/common/inc/cunpack_common.h +++ b/tt_llk_wormhole_b0/common/inc/cunpack_common.h @@ -192,7 +192,7 @@ namespace ckernel::unpacker cfg_reg_rmw_tensix(alu_payload.val); } - template + template inline void configure_unpack_AB( const uint unpA_src_format, const uint unpB_src_format, @@ -267,6 +267,9 @@ namespace ckernel::unpacker cfg_reg_rmw_tensix(alu_payload.val); uint32_t src_zeroflags_disable = ((uint)unpA_dst_format == (uint)DataFormat::UInt16) || ((uint)unpB_dst_format == (uint)DataFormat::UInt16); + if constexpr (disable_src_zero_flag) { + src_zeroflags_disable = true; + } cfg_reg_rmw_tensix(src_zeroflags_disable); t6_mutex_release(mutex::REG_RMW); diff --git a/tt_llk_wormhole_b0/llk_lib/llk_unpack_A.h b/tt_llk_wormhole_b0/llk_lib/llk_unpack_A.h index 3ecbc43f..3ff0352a 100644 --- a/tt_llk_wormhole_b0/llk_lib/llk_unpack_A.h +++ b/tt_llk_wormhole_b0/llk_lib/llk_unpack_A.h @@ -141,13 +141,13 @@ inline void _llk_unpack_A_mop_config_(const bool transpose_of_faces, const std:: } } -template +template inline void _llk_unpack_A_hw_configure_(const std::uint32_t unpack_src_format, const std::uint32_t unpack_dst_format, const std::uint32_t face_r_dim = FACE_R_DIM, const std::uint32_t within_face_16x16_transpose = 0, const std::uint32_t num_faces = 4) { constexpr bool is_row_pool = false; constexpr bool stoch_rnd_en = (stoch_rnd_mode == StochRndType::All); constexpr bool fpu_srnd_en = stoch_rnd_en || (stoch_rnd_mode == StochRndType::Fpu); constexpr bool pack_srnd_en = stoch_rnd_en ||(stoch_rnd_mode == StochRndType::Pack); - configure_unpack_AB( + configure_unpack_AB( unpack_src_format, unpack_src_format, unpack_dst_format,