Skip to content

Commit

Permalink
#13953: Incorrectly handled bfloat16 -0.0 in ttnn.signbit (#28)
Browse files Browse the repository at this point in the history
### Ticket
[Link to Github
Issue](tenstorrent/tt-metal#13953)

### Problem description
`-0.0` Incorrectly handled as `0.0` in `ttnn.signbit`. bfloat16 data
format should preserve `-0.0` values through the unpacker, math, packer
pipeline. However, `-0.0` in bfloat16 format gets passed into the kernel
as `0.0` and `signbit` reads this as a positive value.

The detailed reasoning is as follows. When unpacking to the Dest
register, bfloat16 values are first packed into Src registers and then
moved to Dest. When moving these values from Src, `-0.0` is interpreted
as `0.0` and src zero flags are set. This leads to `-0.0` values not
being moved, but instead, zeroes being written into relevant locations
in Dest.

### What's changed
Disabled src zero flags for eltwise unary/sfpu operations. `-0.0` values
are now moved to Dest instead of written with zeroes.

### Checklist
- [x] [All post
commit](https://github.com/tenstorrent/tt-metal/actions/runs/13592906337)
CI passes
- [x] [Blackhole Post
commit](https://github.com/tenstorrent/tt-metal/actions/runs/13592910095)
CI passes (if applicable)
  • Loading branch information
atatuzunerTT authored Mar 3, 2025
1 parent 3717876 commit b82ac6a
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 6 deletions.
5 changes: 4 additions & 1 deletion tt_llk_blackhole/common/inc/cunpack_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ namespace ckernel::unpacker
cfg_reg_rmw_tensix<ALU_FORMAT_SPEC_REG0_SrcA_ADDR32, 0, ALU_ACC_CTRL_INT8_math_enabled_MASK>(alu_payload.val);
}

template<bool row_pool=false, bool is_fp32_dest_acc_en = false, bool fpu_srnd_en = false, bool pack_srnd_en = false>
template<bool row_pool=false, bool is_fp32_dest_acc_en = false, bool fpu_srnd_en = false, bool pack_srnd_en = false, bool disable_src_zero_flag = false>
inline void configure_unpack_AB(
const uint unpA_src_format,
const uint unpB_src_format,
Expand Down Expand Up @@ -272,6 +272,9 @@ namespace ckernel::unpacker
cfg_reg_rmw_tensix<ALU_FORMAT_SPEC_REG0_SrcA_ADDR32, 0, alu_mask>(alu_payload.val);

uint32_t src_zeroflags_disable = ((uint)unpA_dst_format == (uint)DataFormat::UInt16) || ((uint)unpB_dst_format == (uint)DataFormat::UInt16);
if constexpr (disable_src_zero_flag) {
src_zeroflags_disable = true;
}
cfg_reg_rmw_tensix<ALU_ACC_CTRL_Zero_Flag_disabled_src_RMW>(src_zeroflags_disable);

//Set FP8 E4M3 mode, bit is accessible by unpacker/packer
Expand Down
4 changes: 2 additions & 2 deletions tt_llk_blackhole/llk_lib/llk_unpack_A.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,13 +140,13 @@ inline void _llk_unpack_A_mop_config_(const bool transpose_of_faces, const std::
}
}

template <bool is_fp32_dest_acc_en = false, StochRndType stoch_rnd_mode = StochRndType::None>
template <bool is_fp32_dest_acc_en = false, StochRndType stoch_rnd_mode = StochRndType::None, bool disable_src_zero_flag = false>
inline void _llk_unpack_A_hw_configure_(const std::uint32_t unpack_src_format, const std::uint32_t unpack_dst_format, const std::uint32_t face_r_dim = FACE_R_DIM, const std::uint32_t within_face_16x16_transpose = 0, const std::uint32_t num_faces = 4) {
constexpr bool is_row_pool = false;
constexpr bool stoch_rnd_en = (stoch_rnd_mode == StochRndType::All);
constexpr bool fpu_srnd_en = stoch_rnd_en || (stoch_rnd_mode == StochRndType::Fpu);
constexpr bool pack_srnd_en = stoch_rnd_en ||(stoch_rnd_mode == StochRndType::Pack);
configure_unpack_AB<is_row_pool, is_fp32_dest_acc_en, fpu_srnd_en, pack_srnd_en>(
configure_unpack_AB<is_row_pool, is_fp32_dest_acc_en, fpu_srnd_en, pack_srnd_en, disable_src_zero_flag>(
unpack_src_format,
unpack_src_format,
unpack_dst_format,
Expand Down
5 changes: 4 additions & 1 deletion tt_llk_wormhole_b0/common/inc/cunpack_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ namespace ckernel::unpacker
cfg_reg_rmw_tensix<ALU_FORMAT_SPEC_REG0_SrcA_ADDR32, 0, ALU_ACC_CTRL_INT8_math_enabled_MASK>(alu_payload.val);
}

template<bool row_pool=false, bool is_fp32_dest_acc_en = false, bool fpu_srnd_en = false, bool pack_srnd_en = false>
template<bool row_pool=false, bool is_fp32_dest_acc_en = false, bool fpu_srnd_en = false, bool pack_srnd_en = false, bool disable_src_zero_flag = false>
inline void configure_unpack_AB(
const uint unpA_src_format,
const uint unpB_src_format,
Expand Down Expand Up @@ -267,6 +267,9 @@ namespace ckernel::unpacker
cfg_reg_rmw_tensix<ALU_FORMAT_SPEC_REG0_SrcA_ADDR32, 0, alu_mask>(alu_payload.val);

uint32_t src_zeroflags_disable = ((uint)unpA_dst_format == (uint)DataFormat::UInt16) || ((uint)unpB_dst_format == (uint)DataFormat::UInt16);
if constexpr (disable_src_zero_flag) {
src_zeroflags_disable = true;
}
cfg_reg_rmw_tensix<ALU_ACC_CTRL_Zero_Flag_disabled_src_RMW>(src_zeroflags_disable);

t6_mutex_release(mutex::REG_RMW);
Expand Down
4 changes: 2 additions & 2 deletions tt_llk_wormhole_b0/llk_lib/llk_unpack_A.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,13 @@ inline void _llk_unpack_A_mop_config_(const bool transpose_of_faces, const std::
}
}

template <bool is_fp32_dest_acc_en = false, StochRndType stoch_rnd_mode = StochRndType::None>
template <bool is_fp32_dest_acc_en = false, StochRndType stoch_rnd_mode = StochRndType::None, bool disable_src_zero_flag = false>
inline void _llk_unpack_A_hw_configure_(const std::uint32_t unpack_src_format, const std::uint32_t unpack_dst_format, const std::uint32_t face_r_dim = FACE_R_DIM, const std::uint32_t within_face_16x16_transpose = 0, const std::uint32_t num_faces = 4) {
constexpr bool is_row_pool = false;
constexpr bool stoch_rnd_en = (stoch_rnd_mode == StochRndType::All);
constexpr bool fpu_srnd_en = stoch_rnd_en || (stoch_rnd_mode == StochRndType::Fpu);
constexpr bool pack_srnd_en = stoch_rnd_en ||(stoch_rnd_mode == StochRndType::Pack);
configure_unpack_AB<is_row_pool, is_fp32_dest_acc_en, fpu_srnd_en, pack_srnd_en>(
configure_unpack_AB<is_row_pool, is_fp32_dest_acc_en, fpu_srnd_en, pack_srnd_en, disable_src_zero_flag>(
unpack_src_format,
unpack_src_format,
unpack_dst_format,
Expand Down

0 comments on commit b82ac6a

Please sign in to comment.