diff --git a/tests/firmware/riscv/blackhole/l1_address_map.h b/tests/firmware/riscv/blackhole/l1_address_map.h index eef7bd6..7e9e472 100644 --- a/tests/firmware/riscv/blackhole/l1_address_map.h +++ b/tests/firmware/riscv/blackhole/l1_address_map.h @@ -3,10 +3,10 @@ // SPDX-License-Identifier: Apache-2.0 #pragma once -#include +#include // Aux variable used to align addresses to platform specific width. BH requires 64B alignment. -#define NOC_ADDRESS_ALIGNMENT (64) +constexpr auto NOC_ADDRESS_ALIGNMENT = 64; namespace l1_mem { diff --git a/tests/firmware/riscv/wormhole/l1_address_map.h b/tests/firmware/riscv/wormhole/l1_address_map.h index 9f9e8c9..2e40fd8 100644 --- a/tests/firmware/riscv/wormhole/l1_address_map.h +++ b/tests/firmware/riscv/wormhole/l1_address_map.h @@ -6,7 +6,7 @@ #include // Aux variable used to align addresses to platform specific width. WH requires 32B alignment. -#define NOC_ADDRESS_ALIGNMENT (32) +constexpr auto NOC_ADDRESS_ALIGNMENT = 32; namespace l1_mem { diff --git a/tests/helpers/include/ckernel_helper.h b/tests/helpers/include/ckernel_helper.h index ce5c2e9..617d4fa 100644 --- a/tests/helpers/include/ckernel_helper.h +++ b/tests/helpers/include/ckernel_helper.h @@ -2,8 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 -#ifndef CKERNEL_HELPER_H -#define CKERNEL_HELPER_H +#pragma once namespace ckernel { @@ -16,10 +15,6 @@ volatile std::uint32_t tt_reg_ptr *mailbox_base[4] = { reinterpret_cast(TENSIX_MAILBOX2_BASE), reinterpret_cast(TENSIX_MAILBOX3_BASE)}; -std::uint32_t cfg_state_id __attribute__((section(".bss"))) = 0; // Flip between 0 and 1 to keep state between kernel calls -std::uint32_t dest_offset_id __attribute__((section(".bss"))) = 0; // Flip between 0 and 1 to keep dest pointer between kernel calls +std::uint32_t cfg_state_id = 0; // Flip between 0 and 1 to keep state between kernel calls +std::uint32_t dest_offset_id = 0; // Flip between 0 and 1 to keep dest pointer between kernel calls } // namespace ckernel - -using namespace ckernel; - -#endif diff --git a/tests/helpers/include/params.h b/tests/helpers/include/params.h index dce9b5d..ad81046 100644 --- a/tests/helpers/include/params.h +++ b/tests/helpers/include/params.h @@ -2,122 +2,84 @@ // // SPDX-License-Identifier: Apache-2.0 -#ifndef PARAMS_H -#define PARAMS_H +#pragma once #include -#include +#include -#define L1_ADDRESS(buffer) ((reinterpret_cast(buffer) / 16) - 1) +#include "ckernel_sfpu_log.h" +#include "ckernel_sfpu_sqrt.h" +#include "ckernel_sfpu_square.h" +#include "llk_defs.h" +#include "llk_sfpu_types.h" +#include "tensix_types.h" -#ifdef LLK_TRISC_UNPACK - -#ifdef FORMAT_FLOAT16_B -#define DATA_FORMAT (uint32_t)DataFormat::Float16_b -#endif -#ifdef FORMAT_FLOAT16 -#define DATA_FORMAT (uint32_t)DataFormat::Float16 -#endif -#ifdef FORMAT_FLOAT32 -#define DATA_FORMAT (uint32_t)DataFormat::Float32 -#endif -#ifdef FORMAT_INT32 -#define DATA_FORMAT (uint32_t)DataFormat::Int32 -#endif -#ifdef FORMAT_BFP8_B -#define DATA_FORMAT (uint32_t)DataFormat::Bfp8_b -#endif - -#endif +inline uint32_t L1_ADDRESS(const volatile void* buffer) +{ + return (reinterpret_cast(buffer) / 16) - 1; +} -#ifdef LLK_TRISC_MATH +namespace +{ +constexpr std::underlying_type_t get_data_format(DataFormat format) +{ + return static_cast>(format); +} +} // namespace #ifdef FORMAT_FLOAT16_B -#define DATA_FORMAT (uint32_t)DataFormat::Float16_b +constexpr auto DATA_FORMAT = get_data_format(DataFormat::Float16_b); #endif #ifdef FORMAT_FLOAT16 -#define DATA_FORMAT (uint32_t)DataFormat::Float16 +constexpr auto DATA_FORMAT = get_data_format(DataFormat::Float16); #endif #ifdef FORMAT_FLOAT32 -#define DATA_FORMAT (uint32_t)DataFormat::Float32 +constexpr auto DATA_FORMAT = get_data_format(DataFormat::Float32); #endif #ifdef FORMAT_INT32 -#define DATA_FORMAT (uint32_t)DataFormat::Int32 +constexpr auto DATA_FORMAT = get_data_format(DataFormat::Int32); #endif #ifdef FORMAT_BFP8_B -#define DATA_FORMAT (uint32_t)DataFormat::Bfp8_b +constexpr auto DATA_FORMAT = get_data_format(DataFormat::Bfp8_b); #endif #ifdef ELTWISE_BINARY_ADD -#define ELTWISE_BINARY_OP EltwiseBinaryType::ELWADD +constexpr auto ELTWISE_BINARY_OP = ckernel::EltwiseBinaryType::ELWADD; #endif #ifdef ELTWISE_BINARY_SUB -#define ELTWISE_BINARY_OP EltwiseBinaryType::ELWSUB +constexpr auto ELTWISE_BINARY_OP = ckernel::EltwiseBinaryType::ELWSUB; #endif #ifdef ELTWISE_BINARY_MUL -#define ELTWISE_BINARY_OP EltwiseBinaryType::ELWMUL +constexpr auto ELTWISE_BINARY_OP = ckernel::EltwiseBinaryType::ELWMUL; #endif // TO BE IMPLEMENTED IN LLKs #ifdef ELTWISE_BINARY_DIV -#define ELTWISE_BINARY_OP EltwiseBinaryType::ELWDIV +constexpr auto ELTWISE_BINARY_OP = ckernel::EltwiseBinaryType::ELWDIV; #endif #ifdef ELTWISE_BINARY_LESS -#define ELTWISE_BINARY_OP EltwiseBinaryType::ELWLESS +constexpr auto ELTWISE_BINARY_OP = ckernel::EltwiseBinaryType::ELWLESS; #endif -// SFPU operation macros - #ifdef SFPU_OP_SQRT -#define SFPU_OPERATION SfpuType::sqrt -#define SFPU_CALLS \ - _init_sqrt_(); \ - _calculate_sqrt_(10); +constexpr auto SFPU_OPERATION = SfpuType::sqrt; #endif #ifdef SFPU_OP_LOG -#define SFPU_OPERATION SfpuType::log -#define SFPU_CALLS \ - _init_log_(); \ - _calculate_log_(10, 0); +constexpr auto SFPU_OPERATION = SfpuType::log; #endif #ifdef SFPU_OP_SQUARE -#define SFPU_OPERATION SfpuType::square -#define SFPU_CALLS _calculate_square_(10); +constexpr auto SFPU_OPERATION = SfpuType::square; #endif -#endif - -#ifdef LLK_TRISC_PACK - inline void process_addresses(volatile uint32_t* buffer_Dest[], int n, int first, ...) { - buffer_Dest[0] = (volatile uint32_t*)first; + buffer_Dest[0] = reinterpret_cast(first); va_list args; va_start(args, first); for (int i = 1; i < n; ++i) { int num = va_arg(args, int); - buffer_Dest[i] = (volatile uint32_t*)num; + buffer_Dest[i] = reinterpret_cast(num); } va_end(args); } - -#ifdef FORMAT_FLOAT16_B -#define DATA_FORMAT (uint32_t)DataFormat::Float16_b -#endif -#ifdef FORMAT_FLOAT16 -#define DATA_FORMAT (uint32_t)DataFormat::Float16 -#endif -#ifdef FORMAT_FLOAT32 -#define DATA_FORMAT (uint32_t)DataFormat::Float32 -#endif -#ifdef FORMAT_INT32 -#define DATA_FORMAT (uint32_t)DataFormat::Int32 -#endif -#ifdef FORMAT_BFP8_B -#define DATA_FORMAT (uint32_t)DataFormat::Bfp8_b -#endif - -#endif - -#endif diff --git a/tests/helpers/src/trisc.cpp b/tests/helpers/src/trisc.cpp index 060f441..54b787c 100644 --- a/tests/helpers/src/trisc.cpp +++ b/tests/helpers/src/trisc.cpp @@ -2,6 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 +#include #include #include "ckernel.h" @@ -16,19 +17,15 @@ int main() { #ifdef LLK_TRISC_UNPACK - volatile std::uint32_t* mailbox = (volatile std::uint32_t*)(0x19FFC); + volatile std::uint32_t* mailbox = reinterpret_cast(0x19FFC); #elif defined(LLK_TRISC_MATH) - volatile std::uint32_t* mailbox = (volatile std::uint32_t*)(0x19FF8); + volatile std::uint32_t* mailbox = reinterpret_cast(0x19FF8); #elif defined(LLK_TRISC_PACK) - volatile std::uint32_t* mailbox = (volatile std::uint32_t*)(0x19FF4); + volatile std::uint32_t* mailbox = reinterpret_cast(0x19FF4); #endif - *mailbox = 0x2; // write value different than 1 to mailbox to indicate kernel is running - for (int i = 0; i < 64; i++) - { - ckernel::regfile[i] = 0; - } + std::fill(ckernel::regfile, ckernel::regfile + 64, 0); ckernel::reset_cfg_state_id(); ckernel::reset_dest_offset_id(); @@ -38,7 +35,11 @@ int main() *mailbox = ckernel::KERNEL_COMPLETE; // 0x1 - for (;;) + // Use a volatile variable to prevent the compiler from optimizing away the loop + volatile bool run = true; + + // Infinite loop + while (run) { } } diff --git a/tests/python_tests/helpers/device.py b/tests/python_tests/helpers/device.py index e70e8e4..9be0986 100644 --- a/tests/python_tests/helpers/device.py +++ b/tests/python_tests/helpers/device.py @@ -32,7 +32,7 @@ def run_elf_files(testname, core_loc="0,0", run_brisc=True): # and now cores are run in revese order PACK, MATH, UNOPACK # Once that issue is reolved with tt-exalens code will be returned to normal for loop - for i in range(2, -1, -1): + for i in reversed(range(3)): run_elf(f"{ELF_LOCATION}{testname}_trisc{i}.elf", core_loc, risc_id=i + 1) diff --git a/tests/python_tests/helpers/pack.py b/tests/python_tests/helpers/pack.py index 3c2557e..4e18f98 100644 --- a/tests/python_tests/helpers/pack.py +++ b/tests/python_tests/helpers/pack.py @@ -12,20 +12,7 @@ def flatten_list(sublists): def int_to_bytes_list(n): - binary_str = bin(n)[2:].zfill(32) - return [int(binary_str[i : i + 8], 2) for i in range(0, 32, 8)] - - -def float16_to_bytes(value): - float16_value = torch.tensor(value, dtype=torch.float16) - packed_bytes = struct.pack(">e", float16_value.item()) - return list(packed_bytes) + [0] * (4 - len(packed_bytes)) - - -def bfloat16_to_bytes(number): - number_unpacked = struct.unpack("!I", struct.pack("!f", number))[0] - res_masked = number_unpacked & 0xFFFF0000 - return int_to_bytes_list(res_masked) + return [(n >> (8 * i)) & 0xFF for i in range(3, -1, -1)] def fp32_to_bytes(number): @@ -33,19 +20,12 @@ def fp32_to_bytes(number): return int_to_bytes_list(number_unpacked) -def int32_to_bytes(number): - number = int(number) - number_unpacked = struct.unpack("!I", struct.pack("!I", number))[0] - return int_to_bytes_list(number_unpacked) - - -def bfloat16_to_binary(value): - float_value = value.to(torch.float32).item() - bfloat16_bytes = bfloat16_to_bytes(float_value) - return f"{bfloat16_bytes[0]:08b}{bfloat16_bytes[1]:08b}" - - def pack_bfp16(torch_tensor): + def bfloat16_to_bytes(number): + number_unpacked = struct.unpack("!I", struct.pack("!f", number))[0] + res_masked = number_unpacked & 0xFFFF0000 + return int_to_bytes_list(res_masked) + packed_bytes = [] for i in range(0, len(torch_tensor), 2): half1 = bfloat16_to_bytes(torch_tensor[i]) @@ -55,29 +35,44 @@ def pack_bfp16(torch_tensor): def pack_fp16(torch_tensor): + def float16_to_bytes(value): + packed_bytes = struct.pack("> 16 + return f"{(bfloat16_value >> 8) & 0xFF:08b}{bfloat16_value & 0xFF:08b}" + exponents = [] mantissas = [] signs = [] @@ -109,22 +104,16 @@ def float_to_bfp8_block(block): def pack_bfp8_b(tensor, block_size=16): - flattened_tensor = tensor.flatten() num_blocks = len(flattened_tensor) // block_size - blocks = [ - flattened_tensor[i * block_size : (i + 1) * block_size] - for i in range(num_blocks) - ] exponents = [] mantissas = [] - for block in blocks: + for i in range(num_blocks): + block = flattened_tensor[i * block_size : (i + 1) * block_size] shared_exponent, bfp8_mantissas = float_to_bfp8_block(block) exponents.append(shared_exponent) mantissas.extend(bfp8_mantissas) - bfp8_result = exponents + mantissas - - return bfp8_result + return exponents + mantissas diff --git a/tests/python_tests/helpers/stimuli_generator.py b/tests/python_tests/helpers/stimuli_generator.py index 94e113d..4df9cf0 100644 --- a/tests/python_tests/helpers/stimuli_generator.py +++ b/tests/python_tests/helpers/stimuli_generator.py @@ -12,14 +12,12 @@ def flatten_list(sublists): def generate_random_face(stimuli_format="Float16_b", const_value=1, const_face=False): if stimuli_format in ["Float16_b", "Float16", "Float32"]: - if const_face: srcA_face = torch.ones(256, dtype=format_dict[stimuli_format]) * const_value else: # random for both faces srcA_face = torch.rand(256, dtype=format_dict[stimuli_format]) + 0.1 elif stimuli_format == "Bfp8_b": - size = 256 integer_part = torch.randint(0, 3, (size,)) fraction = torch.randint(0, 16, (size,)).to(dtype=torch.bfloat16) / 16.0 @@ -54,27 +52,22 @@ def generate_stimuli( srcA = [] srcB = [] - for i in range(4 * tile_cnt): + for _ in range(4 * tile_cnt): face_a, face_b = generate_random_face_ab( stimuli_format, const_face, const_value_A, const_value_B ) - srcA.append(face_a.tolist()) - srcB.append(face_b.tolist()) - - srcA = flatten_list(srcA) - srcB = flatten_list(srcB) + srcA.extend(face_a.tolist()) + srcB.extend(face_b.tolist()) if not sfpu: - if stimuli_format != "Bfp8_b": - return torch.tensor(srcA, dtype=format_dict[stimuli_format]), torch.tensor( - srcB, dtype=format_dict[stimuli_format] - ) - else: - return torch.tensor(srcA, dtype=torch.bfloat16), torch.tensor( - srcB, dtype=torch.bfloat16 - ) + dtype = ( + format_dict[stimuli_format] + if stimuli_format != "Bfp8_b" + else torch.bfloat16 + ) + return torch.tensor(srcA, dtype=dtype), torch.tensor(srcB, dtype=dtype) else: srcA = generate_random_face(stimuli_format, const_value_A, const_face) - srcB = torch.full((256,), 0) + srcB = torch.zeros(256) srcA = torch.cat((srcA, torch.zeros(1024 - 256))) return srcA, srcB diff --git a/tests/python_tests/helpers/test_config.py b/tests/python_tests/helpers/test_config.py index 26ff8b0..7aa98d7 100644 --- a/tests/python_tests/helpers/test_config.py +++ b/tests/python_tests/helpers/test_config.py @@ -23,8 +23,7 @@ def generate_make_command(test_config): approx_mode = test_config.get("approx_mode", "false") math_fidelity = test_config.get("math_fidelity", 0) - make_cmd += f" math_fidelity={math_fidelity} " - make_cmd += f" approx_mode={approx_mode} " + make_cmd += f" math_fidelity={math_fidelity} approx_mode={approx_mode} " reduce_dim = test_config.get("reduce_dim", "no_reduce_dim") pool_type = test_config.get("pool_type", "no_reduce_dim") @@ -36,23 +35,20 @@ def generate_make_command(test_config): make_cmd += f"reduce_dim={reduce_dim_args[reduce_dim]} " make_cmd += f"pool_type={reduce_pool_args[pool_type]} " else: - make_cmd += f"mathop={ mathop_args_dict[mathop]} " + make_cmd += f"mathop={mathop_args_dict[mathop]} " else: # multiple tiles handles mathop as int - - if mathop == 1: - make_cmd += " mathop=ELTWISE_BINARY_ADD " - elif mathop == 2: - make_cmd += " mathop=ELTWISE_BINARY_SUB " - else: - make_cmd += " mathop=ELTWISE_BINARY_MUL " + mathop_map = { + 1: "ELTWISE_BINARY_ADD", + 2: "ELTWISE_BINARY_SUB", + 3: "ELTWISE_BINARY_MUL", + } + make_cmd += f"mathop={mathop_map.get(mathop, 'ELTWISE_BINARY_MUL')} " kern_cnt = str(test_config.get("kern_cnt")) pack_addr_cnt = str(test_config.get("pack_addr_cnt")) pack_addrs = test_config.get("pack_addrs") - make_cmd += f" kern_cnt={kern_cnt} " - make_cmd += f" pack_addr_cnt={pack_addr_cnt} pack_addrs={pack_addrs}" + make_cmd += f" kern_cnt={kern_cnt} pack_addr_cnt={pack_addr_cnt} pack_addrs={pack_addrs}" print(make_cmd) - return make_cmd diff --git a/tests/python_tests/helpers/unpack.py b/tests/python_tests/helpers/unpack.py index 01e55f1..b3cd124 100644 --- a/tests/python_tests/helpers/unpack.py +++ b/tests/python_tests/helpers/unpack.py @@ -9,61 +9,54 @@ def int_to_bytes_list(n): - binary_str = bin(n)[2:].zfill(32) - return [int(binary_str[i : i + 8], 2) for i in range(0, 32, 8)] - - -def bytes_to_float16(byte_list): - bytes_data = bytes(byte_list[:2]) - unpacked_value = struct.unpack(">e", bytes_data)[0] - return torch.tensor(unpacked_value, dtype=torch.float16) - - -def bytes_to_bfloat16(byte_list): - bytes_data = bytes(byte_list[:2] + [0, 0]) # Ensure we include padding - unpacked_value = struct.unpack(">f", bytes_data)[0] - return torch.tensor(unpacked_value, dtype=torch.float32) - - -def bytes_to_float32(byte_list): - bytes_data = bytes(byte_list) - unpacked_value = struct.unpack(">f", bytes_data)[0] - return torch.tensor(unpacked_value, dtype=torch.float32) - - -def bytes_to_int32(byte_list): - bytes_data = bytes(byte_list) - unpacked_value = struct.unpack(">I", bytes_data)[0] - return torch.tensor(unpacked_value, dtype=torch.int32) + return [(n >> (24 - i * 8)) & 0xFF for i in range(4)] def unpack_fp16(packed_list): + def bytes_to_float16(byte_list): + bytes_data = bytes(byte_list[:2]) + unpacked_value = struct.unpack(">e", bytes_data)[0] + return unpacked_value + limited_packed_list = packed_list[:2048] return [ - bytes_to_float16(limited_packed_list[i : i + 2]).item() + bytes_to_float16(limited_packed_list[i : i + 2]) for i in range(0, len(limited_packed_list), 2) ] def unpack_bfp16(packed_list): + def bytes_to_bfloat16(byte_list): + bytes_data = bytes(byte_list[:2] + [0, 0]) # Ensure we include padding + unpacked_value = struct.unpack(">f", bytes_data)[0] + return unpacked_value + limited_packed_list = packed_list[:2048] return [ - bytes_to_bfloat16(limited_packed_list[i : i + 2]).item() + bytes_to_bfloat16(limited_packed_list[i : i + 2]) for i in range(0, len(limited_packed_list), 2) ] def unpack_float32(packed_list): + def bytes_to_float32(byte_list): + bytes_data = bytes(byte_list) + unpacked_value = struct.unpack(">f", bytes_data)[0] + return unpacked_value + return [ - bytes_to_float32(packed_list[i : i + 4]).item() - for i in range(0, len(packed_list), 4) + bytes_to_float32(packed_list[i : i + 4]) for i in range(0, len(packed_list), 4) ] def unpack_int32(packed_list): + def bytes_to_int32(byte_list): + bytes_data = bytes(byte_list) + unpacked_value = struct.unpack(">I", bytes_data)[0] + return unpacked_value + return [ - bytes_to_int32(packed_list[i : i + 4]).item() - for i in range(0, len(packed_list), 4) + bytes_to_int32(packed_list[i : i + 4]) for i in range(0, len(packed_list), 4) ] @@ -96,12 +89,11 @@ def unpack_bfp8_b(bfp8_block, sfpu=False): if not sfpu: exponents = bfp8_block[:64] - reversed_exponents = reverse_endian_chunk(exponents) mantissas = bfp8_block[64:] else: exponents = bfp8_block[:16] - reversed_exponents = reverse_endian_chunk(exponents) mantissas = bfp8_block[16:272] + reversed_exponents = reverse_endian_chunk(exponents) bfloat16_values = [] for i in range(len(reversed_exponents)): diff --git a/tests/python_tests/helpers/utils.py b/tests/python_tests/helpers/utils.py index c96d88a..cf0c19e 100644 --- a/tests/python_tests/helpers/utils.py +++ b/tests/python_tests/helpers/utils.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC # SPDX-License-Identifier: Apache-2.0 +import os import torch import numpy as np import subprocess @@ -41,12 +42,11 @@ def run_shell_command(command: str): command, shell=True, text=True, capture_output=False, stdout=subprocess.DEVNULL ) if result.returncode != 0: - raise RuntimeError(f"Command failed: {command}\n{result.stderr}") + raise RuntimeError(f"Build failed: {command}\n{result.stderr}") return result def calculate_read_words_count(format, sfpu=False): - if format not in format_sizes: raise ValueError(f"Unsupported format: {format}") @@ -64,14 +64,8 @@ def reverse_endian_chunk(input_list, chunk_size=4): def format_kernel_list(kernels, as_hex=False): - formatted_str = "" - for i in kernels: - # Use hex formatting if the flag is set, otherwise use decimal - if as_hex: - formatted_str += str(hex(i)) + "," - else: - formatted_str += str(i) + "," - return formatted_str[:-1] # Remove the trailing comma + formatter = hex if as_hex else str + return ",".join(formatter(i) for i in kernels) def compare_pcc(golden, calculated, pcc=0.99): @@ -127,3 +121,10 @@ def compare_pcc(golden, calculated, pcc=0.99): return True, 1.0 return cal_pcc >= pcc, cal_pcc + + +def get_chip_architecture(): + chip_architecture = os.getenv("CHIP_ARCH") + if chip_architecture is None: + raise ValueError("CHIP_ARCH environment variable is not set") + return chip_architecture diff --git a/tests/setup_env.sh b/tests/setup_env.sh index c746ced..7e059c9 100755 --- a/tests/setup_env.sh +++ b/tests/setup_env.sh @@ -57,7 +57,7 @@ if [[ "$REUSE" == false ]]; then echo "Installing required packages..." pip install . - pip install pytest pytest-cov + pip install pytest pytest-cov pytest-repeat pytest-timeout # Detect architecture for chip echo "Running tt-smi -ls to detect architecture..." diff --git a/tests/sources/eltwise_unary_datacopy_test.cpp b/tests/sources/eltwise_unary_datacopy_test.cpp index 88a7e0f..31f91aa 100644 --- a/tests/sources/eltwise_unary_datacopy_test.cpp +++ b/tests/sources/eltwise_unary_datacopy_test.cpp @@ -13,7 +13,6 @@ // Globals uint32_t unp_cfg_context = 0; uint32_t pack_sync_tile_dst_ptr = 0; -volatile uint32_t tt_l1_ptr l1_buffer[16] __attribute__((section(".text#"))) __attribute__((aligned(16))); #ifdef DEST_ACC const bool is_fp32_dest_acc_en = true; diff --git a/tests/sources/eltwise_unary_sfpu_test.cpp b/tests/sources/eltwise_unary_sfpu_test.cpp index 6666486..98952a3 100644 --- a/tests/sources/eltwise_unary_sfpu_test.cpp +++ b/tests/sources/eltwise_unary_sfpu_test.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include "ckernel.h" #include "llk_defs.h" @@ -14,7 +15,6 @@ const bool unpack_to_dest = true; // Globals uint32_t unp_cfg_context = 0; uint32_t pack_sync_tile_dst_ptr = 0; -volatile uint32_t tt_l1_ptr l1_buffer[16] __attribute__((section(".text#"))) __attribute__((aligned(16))); #ifdef DEST_ACC const bool is_fp32_dest_acc_en = true; @@ -50,6 +50,29 @@ void run_kernel() using namespace ckernel; using namespace ckernel::sfpu; +namespace +{ +void call_sfpu_operation(SfpuType operation) +{ + switch (operation) + { + case SfpuType::sqrt: + ckernel::sfpu::_init_sqrt_(); + ckernel::sfpu::_calculate_sqrt_(10); + break; + case SfpuType::log: + ckernel::sfpu::_init_log_(); + ckernel::sfpu::_calculate_log_(10, 0); + break; + case SfpuType::square: + ckernel::sfpu::_calculate_square_(10); + break; + default: + return; + } +} +} // namespace + void run_kernel() { // copy srca to dest @@ -67,11 +90,9 @@ void run_kernel() // calculation of sfpu operation on dest _llk_math_eltwise_unary_sfpu_init_(); _llk_math_eltwise_unary_sfpu_start_(0); -// calling sfpu function from ckernel -// this part is where parametrization of operation takes part -#ifdef SFPU_CALLS - SFPU_CALLS -#endif + // calling sfpu function from ckernel + // this part is where parametrization of operation takes part + call_sfpu_operation(SFPU_OPERATION); _llk_math_eltwise_unary_sfpu_done_(); _llk_math_dest_section_done_(); @@ -85,18 +106,18 @@ void run_kernel() #include "llk_pack_common.h" #include "params.h" -// If data foramt is Bfp8 it is calculated correctly in Dest but packer cannot pack just that one face -// TODO: make it so It can -// So for now It is packed as Float16_b +void run_kernel() +{ + // If data foramt is Bfp8 it is calculated correctly in Dest but packer cannot pack just that one face + // TODO: make it so It can + // So for now It is packed as Float16_b #ifdef FORMAT_BFP8_B -#define PACK_DEST_FORMAT (uint32_t)DataFormat::Float16_b + constexpr auto PACK_DEST_FORMAT = static_cast>(DataFormat::Float16_b); #else -#define PACK_DEST_FORMAT DATA_FORMAT + constexpr auto PACK_DEST_FORMAT = DATA_FORMAT; #endif -void run_kernel() -{ volatile uint32_t* const buffer_Dest = reinterpret_cast(0x1c000); std::fill(buffer_Dest, buffer_Dest + 16 * 16 * 4, 0xdeadbeef); diff --git a/tests/sources/fill_dest_test.cpp b/tests/sources/fill_dest_test.cpp index 2dd428a..da96837 100644 --- a/tests/sources/fill_dest_test.cpp +++ b/tests/sources/fill_dest_test.cpp @@ -12,7 +12,6 @@ // Globals uint32_t unp_cfg_context = 0; uint32_t pack_sync_tile_dst_ptr = 0; -volatile uint32_t tt_l1_ptr l1_buffer[16] __attribute__((section(".text#"))) __attribute__((aligned(16))); #ifdef DEST_ACC const bool is_fp32_dest_acc_en = true; @@ -28,8 +27,8 @@ const bool is_fp32_dest_acc_en = false; void run_kernel() { - volatile uint32_t* const buffer_A = (volatile uint32_t*)0x1a000; - volatile uint32_t* const buffer_B = (volatile uint32_t*)0x1b000; + volatile uint32_t* const buffer_A = reinterpret_cast(0x1a000); + volatile uint32_t* const buffer_B = reinterpret_cast(0x1b000); for (uint index = 0; index < 16; index++) { @@ -76,7 +75,7 @@ void run_kernel() void run_kernel() { - volatile uint32_t* const buffer_Dest = (volatile uint32_t*)0x1c000; + volatile uint32_t* const buffer_Dest = reinterpret_cast(0x1c000); #ifdef ARCH_BLACKHOLE _llk_pack_hw_configure_(DATA_FORMAT, DATA_FORMAT, 16 * 16 * 4); diff --git a/tests/sources/matmul_test.cpp b/tests/sources/matmul_test.cpp index 2635405..d2cd16d 100644 --- a/tests/sources/matmul_test.cpp +++ b/tests/sources/matmul_test.cpp @@ -14,8 +14,6 @@ uint32_t unp_cfg_context = 0; uint32_t pack_sync_tile_dst_ptr = 0; uint32_t math_sync_tile_dst_index = 0; -volatile uint32_t tt_l1_ptr l1_buffer[16] __attribute__((section(".text#"))) __attribute__((aligned(16))); - #ifdef DEST_ACC const bool is_fp32_dest_acc_en = true; #else diff --git a/tests/sources/multiple_tiles_eltwise_test.cpp b/tests/sources/multiple_tiles_eltwise_test.cpp index 883e4e9..038fe3c 100644 --- a/tests/sources/multiple_tiles_eltwise_test.cpp +++ b/tests/sources/multiple_tiles_eltwise_test.cpp @@ -12,7 +12,6 @@ // Globals uint32_t unp_cfg_context = 0; uint32_t pack_sync_tile_dst_ptr = 0; -volatile uint32_t tt_l1_ptr l1_buffer[16] __attribute__((section(".text#"))) __attribute__((aligned(16))); #ifdef DEST_ACC const bool is_fp32_dest_acc_en = true; diff --git a/tests/sources/pack_untilize_test.cpp b/tests/sources/pack_untilize_test.cpp index e5a85d6..1dfb934 100644 --- a/tests/sources/pack_untilize_test.cpp +++ b/tests/sources/pack_untilize_test.cpp @@ -12,7 +12,6 @@ // Globals uint32_t unp_cfg_context = 0; uint32_t pack_sync_tile_dst_ptr = 0; -volatile uint32_t tt_l1_ptr l1_buffer[16] __attribute__((section(".text#"))) __attribute__((aligned(16))); #ifdef DEST_ACC const bool is_fp32_dest_acc_en = true; diff --git a/tests/sources/reduce_test.cpp b/tests/sources/reduce_test.cpp index beb0dcf..5e2c87c 100644 --- a/tests/sources/reduce_test.cpp +++ b/tests/sources/reduce_test.cpp @@ -14,8 +14,6 @@ uint32_t unp_cfg_context = 0; uint32_t pack_sync_tile_dst_ptr = 0; uint32_t math_sync_tile_dst_index = 0; -volatile uint32_t tt_l1_ptr l1_buffer[16] __attribute__((section(".text#"))) __attribute__((aligned(16))); - #ifdef DEST_ACC const bool is_fp32_dest_acc_en = true; #else diff --git a/tests/sources/sfpu_binary_test.cpp b/tests/sources/sfpu_binary_test.cpp index b804e9c..6ff4363 100644 --- a/tests/sources/sfpu_binary_test.cpp +++ b/tests/sources/sfpu_binary_test.cpp @@ -13,7 +13,6 @@ const bool unpack_to_dest = true; // Globals uint32_t unp_cfg_context = 0; uint32_t pack_sync_tile_dst_ptr = 0; -volatile uint32_t tt_l1_ptr l1_buffer[16] __attribute__((section(".text#"))) __attribute__((aligned(16))); #ifdef DEST_ACC const bool is_fp32_dest_acc_en = true; @@ -29,7 +28,7 @@ const bool is_fp32_dest_acc_en = false; void run_kernel() { - volatile uint32_t* buffer_A = (volatile uint32_t*)0x1a000; + volatile uint32_t* buffer_A = reinterpret_cast(0x1a000); _llk_unpack_A_hw_configure_(DATA_FORMAT, DATA_FORMAT, FACE_R_DIM, 0, 4); _llk_unpack_A_init_(0, 0, FACE_R_DIM, 4, DATA_FORMAT, DATA_FORMAT); @@ -49,11 +48,9 @@ void run_kernel() using namespace ckernel; using namespace ckernel::sfpu; -#define ELTWISE_BINARY_SFPU_OP 0 - void run_kernel() { - const bool is_int_fpu_en = false; + constexpr auto ELTWISE_BINARY_SFPU_OP = 0 constexpr bool is_int_fpu_en = false; // copy srca to dest #ifdef ARCH_BLACKHOLE _llk_math_eltwise_unary_datacopy_init_(0, 0, 4, DATA_FORMAT); @@ -98,7 +95,7 @@ void run_kernel() void run_kernel() { - volatile uint32_t* buffer_Dest = (volatile uint32_t*)0x1c000; + volatile uint32_t* buffer_Dest = reinterpret_cast(0x1c000); std::fill(buffer_Dest, buffer_Dest + 16 * 16 * 4, 0xdeadbeef); diff --git a/tests/sources/tilize_calculate_untilize_L1.cpp b/tests/sources/tilize_calculate_untilize_L1.cpp index ff5b8cf..4aafead 100644 --- a/tests/sources/tilize_calculate_untilize_L1.cpp +++ b/tests/sources/tilize_calculate_untilize_L1.cpp @@ -13,7 +13,6 @@ uint32_t unp_cfg_context = 0; uint32_t pack_sync_tile_dst_ptr = 0; uint32_t math_sync_tile_dst_index = 0; -volatile uint32_t tt_l1_ptr l1_buffer[16] __attribute__((section(".text#"))) __attribute__((aligned(16))); #ifdef DEST_ACC const bool is_fp32_dest_acc_en = true; diff --git a/tests/sources/unpack_tilize_test.cpp b/tests/sources/unpack_tilize_test.cpp index 01c9dbc..8336288 100644 --- a/tests/sources/unpack_tilize_test.cpp +++ b/tests/sources/unpack_tilize_test.cpp @@ -12,7 +12,6 @@ // Globals uint32_t unp_cfg_context = 0; uint32_t pack_sync_tile_dst_ptr = 0; -volatile uint32_t tt_l1_ptr l1_buffer[16] __attribute__((section(".text#"))) __attribute__((aligned(16))); #ifdef DEST_ACC const bool is_fp32_dest_acc_en = true; #else diff --git a/tests/sources/unpack_untilize_test.cpp b/tests/sources/unpack_untilize_test.cpp index a42fdb5..da9e4f1 100644 --- a/tests/sources/unpack_untilize_test.cpp +++ b/tests/sources/unpack_untilize_test.cpp @@ -12,7 +12,6 @@ // Globals uint32_t unp_cfg_context = 0; uint32_t pack_sync_tile_dst_ptr = 0; -volatile uint32_t tt_l1_ptr l1_buffer[16] __attribute__((section(".text#"))) __attribute__((aligned(16))); #ifdef DEST_ACC const bool is_fp32_dest_acc_en = true; diff --git a/tt_llk_blackhole/common/inc/ckernel_defs.h b/tt_llk_blackhole/common/inc/ckernel_defs.h index 583a6a2..2c85950 100644 --- a/tt_llk_blackhole/common/inc/ckernel_defs.h +++ b/tt_llk_blackhole/common/inc/ckernel_defs.h @@ -8,6 +8,7 @@ #include "ckernel_ops.h" #include "llk_defs.h" +#include "tensix.h" #include "tensix_types.h" namespace ckernel diff --git a/tt_llk_wormhole_b0/common/inc/ckernel_defs.h b/tt_llk_wormhole_b0/common/inc/ckernel_defs.h index 2fd9f95..83e28e4 100644 --- a/tt_llk_wormhole_b0/common/inc/ckernel_defs.h +++ b/tt_llk_wormhole_b0/common/inc/ckernel_defs.h @@ -8,6 +8,7 @@ #include "ckernel_ops.h" #include "llk_defs.h" +#include "tensix.h" #include "tensix_types.h" namespace ckernel