From 07a0500cd7bbba1a2ed42bb8d9758be497f0b08e Mon Sep 17 00:00:00 2001 From: Samridhi Date: Thu, 27 Feb 2025 16:06:38 -0500 Subject: [PATCH] Added fix to make topology agnostic to eth fw version - Update how local board info is calculated so its in-line with remote_info - breaking changes to eth fw update affect both values equally. - Added traceback printing to exceptions in main program for clarity in debugging. - chore: bumped version and changelog. Signed-off-by: Samridhi --- CHANGELOG.md | 8 ++++++++ pyproject.toml | 4 ++-- tt_topology/backend.py | 34 ++++++++++++++++++++++++---------- tt_topology/constants.py | 5 +++++ tt_topology/tt_topology.py | 5 +++-- 5 files changed, 42 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3deee1f..5d52f8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 1.2.0 - 06/03/2025 + +### Fixed + +- Updated how local eth board info is calculated to make it agnostic to eth fw version +- bumped tt-tools-common version +- Added traceback printing when catching exceptions in main. + ## 1.1.5 - 14/05/2024 ### Updated diff --git a/pyproject.toml b/pyproject.toml index 8f0ef4f..5ecd13a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "tt-topology" -version = "1.1.5" +version = "1.2.0" description = "ethernet topology configuration tool for Tenstorrent silicon" readme = "README.md" requires-python = ">=3.7" @@ -28,7 +28,7 @@ dependencies = [ 'elasticsearch==8.11.0', 'pydantic>=1.2', 'pyluwen @ git+https://github.com/tenstorrent/luwen.git@v0.4.9#subdirectory=crates/pyluwen', - 'tt_tools_common @ git+https://github.com/tenstorrent/tt-tools-common.git@v1.4.11', + 'tt_tools_common @ git+https://github.com/tenstorrent/tt-tools-common.git@v1.4.14', 'pre-commit==3.5.0', 'networkx==3.1', 'matplotlib==3.7.4' diff --git a/tt_topology/backend.py b/tt_topology/backend.py index 6778243..479be60 100644 --- a/tt_topology/backend.py +++ b/tt_topology/backend.py @@ -206,7 +206,7 @@ def get_eth_config_state(self): for data in config_state: assert ( data["fw_version"] == config_state[0]["fw_version"] - ), "Firmware versions do not match" + ), f"Firmware versions do not match: {data['fw_version']} != {config_state[0]['fw_version']}" if not self.log.starting_configs: self.log.starting_configs = config_state_log elif not self.log.post_default_flashing_configs: @@ -296,6 +296,24 @@ def flash_to_default_state(self): CMD_LINE_COLOR.ENDC, ) + def get_local_eth_board_info(self, chip): + """ + Get the local board info from noc, making it eth fw version agnostic + """ + local_board_id = bytearray(4) + local_board_type = bytearray(4) + + for port in range(16): + eth_x, eth_y = self.eth_xy_decode(port) + chip.noc_read(0, eth_x, eth_y, constants.ETH_TEST_RESULT_LOCAL_TYPE, local_board_type) + + if int.from_bytes(local_board_type, "little") != 0: + chip.noc_read(0, eth_x, eth_y, constants.ETH_TEST_RESULT_LOCAL_ID, local_board_id) + local_board_type = int.from_bytes(local_board_type, "little") + local_board_id = int.from_bytes(local_board_id, "little") + local_board_info = f"{(local_board_type << 32) | local_board_id:016x}" + return local_board_info + def generate_connection_map(self): """ Generate an map with chip data and a list of connections @@ -314,15 +332,7 @@ def generate_connection_map(self): chip = device.as_wh() board_id = str(hex(device.board_id())).replace("0x", "") board_type = get_board_type(board_id) - - # print(board_id, board_type) - eth_board_type = bytearray(4) - eth_board_id = bytearray(4) - chip.noc_read(0, 9, 0, constants.ETH_L1_PARAM_BOARD_TYPE, eth_board_type) - chip.noc_read(0, 9, 0, constants.ETH_L1_PARAM_BOARD_ID, eth_board_id) - eth_board_type = int.from_bytes(eth_board_type, "little") - eth_board_id = int.from_bytes(eth_board_id, "little") - eth_board_info = f"{(eth_board_type << 32) | eth_board_id:016x}" + eth_board_info = self.get_local_eth_board_info(chip) chip_data[eth_board_info] = { "id": idx, @@ -357,6 +367,10 @@ def generate_connection_map(self): connection_map_log_obj = log_obj break + # get fw version and collect remote_info accordingly + chip_eth_fw_ver = bytearray(4) + chip.spi_read(int(constants.ETH_FW_VERSION_ADDR), chip_eth_fw_ver) + chip_eth_fw_ver = int.from_bytes(chip_eth_fw_ver, "little") # Go through all 16 ETH ports and read their remote chip ids (if applicable) # Use those IDs to construct the vectorized representation for port in range(16): diff --git a/tt_topology/constants.py b/tt_topology/constants.py index bb7e0d4..578d4ce 100644 --- a/tt_topology/constants.py +++ b/tt_topology/constants.py @@ -25,3 +25,8 @@ ETH_TEST_RESULT_REMOTE_ID = ETH_TEST_RESULT_BASE_ADDR + (0x4) * 73 ETH_TEST_RESULT_REMOTE_COORD = ETH_TEST_RESULT_BASE_ADDR + (0x4) * 74 # 0x0000YYXX ETH_TEST_RESULT_REMOTE_SHELF_RACK = ETH_TEST_RESULT_BASE_ADDR + (0x4) * 75 # 0x0000SSRR + +ETH_TEST_RESULT_LOCAL_TYPE = ETH_TEST_RESULT_BASE_ADDR + (0x4) * 64 +ETH_TEST_RESULT_LOCAL_ID = ETH_TEST_RESULT_BASE_ADDR + (0x4) * 65 +ETH_TEST_RESULT_LOCAL_COORD = ETH_TEST_RESULT_BASE_ADDR + (0x4) * 66 # 0x0000YYXX +ETH_TEST_RESULT_LOCAL_SHELF_RACK = ETH_TEST_RESULT_BASE_ADDR + (0x4) * 67 # 0x0000SSRR diff --git a/tt_topology/tt_topology.py b/tt_topology/tt_topology.py index e4a6e94..659a482 100644 --- a/tt_topology/tt_topology.py +++ b/tt_topology/tt_topology.py @@ -9,6 +9,7 @@ import sys import time import argparse +import traceback import pkg_resources from tt_tools_common.reset_common.wh_reset import WHChipReset from tt_tools_common.ui_common.themes import CMD_LINE_COLOR @@ -467,10 +468,10 @@ def main(): except Exception as e: print( CMD_LINE_COLOR.RED, - e, + traceback.format_exc(), CMD_LINE_COLOR.ENDC, ) - topo_backend.log.errors = str(e) + topo_backend.log.errors = str(traceback.format_exc()) errors = True finally: # Still collect the log if something went wrong