Skip to content

Commit

Permalink
Merge branch 'main' into brosko/simulator_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
broskoTT authored Mar 7, 2025
2 parents 873a0e8 + 3db7334 commit bd456dd
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 24 deletions.
1 change: 1 addition & 0 deletions device/api/umd/device/architecture_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class architecture_implementation {
virtual uint32_t get_dynamic_tlb_16m_cfg_addr() const = 0;
virtual uint32_t get_mem_large_read_tlb() const = 0;
virtual uint32_t get_mem_large_write_tlb() const = 0;
virtual uint32_t get_num_eth_channels() const = 0;
virtual uint32_t get_static_tlb_cfg_addr() const = 0;
virtual uint32_t get_static_tlb_size() const = 0;
virtual uint32_t get_read_checking_offset() const = 0;
Expand Down
2 changes: 2 additions & 0 deletions device/api/umd/device/blackhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ class blackhole_implementation : public architecture_implementation {

uint32_t get_mem_large_write_tlb() const override { return blackhole::MEM_LARGE_WRITE_TLB; }

uint32_t get_num_eth_channels() const override { return blackhole::NUM_ETH_CHANNELS; }

uint32_t get_static_tlb_cfg_addr() const override { return blackhole::STATIC_TLB_CFG_ADDR; }

uint32_t get_static_tlb_size() const override { return blackhole::STATIC_TLB_SIZE; }
Expand Down
2 changes: 2 additions & 0 deletions device/api/umd/device/grayskull_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,8 @@ class grayskull_implementation : public architecture_implementation {

uint32_t get_mem_large_write_tlb() const override { return grayskull::MEM_LARGE_WRITE_TLB; }

uint32_t get_num_eth_channels() const override { return 0; }

uint32_t get_static_tlb_cfg_addr() const override { return grayskull::STATIC_TLB_CFG_ADDR; }

uint32_t get_static_tlb_size() const override { return grayskull::STATIC_TLB_SIZE; }
Expand Down
2 changes: 2 additions & 0 deletions device/api/umd/device/types/cluster_descriptor_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ inline BoardType get_board_type_from_board_id(const uint64_t board_id) {
return BoardType::P100;
} else if (upi == 0x40 || upi == 0x41 || upi == 0x42) {
return BoardType::P150;
} else if (upi == 0x44 || upi == 0x45 || upi == 0x46) {
return BoardType::P300;
}

throw std::runtime_error(fmt::format("No existing board type for board id {}", board_id));
Expand Down
2 changes: 2 additions & 0 deletions device/api/umd/device/wormhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,8 @@ class wormhole_implementation : public architecture_implementation {

uint32_t get_mem_large_write_tlb() const override { return wormhole::MEM_LARGE_WRITE_TLB; }

uint32_t get_num_eth_channels() const override { return wormhole::NUM_ETH_CHANNELS; }

uint32_t get_static_tlb_cfg_addr() const override { return wormhole::STATIC_TLB_CFG_ADDR; }

uint32_t get_read_checking_offset() const override { return wormhole::ARC_SCRATCH_6_OFFSET; }
Expand Down
26 changes: 24 additions & 2 deletions device/tt_cluster_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include <memory>
#include <sstream>

#include "api/umd/device/blackhole_implementation.h"
#include "api/umd/device/wormhole_implementation.h"
#include "disjoint_set.hpp"
#include "fmt/core.h"
#include "libs/create_ethernet_map.h"
Expand Down Expand Up @@ -426,10 +428,10 @@ std::unique_ptr<tt_ClusterDescriptor> tt_ClusterDescriptor::create_from_yaml(

YAML::Node yaml = YAML::LoadFile(cluster_descriptor_file_path);
tt_ClusterDescriptor::load_chips_from_connectivity_descriptor(yaml, *desc);
tt_ClusterDescriptor::load_harvesting_information(yaml, *desc);
tt_ClusterDescriptor::load_ethernet_connections_from_connectivity_descriptor(yaml, *desc);
tt_ClusterDescriptor::merge_cluster_ids(*desc);
tt_ClusterDescriptor::fill_galaxy_connections(*desc);
tt_ClusterDescriptor::load_harvesting_information(yaml, *desc);
desc->enable_all_devices();

desc->fill_chips_grouped_by_closest_mmio();
Expand Down Expand Up @@ -482,6 +484,20 @@ std::unique_ptr<tt_ClusterDescriptor> tt_ClusterDescriptor::create_mock_cluster(
void tt_ClusterDescriptor::load_ethernet_connections_from_connectivity_descriptor(
YAML::Node &yaml, tt_ClusterDescriptor &desc) {
log_assert(yaml["ethernet_connections"].IsSequence(), "Invalid YAML");

// Preload idle eth channels.
for (const auto &chip : desc.all_chips) {
int num_harvested_channels = desc.eth_harvesting_masks.empty()
? 0
: CoordinateManager::get_num_harvested(desc.eth_harvesting_masks.at(chip));
int num_channels =
tt::umd::architecture_implementation::create(desc.chip_arch.at(chip))->get_num_eth_channels() -
num_harvested_channels;
for (int i = 0; i < num_channels; i++) {
desc.idle_eth_channels[chip].insert(i);
}
}

for (YAML::Node &connected_endpoints : yaml["ethernet_connections"].as<std::vector<YAML::Node>>()) {
log_assert(connected_endpoints.IsSequence(), "Invalid YAML");

Expand All @@ -508,6 +524,10 @@ void tt_ClusterDescriptor::load_ethernet_connections_from_connectivity_descripto
} else {
desc.ethernet_connections[chip_1][channel_1] = {chip_0, channel_0};
}
desc.active_eth_channels[chip_0].insert(channel_0);
desc.idle_eth_channels[chip_0].erase(channel_0);
desc.active_eth_channels[chip_1].insert(channel_1);
desc.idle_eth_channels[chip_1].erase(channel_1);
}

log_debug(LogSiliconDriver, "Ethernet Connectivity Descriptor:");
Expand Down Expand Up @@ -756,7 +776,9 @@ void tt_ClusterDescriptor::load_chips_from_connectivity_descriptor(YAML::Node &y
chip_board_type.second == "p150" || chip_board_type.second == "p150A" ||
chip_board_type.second == "p150C") {
board_type = BoardType::P150;
} else if (chip_board_type.second == "p300") {
} else if (
chip_board_type.second == "p300" || chip_board_type.second == "p300A" ||
chip_board_type.second == "p300C") {
board_type = BoardType::P300;
} else if (chip_board_type.second == "GALAXY") {
board_type = BoardType::GALAXY;
Expand Down
56 changes: 56 additions & 0 deletions tests/api/test_cluster_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,59 @@ TEST(ApiClusterDescriptorTest, SeparateClusters) {
EXPECT_TRUE(chip_clusters.are_same_set(chip, closest_mmio_chip));
}
}

TEST(ApiClusterDescriptorTest, EthernetConnectivity) {
std::unique_ptr<tt_ClusterDescriptor> cluster_desc = tt::umd::Cluster::create_cluster_descriptor();

if (cluster_desc == nullptr) {
GTEST_SKIP() << "No chips present on the system. Skipping test.";
}

auto ethernet_connections = cluster_desc->get_ethernet_connections();
for (auto [chip, connections] : ethernet_connections) {
for (auto [channel, remote_chip_and_channel] : connections) {
std::cout << "Ethernet connection from chip " << chip << " channel " << channel << " to chip "
<< std::get<0>(remote_chip_and_channel) << " channel " << std::get<1>(remote_chip_and_channel)
<< std::endl;
}
}

auto chips_with_mmio = cluster_desc->get_chips_with_mmio();
for (auto [chip, mmio_chip] : chips_with_mmio) {
std::cout << "Chip " << chip << " has MMIO on PCI id " << mmio_chip << std::endl;
}

for (auto chip : cluster_desc->get_all_chips()) {
// Wormhole has 16 and Blackhole has 14 ethernet channels.
for (int eth_chan = 0;
eth_chan <
tt::umd::architecture_implementation::create(cluster_desc->get_arch(chip))->get_num_eth_channels();
eth_chan++) {
bool has_active_link = cluster_desc->ethernet_core_has_active_ethernet_link(chip, eth_chan);
std::cout << "Chip " << chip << " channel " << eth_chan << " has active link: " << has_active_link
<< std::endl;

if (!has_active_link) {
continue;
}
std::tuple<chip_id_t, ethernet_channel_t> remote_chip_and_channel =
cluster_desc->get_chip_and_channel_of_remote_ethernet_core(chip, eth_chan);
std::cout << "Chip " << chip << " channel " << eth_chan << " has remote chip "
<< std::get<0>(remote_chip_and_channel) << " channel " << std::get<1>(remote_chip_and_channel)
<< std::endl;
}
}

for (auto chip : cluster_desc->get_all_chips()) {
std::cout << "Chip " << chip << " has the following active ethernet channels: ";
for (auto eth_chan : cluster_desc->get_active_eth_channels(chip)) {
std::cout << eth_chan << " ";
}
std::cout << std::endl;
std::cout << " and following idle ethernet channels: ";
for (auto eth_chan : cluster_desc->get_idle_eth_channels(chip)) {
std::cout << eth_chan << " ";
}
std::cout << std::endl;
}
}
40 changes: 18 additions & 22 deletions tests/blackhole/test_arc_messages_bh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,36 +27,32 @@ TEST(BlackholeArcMessages, BlackholeArcMessagesBasic) {
}
}

// TODO: enable this test once 80.15 fw is flashed to CI machines
// TEST(BlackholeArcMessages, BlackholeArcMessageHigherAIClock) {
// const uint32_t ms_sleep = 2000;
TEST(BlackholeArcMessages, BlackholeArcMessageHigherAIClock) {
const uint32_t ms_sleep = 2000;

// std::vector<int> pci_device_ids = PCIDevice::enumerate_devices();

// for (int pci_device_id : pci_device_ids) {
// std::unique_ptr<TTDevice> tt_device = TTDevice::create(pci_device_id);
std::vector<int> pci_device_ids = PCIDevice::enumerate_devices();

// std::unique_ptr<ArcMessenger> bh_arc_messenger = ArcMessenger::create_arc_messenger(tt_device.get());
for (int pci_device_id : pci_device_ids) {
std::unique_ptr<TTDevice> tt_device = TTDevice::create(pci_device_id);

// uint32_t response = bh_arc_messenger->send_message((uint32_t)ArcMessageType::AICLK_GO_BUSY);
std::unique_ptr<ArcMessenger> bh_arc_messenger = ArcMessenger::create_arc_messenger(tt_device.get());

// // Wait for telemetry to update AICLK.
// std::this_thread::sleep_for(std::chrono::milliseconds(ms_sleep));
uint32_t response = bh_arc_messenger->send_message((uint32_t)ArcMessageType::AICLK_GO_BUSY);

// std::unique_ptr<blackhole::BlackholeArcTelemetryReader> blackhole_arc_telemetry_reader =
// std::make_unique<blackhole::BlackholeArcTelemetryReader>(tt_device.get());
// Wait for telemetry to update AICLK.
std::this_thread::sleep_for(std::chrono::milliseconds(ms_sleep));

// uint32_t aiclk = blackhole_arc_telemetry_reader->read_entry(blackhole::TAG_AICLK);
uint32_t aiclk = tt_device->get_clock();

// EXPECT_EQ(aiclk, blackhole::AICLK_BUSY_VAL);
EXPECT_EQ(aiclk, blackhole::AICLK_BUSY_VAL);

// response = bh_arc_messenger->send_message((uint32_t)ArcMessageType::AICLK_GO_LONG_IDLE);
response = bh_arc_messenger->send_message((uint32_t)ArcMessageType::AICLK_GO_LONG_IDLE);

// // Wait for telemetry to update AICLK.
// std::this_thread::sleep_for(std::chrono::milliseconds(ms_sleep));
// Wait for telemetry to update AICLK.
std::this_thread::sleep_for(std::chrono::milliseconds(ms_sleep));

// aiclk = blackhole_arc_telemetry_reader->read_entry(blackhole::TAG_AICLK);
aiclk = tt_device->get_clock();

// EXPECT_EQ(aiclk, blackhole::AICLK_IDLE_VAL);
// }
// }
EXPECT_EQ(aiclk, blackhole::AICLK_IDLE_VAL);
}
}

0 comments on commit bd456dd

Please sign in to comment.