From 81150bd87314d6d676cc757c744441e5122116b5 Mon Sep 17 00:00:00 2001 From: Joel Smith Date: Fri, 13 Dec 2024 02:30:12 +0000 Subject: [PATCH] Revert "Abolish get_available_num_host_mem_channels" This reverts commit 3210bd959bc75e5e2d97b30631bbb606585a48e5. --- device/api/umd/device/hugepage.h | 4 +++ device/cluster.cpp | 9 ++++- device/hugepage.cpp | 62 ++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/device/api/umd/device/hugepage.h b/device/api/umd/device/hugepage.h index 9b4c9a5b..f53246f9 100644 --- a/device/api/umd/device/hugepage.h +++ b/device/api/umd/device/hugepage.h @@ -16,6 +16,10 @@ namespace tt::umd { // Get number of 1GB host hugepages installed. uint32_t get_num_hugepages(); +// Dynamically figure out how many host memory channels (based on hugepages installed) for each device, based on arch. +uint32_t get_available_num_host_mem_channels( + const uint32_t num_channels_per_device_target, const uint16_t device_id, const uint16_t revision_id); + // Looks for hugetlbfs inside /proc/mounts matching desired pagesize (typically 1G) std::string find_hugepage_dir(std::size_t pagesize); diff --git a/device/cluster.cpp b/device/cluster.cpp index e1981064..b79c1158 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -240,7 +240,14 @@ void Cluster::create_device( } auto pci_device = m_tt_device_map.at(logical_device_id)->get_pci_device(); - int num_host_mem_channels = num_host_mem_ch_per_mmio_device; + uint16_t pcie_device_id = pci_device->get_pci_device_id(); + uint32_t pcie_revision = pci_device->get_pci_revision(); + // TODO: get rid of this, it doesn't make any sense. + // Update: I did get rid of it and it broke Metal CI, which is passing + // tests that ask for more hugepages than exist. That's wrong, but it + // isn't fixed yet, so until then... + int num_host_mem_channels = + get_available_num_host_mem_channels(num_host_mem_ch_per_mmio_device, pcie_device_id, pcie_revision); log_debug( LogSiliconDriver, diff --git a/device/hugepage.cpp b/device/hugepage.cpp index 31aad9b3..8883bff2 100644 --- a/device/hugepage.cpp +++ b/device/hugepage.cpp @@ -37,6 +37,68 @@ uint32_t get_num_hugepages() { return num_hugepages; } +uint32_t get_available_num_host_mem_channels( + const uint32_t num_channels_per_device_target, const uint16_t device_id, const uint16_t revision_id) { + // To minimally support hybrid dev systems with mix of ARCH, get only devices matching current ARCH's device_id. + uint32_t total_num_tt_mmio_devices = tt::cpuset::tt_cpuset_allocator::get_num_tt_pci_devices(); + uint32_t num_tt_mmio_devices_for_arch = + tt::cpuset::tt_cpuset_allocator::get_num_tt_pci_devices_by_pci_device_id(device_id, revision_id); + uint32_t total_hugepages = get_num_hugepages(); + + // This shouldn't happen on silicon machines. + if (num_tt_mmio_devices_for_arch == 0) { + log_warning( + LogSiliconDriver, + "No TT devices found that match PCI device_id: 0x{:x} revision: {}, returning NumHostMemChannels:0", + device_id, + revision_id); + return 0; + } + + // GS will use P2P + 1 channel, others may support 4 host channels. Apply min of 1 to not completely break setups + // that were incomplete ie fewer hugepages than devices, which would partially work previously for some devices. + uint32_t num_channels_per_device_available = + std::min(num_channels_per_device_target, std::max((uint32_t)1, total_hugepages / num_tt_mmio_devices_for_arch)); + + // Perform some helpful assertion checks to guard against common pitfalls that would show up as runtime issues later + // on. + if (total_num_tt_mmio_devices > num_tt_mmio_devices_for_arch) { + log_warning( + LogSiliconDriver, + "Hybrid system mixing different TTDevices - this is not well supported. Ensure sufficient " + "Hugepages/HostMemChannels per device."); + } + + if (total_hugepages < num_tt_mmio_devices_for_arch) { + log_warning( + LogSiliconDriver, + "Insufficient NumHugepages: {} should be at least NumMMIODevices: {} for device_id: 0x{:x} revision: {}. " + "NumHostMemChannels would be 0, bumping to 1.", + total_hugepages, + num_tt_mmio_devices_for_arch, + device_id, + revision_id); + } + + if (num_channels_per_device_available < num_channels_per_device_target) { + log_warning( + LogSiliconDriver, + "NumHostMemChannels: {} used for device_id: 0x{:x} less than target: {}. Workload will fail if it exceeds " + "NumHostMemChannels. Increase Number of Hugepages.", + num_channels_per_device_available, + device_id, + num_channels_per_device_target); + } + + log_assert( + num_channels_per_device_available <= g_MAX_HOST_MEM_CHANNELS, + "NumHostMemChannels: {} exceeds supported maximum: {}, this is unexpected.", + num_channels_per_device_available, + g_MAX_HOST_MEM_CHANNELS); + + return num_channels_per_device_available; +} + std::string find_hugepage_dir(std::size_t pagesize) { static const std::regex hugetlbfs_mount_re( fmt::format("^(nodev|hugetlbfs) ({}) hugetlbfs ([^ ]+) 0 0$", hugepage_dir));