diff --git a/device/cluster.cpp b/device/cluster.cpp index 6efa1752..1e8e1ffc 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -171,11 +171,14 @@ struct tt_4_byte_aligned_buffer { namespace tt::umd { bool Cluster::address_in_tlb_space( - uint32_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, std::uint32_t chip) { - return ( - (tlb_config_map.at(chip).find(tlb_index) != tlb_config_map.at(chip).end()) && - address >= tlb_config_map.at(chip).at(tlb_index) && - (address + size_in_bytes <= tlb_config_map.at(chip).at(tlb_index) + tlb_size)); + uint64_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, std::uint32_t chip) { + const auto& tlb_map = tlb_config_map.at(chip); + const auto it = tlb_map.find(tlb_index); + if (it != tlb_map.end()) { + auto mapped_address = it->second; + return address >= mapped_address && (address + size_in_bytes <= mapped_address + tlb_size); + } + return false; } std::unordered_map& Cluster::get_virtual_soc_descriptors() { @@ -1113,7 +1116,7 @@ void Cluster::write_device_memory( const void* mem_ptr, uint32_t size_in_bytes, tt_cxy_pair target, - std::uint32_t address, + uint64_t address, const std::string& fallback_tlb) { PCIDevice* dev = get_pci_device(target.chip); const uint8_t* buffer_addr = static_cast(mem_ptr); @@ -1164,13 +1167,7 @@ void Cluster::write_device_memory( } void Cluster::read_device_memory( - void* mem_ptr, - tt_cxy_pair target, - std::uint32_t address, - std::uint32_t size_in_bytes, - const std::string& fallback_tlb) { - // Assume that mem_ptr has been allocated adequate memory on host when this function is called. Otherwise, this - // function will cause a segfault. + void* mem_ptr, tt_cxy_pair target, uint64_t address, uint32_t size_in_bytes, const std::string& fallback_tlb) { log_debug( LogSiliconDriver, "Cluster::read_device_memory to chip:{} {}-{} at 0x{:x} size_in_bytes: {}", diff --git a/device/cluster.h b/device/cluster.h index 822bdb96..130150b7 100644 --- a/device/cluster.h +++ b/device/cluster.h @@ -877,7 +877,7 @@ class Cluster : public tt_device { const void* mem_ptr, uint32_t size_in_bytes, tt_cxy_pair target, - std::uint32_t address, + uint64_t address, const std::string& fallback_tlb); void write_to_non_mmio_device( const void* mem_ptr, @@ -887,11 +887,7 @@ class Cluster : public tt_device { bool broadcast = false, std::vector broadcast_header = {}); void read_device_memory( - void* mem_ptr, - tt_cxy_pair target, - std::uint32_t address, - std::uint32_t size_in_bytes, - const std::string& fallback_tlb); + void* mem_ptr, tt_cxy_pair target, uint64_t address, uint32_t size_in_bytes, const std::string& fallback_tlb); void read_from_non_mmio_device(void* mem_ptr, tt_cxy_pair core, uint64_t address, uint32_t size_in_bytes); void read_mmio_device_register( void* mem_ptr, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb); @@ -948,7 +944,7 @@ class Cluster : public tt_device { uint32_t* return_3 = nullptr, uint32_t* return_4 = nullptr); bool address_in_tlb_space( - uint32_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, uint32_t chip); + uint64_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, uint32_t chip); std::shared_ptr get_mutex(const std::string& tlb_name, int pci_interface_id); virtual uint32_t get_harvested_noc_rows_for_chip( int logical_device_id); // Returns one-hot encoded harvesting mask for PCIe mapped chips @@ -1012,7 +1008,7 @@ class Cluster : public tt_device { std::unordered_map> workers_per_chip = {}; std::unordered_set eth_cores = {}; std::unordered_set dram_cores = {}; - std::map> tlb_config_map = {}; + std::map> tlb_config_map = {}; std::set all_target_mmio_devices; // Note that these maps holds only entries for local PCIe chips.