Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix address types #328

Merged
merged 4 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 11 additions & 14 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,14 @@ struct tt_4_byte_aligned_buffer {
namespace tt::umd {

bool Cluster::address_in_tlb_space(
uint32_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, std::uint32_t chip) {
return (
(tlb_config_map.at(chip).find(tlb_index) != tlb_config_map.at(chip).end()) &&
address >= tlb_config_map.at(chip).at(tlb_index) &&
(address + size_in_bytes <= tlb_config_map.at(chip).at(tlb_index) + tlb_size));
uint64_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, std::uint32_t chip) {
const auto& tlb_map = tlb_config_map.at(chip);
const auto it = tlb_map.find(tlb_index);
if (it != tlb_map.end()) {
auto mapped_address = it->second;
return address >= mapped_address && (address + size_in_bytes <= mapped_address + tlb_size);
}
return false;
}

std::unordered_map<chip_id_t, tt_SocDescriptor>& Cluster::get_virtual_soc_descriptors() {
Expand Down Expand Up @@ -1113,7 +1116,7 @@ void Cluster::write_device_memory(
const void* mem_ptr,
uint32_t size_in_bytes,
tt_cxy_pair target,
std::uint32_t address,
uint64_t address,
const std::string& fallback_tlb) {
PCIDevice* dev = get_pci_device(target.chip);
const uint8_t* buffer_addr = static_cast<const uint8_t*>(mem_ptr);
Expand Down Expand Up @@ -1164,13 +1167,7 @@ void Cluster::write_device_memory(
}

void Cluster::read_device_memory(
void* mem_ptr,
tt_cxy_pair target,
std::uint32_t address,
std::uint32_t size_in_bytes,
const std::string& fallback_tlb) {
// Assume that mem_ptr has been allocated adequate memory on host when this function is called. Otherwise, this
// function will cause a segfault.
void* mem_ptr, tt_cxy_pair target, uint64_t address, uint32_t size_in_bytes, const std::string& fallback_tlb) {
log_debug(
LogSiliconDriver,
"Cluster::read_device_memory to chip:{} {}-{} at 0x{:x} size_in_bytes: {}",
Expand Down Expand Up @@ -1379,7 +1376,7 @@ std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(
}

void Cluster::configure_tlb(
chip_id_t logical_device_id, tt_xy_pair core, std::int32_t tlb_index, std::int32_t address, uint64_t ordering) {
chip_id_t logical_device_id, tt_xy_pair core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
log_assert(
ordering == TLB_DATA::Strict || ordering == TLB_DATA::Posted || ordering == TLB_DATA::Relaxed,
"Invalid ordering specified in Cluster::configure_tlb");
Expand Down
20 changes: 8 additions & 12 deletions device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,8 @@ class tt_device {
virtual void configure_tlb(
chip_id_t logical_device_id,
tt_xy_pair core,
std::int32_t tlb_index,
std::int32_t address,
int32_t tlb_index,
uint64_t address,
uint64_t ordering = TLB_DATA::Relaxed) {
throw std::runtime_error("---- tt_device::configure_tlb is not implemented\n");
}
Expand Down Expand Up @@ -721,8 +721,8 @@ class Cluster : public tt_device {
virtual void configure_tlb(
chip_id_t logical_device_id,
tt_xy_pair core,
std::int32_t tlb_index,
std::int32_t address,
int32_t tlb_index,
uint64_t address,
uint64_t ordering = TLB_DATA::Posted);
virtual void set_fallback_tlb_ordering_mode(const std::string& fallback_tlb, uint64_t ordering = TLB_DATA::Posted);
virtual void setup_core_to_tlb_map(
Expand Down Expand Up @@ -877,7 +877,7 @@ class Cluster : public tt_device {
const void* mem_ptr,
uint32_t size_in_bytes,
tt_cxy_pair target,
std::uint32_t address,
uint64_t address,
const std::string& fallback_tlb);
void write_to_non_mmio_device(
const void* mem_ptr,
Expand All @@ -887,11 +887,7 @@ class Cluster : public tt_device {
bool broadcast = false,
std::vector<int> broadcast_header = {});
void read_device_memory(
void* mem_ptr,
tt_cxy_pair target,
std::uint32_t address,
std::uint32_t size_in_bytes,
const std::string& fallback_tlb);
void* mem_ptr, tt_cxy_pair target, uint64_t address, uint32_t size_in_bytes, const std::string& fallback_tlb);
void read_from_non_mmio_device(void* mem_ptr, tt_cxy_pair core, uint64_t address, uint32_t size_in_bytes);
void read_mmio_device_register(
void* mem_ptr, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb);
Expand Down Expand Up @@ -948,7 +944,7 @@ class Cluster : public tt_device {
uint32_t* return_3 = nullptr,
uint32_t* return_4 = nullptr);
bool address_in_tlb_space(
uint32_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, uint32_t chip);
uint64_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, uint32_t chip);
std::shared_ptr<boost::interprocess::named_mutex> get_mutex(const std::string& tlb_name, int pci_interface_id);
virtual uint32_t get_harvested_noc_rows_for_chip(
int logical_device_id); // Returns one-hot encoded harvesting mask for PCIe mapped chips
Expand Down Expand Up @@ -1012,7 +1008,7 @@ class Cluster : public tt_device {
std::unordered_map<chip_id_t, std::unordered_set<tt_xy_pair>> workers_per_chip = {};
std::unordered_set<tt_xy_pair> eth_cores = {};
std::unordered_set<tt_xy_pair> dram_cores = {};
std::map<chip_id_t, std::unordered_map<std::int32_t, std::int32_t>> tlb_config_map = {};
std::map<chip_id_t, std::unordered_map<int32_t, uint64_t>> tlb_config_map = {};
std::set<chip_id_t> all_target_mmio_devices;

// Note that these maps holds only entries for local PCIe chips.
Expand Down
66 changes: 33 additions & 33 deletions src/firmware/riscv/blackhole/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,35 +9,35 @@ namespace eth_l1_mem {


struct address_map {

// Sizes
static constexpr std::int32_t FIRMWARE_SIZE = 32 * 1024;
static constexpr std::int32_t ERISC_BARRIER_SIZE = 0x20; // 32 bytes reserved for Barrier
static constexpr std::int32_t COMMAND_Q_SIZE = 4 * 1024;
static constexpr std::int32_t DATA_BUFFER_SIZE_HOST = 4 * 1024;
static constexpr std::int32_t DATA_BUFFER_SIZE_ETH = 4 * 1024;
static constexpr std::int32_t DATA_BUFFER_SIZE_NOC = 16 * 1024;
static constexpr std::int32_t DATA_BUFFER_SIZE = 24 * 1024;
static constexpr std::int32_t EPOCH_RUNTIME_CONFIG_SIZE = 128; //
static constexpr std::int32_t OVERLAY_BLOB_SIZE = (32 * 1024) - EPOCH_RUNTIME_CONFIG_SIZE;
static constexpr std::int32_t OVERLAY_MAX_EXTRA_BLOB_SIZE = (0 * 1024);
static constexpr std::int32_t TILE_HEADER_BUF_SIZE = 32 * 1024; //
static constexpr std::int32_t NCRISC_L1_EPOCH_Q_SIZE = 32;
static constexpr std::int32_t FW_L1_BLOCK_SIZE = OVERLAY_BLOB_SIZE + EPOCH_RUNTIME_CONFIG_SIZE + TILE_HEADER_BUF_SIZE;
static constexpr std::int32_t FW_DRAM_BLOCK_SIZE = OVERLAY_BLOB_SIZE + OVERLAY_MAX_EXTRA_BLOB_SIZE + EPOCH_RUNTIME_CONFIG_SIZE + TILE_HEADER_BUF_SIZE;
static constexpr uint32_t FIRMWARE_SIZE = 32 * 1024;
static constexpr uint32_t ERISC_BARRIER_SIZE = 0x20; // 32 bytes reserved for Barrier
static constexpr uint32_t COMMAND_Q_SIZE = 4 * 1024;
static constexpr uint32_t DATA_BUFFER_SIZE_HOST = 4 * 1024;
static constexpr uint32_t DATA_BUFFER_SIZE_ETH = 4 * 1024;
static constexpr uint32_t DATA_BUFFER_SIZE_NOC = 16 * 1024;
static constexpr uint32_t DATA_BUFFER_SIZE = 24 * 1024;
static constexpr uint32_t EPOCH_RUNTIME_CONFIG_SIZE = 128; //
static constexpr uint32_t OVERLAY_BLOB_SIZE = (32 * 1024) - EPOCH_RUNTIME_CONFIG_SIZE;
static constexpr uint32_t OVERLAY_MAX_EXTRA_BLOB_SIZE = (0 * 1024);
static constexpr uint32_t TILE_HEADER_BUF_SIZE = 32 * 1024; //
static constexpr uint32_t NCRISC_L1_EPOCH_Q_SIZE = 32;
static constexpr uint32_t FW_L1_BLOCK_SIZE = OVERLAY_BLOB_SIZE + EPOCH_RUNTIME_CONFIG_SIZE + TILE_HEADER_BUF_SIZE;
static constexpr uint32_t FW_DRAM_BLOCK_SIZE = OVERLAY_BLOB_SIZE + OVERLAY_MAX_EXTRA_BLOB_SIZE + EPOCH_RUNTIME_CONFIG_SIZE + TILE_HEADER_BUF_SIZE;
// Base addresses
static constexpr std::int32_t FIRMWARE_BASE = 0x9040;
static constexpr std::int32_t ERISC_BARRIER_BASE = 0x11FE0;
static constexpr std::int32_t L1_EPOCH_Q_BASE = 0x9000; // Epoch Q start in L1.
static constexpr std::int32_t L1_DRAM_POLLING_CTRL_BASE = 0x9020;
static constexpr std::int32_t COMMAND_Q_BASE = L1_EPOCH_Q_BASE + FIRMWARE_SIZE;
static constexpr std::int32_t DATA_BUFFER_BASE = COMMAND_Q_BASE + COMMAND_Q_SIZE;
static constexpr std::int32_t TILE_HEADER_BUFFER_BASE = DATA_BUFFER_BASE + DATA_BUFFER_SIZE;
static constexpr std::int32_t EPOCH_RUNTIME_CONFIG_BASE = TILE_HEADER_BUFFER_BASE + TILE_HEADER_BUF_SIZE;
static constexpr std::int32_t OVERLAY_BLOB_BASE = EPOCH_RUNTIME_CONFIG_BASE + EPOCH_RUNTIME_CONFIG_SIZE;
static constexpr std::int32_t DATA_BUFFER_SPACE_BASE = OVERLAY_BLOB_BASE + OVERLAY_BLOB_SIZE;

static std::int32_t OVERLAY_FULL_BLOB_SIZE() {
static constexpr uint32_t FIRMWARE_BASE = 0x9040;
static constexpr uint32_t ERISC_BARRIER_BASE = 0x11FE0;
static constexpr uint32_t L1_EPOCH_Q_BASE = 0x9000; // Epoch Q start in L1.
static constexpr uint32_t L1_DRAM_POLLING_CTRL_BASE = 0x9020;
static constexpr uint32_t COMMAND_Q_BASE = L1_EPOCH_Q_BASE + FIRMWARE_SIZE;
static constexpr uint32_t DATA_BUFFER_BASE = COMMAND_Q_BASE + COMMAND_Q_SIZE;
static constexpr uint32_t TILE_HEADER_BUFFER_BASE = DATA_BUFFER_BASE + DATA_BUFFER_SIZE;
static constexpr uint32_t EPOCH_RUNTIME_CONFIG_BASE = TILE_HEADER_BUFFER_BASE + TILE_HEADER_BUF_SIZE;
static constexpr uint32_t OVERLAY_BLOB_BASE = EPOCH_RUNTIME_CONFIG_BASE + EPOCH_RUNTIME_CONFIG_SIZE;
static constexpr uint32_t DATA_BUFFER_SPACE_BASE = OVERLAY_BLOB_BASE + OVERLAY_BLOB_SIZE;

static uint32_t OVERLAY_FULL_BLOB_SIZE() {
return OVERLAY_BLOB_SIZE + OVERLAY_MAX_EXTRA_BLOB_SIZE;
};

Expand All @@ -48,15 +48,15 @@ template<std::size_t A, std::size_t B> struct TAssertEquality {

static constexpr bool _DATA_BUFFER_SPACE_BASE_CORRECT = TAssertEquality<DATA_BUFFER_SPACE_BASE, 0x28000>::_cResult;

static constexpr std::int32_t MAX_SIZE = 256*1024;
static constexpr std::int32_t MAX_L1_LOADING_SIZE = 1 * 256 * 1024;
static constexpr std::int32_t RISC_LOCAL_MEM_BASE = 0xffb00000; // Actaul local memory address as seen from risc firmware
static constexpr uint32_t MAX_SIZE = 256*1024;
static constexpr uint32_t MAX_L1_LOADING_SIZE = 1 * 256 * 1024;

static constexpr uint32_t RISC_LOCAL_MEM_BASE = 0xffb00000; // Actaul local memory address as seen from risc firmware
// As part of the init risc firmware will copy local memory data from
// l1 locations listed above into internal local memory that starts
// l1 locations listed above into internal local memory that starts
// at RISC_LOCAL_MEM_BASE address

static constexpr std::uint32_t FW_VERSION_ADDR = 0x210;
static constexpr uint32_t FW_VERSION_ADDR = 0x210;
};
} // namespace llk

Loading
Loading