Skip to content

Commit

Permalink
support firmware_core store in bmodel for bm1684x
Browse files Browse the repository at this point in the history
Change-Id: Id8a0ef06850ee069be54c5e8e0ecaaa9a844cbdd
  • Loading branch information
HarmonyHu committed May 20, 2023
1 parent 7d3a5d4 commit 400a0dc
Show file tree
Hide file tree
Showing 14 changed files with 143 additions and 48 deletions.
2 changes: 1 addition & 1 deletion include/tpu_mlir/Backend/Arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class Arch {
static int64_t LMEM_BYTES;
static int64_t LMEM_BANKS;
static int64_t LMEM_BANK_BYTES;
static llvm::StringRef LIB_NAME;
static llvm::StringRef LIB_BACKEND_NAME;
static bool ALIGN_4N;
static module::Chip chip;
// dbytes is 0.5 for INT4
Expand Down
2 changes: 1 addition & 1 deletion include/tpu_mlir/Backend/BM168x/BM1684.h
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ class BM1684 : public BM168x {
GDMA_VALUE_FORMAT_INT8 = 3;
GDMA_VALUE_FORMAT_FLOAT16 = 4;
GDMA_VALUE_FORMAT_NUM = 5;
LIB_NAME = "libbackend_1684.so";
LIB_BACKEND_NAME = "libbackend_1684.so";
ALIGN_4N = true;
start_env();
};
Expand Down
8 changes: 6 additions & 2 deletions include/tpu_mlir/Backend/BM168x/BM1684X.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,15 @@ class BM1684X : public BM168x {

public:
virtual void after_codegen(int64_t flops = 0) override;

// arch info
virtual uint32_t get_bdc_len(int bdc_num, int group_id) override;
virtual uint32_t get_gdma_len(int gdma_num, int group_id) override;

public:
// specific global info
static constexpr llvm::StringRef LIB_KERNEL_NAME =
"libbm1684x_kernel_module.so";

protected:
BM1684X() {
if (chip != module::Chip::BM1684X) {
Expand All @@ -64,7 +68,7 @@ class BM1684X : public BM168x {
GDMA_VALUE_FORMAT_BFLOAT16 = 5;
GDMA_VALUE_FORMAT_INT4 = 6;
GDMA_VALUE_FORMAT_NUM = 7;
LIB_NAME = "libbackend_1684x.so";
LIB_BACKEND_NAME = "libbackend_1684x.so";
start_env();
};
virtual ~BM1684X() { end_env(); };
Expand Down
2 changes: 1 addition & 1 deletion include/tpu_mlir/Backend/BM168x/BM1686.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class BM1686 : public BM1684X {
GMEM_START_ADDR = 0x100000000ull;
LMEM_BANK_BYTES = LMEM_BYTES / LMEM_BANKS;
CTX_START_ADDR = GMEM_START_ADDR;
LIB_NAME = "libbackend_1686.so";
LIB_BACKEND_NAME = "libbackend_1686.so";
// GDMA format
GDMA_VALUE_FORMAT_INT8 = 0;
GDMA_VALUE_FORMAT_FLOAT16 = 1;
Expand Down
10 changes: 9 additions & 1 deletion include/tpu_mlir/Builder/BM168x/bmodel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ class ModelGen {
void AddNet(const flatbuffers::Offset<Net> &net);
void AddNet(std::string net_name, const flatbuffers::Offset<NetParameter> &parameter,
uint32_t *net_idx = NULL, uint32_t *stage_idx = NULL);

// firmware_core.so save into bmodel
void AddKernelModule(std::string &version, Binary &tpu_module);
// finish and save to file
void Finish(const std::string &filename);

Expand All @@ -84,13 +85,20 @@ class ModelGen {
std::vector<flatbuffers::Offset<NetParameter>> parameters;
} NET_INFO_T;

typedef struct {
std::string file_name;
Binary binary;
} KERNEL_MODULE_T;

std::string chip_;
flatbuffers::FlatBufferBuilder builder_;
std::vector<uint8_t> binary_;
std::vector<Binary> binary_vector_;
std::vector<NET_INFO_T> net_vector_;
std::vector<flatbuffers::Offset<bmodel::Net>> nets_;
uint64_t max_neuron_size_;
// Binary tpu_module_;
KERNEL_MODULE_T kernel_module_;
};

class ModelCtx {
Expand Down
4 changes: 2 additions & 2 deletions lib/Backend/Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ int64_t Arch::LMEM_BYTES = 0;
int64_t Arch::LMEM_BANKS = 0;
int64_t Arch::LMEM_BANK_BYTES = 0;
bool Arch::ALIGN_4N = false;
llvm::StringRef Arch::LIB_NAME = "";
llvm::StringRef Arch::LIB_BACKEND_NAME = "";
module::Chip Arch::chip;

Arch *Arch::inst = nullptr;
Expand Down Expand Up @@ -138,7 +138,7 @@ Arch::~Arch() {}
void Arch::load_library() {
if (!DL.isValid()) {
std::string Err;
DL = llvm::sys::DynamicLibrary::getPermanentLibrary(LIB_NAME.data(), &Err);
DL = llvm::sys::DynamicLibrary::getPermanentLibrary(LIB_BACKEND_NAME.data(), &Err);
if (DL.isValid() == false) {
llvm_unreachable(Err.c_str());
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Backend/CV18xx/CV18xx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ void CV18xx::load_ctx(module::Chip chip) {
}

CV18xx::CV18xx(module::Chip chip) {
LIB_NAME = "libcvikernel.so";
LIB_BACKEND_NAME = "libcvikernel.so";
load_library();
load_ctx(chip);
NPU_NUM = cvk_ctx_->info.npu_num;
Expand Down
12 changes: 12 additions & 0 deletions lib/Builder/bmodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,11 @@ void ModelGen::AddChip(const std::string &arch_name)
chip_ = arch_name;
}

void ModelGen::AddKernelModule(std::string &file_name, Binary &tpu_module) {
kernel_module_.file_name = file_name;
kernel_module_.binary = tpu_module;
}

void ModelGen::Finish(const string &filename)
{
this->Finish();
Expand Down Expand Up @@ -254,6 +259,12 @@ size_t ModelGen::Finish()
auto chip = builder_.CreateString(chip_);
auto now = time(0);
auto time = builder_.CreateString(ctime(&now));
auto module_name = builder_.CreateString(kernel_module_.file_name);

bmodel::KernelModuleBuilder kb(builder_);
kb.add_file_name(module_name);
kb.add_binary(&kernel_module_.binary);
auto kernel_module = kb.Finish();

bmodel::ModelBuilder mb(builder_);
mb.add_chip(chip);
Expand All @@ -262,6 +273,7 @@ size_t ModelGen::Finish()
mb.add_version(version);
mb.add_net(net);
mb.add_neuron_size(max_neuron_size_);
mb.add_kernel_module(kernel_module);

auto model = mb.Finish();
builder_.Finish(model);
Expand Down
98 changes: 64 additions & 34 deletions lib/Dialect/Tpu/Transforms/Codegen/BM168xCodegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "ProfileCtx.h"
#include "TensorLocation.hpp"
#include "tpu_mlir/Backend/BM168x/BM168x.h"
#include "tpu_mlir/Backend/BM168x/BM1684X.h"
#include "tpu_mlir/Builder/BM168x/bmodel.hpp"
#include "tpu_mlir/Dialect/Tpu/Transforms/Codegen/Dynamic/DynamicLayer.hpp"
#include "tpu_mlir/Dialect/Tpu/Transforms/Codegen/Dynamic/DynamicNetIr.hpp"
Expand All @@ -23,6 +24,7 @@
#include "tpu_mlir/Support/Module.h"
#include "llvm/Support/MemoryBuffer.h"
#include <llvm/Support/Debug.h>
#include <stdlib.h>

#include <fstream>
#include <set>
Expand All @@ -39,6 +41,29 @@ using namespace flatbuffers;
namespace tpu_mlir {
namespace tpu {

static bmodel::Binary CreateBinaryFromFile(bmodel::ModelGen *model_gen,
FILE *fp) {
std::vector<u8> data;
fseek(fp, 0, SEEK_END);
uint32_t size = ftell(fp);
fseek(fp, 0, SEEK_SET);
data.resize(size);
fread(data.data(), 1, size, fp);
fclose(fp);
auto binary = model_gen->WriteBinary(data.size(), data.data());
return binary;
}

static bmodel::Binary CreateBinaryFromFile(bmodel::ModelGen *model_gen,
const std::string &filename) {
auto fp = fopen(filename.c_str(), "rb");
if (!fp) {
llvm_unreachable((std::string("can't find file: ") + filename).c_str());
return bmodel::Binary();
}
return CreateBinaryFromFile(model_gen, fp);
}

void BMCodegen::run(ModuleOp &module, std::string &filename) {
// record the line number of operation in module.
llvm::raw_null_ostream os;
Expand Down Expand Up @@ -72,6 +97,14 @@ void BMCodegen::run(ModuleOp &module, std::string &filename) {
model_gen = std::make_shared<bmodel::ModelGen>();
// add chip name
model_gen->AddChip(chip);
if (module::isBM1684X()) {
std::string kernel_name = backend::BM1684X::LIB_KERNEL_NAME.str();
std::string root_path = getenv("TPUC_ROOT");
std::string kernel_path = root_path + std::string("/lib/") + kernel_name;
bmodel::Binary kernel_module =
CreateBinaryFromFile(&(*model_gen), kernel_path);
model_gen->AddKernelModule(kernel_name, kernel_module);
}
auto &builder = model_gen->Builder();
auto input_tensor = CreateTensorVector(inputs);
auto output_tensor = CreateTensorVector(outputs);
Expand All @@ -87,7 +120,7 @@ void BMCodegen::run(ModuleOp &module, std::string &filename) {
bool is_first = true;
int dynamic_mode = module::isBM1684XFamily() ? 2 : 1;

module.walk<WalkOrder::PreOrder>([&](Operation* op){
module.walk<WalkOrder::PreOrder>([&](Operation *op) {
if (isa<func::CallOp>(op)) {
auto call = dyn_cast<func::CallOp>(op);
auto func = module::getFuncOp(call.getCallee());
Expand Down Expand Up @@ -122,9 +155,9 @@ void BMCodegen::run(ModuleOp &module, std::string &filename) {
first_dynamic = true;
}
}
} else if (isa<func::ReturnOp>(op)
&& isa<tpu::IfOp>(op->getOperand(0).getDefiningOp())) {
//codegen merge subnet
} else if (isa<func::ReturnOp>(op) &&
isa<tpu::IfOp>(op->getOperand(0).getDefiningOp())) {
// codegen merge subnet
auto ifOp = dyn_cast<tpu::IfOp>(op->getOperand(0).getDefiningOp());
auto subnet = CreateMergeSubNet(ifOp);
subnet_v.push_back(subnet);
Expand Down Expand Up @@ -339,8 +372,8 @@ BMCodegen::CreateCmdGroupVector() {
}

Offset<bmodel::SwitchParam>
BMCodegen::CreateSwitchParamVector(vector<int>& output_from,
vector<int>& output_branch) {
BMCodegen::CreateSwitchParamVector(vector<int> &output_from,
vector<int> &output_branch) {
auto &builder = model_gen->Builder();
auto out_from = builder.CreateVector(output_from);
auto out_branch = builder.CreateVector(output_branch);
Expand All @@ -351,17 +384,17 @@ BMCodegen::CreateSwitchParamVector(vector<int>& output_from,
}

Offset<bmodel::MergeParam>
BMCodegen::CreateMergeParamVector(vector<vector<int>>& output_from) {
auto& builder = model_gen->Builder();
BMCodegen::CreateMergeParamVector(vector<vector<int>> &output_from) {
auto &builder = model_gen->Builder();
vector<Offset<Vector<int>>> indice_v;
for(auto& indice: output_from){
indice_v.push_back(builder.CreateVector(indice));
for (auto &indice : output_from) {
indice_v.push_back(builder.CreateVector(indice));
}
vector<Offset<bmodel::OutputFrom>> output_from_v;
for(auto idx: indice_v){
bmodel::OutputFromBuilder ofb(builder);
ofb.add_indice(idx);
output_from_v.push_back(ofb.Finish());
for (auto idx : indice_v) {
bmodel::OutputFromBuilder ofb(builder);
ofb.add_indice(idx);
output_from_v.push_back(ofb.Finish());
}
auto output_froms = builder.CreateVector(output_from_v);
bmodel::MergeParamBuilder mpb(builder);
Expand Down Expand Up @@ -659,9 +692,7 @@ void BMCodegen::codegen(Operation *op) {
Offset<bmodel::SubNet> BMCodegen::CreateSubNet(func::CallOp call) {
bm168x->before_codegen();
auto func = module::getFuncOp(call.getCallee());
func.walk([&](Operation *op) {
codegen(op);
});
func.walk([&](Operation *op) { codegen(op); });
bm168x->after_codegen(module::getFLOPs());
int subnet_id = func->getAttrOfType<IntegerAttr>("id").getInt() + merge_num;
next_id = subnet_id + 1;
Expand All @@ -675,12 +706,12 @@ Offset<bmodel::SubNet> BMCodegen::CreateSubNet(func::CallOp call) {
std::vector<bool> user_is_cpu;
tensor_is_cpu[v_name] = user_is_cpu;
for (auto user : v.value().getUsers()) {
if (isa<tpu::IfOp>(call->getParentOp())
&& isa<tpu::YieldOp>(user)) {
if (isa<tpu::IfOp>(call->getParentOp()) && isa<tpu::YieldOp>(user)) {
tensor_is_cpu[v_name].push_back(false);
auto funcOp = call->getParentOp()->getParentOp();
//id is the ifOp's id + 3
int merge_id = funcOp->getAttrOfType<IntegerAttr>("id").getInt() + merge_num + 3;
// id is the ifOp's id + 3
int merge_id =
funcOp->getAttrOfType<IntegerAttr>("id").getInt() + merge_num + 3;
next_id_v.push_back(merge_id);
} else if (isa<ReturnOp>(user)) {
tensor_is_cpu[v_name].push_back(false);
Expand Down Expand Up @@ -826,7 +857,7 @@ Offset<bmodel::SubNet> BMCodegen::CreateCPUSubNet(func::CallOp call) {
return snb.Finish();
}

Offset<bmodel::SubNet> BMCodegen::CreateSwitchSubNet(func::CallOp call){
Offset<bmodel::SubNet> BMCodegen::CreateSwitchSubNet(func::CallOp call) {
auto func = module::getFuncOp(call.getCallee());
std::vector<Value> inputs;
std::vector<Value> outputs;
Expand All @@ -853,8 +884,8 @@ Offset<bmodel::SubNet> BMCodegen::CreateSwitchSubNet(func::CallOp call){
auto next_ids = builder.CreateVector(next_id_v);
vector<int> output_from;
vector<int> output_branch;
Offset<bmodel::SwitchParam> switch_param
= CreateSwitchParamVector(output_from, output_branch);
Offset<bmodel::SwitchParam> switch_param =
CreateSwitchParamVector(output_from, output_branch);

bmodel::SubNetBuilder snb(builder);
snb.add_switch_param(switch_param);
Expand All @@ -867,30 +898,30 @@ Offset<bmodel::SubNet> BMCodegen::CreateSwitchSubNet(func::CallOp call){
return snb.Finish();
}

Offset<bmodel::SubNet>
BMCodegen::CreateMergeSubNet(tpu::IfOp ifOp) {
Offset<bmodel::SubNet> BMCodegen::CreateMergeSubNet(tpu::IfOp ifOp) {
std::vector<Value> inputs;
std::vector<Value> outputs;
std::vector<int> next_id_v = {};
int subnet_id = next_id++; merge_num++;
int subnet_id = next_id++;
merge_num++;
LLVM_DEBUG(llvm::dbgs() << "subnet id: '" << subnet_id << "'\n");
for (int k = 0; k < ifOp.getNumResults(); k++) {
for (int i = 0; i < ifOp.getNumRegions(); i++) {
Region& region = ifOp.getRegion(i);
Operation* yieldOp = region.back().getTerminator();
Region &region = ifOp.getRegion(i);
Operation *yieldOp = region.back().getTerminator();
inputs.emplace_back(module::getOriValue(yieldOp->getOperand(k)));
}
}

for (int i = 0; i < ifOp.getNumResults(); i++){
for (int i = 0; i < ifOp.getNumResults(); i++) {
outputs.emplace_back(module::getOriValue(ifOp.getResult(i)));
}
int next = std::numeric_limits<int>::max();
//get the nearest subnet_id
// get the nearest subnet_id
if (auto funcOp = dyn_cast<func::FuncOp>(ifOp->getParentOp())) {
auto callOp = module::getCallOp(funcOp);
for (int i = 0; i < callOp.getNumResults(); i++) {
for (Operation *op: callOp.getResult(i).getUsers()) {
for (Operation *op : callOp.getResult(i).getUsers()) {
if (isa<func::CallOp>(op)) {
auto func = module::getFuncOp(dyn_cast<func::CallOp>(op).getCallee());
int id = func->getAttrOfType<IntegerAttr>("id").getInt() + merge_num;
Expand All @@ -917,8 +948,7 @@ BMCodegen::CreateMergeSubNet(tpu::IfOp ifOp) {
for (int i = 0; i < outputs.size(); i++) {
output_from.emplace_back(vector{index++, index++});
}
Offset<bmodel::MergeParam> merge_param
= CreateMergeParamVector(output_from);
Offset<bmodel::MergeParam> merge_param = CreateMergeParamVector(output_from);

bmodel::SubNetBuilder snb(builder);
snb.add_merge_param(merge_param);
Expand Down
1 change: 1 addition & 0 deletions third_party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ install(FILES oneDNN/lib/libdnnl.so
nntoolchain/lib/libcmodel.so
nntoolchain/lib/libbmrt.so
nntoolchain/lib/libcpuop.so
nntoolchain/lib/libbm1684x_kernel_module.so
# cvitek
CV18xx/lib/libcvikernel.so
CV18xx/lib/libcviruntime.so
Expand Down
Loading

0 comments on commit 400a0dc

Please sign in to comment.