Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

try fix tgs agitate #751

Merged
merged 23 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@

namespace dipu {

constexpr size_t kMaxAsyncResourcePoolLength = 3;

template <class T>
class AsyncResourcePool {
public:
Expand All @@ -31,7 +29,11 @@ class AsyncResourcePoolImpl : public AsyncResourcePool<T> {
public:
void add(const T& t, std::deque<DIPUEvent>& events) override {
std::lock_guard<mutex_t> lk(mutex_);
list_.emplace_back(t, std::move(events));
if (events.size() > 0) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里 events 为空需要加入 list 吗,我以为可以忽略它

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

需要的,不然会内存泄漏

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

建议后续把这块逻辑整体改下, 对于没有在流上等待的 tensor, 析构时直接 restore()。

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里实际上是故意没有在析构的时候restore。主要目的: 1. 加快tensor析构的速度 2. tensor析构时restore没有什么用,只有在申请的时候才需要尽可能多的内存已经回收。 3. 析构时里面回收,有可能流上还没有读写完毕,减小竞争的概率 4. resotre时可能会有碎片整理等操作, 把潜在的耗时放在申请的时候,可以让一部分wait变成有意义的cpu操作

list_.emplace_back(t, std::move(events));
} else {
list_.emplace_front(t, std::move(events));
}
}

T get() override {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@

namespace dipu {

// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
const size_t kMaxExtendSize = []() {
size_t maxExtendSize = 1024;
const char* env = std::getenv("DIPU_MAX_EXTEND_SIZE");
if (env != nullptr) {
maxExtendSize = std::atoi(env);
}
maxExtendSize = maxExtendSize << 20U;
return maxExtendSize;
}();

class BFCachingAllocatorImpl {
public:
using allocate_fn_t = std::function<void*(size_t)>;
Expand All @@ -29,7 +40,6 @@ class BFCachingAllocatorImpl {
static constexpr size_t kMinAllocationSize = 512;
static constexpr size_t kMaxInternalFragmentation = 8U << 20U; // 8MB
static constexpr size_t kMinExtendSize = 8U << 20U; // 8MB
static constexpr size_t kMaxExtendSize = 1U << 30U; // 1GB

size_t cachedBytes = 0;
size_t allocatedBytes = 0;
Expand Down Expand Up @@ -425,10 +435,18 @@ class BFCachingAllocator : public CacheAllocator {

void empty_resource_pool() const {
std::lock_guard<mutex_t> lk(resource_pool_mutex_);
auto start = std::chrono::system_clock::now();
while (!async_mem_pool()->empty()) {
if (!async_mem_pool()->ready()) {
std::this_thread::yield();
continue;
auto elasped = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now() - start)
.count();
if (elasped < 32) {
std::this_thread::yield();
continue;
} else {
return;
}
}
const auto block = async_mem_pool()->get();
void* ptr = std::get<0>(block);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ namespace dipu {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
std::mutex DIPURawDeviceAllocator::mutex_;

// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
size_t kMaxAsyncResourcePoolLength = []() {
size_t maxAsyncResourcePoolLength = 64;
const char* env = std::getenv("DIPU_MAX_ASYNC_RESOURCE_POOL_LENGTH");
if (env != nullptr) {
maxAsyncResourcePoolLength = std::atoi(env);
}
return maxAsyncResourcePoolLength;
}();

namespace {

// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ namespace dipu {

constexpr size_t kDefaultMermoryAlignment = 512;

extern size_t kMaxAsyncResourcePoolLength;

class MemoryAlignmentStrategy {
size_t kBytesAlign = kDefaultMermoryAlignment;
size_t alpha = 1; // reserved
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const static int ascend_init = []() {
// – 内存大小向上对齐成32整数倍+32字节(m=ALIGN_UP[len,32]+32字节);
// – 内存起始地址需满足64字节对齐(ALIGN_UP[m,64])。
// nbytes = align_64(1 * nbytes + 32);
static dipu::MemoryAlignmentStrategy memoryAlignStrategy(64, 1, 32);
static dipu::MemoryAlignmentStrategy memoryAlignStrategy(512, 1, 32);
dipu::setMemoryAlignmentStrategy(&memoryAlignStrategy);
return 0;
}();
Loading