diff --git a/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUAsyncResourcePool.h b/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUAsyncResourcePool.h index 4533ac3fa..fe99c5c49 100644 --- a/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUAsyncResourcePool.h +++ b/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUAsyncResourcePool.h @@ -9,8 +9,6 @@ namespace dipu { -constexpr size_t kMaxAsyncResourcePoolLength = 3; - template class AsyncResourcePool { public: @@ -31,7 +29,11 @@ class AsyncResourcePoolImpl : public AsyncResourcePool { public: void add(const T& t, std::deque& events) override { std::lock_guard lk(mutex_); - list_.emplace_back(t, std::move(events)); + if (events.empty()) { + list_.emplace_front(t, std::move(events)); + } else { + list_.emplace_back(t, std::move(events)); + } } T get() override { diff --git a/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUBFCachingAllocator.cpp b/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUBFCachingAllocator.cpp index cd7e191f1..a13ce575d 100644 --- a/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUBFCachingAllocator.cpp +++ b/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUBFCachingAllocator.cpp @@ -7,11 +7,17 @@ #include #include +#include "csrc_dipu/utils/env.hpp" + #include "DIPUCachingAllocator.h" #include "DIPUSpinMutex.h" namespace dipu { +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +const size_t kMaxExtendSize = get_env_or_default("DIPU_MAX_EXTEND_SIZE", 1024) + << 20U; + class BFCachingAllocatorImpl { public: using allocate_fn_t = std::function; @@ -29,7 +35,6 @@ class BFCachingAllocatorImpl { static constexpr size_t kMinAllocationSize = 512; static constexpr size_t kMaxInternalFragmentation = 8U << 20U; // 8MB static constexpr size_t kMinExtendSize = 8U << 20U; // 8MB - static constexpr size_t kMaxExtendSize = 1U << 30U; // 1GB size_t cachedBytes = 0; size_t allocatedBytes = 0; @@ -441,6 +446,33 @@ class BFCachingAllocator : public CacheAllocator { } } + bool try_empty_resource_pool() const { + using namespace std::chrono_literals; + std::lock_guard lk(resource_pool_mutex_); + auto start = std::chrono::steady_clock::now(); + constexpr auto maxWaitTime = 32us; + while (!async_mem_pool()->empty()) { + if (!async_mem_pool()->ready()) { + auto now = std::chrono::steady_clock::now(); + auto elapsed = now - start; + if (elapsed < maxWaitTime) { + std::this_thread::yield(); + continue; + } + return false; + } + const auto block = async_mem_pool()->get(); + void* ptr = std::get<0>(block); + int id = static_cast(std::get<1>(block)); + DIPU_DEBUG_ALLOCATOR( + 8, "BFCachingAllocator: " << __FUNCTION__ << " ,ptr:" << ptr + << " ,id:" << id << " ,allocator:" << this + << ", device:" << device()); + impl->releaseRaw(ptr, id); + } + return true; + } + void check_impl() const { if (impl) { return; @@ -507,7 +539,7 @@ class BFCachingAllocator : public CacheAllocator { c10::DataPtr allocate(size_t size) const override { restore(); if (async_mem_pool()->size() > kMaxAsyncResourcePoolLength) { - empty_resource_pool(); + try_empty_resource_pool(); } size = getMemoryAlignmentStrategy()->roundBytes(size); std::tuple block = impl->allocateRaw(size); diff --git a/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUCachingAllocator.cpp b/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUCachingAllocator.cpp index 29333f15e..fbf9c6704 100644 --- a/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUCachingAllocator.cpp +++ b/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUCachingAllocator.cpp @@ -12,12 +12,18 @@ #include "csrc_dipu/base/basedef.h" #include "csrc_dipu/runtime/devproxy/deviceproxy.h" +#include "csrc_dipu/utils/env.hpp" namespace dipu { // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) std::mutex DIPURawDeviceAllocator::mutex_; +constexpr size_t kDefaultMaxAsyncResourcePoolLength = 64; +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +const size_t kMaxAsyncResourcePoolLength = get_env_or_default( + "DIPU_MAX_ASYNC_RESOURCE_POOL_LENGTH", kDefaultMaxAsyncResourcePoolLength); + namespace { // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) diff --git a/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUCachingAllocator.h b/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUCachingAllocator.h index 87d608375..fe21919b7 100644 --- a/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUCachingAllocator.h +++ b/dipu/torch_dipu/csrc_dipu/runtime/core/allocator/DIPUCachingAllocator.h @@ -13,6 +13,8 @@ namespace dipu { constexpr size_t kDefaultMermoryAlignment = 512; +extern const size_t kMaxAsyncResourcePoolLength; + class MemoryAlignmentStrategy { size_t kBytesAlign = kDefaultMermoryAlignment; size_t alpha = 1; // reserved diff --git a/dipu/torch_dipu/csrc_dipu/utils/env.hpp b/dipu/torch_dipu/csrc_dipu/utils/env.hpp new file mode 100644 index 000000000..2ad796613 --- /dev/null +++ b/dipu/torch_dipu/csrc_dipu/utils/env.hpp @@ -0,0 +1,18 @@ +// Copyright (c) 2024, DeepLink. +#pragma once +#include + +namespace dipu { + +template +T get_env_or_default(const char* env_name, const T& default_value) { + const char* env = std::getenv(env_name); + if (env == nullptr) { + return default_value; + } + T value = default_value; + std::istringstream(env) >> value; + return value; +} + +} // namespace dipu