Skip to content

Commit 696defe

Browse files
committed
Fix uneven loading of gpu memory when use multi device
Signed-off-by: yusheng.ma <yusheng.ma@zilliz.com>
1 parent 3750734 commit 696defe

File tree

2 files changed

+19
-2
lines changed

2 files changed

+19
-2
lines changed

src/common/raft/integration/raft_knowhere_index.cuh

+18-1
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,24 @@ struct raft_knowhere_index<IndexKind>::impl {
626626
}
627627

628628
auto static deserialize(std::istream& is) {
629-
auto new_device_id = select_device_id();
629+
auto static device_count = []() {
630+
auto result = 0;
631+
RAFT_CUDA_TRY(cudaGetDeviceCount(&result));
632+
RAFT_EXPECTS(result != 0, "No CUDA devices found");
633+
return result;
634+
}();
635+
// The lazy allocation mode cannot completely eliminate uneven distribution, but it can alleviate it well.
636+
int new_device_id = 0;
637+
size_t free, total;
638+
size_t max_free = 0;
639+
for (int i = 0; i < device_count; ++i) {
640+
auto scoped_device = raft::device_setter{i};
641+
RAFT_CUDA_TRY(cudaMemGetInfo(&free, &total));
642+
if (max_free < free) {
643+
max_free = free;
644+
new_device_id = i;
645+
}
646+
}
630647
auto scoped_device = raft::device_setter{new_device_id};
631648
auto const& res = get_device_resources_without_mempool();
632649
auto des_index = raft_index_type::template deserialize<data_type, indexing_type>(res, is);

src/index/gpu_raft/gpu_raft.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141

4242
namespace knowhere {
4343

44-
auto static constexpr cuda_concurrent_size_per_device = std::uint32_t{8};
44+
auto static constexpr cuda_concurrent_size_per_device = std::uint32_t{4};
4545

4646
template <raft_proto::raft_index_kind K>
4747
struct KnowhereConfigType {};

0 commit comments

Comments
 (0)