From f4e4d4a2aae0c82db4ab06d069b7e5409dc4276b Mon Sep 17 00:00:00 2001 From: hellozmz <40790054@qq.com> Date: Sat, 30 Mar 2024 04:19:28 +0000 Subject: [PATCH 1/3] test1: does not need getDevice, cause setDevice() will set currentDeviceIndex --- dipu/torch_dipu/csrc_dipu/vendor/ascend/deviceimpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/torch_dipu/csrc_dipu/vendor/ascend/deviceimpl.cpp b/dipu/torch_dipu/csrc_dipu/vendor/ascend/deviceimpl.cpp index e48e69ef0..0e6c1dc27 100644 --- a/dipu/torch_dipu/csrc_dipu/vendor/ascend/deviceimpl.cpp +++ b/dipu/torch_dipu/csrc_dipu/vendor/ascend/deviceimpl.cpp @@ -29,7 +29,7 @@ void finalizeVendor() { DIPU_CALLACLRT(aclFinalize()); } deviceId_t current_device() { if (currentDeviceIndex < 0) { setDevice(-1); - DIPU_CALLACLRT(::aclrtGetDevice(¤tDeviceIndex)) + // DIPU_CALLACLRT(::aclrtGetDevice(¤tDeviceIndex)) } return static_cast(currentDeviceIndex); } From f510bd40bc38f94262076d6b15a529a035715004 Mon Sep 17 00:00:00 2001 From: hellozmz <40790054@qq.com> Date: Sat, 30 Mar 2024 04:40:01 +0000 Subject: [PATCH 2/3] change current_device position --- dipu/tests/python/individual_scripts/test_rt_ddp.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dipu/tests/python/individual_scripts/test_rt_ddp.py b/dipu/tests/python/individual_scripts/test_rt_ddp.py index 1fed653da..33483ed27 100644 --- a/dipu/tests/python/individual_scripts/test_rt_ddp.py +++ b/dipu/tests/python/individual_scripts/test_rt_ddp.py @@ -65,8 +65,9 @@ def forward(self, x): def demo_basic_ddp(rank, world_size, port): import torch_dipu - print(f"Running basic DDP example on rank {rank} {torch.cuda.current_device()}") + # print(f"Running basic DDP example on rank {rank} {torch.cuda.current_device()}") torch.cuda.set_device(rank) + print(f"Running basic DDP example on rank {rank} {torch.cuda.current_device()}") backend = "nccl" dev1 = rank @@ -100,8 +101,9 @@ def demo_basic_ddp(rank, world_size, port): def demo_allreduce(rank, world_size, port): import torch_dipu - print(f"Running basic DDP example on rank {rank} {torch.cuda.current_device()}") + # print(f"Running basic DDP example on rank {rank} {torch.cuda.current_device()}") torch.cuda.set_device(rank) + print(f"Running basic DDP example on rank {rank} {torch.cuda.current_device()}") dev1 = rank setup(rank, world_size, port) @@ -151,7 +153,7 @@ def demo_allgather(rank, world_size, port): import torch_dipu setup(rank, world_size, port) - + print(f'rank={rank}') src1 = torch.ones((2, 4)).to(rank) dests = torch.zeros((world_size * 2, 4)).to(rank) dests = [ From 694c3abb3ccdd2dd991c856f1589e735b230f25e Mon Sep 17 00:00:00 2001 From: hellozmz <40790054@qq.com> Date: Sat, 30 Mar 2024 05:59:00 +0000 Subject: [PATCH 3/3] test --- dipu/tests/python/individual_scripts/test_rt_ddp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/tests/python/individual_scripts/test_rt_ddp.py b/dipu/tests/python/individual_scripts/test_rt_ddp.py index 33483ed27..257804982 100644 --- a/dipu/tests/python/individual_scripts/test_rt_ddp.py +++ b/dipu/tests/python/individual_scripts/test_rt_ddp.py @@ -153,7 +153,7 @@ def demo_allgather(rank, world_size, port): import torch_dipu setup(rank, world_size, port) - print(f'rank={rank}') + print(f"rank={rank}") src1 = torch.ones((2, 4)).to(rank) dests = torch.zeros((world_size * 2, 4)).to(rank) dests = [