From 09e6f7b916110803413cae8f900b2dbb32d6ddcd Mon Sep 17 00:00:00 2001 From: Insop Song Date: Fri, 15 Nov 2024 19:02:17 +0000 Subject: [PATCH] fix: handle data parallel world size and rank when mpu is None Co-authored-by: Genie --- deepspeed/runtime/engine.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deepspeed/runtime/engine.py b/deepspeed/runtime/engine.py index ba0e0f875625..37776159ade1 100755 --- a/deepspeed/runtime/engine.py +++ b/deepspeed/runtime/engine.py @@ -1768,6 +1768,9 @@ def deepspeed_io(self, if self.mpu is not None: data_parallel_world_size = self.mpu.get_data_parallel_world_size() data_parallel_rank = self.mpu.get_data_parallel_rank() + else: + data_parallel_world_size = self.dp_world_size + data_parallel_rank = groups._get_sequence_data_parallel_rank() if data_sampler is None and (route == ROUTE_PREDICT or route == ROUTE_EVAL): data_sampler = torch.utils.data.DistributedSampler(