Fix numerics

Signed-off-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com>
NVIDIA · Apr 10, 2024 · 19d0bd4 · 19d0bd4
1 parent 31dc133
commit 19d0bd4
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/transformer_engine/pytorch/fp8.py b/transformer_engine/pytorch/fp8.py
@@ -435,7 +435,7 @@ def fp8_autocast_exit(cls, enabled: bool, _graph: bool) -> None:
         # Reduce only the non-FP8 weight modules here.
         # FP8 weight modules are reduced at the end of the optimizer
         # step after the weight amax is populated.
-        if enabled and cls.FP8_AUTOCAST_DEPTH == 0 and not _graph:
+        if enabled and cls.FP8_AUTOCAST_DEPTH == 0 and not _graph and torch.is_grad_enabled():
             cls.reduce_and_update_fp8_tensors(forward=True, fp8_weights=False)
 
     @classmethod