set ACL_RT_OVERFLOW_MODE to INFNAN for Ascend (DeepLink-org#644)

* set ACL_RT_OVERFLOW_MODE to INFNAN * add epsilon as input to BNTrainingReduce in diopiBatchNorm * update printTensorHelper0 for debug * remove skips for batch_norm in device_configs.py for Ascend * remove skips for fill_, mean, sum in device_configs.py * set ACL_RT_OVERFLOW_MODE to INFNAN for ascend_npu * remove skips for reciprocal, addcdiv in device_configs.py * make cpp-linter happy * revert batch_norm skips in device_configs.py * add a comment for batch_norm skips in device_configs.py
DeepLink-org · Dec 4, 2023 · 9c18323 · 9c18323
1 parent a93df80
commit 9c18323
Show file tree

Hide file tree

Showing 5 changed files with 6 additions and 82 deletions.
diff --git a/impl/ascend/common/debug.cpp b/impl/ascend/common/debug.cpp
@@ -29,7 +29,7 @@ void printTensorHelper0(const AscendTensor& at, void* ptrHost) {
             printf("item[0]: %d\n", reinterpret_cast<bool*>(ptrHost)[0]);
             break;
         default:
-            printf("unsupport dtype %s", diopiDtypeToStr(at.dtype()));
+            printf("unsupport dtype %s\n", diopiDtypeToStr(at.dtype()));
             break;
     }
 }

diff --git a/impl/ascend/device_configs.py b/impl/ascend/device_configs.py
@@ -115,22 +115,8 @@
             args=[
                 {
                     "ins": ['input'],
-                    # "dtype": [Skip(np.float16),],
-                    # temp for 910B
-                    "dtype": [Skip(np.float16),Skip(np.float32),Skip(np.float64),],
-                },
-            ]
-        ),
-    ),
-
-    # temp for 910B
-    'batch_norm_nan': dict(
-        name=['batch_norm'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "dtype": [Skip(np.float16),Skip(np.float32),Skip(np.float64),],
+                    # Skip due to low precision
+                    "dtype": [Skip(np.float16),],
                 },
             ]
         ),
@@ -803,18 +789,6 @@
         ),
     ),
 
-    'addcdiv_specific': dict(
-        name=['addcdiv'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "shape": [Skip(()),Skip((128,)),Skip((576, 192)),Skip((64, 3, 3, 3)),Skip((10, 3, 5)),Skip((0,)),Skip((0, 5)),Skip((2, 0, 9)),],
-                },
-            ]
-        ),
-    ),
-
     'matmul': dict(
         name=['matmul'],
         tensor_para=dict(
@@ -827,44 +801,16 @@
         ),
     ),
 
-    'fill': dict(
-        name=['fill_'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "dtype": [Skip(np.float16)],
-                },
-            ]
-        ),
-    ),
-
     'reduce_op': dict(
         name=['mean', 'sum'],
         atol=1e-3,
         rtol=1e-3,
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "dtype": [Skip(np.float16)],
-                },
-            ],
-        ),
     ),
 
     'reduce_partial_op': dict(
         atol=1e-3,
         rtol=1e-3,
         name=['mean', 'sum'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "dtype": [Skip(np.float16)],
-                },
-            ],
-        ),
     ),
 
     'reduce_partial_op_1': dict(
@@ -1343,30 +1289,6 @@
         ),
     ),
 
-    'reciprocal': dict(
-        name=['reciprocal'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "dtype": [Skip(np.float16)],
-                },
-            ]
-        ),
-    ),
-
-    'reciprocal_zero': dict(
-        name=['reciprocal'],
-        tensor_para=dict(
-            args=[
-                {
-                    "ins": ['input'],
-                    "dtype": [Skip(np.float16)],
-                },
-            ]
-        ),
-    ),
-
     'conv_transpose2d': dict(
         name=['conv_transpose2d'],
         tensor_para=dict(

diff --git a/impl/ascend/functions/batch_norm.cpp b/impl/ascend/functions/batch_norm.cpp
@@ -93,7 +93,7 @@ diopiError_t diopiBatchNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, d
         diopiGetTensorStride(runningMeanTemp, &stride);
         diopiRequireTensor(ctx, &sum, &shape, &stride, diopiDtype_t::diopi_dtype_float32, diopi_device);
         diopiRequireTensor(ctx, &squareSum, &shape, &stride, diopiDtype_t::diopi_dtype_float32, diopi_device);
-        AclOpRunner<1, 2>("BNTrainingReduce", ctx).addInput(inputAt).addOutput(sum).addOutput(squareSum).run();
+        AclOpRunner<1, 2>("BNTrainingReduce", ctx).addInput(inputAt).addOutput(sum).setAttr("epsilon", static_cast<float>(eps)).addOutput(squareSum).run();
         AclOpRunner<7, 5>("BNTrainingUpdate", ctx)
             .addInput(inputAt)
             .addInput(sum)

diff --git a/impl/ascend/test/conform_test.cpp b/impl/ascend/test/conform_test.cpp
@@ -71,6 +71,7 @@ diopiError_t device_memcpy_d2d_async(diopiStreamHandle_t streamHandle, void* dst
 diopiError_t initLibrary() {
     CALL_ACLRT(aclInit(nullptr));
     CALL_ACLRT(aclrtSetDevice(0));
+    CALL_ACLRT(aclrtSetDeviceSatMode(ACL_RT_OVERFLOW_MODE_INFNAN));
     aclrtContext context;
     CALL_ACLRT(aclrtCreateContext(&context, 0));
     return diopiSuccess;

diff --git a/impl/ascend_npu/test/conform_test.cpp b/impl/ascend_npu/test/conform_test.cpp
@@ -69,6 +69,7 @@ diopiError_t device_memcpy_d2d_async(diopiStreamHandle_t streamHandle, void* dst
 diopiError_t initLibrary() {
     CALL_ACLRT(aclInit(nullptr));
     CALL_ACLRT(aclrtSetDevice(0));
+    CALL_ACLRT(aclrtSetDeviceSatMode(ACL_RT_OVERFLOW_MODE_INFNAN));
     // aclrtContext context;
     // CALL_ACLRT(aclrtCreateContext(&context, 0));
     return diopiSuccess;