Some fixes to the CPU reference

Signed-off-by: Przemek Tredak <ptredak@nvidia.com>
NVIDIA · Feb 4, 2025 · 8339f0b · 8339f0b
1 parent 6e1a2be
commit 8339f0b
Showing 1 changed file with 4 additions and 2 deletions.
diff --git a/tests/cpp/operator/test_cast_mxfp8_gated_swiglu.cu b/tests/cpp/operator/test_cast_mxfp8_gated_swiglu.cu
@@ -130,12 +130,14 @@ void compute_ref_x1(const IType* grad,
                 const size_t block_idx_Y = tile_Y * blocks_per_tile_Y + ii;
                 const size_t block_offset_Y = ii * block_size_Y;
                 const size_t i_min = tile_offset_Y + block_offset_Y;
+                if (i_min >= rows) continue;
                 const size_t i_max = std::min(i_min + block_size_Y, rows);
 
                 for (size_t jj = 0; jj < blocks_per_tile_X; ++jj) {
                     const size_t block_idx_X = tile_X * blocks_per_tile_X + jj;
                     const size_t block_offset_X = jj * block_size_X;
                     const size_t j_min = tile_offset_X + block_offset_X;
+                    if (j_min >= cols) continue;
                     const size_t j_max = std::min(j_min + block_size_X, cols);
 
                     const size_t mx_scale_idx = block_idx_Y * scales_stride + block_idx_X;
@@ -364,8 +366,8 @@ void performTest_x2(const size_t rows,
 }
 
 std::vector<std::pair<size_t, size_t>> matrix_sizes = {
-    {1, 16},
-    {16, 48},
+    {1, 32},
+    {16, 64},
     {65, 96},
     {128, 128},
     {256, 256},