tenstorrent · jdh8 · Sep 25, 2024 · Sep 25, 2024 · Nov 9, 2024 · Nov 9, 2024
diff --git a/tests/lowering/reduction/test_sum_dim.py b/tests/lowering/reduction/test_sum_dim.py
@@ -0,0 +1,62 @@
+import torch
+import torch_ttnn
+import pytest
+import ttnn
+
+from tests.utils import assert_with_pcc
+
+
+class SumDimModule(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input, dim, keepdim=False):
+        return torch.sum(input, dim, keepdim=keepdim)
+
+
+@pytest.mark.parametrize(
+    "input_shape, dim",
+    (
+        ((1, 32, 32), (-1, -2)),
+        ((1, 1, 768), (0, 1)),
+        ((1, 1000), (0,)),
+        ((1, 1024, 256), (0, 1)),
+        ((1, 1024, 7, 7), (2, 3)),
+        ((1, 12, 16), (1,)),
+        ((1, 12, 16), (2,)),
+        ((1, 512), (1,)),
+        ((1, 64), (0,)),
+        ((1024, 160), (0,)),
+        ((1024, 640), (0,)),
+        ((14, 2048), (0,)),
+        ((14, 512), (0,)),
+        ((16384, 128), (0,)),
+        ((16384, 32), (0,)),
+        ((197, 1024), (0,)),
+        ((197, 3072), (0,)),
+        ((197, 4096), (0,)),
+        ((197, 768), (0,)),
+        ((2, 512), (1,)),
+        ((2, 7, 512), (0,)),
+        ((50, 768), (0,)),
+        ((768, 196), (0,)),
+    ),
+)
+def test_sum_dim(device, input_shape, dim):
+    m = SumDimModule()
+    input = torch.empty(input_shape, dtype=torch.bfloat16).uniform_(-1, 1)
+    keepdim = True
+    result_before = m.forward(input, dim, keepdim)
+    option = torch_ttnn.TorchTtnnOption(device=device)
+    option.gen_graphviz = True
+    # The compilation is lazy, so we need to run forward once to trigger the compilation
+    m = torch.compile(m, backend=torch_ttnn.backend, options=option)
+    result_after = m.forward(input, dim, keepdim)
+    option._out_fx_graphs[0].print_tabular()
+
+    # Check the graph has been rewritten and contains ttnn ops
+    nodes = list(option._out_fx_graphs[0].nodes)
+    trivial = all(input_shape[n] == 1 for n in dim)
+    assert [node.target for node in nodes].count(ttnn.sum) >= (not trivial)
+    # Check inference result
+    assert_with_pcc(result_before, result_after)
diff --git a/torch_ttnn/passes/lowering/add_data_move_pass.py b/torch_ttnn/passes/lowering/add_data_move_pass.py
@@ -192,13 +192,14 @@ def is_tt_compute(node) -> bool:
             ttnn.zeros_like,
             ttnn.mean,
             ttnn.moreh_cumsum,
+            ttnn.sum,
+            ttnn.global_avg_pool2d,
             ttnn.clip,
             ttnn.squeeze,
+            ttnn.unsqueeze,
             ttnn.full,
             ttnn.as_tensor,
             ttnn.expand,
-            ttnn.moreh_cumsum,
-            ttnn.sum,
             ttnn.typecast,
             ttnn.argmax,
         ]

diff --git a/torch_ttnn/passes/lowering/to_tt_pass.py b/torch_ttnn/passes/lowering/to_tt_pass.py
@@ -933,6 +933,23 @@ def reshape_1d(code, args=args, kwargs=kwargs):
                 else:
                     return None
 
+            if node.target == torch.ops.aten.sum.dim_IntList:
+                tensor, dims, keepdim = args
+
+                if (shape := get_shape(gm, tensor)) is not None:
+                    dims = (n if n >= 0 else len(shape) + n for n in dims)
+                    dims = [n for n in dims if shape[n] > 1]
+
+                if len(dims) == 0:
+                    return tensor
+
+                tensor = g.call_function(ttnn.sum, (tensor, dims))
+
+                if not keepdim:
+                    tensor = g.call_function(ttnn.squeeze, (tensor, dims))
+
+                return tensor
+
             if node.target == torch.ops.aten.select.int:
                 tensor, dim, start = args