openxla · copybara-service · Feb 13, 2025 · Feb 13, 2025
diff --git a/third_party/triton/temporary/series.bzl b/third_party/triton/temporary/series.bzl
@@ -16,5 +16,6 @@ those to this list.
 temporary_patch_list = [
     "//third_party/triton:temporary/fix_fence_insertion_race.patch",
     "//third_party/triton:temporary/enable_peer_access.patch",
+    "//third_party/triton:temporary/sm120.patch",
     # Add new patches just above this line
 ]
diff --git a/third_party/triton/temporary/sm120.patch b/third_party/triton/temporary/sm120.patch
@@ -0,0 +1,13 @@
+diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp
+index c66c9f4ae..3415d6a91 100644
+--- a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp
++++ b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp
+@@ -33,6 +33,8 @@ static int getMMAVersionSafe(int computeCapability, DotOp op) {
+     versionsSupported = {3, 2};
+   } else if (computeCapability < 110) {
+     versionsSupported = {5, 2};
++  } else if (computeCapability == 120) {
++    versionsSupported = {2};
+   } else {
+     assert(false && "computeCapability not supported");
+   }
diff --git a/xla/backends/gpu/codegen/triton/fusion_emitter.cc b/xla/backends/gpu/codegen/triton/fusion_emitter.cc
@@ -1244,11 +1244,6 @@ absl::StatusOr<TritonWrapperResult> CompileTritonToLLVM(
   const auto& cc = device_info.gpu_compute_capability();
   std::string arch_name =
       std::visit([](auto& cc) { return cc.ToString(); }, cc);
-  if (arch_name == "12.0") {
-    LOG(WARNING) << "Triton does not support sm_120 yet. Passing CC 10.0 to "
-                    "avoid spurious \"unsupported conversion\" errors";
-    arch_name = "10.0";
-  }
   if (std::holds_alternative<se::CudaComputeCapability>(cc)) {
     auto ccCuda = std::get<se::CudaComputeCapability>(cc);
     if (!ccCuda.IsAtLeastAmpere()) {