From 63a0117a7205d670e882cb3696e856acee4760ec Mon Sep 17 00:00:00 2001
From: Alexander Kalistratov <alexander.kalistratov@intel.com>
Date: Mon, 19 Aug 2024 18:39:18 +0200
Subject: [PATCH 1/2] Update llvm

---
 .github/workflows/build.yml                       |  3 +++
 llvm-sha.txt                                      |  2 +-
 mlir/CMakeLists.txt                               |  1 +
 mlir/include/numba/Analysis/MemorySsa.hpp         |  8 ++------
 mlir/include/numba/Conversion/GpuAttributes.hpp   | 13 +++++++++++++
 mlir/include/numba/Transforms/LoopUtils.hpp       |  7 +------
 mlir/include/numba/Transforms/MemoryRewrites.hpp  |  2 +-
 mlir/include/numba/Transforms/SCFVectorize.hpp    |  2 +-
 mlir/lib/Conversion/GpuRuntimeToLlvm.cpp          |  3 ++-
 mlir/lib/Conversion/GpuToGpuRuntime.cpp           |  7 ++++++-
 .../mlir_compiler/lib/NumpyResolver.hpp           |  2 +-
 .../numba_mlir/mlir_compiler/lib/PyModule.cpp     |  5 +++--
 .../mlir_compiler/lib/pipelines/LowerToGpu.cpp    |  9 +++++++--
 patches/llvm.diff                                 | 15 +++++++++++++++
 14 files changed, 57 insertions(+), 22 deletions(-)
 create mode 100644 mlir/include/numba/Conversion/GpuAttributes.hpp
 create mode 100644 patches/llvm.diff
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 82c990df1..79c277e7c 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -84,6 +84,7 @@ jobs:
         timeout-minutes: 420
         run: |
           $env:vcvarsPath = (Resolve-Path "$env:GITHUB_WORKSPACE\scripts")
+          $env:patchPath = (Resolve-Path "$env:GITHUB_WORKSPACE\patches\llvm.diff")
           pushd $env:vcvarsPath
           ./vcvars.ps1
           popd
@@ -95,6 +96,7 @@ jobs:
           git clone https://github.com/llvm/llvm-project
           cd llvm-project
           git checkout $env:LLVM_SHA
+          git apply $env:patchPath
           mkdir _build
           cd _build
           $env:CXX="cl.exe"
@@ -436,6 +438,7 @@ jobs:
           git clone https://github.com/llvm/llvm-project || exit 1
           cd llvm-project || exit 1
           git checkout $LLVM_SHA || exit 1
+          git apply $GITHUB_WORKSPACE/patches/llvm.diff
           mkdir _build || exit 1
           cd _build || exit 1
           export CC=$CONDA_PREFIX/bin/x86_64-conda-linux-gnu-cc
diff --git a/llvm-sha.txt b/llvm-sha.txt
index ed6390e43..3cd0efef3 100644
--- a/llvm-sha.txt
+++ b/llvm-sha.txt
@@ -1 +1 @@
-d58637219463924185614f18911c5f01a1c20aa9
+b1edac0496f47374c9780f3f83c6773eed73a66e
diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
index 8e0b071d4..197e5bc04 100644
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -171,6 +171,7 @@ target_link_libraries(${NUMBA_MLIR_LIB} PRIVATE
     MLIRFuncTransforms
     MLIRIR
     MLIRLLVMDialect
+    MLIRBuiltinToLLVMIRTranslation
     MLIRLinalgTransforms
     MLIRMathToSPIRV
     MLIRTensorTransforms
diff --git a/mlir/include/numba/Analysis/MemorySsa.hpp b/mlir/include/numba/Analysis/MemorySsa.hpp
index 6725e7154..f0e730ddf 100644
--- a/mlir/include/numba/Analysis/MemorySsa.hpp
+++ b/mlir/include/numba/Analysis/MemorySsa.hpp
@@ -12,16 +12,12 @@
 #include <llvm/ADT/simple_ilist.h>
 #include <llvm/Support/Allocator.h>
 
+#include <mlir/IR/BuiltinTypes.h>
+
 namespace llvm {
 class raw_ostream;
 }
 
-namespace mlir {
-struct LogicalResult;
-class Operation;
-class Region;
-} // namespace mlir
-
 namespace numba {
 
 class MemorySSA {
diff --git a/mlir/include/numba/Conversion/GpuAttributes.hpp b/mlir/include/numba/Conversion/GpuAttributes.hpp
new file mode 100644
index 000000000..c991ed71b
--- /dev/null
+++ b/mlir/include/numba/Conversion/GpuAttributes.hpp
@@ -0,0 +1,13 @@
+// SPDX-FileCopyrightText: 2024 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#pragma once
+
+#include <string>
+
+namespace gpu_runtime {
+
+std::string getGpuBinaryAttrName();
+
+} // namespace gpu_runtime
diff --git a/mlir/include/numba/Transforms/LoopUtils.hpp b/mlir/include/numba/Transforms/LoopUtils.hpp
index 2f1a75767..b9eb01f2f 100644
--- a/mlir/include/numba/Transforms/LoopUtils.hpp
+++ b/mlir/include/numba/Transforms/LoopUtils.hpp
@@ -5,12 +5,7 @@
 #pragma once
 
 #include <llvm/ADT/STLExtras.h>
-
-namespace mlir {
-class Operation;
-class Region;
-struct LogicalResult;
-} // namespace mlir
+#include <mlir/IR/BuiltinTypes.h>
 
 namespace numba {
 mlir::LogicalResult naivelyFuseParallelOps(mlir::Region &region);
diff --git a/mlir/include/numba/Transforms/MemoryRewrites.hpp b/mlir/include/numba/Transforms/MemoryRewrites.hpp
index 912182a4b..1072d8b5f 100644
--- a/mlir/include/numba/Transforms/MemoryRewrites.hpp
+++ b/mlir/include/numba/Transforms/MemoryRewrites.hpp
@@ -5,12 +5,12 @@
 #pragma once
 
 #include <memory>
+#include <mlir/IR/BuiltinTypes.h>
 #include <optional>
 
 namespace mlir {
 class AnalysisManager;
 class Pass;
-struct LogicalResult;
 } // namespace mlir
 
 namespace numba {
diff --git a/mlir/include/numba/Transforms/SCFVectorize.hpp b/mlir/include/numba/Transforms/SCFVectorize.hpp
index 235a927e6..634bc08fc 100644
--- a/mlir/include/numba/Transforms/SCFVectorize.hpp
+++ b/mlir/include/numba/Transforms/SCFVectorize.hpp
@@ -5,12 +5,12 @@
 #pragma once
 
 #include <memory>
+#include <mlir/IR/BuiltinTypes.h>
 #include <optional>
 
 namespace mlir {
 class OpBuilder;
 class Pass;
-struct LogicalResult;
 namespace scf {
 class ParallelOp;
 }
diff --git a/mlir/lib/Conversion/GpuRuntimeToLlvm.cpp b/mlir/lib/Conversion/GpuRuntimeToLlvm.cpp
index 19ca1860b..e95197bf8 100644
--- a/mlir/lib/Conversion/GpuRuntimeToLlvm.cpp
+++ b/mlir/lib/Conversion/GpuRuntimeToLlvm.cpp
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 #include "numba/Conversion/GpuRuntimeToLlvm.hpp"
+#include "numba/Conversion/GpuAttributes.hpp"
 
 #include "numba/Dialect/gpu_runtime/IR/GpuRuntimeOps.hpp"
 #include "numba/Dialect/numba_util/Dialect.hpp"
@@ -303,7 +304,7 @@ class ConvertGpuModuleLoadPattern
       return mlir::failure();
 
     auto blobAttr = gpuMod->getAttrOfType<mlir::StringAttr>(
-        mlir::gpu::getDefaultGpuBinaryAnnotation());
+        gpu_runtime::getGpuBinaryAttrName());
     if (!blobAttr)
       return mlir::failure();
 
diff --git a/mlir/lib/Conversion/GpuToGpuRuntime.cpp b/mlir/lib/Conversion/GpuToGpuRuntime.cpp
index baca8dcee..e6537fb2b 100644
--- a/mlir/lib/Conversion/GpuToGpuRuntime.cpp
+++ b/mlir/lib/Conversion/GpuToGpuRuntime.cpp
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 #include "numba/Conversion/GpuToGpuRuntime.hpp"
+#include "numba/Conversion/GpuAttributes.hpp"
 
 #include "GpuCommon.hpp"
 
@@ -1267,7 +1268,7 @@ struct SerializeSPIRVPass
           llvm::StringRef(reinterpret_cast<const char *>(spvBinary.data()),
                           spvBinary.size() * sizeof(uint32_t));
       auto spvAttr = mlir::StringAttr::get(&getContext(), spvData);
-      gpuMod->setAttr(gpu::getDefaultGpuBinaryAnnotation(), spvAttr);
+      gpuMod->setAttr(gpu_runtime::getGpuBinaryAttrName(), spvAttr);
       spvMod->erase();
     }
   }
@@ -2630,6 +2631,10 @@ struct ApplySPIRVFastmathFlags
 };
 } // namespace
 
+namespace gpu_runtime {
+std::string getGpuBinaryAttrName() { return "gpu.binary"; }
+} // namespace gpu_runtime
+
 // Expose the passes to the outside world
 std::unique_ptr<mlir::Pass> gpu_runtime::createAbiAttrsPass() {
   return std::make_unique<AbiAttrsPass>();
diff --git a/numba_mlir/numba_mlir/mlir_compiler/lib/NumpyResolver.hpp b/numba_mlir/numba_mlir/mlir_compiler/lib/NumpyResolver.hpp
index 005a96874..4359d05da 100644
--- a/numba_mlir/numba_mlir/mlir_compiler/lib/NumpyResolver.hpp
+++ b/numba_mlir/numba_mlir/mlir_compiler/lib/NumpyResolver.hpp
@@ -9,6 +9,7 @@
 #include <string>
 
 #include <llvm/ADT/SmallVector.h>
+#include <mlir/IR/BuiltinTypes.h>
 
 namespace llvm {
 class StringRef;
@@ -20,7 +21,6 @@ class Location;
 class OpBuilder;
 class Value;
 class ValueRange;
-struct LogicalResult;
 } // namespace mlir
 
 enum class PrimitiveType { Default = 0, View = 1, SideEffect = 2 };
diff --git a/numba_mlir/numba_mlir/mlir_compiler/lib/PyModule.cpp b/numba_mlir/numba_mlir/mlir_compiler/lib/PyModule.cpp
index f91c8d35e..c90dfd18c 100644
--- a/numba_mlir/numba_mlir/mlir_compiler/lib/PyModule.cpp
+++ b/numba_mlir/numba_mlir/mlir_compiler/lib/PyModule.cpp
@@ -28,8 +28,9 @@ static bool isSyclMKLSupported() {
 }
 
 static unsigned getVectorLength() {
-  llvm::StringMap<bool, llvm::MallocAllocator> features;
-  if (!llvm::sys::getHostCPUFeatures(features))
+  llvm::StringMap<bool, llvm::MallocAllocator> features =
+      llvm::sys::getHostCPUFeatures();
+  if (features.size() == 0)
     return 128;
 
   auto checkFlag = [&](llvm::StringRef name) -> bool {
diff --git a/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp b/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp
index d73ca1334..5fcad545b 100644
--- a/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp
+++ b/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp
@@ -1635,6 +1635,8 @@ static ReduceFuncType getReduceFunc(mlir::gpu::AllReduceOperation op,
   return nullptr;
 }
 
+#include <limits>
+
 class ConvertGroupOpsToSubgroup
     : public mlir::OpRewritePattern<mlir::gpu::AllReduceOp> {
 public:
@@ -1707,7 +1709,9 @@ class ConvertGroupOpsToSubgroup
     mlir::Value subgroupId = [&]() {
       mlir::OpBuilder::InsertionGuard g(rewriter);
       rewriter.setInsertionPointToStart(&launchOp.getBody().front());
-      return rewriter.create<mlir::gpu::SubgroupIdOp>(rewriter.getUnknownLoc());
+      return rewriter.create<mlir::gpu::SubgroupIdOp>(
+          rewriter.getUnknownLoc(),
+          rewriter.getIndexAttr(std::numeric_limits<int64_t>::max()));
     }();
 
     auto loc = op->getLoc();
@@ -1726,7 +1730,8 @@ class ConvertGroupOpsToSubgroup
       mlir::OpBuilder::InsertionGuard g(rewriter);
       rewriter.setInsertionPointToStart(&launchOp.getBody().front());
       return rewriter.create<mlir::gpu::NumSubgroupsOp>(
-          rewriter.getUnknownLoc());
+          rewriter.getUnknownLoc(),
+          rewriter.getIndexAttr(std::numeric_limits<int64_t>::max()));
     }();
 
     mlir::Value zero = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 0);
diff --git a/patches/llvm.diff b/patches/llvm.diff
new file mode 100644
index 000000000..76c1085e1
--- /dev/null
+++ b/patches/llvm.diff
@@ -0,0 +1,15 @@
+diff --git a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp
+index 5313a64ed47e..e16a4154c9bf 100644
+--- a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp
++++ b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp
+@@ -188,6 +188,10 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx,
+       if (!block->isEntryBlock() ||
+           !isa<FunctionOpInterface>(block->getParentOp()))
+         return std::nullopt;
++      if (!mlir::isa<mlir::LLVM::LLVMPointerType>(barePtr.getType()))
++        return std::nullopt;
++      if (!resultType.hasStaticShape())
++        return std::nullopt;
+       desc = MemRefDescriptor::fromStaticShape(builder, loc, *this, resultType,
+                                                inputs[0]);
+     } else {

From 8b86774d224fd424d01e43fdda0509d05a9c1ca7 Mon Sep 17 00:00:00 2001
From: Alexander Kalistratov <alexander.kalistratov@intel.com>
Date: Wed, 21 Aug 2024 15:59:56 +0200
Subject: [PATCH 2/2] Review fixes

---
 mlir/include/numba/Conversion/GpuAttributes.hpp               | 4 ++--
 mlir/lib/Conversion/GpuToGpuRuntime.cpp                       | 2 +-
 .../numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp     | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/mlir/include/numba/Conversion/GpuAttributes.hpp b/mlir/include/numba/Conversion/GpuAttributes.hpp
index c991ed71b..9ae1582a3 100644
--- a/mlir/include/numba/Conversion/GpuAttributes.hpp
+++ b/mlir/include/numba/Conversion/GpuAttributes.hpp
@@ -4,10 +4,10 @@
 
 #pragma once
 
-#include <string>
+#include <mlir/Support/LLVM.h>
 
 namespace gpu_runtime {
 
-std::string getGpuBinaryAttrName();
+mlir::StringRef getGpuBinaryAttrName();
 
 } // namespace gpu_runtime
diff --git a/mlir/lib/Conversion/GpuToGpuRuntime.cpp b/mlir/lib/Conversion/GpuToGpuRuntime.cpp
index e6537fb2b..9b2c333f4 100644
--- a/mlir/lib/Conversion/GpuToGpuRuntime.cpp
+++ b/mlir/lib/Conversion/GpuToGpuRuntime.cpp
@@ -2632,7 +2632,7 @@ struct ApplySPIRVFastmathFlags
 } // namespace
 
 namespace gpu_runtime {
-std::string getGpuBinaryAttrName() { return "gpu.binary"; }
+mlir::StringRef getGpuBinaryAttrName() { return "gpu.binary"; }
 } // namespace gpu_runtime
 
 // Expose the passes to the outside world
diff --git a/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp b/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp
index 5fcad545b..74b850955 100644
--- a/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp
+++ b/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp
@@ -4,6 +4,8 @@
 
 #include "pipelines/LowerToGpu.hpp"
 
+#include <limits>
+
 #include <mlir/Conversion/AffineToStandard/AffineToStandard.h>
 #include <mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h>
 #include <mlir/Conversion/ComplexToStandard/ComplexToStandard.h>
@@ -1635,8 +1637,6 @@ static ReduceFuncType getReduceFunc(mlir::gpu::AllReduceOperation op,
   return nullptr;
 }
 
-#include <limits>
-
 class ConvertGroupOpsToSubgroup
     : public mlir::OpRewritePattern<mlir::gpu::AllReduceOp> {
 public: