From 63a0117a7205d670e882cb3696e856acee4760ec Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Mon, 19 Aug 2024 18:39:18 +0200 Subject: [PATCH 1/2] Update llvm --- .github/workflows/build.yml | 3 +++ llvm-sha.txt | 2 +- mlir/CMakeLists.txt | 1 + mlir/include/numba/Analysis/MemorySsa.hpp | 8 ++------ mlir/include/numba/Conversion/GpuAttributes.hpp | 13 +++++++++++++ mlir/include/numba/Transforms/LoopUtils.hpp | 7 +------ mlir/include/numba/Transforms/MemoryRewrites.hpp | 2 +- mlir/include/numba/Transforms/SCFVectorize.hpp | 2 +- mlir/lib/Conversion/GpuRuntimeToLlvm.cpp | 3 ++- mlir/lib/Conversion/GpuToGpuRuntime.cpp | 7 ++++++- .../mlir_compiler/lib/NumpyResolver.hpp | 2 +- .../numba_mlir/mlir_compiler/lib/PyModule.cpp | 5 +++-- .../mlir_compiler/lib/pipelines/LowerToGpu.cpp | 9 +++++++-- patches/llvm.diff | 15 +++++++++++++++ 14 files changed, 57 insertions(+), 22 deletions(-) create mode 100644 mlir/include/numba/Conversion/GpuAttributes.hpp create mode 100644 patches/llvm.diff diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 82c990df1..79c277e7c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -84,6 +84,7 @@ jobs: timeout-minutes: 420 run: | $env:vcvarsPath = (Resolve-Path "$env:GITHUB_WORKSPACE\scripts") + $env:patchPath = (Resolve-Path "$env:GITHUB_WORKSPACE\patches\llvm.diff") pushd $env:vcvarsPath ./vcvars.ps1 popd @@ -95,6 +96,7 @@ jobs: git clone https://github.com/llvm/llvm-project cd llvm-project git checkout $env:LLVM_SHA + git apply $env:patchPath mkdir _build cd _build $env:CXX="cl.exe" @@ -436,6 +438,7 @@ jobs: git clone https://github.com/llvm/llvm-project || exit 1 cd llvm-project || exit 1 git checkout $LLVM_SHA || exit 1 + git apply $GITHUB_WORKSPACE/patches/llvm.diff mkdir _build || exit 1 cd _build || exit 1 export CC=$CONDA_PREFIX/bin/x86_64-conda-linux-gnu-cc diff --git a/llvm-sha.txt b/llvm-sha.txt index ed6390e43..3cd0efef3 100644 --- a/llvm-sha.txt +++ b/llvm-sha.txt @@ -1 +1 @@ -d58637219463924185614f18911c5f01a1c20aa9 +b1edac0496f47374c9780f3f83c6773eed73a66e diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt index 8e0b071d4..197e5bc04 100644 --- a/mlir/CMakeLists.txt +++ b/mlir/CMakeLists.txt @@ -171,6 +171,7 @@ target_link_libraries(${NUMBA_MLIR_LIB} PRIVATE MLIRFuncTransforms MLIRIR MLIRLLVMDialect + MLIRBuiltinToLLVMIRTranslation MLIRLinalgTransforms MLIRMathToSPIRV MLIRTensorTransforms diff --git a/mlir/include/numba/Analysis/MemorySsa.hpp b/mlir/include/numba/Analysis/MemorySsa.hpp index 6725e7154..f0e730ddf 100644 --- a/mlir/include/numba/Analysis/MemorySsa.hpp +++ b/mlir/include/numba/Analysis/MemorySsa.hpp @@ -12,16 +12,12 @@ #include #include +#include + namespace llvm { class raw_ostream; } -namespace mlir { -struct LogicalResult; -class Operation; -class Region; -} // namespace mlir - namespace numba { class MemorySSA { diff --git a/mlir/include/numba/Conversion/GpuAttributes.hpp b/mlir/include/numba/Conversion/GpuAttributes.hpp new file mode 100644 index 000000000..c991ed71b --- /dev/null +++ b/mlir/include/numba/Conversion/GpuAttributes.hpp @@ -0,0 +1,13 @@ +// SPDX-FileCopyrightText: 2024 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once + +#include + +namespace gpu_runtime { + +std::string getGpuBinaryAttrName(); + +} // namespace gpu_runtime diff --git a/mlir/include/numba/Transforms/LoopUtils.hpp b/mlir/include/numba/Transforms/LoopUtils.hpp index 2f1a75767..b9eb01f2f 100644 --- a/mlir/include/numba/Transforms/LoopUtils.hpp +++ b/mlir/include/numba/Transforms/LoopUtils.hpp @@ -5,12 +5,7 @@ #pragma once #include - -namespace mlir { -class Operation; -class Region; -struct LogicalResult; -} // namespace mlir +#include namespace numba { mlir::LogicalResult naivelyFuseParallelOps(mlir::Region ®ion); diff --git a/mlir/include/numba/Transforms/MemoryRewrites.hpp b/mlir/include/numba/Transforms/MemoryRewrites.hpp index 912182a4b..1072d8b5f 100644 --- a/mlir/include/numba/Transforms/MemoryRewrites.hpp +++ b/mlir/include/numba/Transforms/MemoryRewrites.hpp @@ -5,12 +5,12 @@ #pragma once #include +#include #include namespace mlir { class AnalysisManager; class Pass; -struct LogicalResult; } // namespace mlir namespace numba { diff --git a/mlir/include/numba/Transforms/SCFVectorize.hpp b/mlir/include/numba/Transforms/SCFVectorize.hpp index 235a927e6..634bc08fc 100644 --- a/mlir/include/numba/Transforms/SCFVectorize.hpp +++ b/mlir/include/numba/Transforms/SCFVectorize.hpp @@ -5,12 +5,12 @@ #pragma once #include +#include #include namespace mlir { class OpBuilder; class Pass; -struct LogicalResult; namespace scf { class ParallelOp; } diff --git a/mlir/lib/Conversion/GpuRuntimeToLlvm.cpp b/mlir/lib/Conversion/GpuRuntimeToLlvm.cpp index 19ca1860b..e95197bf8 100644 --- a/mlir/lib/Conversion/GpuRuntimeToLlvm.cpp +++ b/mlir/lib/Conversion/GpuRuntimeToLlvm.cpp @@ -3,6 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "numba/Conversion/GpuRuntimeToLlvm.hpp" +#include "numba/Conversion/GpuAttributes.hpp" #include "numba/Dialect/gpu_runtime/IR/GpuRuntimeOps.hpp" #include "numba/Dialect/numba_util/Dialect.hpp" @@ -303,7 +304,7 @@ class ConvertGpuModuleLoadPattern return mlir::failure(); auto blobAttr = gpuMod->getAttrOfType( - mlir::gpu::getDefaultGpuBinaryAnnotation()); + gpu_runtime::getGpuBinaryAttrName()); if (!blobAttr) return mlir::failure(); diff --git a/mlir/lib/Conversion/GpuToGpuRuntime.cpp b/mlir/lib/Conversion/GpuToGpuRuntime.cpp index baca8dcee..e6537fb2b 100644 --- a/mlir/lib/Conversion/GpuToGpuRuntime.cpp +++ b/mlir/lib/Conversion/GpuToGpuRuntime.cpp @@ -3,6 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "numba/Conversion/GpuToGpuRuntime.hpp" +#include "numba/Conversion/GpuAttributes.hpp" #include "GpuCommon.hpp" @@ -1267,7 +1268,7 @@ struct SerializeSPIRVPass llvm::StringRef(reinterpret_cast(spvBinary.data()), spvBinary.size() * sizeof(uint32_t)); auto spvAttr = mlir::StringAttr::get(&getContext(), spvData); - gpuMod->setAttr(gpu::getDefaultGpuBinaryAnnotation(), spvAttr); + gpuMod->setAttr(gpu_runtime::getGpuBinaryAttrName(), spvAttr); spvMod->erase(); } } @@ -2630,6 +2631,10 @@ struct ApplySPIRVFastmathFlags }; } // namespace +namespace gpu_runtime { +std::string getGpuBinaryAttrName() { return "gpu.binary"; } +} // namespace gpu_runtime + // Expose the passes to the outside world std::unique_ptr gpu_runtime::createAbiAttrsPass() { return std::make_unique(); diff --git a/numba_mlir/numba_mlir/mlir_compiler/lib/NumpyResolver.hpp b/numba_mlir/numba_mlir/mlir_compiler/lib/NumpyResolver.hpp index 005a96874..4359d05da 100644 --- a/numba_mlir/numba_mlir/mlir_compiler/lib/NumpyResolver.hpp +++ b/numba_mlir/numba_mlir/mlir_compiler/lib/NumpyResolver.hpp @@ -9,6 +9,7 @@ #include #include +#include namespace llvm { class StringRef; @@ -20,7 +21,6 @@ class Location; class OpBuilder; class Value; class ValueRange; -struct LogicalResult; } // namespace mlir enum class PrimitiveType { Default = 0, View = 1, SideEffect = 2 }; diff --git a/numba_mlir/numba_mlir/mlir_compiler/lib/PyModule.cpp b/numba_mlir/numba_mlir/mlir_compiler/lib/PyModule.cpp index f91c8d35e..c90dfd18c 100644 --- a/numba_mlir/numba_mlir/mlir_compiler/lib/PyModule.cpp +++ b/numba_mlir/numba_mlir/mlir_compiler/lib/PyModule.cpp @@ -28,8 +28,9 @@ static bool isSyclMKLSupported() { } static unsigned getVectorLength() { - llvm::StringMap features; - if (!llvm::sys::getHostCPUFeatures(features)) + llvm::StringMap features = + llvm::sys::getHostCPUFeatures(); + if (features.size() == 0) return 128; auto checkFlag = [&](llvm::StringRef name) -> bool { diff --git a/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp b/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp index d73ca1334..5fcad545b 100644 --- a/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp +++ b/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp @@ -1635,6 +1635,8 @@ static ReduceFuncType getReduceFunc(mlir::gpu::AllReduceOperation op, return nullptr; } +#include + class ConvertGroupOpsToSubgroup : public mlir::OpRewritePattern { public: @@ -1707,7 +1709,9 @@ class ConvertGroupOpsToSubgroup mlir::Value subgroupId = [&]() { mlir::OpBuilder::InsertionGuard g(rewriter); rewriter.setInsertionPointToStart(&launchOp.getBody().front()); - return rewriter.create(rewriter.getUnknownLoc()); + return rewriter.create( + rewriter.getUnknownLoc(), + rewriter.getIndexAttr(std::numeric_limits::max())); }(); auto loc = op->getLoc(); @@ -1726,7 +1730,8 @@ class ConvertGroupOpsToSubgroup mlir::OpBuilder::InsertionGuard g(rewriter); rewriter.setInsertionPointToStart(&launchOp.getBody().front()); return rewriter.create( - rewriter.getUnknownLoc()); + rewriter.getUnknownLoc(), + rewriter.getIndexAttr(std::numeric_limits::max())); }(); mlir::Value zero = rewriter.create(loc, 0); diff --git a/patches/llvm.diff b/patches/llvm.diff new file mode 100644 index 000000000..76c1085e1 --- /dev/null +++ b/patches/llvm.diff @@ -0,0 +1,15 @@ +diff --git a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp +index 5313a64ed47e..e16a4154c9bf 100644 +--- a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp ++++ b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp +@@ -188,6 +188,10 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, + if (!block->isEntryBlock() || + !isa(block->getParentOp())) + return std::nullopt; ++ if (!mlir::isa(barePtr.getType())) ++ return std::nullopt; ++ if (!resultType.hasStaticShape()) ++ return std::nullopt; + desc = MemRefDescriptor::fromStaticShape(builder, loc, *this, resultType, + inputs[0]); + } else { From 8b86774d224fd424d01e43fdda0509d05a9c1ca7 Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Wed, 21 Aug 2024 15:59:56 +0200 Subject: [PATCH 2/2] Review fixes --- mlir/include/numba/Conversion/GpuAttributes.hpp | 4 ++-- mlir/lib/Conversion/GpuToGpuRuntime.cpp | 2 +- .../numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mlir/include/numba/Conversion/GpuAttributes.hpp b/mlir/include/numba/Conversion/GpuAttributes.hpp index c991ed71b..9ae1582a3 100644 --- a/mlir/include/numba/Conversion/GpuAttributes.hpp +++ b/mlir/include/numba/Conversion/GpuAttributes.hpp @@ -4,10 +4,10 @@ #pragma once -#include +#include namespace gpu_runtime { -std::string getGpuBinaryAttrName(); +mlir::StringRef getGpuBinaryAttrName(); } // namespace gpu_runtime diff --git a/mlir/lib/Conversion/GpuToGpuRuntime.cpp b/mlir/lib/Conversion/GpuToGpuRuntime.cpp index e6537fb2b..9b2c333f4 100644 --- a/mlir/lib/Conversion/GpuToGpuRuntime.cpp +++ b/mlir/lib/Conversion/GpuToGpuRuntime.cpp @@ -2632,7 +2632,7 @@ struct ApplySPIRVFastmathFlags } // namespace namespace gpu_runtime { -std::string getGpuBinaryAttrName() { return "gpu.binary"; } +mlir::StringRef getGpuBinaryAttrName() { return "gpu.binary"; } } // namespace gpu_runtime // Expose the passes to the outside world diff --git a/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp b/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp index 5fcad545b..74b850955 100644 --- a/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp +++ b/numba_mlir/numba_mlir/mlir_compiler/lib/pipelines/LowerToGpu.cpp @@ -4,6 +4,8 @@ #include "pipelines/LowerToGpu.hpp" +#include + #include #include #include @@ -1635,8 +1637,6 @@ static ReduceFuncType getReduceFunc(mlir::gpu::AllReduceOperation op, return nullptr; } -#include - class ConvertGroupOpsToSubgroup : public mlir::OpRewritePattern { public: