Skip to content

Commit

Permalink
[SYCLomatic] Fix some bugs for migrating APPs using Pytorch as a libr…
Browse files Browse the repository at this point in the history
…ary (#2568)

Signed-off-by: Jiang, Zhiwei <zhiwei.jiang@intel.com>
  • Loading branch information
zhiweij1 authored Dec 17, 2024
1 parent e4dbf00 commit a5b0ce3
Show file tree
Hide file tree
Showing 10 changed files with 77 additions and 31 deletions.
3 changes: 2 additions & 1 deletion clang/lib/DPCT/AnalysisInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,8 @@ std::shared_ptr<TargetType> makeTextureObjectInfo(const ValueDecl *D,
if (auto VD = dyn_cast<VarDecl>(D)) {
return std::make_shared<TargetType>(VD);
}
} else if (auto PVD = dyn_cast<ParmVarDecl>(D)) {
} else if (const auto *PVD = dyn_cast<ParmVarDecl>(D);
PVD && PVD->getTypeSourceInfo()) {
return std::make_shared<TargetType>(PVD);
}
return std::shared_ptr<TargetType>();
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/DPCT/RulesInclude/InclusionHeaders.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ void IncludesCallbacks::InclusionDirective(
DpctGlobalInfo::getIncludeMapSet().push_back({IncludedFile, Repl});
}
}
return;
if (Global.isInRoot(IncludedFile))
return;
}

if (!Global.isInAnalysisScope(LocInfo.first) &&
Expand Down
3 changes: 1 addition & 2 deletions clang/lib/DPCT/RulesInclude/InclusionHeaders.inc
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ REGIST_INCLUSION("curand_kernel.h", FullMatch, Rng, Replace, false,
REGIST_INCLUSION("cusparse.h", FullMatch, Sparse, Replace, false,
HeaderType::HT_DPCT_SPBLAS_Utils)
REGIST_INCLUSION("cusparse_v2.h", FullMatch, Sparse, Replace, false,
HeaderType::HT_DPCT_SPBLAS_Utils,
HeaderType::HT_DPCT_BLAS_Utils)
HeaderType::HT_DPCT_SPBLAS_Utils)

REGIST_INCLUSION("cufft.h", FullMatch, FFT, Replace, false,
HeaderType::HT_DPCT_FFT_Utils)
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/DPCT/RulesLang/RulesLang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1082,6 +1082,13 @@ void TypeInDeclRule::runRule(const MatchFinder::MatchResult &Result) {
}
Str = Itr->second;
}
} else if (llvm::StringRef(TypeStr).starts_with("cublas")) {
// In most cases, we do not need to insert blas_utils.hpp manually since
// the cublas_v2.h will be migrated. However, when the include directive
// of cublas_v2.h is not in the in-root, the migrated code cannot be
// built successfully.
DpctGlobalInfo::getInstance().insertHeader(
TL->getBeginLoc(), HeaderType::HT_DPCT_BLAS_Utils);
}

// Add '#include <complex>' directive to the file only once
Expand Down
2 changes: 1 addition & 1 deletion clang/test/dpct/pytorch/ATen.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// RUN: cp -r %S/pytorch_cuda_inc %T/pytorch/ATen/
// RUN: cd %T/pytorch/ATen
// RUN: mkdir dpct_out
// RUN: dpct -out-root dpct_out %T/pytorch/ATen/src/ATen.cu --extra-arg="-I%T/pytorch/ATen/pytorch_cuda_inc" --cuda-include-path="%cuda-path/include" --rule-file=%T/pytorch/ATen/user_defined_rule_pytorch.yaml -- -x cuda --cuda-host-only
// RUN: dpct --out-root dpct_out %T/pytorch/ATen/src/ATen.cu --extra-arg="-I%T/pytorch/ATen/pytorch_cuda_inc" --cuda-include-path="%cuda-path/include" --rule-file=%T/pytorch/ATen/user_defined_rule_pytorch.yaml --analysis-scope-path %T/pytorch/ATen/pytorch_cuda_inc --analysis-scope-path %T/pytorch/ATen/src --in-root %T/pytorch/ATen/src
// RUN: FileCheck --input-file %T/pytorch/ATen/dpct_out/ATen.dp.cpp --match-full-lines %T/pytorch/ATen/src/ATen.cu
// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/pytorch/ATen/dpct_out/ATen.dp.cpp -o %T/pytorch/ATen/dpct_out/ATen.dp.o %}

Expand Down
11 changes: 6 additions & 5 deletions clang/test/dpct/pytorch_api_type/pytoch_api_test1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,17 @@
#include <iostream>
#include <stdexcept>

// CHECK: #include "c10/xpu/XPUStream.h"
// CHECK: #include "ATen/xpu/XPUContext.h"
#include "ATen/cuda/CUDAContext.h"

class TensorStub {
namespace torch {
class Tensor {
public:
bool is_cuda() const {
return true;
}
};
} // namespace torch

#define MY_CHECK(condition, message) \
do { \
Expand All @@ -25,9 +27,8 @@ class TensorStub {
} \
} while (0)

int main() {
TensorStub x;
// CHECK: MY_CHECK(x.is_xpu(), "x must reside on device");
void foo(torch::Tensor x) {
// CHECK: MY_CHECK(x. is_xpu(), "x must reside on device");
MY_CHECK(x.is_cuda(), "x must reside on device");

return 0;
Expand Down
32 changes: 25 additions & 7 deletions clang/test/dpct/pytorch_api_type/pytoch_api_test2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,26 @@

#define AT_CUDA_CHECK(stmt) (stmt)

namespace at {
namespace c10 {
using DeviceIndex = int8_t;
namespace cuda {
cudaStream_t getCurrentCUDAStream() {
return nullptr; // Return a dummy stream
class CUDAStream {
public:
CUDAStream() {}
cudaStream_t stream() { return 0; }
operator cudaStream_t() const {
return stream();
}
cudaStream_t stream() const;
};
CUDAStream getCurrentCUDAStream(DeviceIndex device_index = -1) {
return CUDAStream();
}
} // namespace cuda
} // namespace c10

namespace at {
using namespace c10;
} // namespace at

__global__ void kernel() {}
Expand All @@ -21,15 +35,19 @@ int main() {
dim3 blockSize(8, 8, 1);
void *args[] = {nullptr};

// CHECK: [&](){
// CHECK-NEXT: &static_cast<sycl::queue &>(c10::xpu::getCurrentXPUStream())->parallel_for(
// CHECK:([&](){
// CHECK-NEXT: ((sycl::queue*)(c10::xpu::getCurrentXPUStream()))->parallel_for(
// CHECK-NEXT: sycl::nd_range<3>(gridSize * blockSize, blockSize),
// CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) {
// CHECK-NEXT: kernel();
// CHECK-NEXT: });
// CHECK-NEXT: return 0;
// CHECK-NEXT:}();
// CHECK-NEXT:}());
AT_CUDA_CHECK(cudaLaunchKernel((const void *)kernel, gridSize, blockSize, args, 0, at::cuda::getCurrentCUDAStream()));

at::DeviceIndex d = 1;
// CHECK: c10::xpu::getCurrentXPUStream(d);
at::cuda::getCurrentCUDAStream(d);
// CHECK: dpct::queue_ptr s = &static_cast<sycl::queue &>(c10::xpu::getCurrentXPUStream(). queue());
cudaStream_t s = at::cuda::getCurrentCUDAStream().stream();
return 0;
}
5 changes: 3 additions & 2 deletions clang/test/dpct/two_analysis_scopes/app/test.cu
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
// RUN: dpct --format-range=none --out-root %T/out %s --analysis-scope-path %S --analysis-scope-path %S/../deps --cuda-include-path="%cuda-path/include" --extra-arg="-I%S/../deps"
// RUN: FileCheck --match-full-lines --input-file %T/out/test.dp.cpp %s
// RUN: FileCheck --match-full-lines --input-file %T/out/test.dp.hpp %S/test.cuh
// RUN: echo "// empty" > %T/out/dep.h
// RUN: %if build_lit %{icpx -c -fsycl %T/out/test.dp.cpp -o %T/out/test.dp.o -I%T/out %}

// CHECK: #include <sycl/sycl.hpp>
// CHECK-NEXT: #include <dpct/dpct.hpp>
// CHECK-NEXT: #include <dep.h>
// CHECK-NEXT: #include "test.dp.hpp"
// CHECK-NEXT: #include <dpct/blas_utils.hpp>
#include "test.cuh"
#include <cuda_fp16.h>
#include <cuda_runtime.h>
#include <dep.h>

void foo(cublasHandle_t handle, const half *a, const half *b, half *c,
int n, half *alpha, half *beta) {
Expand Down
5 changes: 5 additions & 0 deletions clang/test/dpct/two_analysis_scopes/app/test.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// CHECK: #include <dep.h>
// CHECK-NEXT: #include <dpct/blas_utils.hpp>
#include <dep.h>

void foo_bar(cublasHandle_t h);
37 changes: 25 additions & 12 deletions clang/tools/dpct/DpctOptRules/pytorch_api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,29 +40,42 @@
Priority: Takeover
In: get_in_order_queue
Out: static_cast<sycl::queue&>(c10::xpu::getCurrentXPUStream())
Includes: ["c10/xpu/XPUStream.h"]

- Rule: rule_process_is_cuda
Kind: PatternRewriter
- Rule: rule_torch_Tensor
Kind: Class
Priority: Takeover
In: is_cuda
Out: is_xpu
In: torch::Tensor
Out: torch::Tensor
Methods:
- In: is_cuda
Out: $method_base is_xpu()

- Rule: rule_getCurrentCUDAStream
Kind: PatternRewriter
Kind: API
Priority: Takeover
In: at::cuda::getCurrentCUDAStream()
Out: |
&static_cast<sycl::queue &>(c10::xpu::getCurrentXPUStream())
In: at::cuda::getCurrentCUDAStream
Out: c10::xpu::getCurrentXPUStream($1)
Includes: ["c10/xpu/XPUStream.h"]

- Rule: rule_CUDAStream
Kind: Class
Priority: Takeover
In: c10::cuda::CUDAStream
Out: c10::xpu::XPUStream
Methods:
- In: stream
Out: \&static_cast<sycl::queue &>($method_base queue())

- Rule: rule_remove_AT_CUDA_CHECK
Kind: PatternRewriter
Kind: Macro
Priority: Takeover
In: AT_CUDA_CHECK(${args});
Out: ${args};
In: AT_CUDA_CHECK
Out: |

- Rule: rule_CUDAContext_h
Kind: Header
Priority: Takeover
In: ATen/cuda/CUDAContext.h
Out: c10/xpu/XPUStream.h
Out: ATen/xpu/XPUContext.h
Includes: []

0 comments on commit a5b0ce3

Please sign in to comment.