diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index d410a7e8ba57..458724d38b1a 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -246,7 +246,8 @@ std::shared_ptr makeTextureObjectInfo(const ValueDecl *D, if (auto VD = dyn_cast(D)) { return std::make_shared(VD); } - } else if (auto PVD = dyn_cast(D)) { + } else if (const auto *PVD = dyn_cast(D); + PVD && PVD->getTypeSourceInfo()) { return std::make_shared(PVD); } return std::shared_ptr(); diff --git a/clang/lib/DPCT/RulesInclude/InclusionHeaders.cpp b/clang/lib/DPCT/RulesInclude/InclusionHeaders.cpp index 79673ca76c2e..672a76e3f5b2 100644 --- a/clang/lib/DPCT/RulesInclude/InclusionHeaders.cpp +++ b/clang/lib/DPCT/RulesInclude/InclusionHeaders.cpp @@ -200,7 +200,8 @@ void IncludesCallbacks::InclusionDirective( DpctGlobalInfo::getIncludeMapSet().push_back({IncludedFile, Repl}); } } - return; + if (Global.isInRoot(IncludedFile)) + return; } if (!Global.isInAnalysisScope(LocInfo.first) && diff --git a/clang/lib/DPCT/RulesInclude/InclusionHeaders.inc b/clang/lib/DPCT/RulesInclude/InclusionHeaders.inc index 87991dc3ae14..dad32862ed64 100644 --- a/clang/lib/DPCT/RulesInclude/InclusionHeaders.inc +++ b/clang/lib/DPCT/RulesInclude/InclusionHeaders.inc @@ -50,8 +50,7 @@ REGIST_INCLUSION("curand_kernel.h", FullMatch, Rng, Replace, false, REGIST_INCLUSION("cusparse.h", FullMatch, Sparse, Replace, false, HeaderType::HT_DPCT_SPBLAS_Utils) REGIST_INCLUSION("cusparse_v2.h", FullMatch, Sparse, Replace, false, - HeaderType::HT_DPCT_SPBLAS_Utils, - HeaderType::HT_DPCT_BLAS_Utils) + HeaderType::HT_DPCT_SPBLAS_Utils) REGIST_INCLUSION("cufft.h", FullMatch, FFT, Replace, false, HeaderType::HT_DPCT_FFT_Utils) diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index af65bd17b184..68ffdaf523b7 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -1082,6 +1082,13 @@ void TypeInDeclRule::runRule(const MatchFinder::MatchResult &Result) { } Str = Itr->second; } + } else if (llvm::StringRef(TypeStr).starts_with("cublas")) { + // In most cases, we do not need to insert blas_utils.hpp manually since + // the cublas_v2.h will be migrated. However, when the include directive + // of cublas_v2.h is not in the in-root, the migrated code cannot be + // built successfully. + DpctGlobalInfo::getInstance().insertHeader( + TL->getBeginLoc(), HeaderType::HT_DPCT_BLAS_Utils); } // Add '#include ' directive to the file only once diff --git a/clang/test/dpct/pytorch/ATen.cu b/clang/test/dpct/pytorch/ATen.cu index d613ff66d474..8df6fd9be66b 100644 --- a/clang/test/dpct/pytorch/ATen.cu +++ b/clang/test/dpct/pytorch/ATen.cu @@ -5,7 +5,7 @@ // RUN: cp -r %S/pytorch_cuda_inc %T/pytorch/ATen/ // RUN: cd %T/pytorch/ATen // RUN: mkdir dpct_out -// RUN: dpct -out-root dpct_out %T/pytorch/ATen/src/ATen.cu --extra-arg="-I%T/pytorch/ATen/pytorch_cuda_inc" --cuda-include-path="%cuda-path/include" --rule-file=%T/pytorch/ATen/user_defined_rule_pytorch.yaml -- -x cuda --cuda-host-only +// RUN: dpct --out-root dpct_out %T/pytorch/ATen/src/ATen.cu --extra-arg="-I%T/pytorch/ATen/pytorch_cuda_inc" --cuda-include-path="%cuda-path/include" --rule-file=%T/pytorch/ATen/user_defined_rule_pytorch.yaml --analysis-scope-path %T/pytorch/ATen/pytorch_cuda_inc --analysis-scope-path %T/pytorch/ATen/src --in-root %T/pytorch/ATen/src // RUN: FileCheck --input-file %T/pytorch/ATen/dpct_out/ATen.dp.cpp --match-full-lines %T/pytorch/ATen/src/ATen.cu // RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/pytorch/ATen/dpct_out/ATen.dp.cpp -o %T/pytorch/ATen/dpct_out/ATen.dp.o %} diff --git a/clang/test/dpct/pytorch_api_type/pytoch_api_test1.cpp b/clang/test/dpct/pytorch_api_type/pytoch_api_test1.cpp index 68bf1013d60f..fb0828bfee5c 100644 --- a/clang/test/dpct/pytorch_api_type/pytoch_api_test1.cpp +++ b/clang/test/dpct/pytorch_api_type/pytoch_api_test1.cpp @@ -8,15 +8,17 @@ #include #include -// CHECK: #include "c10/xpu/XPUStream.h" +// CHECK: #include "ATen/xpu/XPUContext.h" #include "ATen/cuda/CUDAContext.h" -class TensorStub { +namespace torch { +class Tensor { public: bool is_cuda() const { return true; } }; +} // namespace torch #define MY_CHECK(condition, message) \ do { \ @@ -25,9 +27,8 @@ class TensorStub { } \ } while (0) -int main() { - TensorStub x; - // CHECK: MY_CHECK(x.is_xpu(), "x must reside on device"); +void foo(torch::Tensor x) { + // CHECK: MY_CHECK(x. is_xpu(), "x must reside on device"); MY_CHECK(x.is_cuda(), "x must reside on device"); return 0; diff --git a/clang/test/dpct/pytorch_api_type/pytoch_api_test2.cu b/clang/test/dpct/pytorch_api_type/pytoch_api_test2.cu index 284f67902330..771dc3c2833f 100644 --- a/clang/test/dpct/pytorch_api_type/pytoch_api_test2.cu +++ b/clang/test/dpct/pytorch_api_type/pytoch_api_test2.cu @@ -6,12 +6,26 @@ #define AT_CUDA_CHECK(stmt) (stmt) -namespace at { +namespace c10 { +using DeviceIndex = int8_t; namespace cuda { -cudaStream_t getCurrentCUDAStream() { - return nullptr; // Return a dummy stream +class CUDAStream { +public: + CUDAStream() {} + cudaStream_t stream() { return 0; } + operator cudaStream_t() const { + return stream(); + } + cudaStream_t stream() const; +}; +CUDAStream getCurrentCUDAStream(DeviceIndex device_index = -1) { + return CUDAStream(); } } // namespace cuda +} // namespace c10 + +namespace at { +using namespace c10; } // namespace at __global__ void kernel() {} @@ -21,15 +35,19 @@ int main() { dim3 blockSize(8, 8, 1); void *args[] = {nullptr}; - // CHECK: [&](){ - // CHECK-NEXT: &static_cast(c10::xpu::getCurrentXPUStream())->parallel_for( + // CHECK:([&](){ + // CHECK-NEXT: ((sycl::queue*)(c10::xpu::getCurrentXPUStream()))->parallel_for( // CHECK-NEXT: sycl::nd_range<3>(gridSize * blockSize, blockSize), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { // CHECK-NEXT: kernel(); // CHECK-NEXT: }); // CHECK-NEXT: return 0; - // CHECK-NEXT:}(); + // CHECK-NEXT:}()); AT_CUDA_CHECK(cudaLaunchKernel((const void *)kernel, gridSize, blockSize, args, 0, at::cuda::getCurrentCUDAStream())); - + at::DeviceIndex d = 1; + // CHECK: c10::xpu::getCurrentXPUStream(d); + at::cuda::getCurrentCUDAStream(d); + // CHECK: dpct::queue_ptr s = &static_cast(c10::xpu::getCurrentXPUStream(). queue()); + cudaStream_t s = at::cuda::getCurrentCUDAStream().stream(); return 0; } diff --git a/clang/test/dpct/two_analysis_scopes/app/test.cu b/clang/test/dpct/two_analysis_scopes/app/test.cu index 247013fddf31..0819f27962e5 100644 --- a/clang/test/dpct/two_analysis_scopes/app/test.cu +++ b/clang/test/dpct/two_analysis_scopes/app/test.cu @@ -1,15 +1,16 @@ // RUN: dpct --format-range=none --out-root %T/out %s --analysis-scope-path %S --analysis-scope-path %S/../deps --cuda-include-path="%cuda-path/include" --extra-arg="-I%S/../deps" // RUN: FileCheck --match-full-lines --input-file %T/out/test.dp.cpp %s +// RUN: FileCheck --match-full-lines --input-file %T/out/test.dp.hpp %S/test.cuh // RUN: echo "// empty" > %T/out/dep.h // RUN: %if build_lit %{icpx -c -fsycl %T/out/test.dp.cpp -o %T/out/test.dp.o -I%T/out %} // CHECK: #include // CHECK-NEXT: #include -// CHECK-NEXT: #include +// CHECK-NEXT: #include "test.dp.hpp" // CHECK-NEXT: #include +#include "test.cuh" #include #include -#include void foo(cublasHandle_t handle, const half *a, const half *b, half *c, int n, half *alpha, half *beta) { diff --git a/clang/test/dpct/two_analysis_scopes/app/test.cuh b/clang/test/dpct/two_analysis_scopes/app/test.cuh new file mode 100644 index 000000000000..f90a1831f970 --- /dev/null +++ b/clang/test/dpct/two_analysis_scopes/app/test.cuh @@ -0,0 +1,5 @@ +// CHECK: #include +// CHECK-NEXT: #include +#include + +void foo_bar(cublasHandle_t h); diff --git a/clang/tools/dpct/DpctOptRules/pytorch_api.yaml b/clang/tools/dpct/DpctOptRules/pytorch_api.yaml index 169e96229d8b..a363f955fa5e 100644 --- a/clang/tools/dpct/DpctOptRules/pytorch_api.yaml +++ b/clang/tools/dpct/DpctOptRules/pytorch_api.yaml @@ -40,29 +40,42 @@ Priority: Takeover In: get_in_order_queue Out: static_cast(c10::xpu::getCurrentXPUStream()) + Includes: ["c10/xpu/XPUStream.h"] -- Rule: rule_process_is_cuda - Kind: PatternRewriter +- Rule: rule_torch_Tensor + Kind: Class Priority: Takeover - In: is_cuda - Out: is_xpu + In: torch::Tensor + Out: torch::Tensor + Methods: + - In: is_cuda + Out: $method_base is_xpu() - Rule: rule_getCurrentCUDAStream - Kind: PatternRewriter + Kind: API Priority: Takeover - In: at::cuda::getCurrentCUDAStream() - Out: | - &static_cast(c10::xpu::getCurrentXPUStream()) + In: at::cuda::getCurrentCUDAStream + Out: c10::xpu::getCurrentXPUStream($1) + Includes: ["c10/xpu/XPUStream.h"] + +- Rule: rule_CUDAStream + Kind: Class + Priority: Takeover + In: c10::cuda::CUDAStream + Out: c10::xpu::XPUStream + Methods: + - In: stream + Out: \&static_cast($method_base queue()) - Rule: rule_remove_AT_CUDA_CHECK - Kind: PatternRewriter + Kind: Macro Priority: Takeover - In: AT_CUDA_CHECK(${args}); - Out: ${args}; + In: AT_CUDA_CHECK + Out: | - Rule: rule_CUDAContext_h Kind: Header Priority: Takeover In: ATen/cuda/CUDAContext.h - Out: c10/xpu/XPUStream.h + Out: ATen/xpu/XPUContext.h Includes: []