Skip to content

Commit

Permalink
[SYCLomatic] Enable migration of implicit sync behavior between defau…
Browse files Browse the repository at this point in the history
…lt stream and other stream (#2567)

Add new value for option:  -use-experimental-features=in_order_queue_events to apply SYCL feature: in order queue events during migration (ref to https://github.com/intel/llvm/blob/ca955e538171cb7b7eb07734dd5c2b958c84901c/sycl/doc/extensions/experimental/sycl_ext_oneapi_in_order_queue_events.asciidoc)  

Signed-off-by: intwanghao <hao3.wang@intel.com>
  • Loading branch information
intwanghao authored Dec 19, 2024
1 parent b3b0ed5 commit d236fce
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 2 deletions.
8 changes: 8 additions & 0 deletions clang/include/clang/DPCT/DPCTOptions.inc
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,14 @@ DPCT_ENUM_OPTION(
"Experimental extension that allows map an address range onto "
"multiple allocations of physical memory.",
false),
DPCT_OPTION_ENUM_VALUE(
"in_order_queue_events",
int(ExperimentalFeatures::Exp_InOrderQueueEvents),
"Experimental extension that allows get the event from the "
"last command submission into the queue and set an external "
"event as an implicit dependence on the next command submitted to "
"the queue.",
false),
DPCT_OPTION_ENUM_VALUE(
"non-stdandard-sycl-builtins",
int(ExperimentalFeatures::Exp_NonStandardSYCLBuiltins),
Expand Down
19 changes: 18 additions & 1 deletion clang/lib/DPCT/AnalysisInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5721,6 +5721,13 @@ void KernelCallExpr::printSubmit(KernelPrinter &Printer) {
}

printStreamBase(Printer);
if (isDefaultStream()) {
SubmitStmts.DefaultStreamFlag = true;
}
if (DpctGlobalInfo::useExpInOrderQueueEvents() &&
(DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_Restricted)) {
SubmitStmts.ImplicitSyncFlag = true;
}
if (SubmitStmts.empty()) {
printParallelFor(Printer, false);
} else {
Expand Down Expand Up @@ -6519,10 +6526,20 @@ KernelPrinter &KernelCallExpr::SubmitStmtsList::print(KernelPrinter &Printer) {
printList(Printer, NdRangeList,
"ranges to define ND iteration space for the kernel");
printList(Printer, CommandGroupList, "helper variables defined");
if (ImplicitSyncFlag) {
if (DefaultStreamFlag) {
Printer.line("cgh.depends_on(dpct::get_current_device().get_in_order_"
"queues_last_events());");
} else {
Printer.line("cgh.depends_on(dpct::get_default_queue().ext_oneapi_get_"
"last_event());");
}
Printer.newLine();
}
return Printer;
}
bool KernelCallExpr::SubmitStmtsList::empty() const noexcept {
return CommandGroupList.empty() && NdRangeList.empty() &&
return !ImplicitSyncFlag && CommandGroupList.empty() && NdRangeList.empty() &&
AccessorList.empty() && PtrList.empty() && MemoryList.empty() &&
RangeList.empty() && TextureList.empty() && SamplerList.empty() &&
StreamList.empty() && SyncList.empty();
Expand Down
6 changes: 5 additions & 1 deletion clang/lib/DPCT/AnalysisInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,9 @@ class DpctGlobalInfo {
static bool useExpVirtualMemory() {
return getUsingExperimental<ExperimentalFeatures::Exp_VirtualMemory>();
}
static bool useExpInOrderQueueEvents() {
return getUsingExperimental<ExperimentalFeatures::Exp_InOrderQueueEvents>();
}
static bool useExpNonStandardSYCLBuiltins() {
return getUsingExperimental<
ExperimentalFeatures::Exp_NonStandardSYCLBuiltins>();
Expand Down Expand Up @@ -2933,7 +2936,8 @@ class KernelCallExpr : public CallFunctionExpr {
StmtList SamplerList;
StmtList NdRangeList;
StmtList CommandGroupList;

bool ImplicitSyncFlag = false;
bool DefaultStreamFlag = false;
KernelPrinter &print(KernelPrinter &Printer);
bool empty() const noexcept;

Expand Down
1 change: 1 addition & 0 deletions clang/lib/DPCT/CommandOption/ValidateArguments.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ enum class ExperimentalFeatures : unsigned int {
Exp_NonUniformGroups,
Exp_DeviceGlobal,
Exp_VirtualMemory,
Exp_InOrderQueueEvents,
Exp_ExperimentalFeaturesEnumSize,
Exp_NonStandardSYCLBuiltins,
Exp_All
Expand Down
15 changes: 15 additions & 0 deletions clang/runtime/dpct-rt/include/dpct/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,21 @@ class device_ext : public sycl::device {
lock.lock();
}
std::vector<sycl::event> get_in_order_queues_last_events() {
std::unique_lock<mutex_type> lock(m_mutex);
std::vector<sycl::event> last_events;
std::vector<std::shared_ptr<sycl::queue>> current_queues(_queues);
lock.unlock();
for (const auto &q : current_queues) {
if (q->is_in_order()) {
last_events.push_back(q->ext_oneapi_get_last_event());
}
}
// Guard the destruct of current_queues to make sure the ref count is safe.
lock.lock();
return last_events;
}
sycl::queue *create_queue(bool enable_exception_handler = false) {
#ifdef DPCT_USM_LEVEL_NONE
return create_out_of_order_queue(enable_exception_handler);
Expand Down
40 changes: 40 additions & 0 deletions clang/test/dpct/kernel_implicit_sync.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// RUN: dpct --format-range=none --use-experimental-features=in_order_queue_events -out-root %T/kernel_implicit_sync %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only
// RUN: FileCheck %s --match-full-lines --input-file %T/kernel_implicit_sync/kernel_implicit_sync.dp.cpp
// RUN: %if build_lit %{icpx -c -fsycl %T/kernel_implicit_sync/kernel_implicit_sync.dp.cpp -o %T/kernel_implicit_sync/kernel_implicit_sync.dp.o %}
#include<cuda_runtime.h>

__global__ void kernel(int *a){

}

int main() {
int *a, *b;
cudaStream_t s1;
cudaStreamCreate(&s1);
cudaMallocManaged(&a, 100);
cudaMallocManaged(&b, 100);

// CHECK: q_ct1.submit(
// CHECK: [&](sycl::handler &cgh) {
// CHECK: cgh.depends_on(dpct::get_current_device().get_in_order_queues_last_events());
// CHECK: cgh.parallel_for(
// CHECK: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
// CHECK: [=](sycl::nd_item<3> item_ct1) {
// CHECK: kernel(a);
// CHECK: });
// CHECK: });
kernel<<<1,1>>>(a);

// CHECK: s1->submit(
// CHECK: [&](sycl::handler &cgh) {
// CHECK: cgh.depends_on(dpct::get_default_queue().ext_oneapi_get_last_event());
// CHECK: cgh.parallel_for(
// CHECK: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
// CHECK: [=](sycl::nd_item<3> item_ct1) {
// CHECK: kernel(b);
// CHECK: });
// CHECK: });
kernel<<<1, 1, 0, s1>>>(b);

return 0;
}

0 comments on commit d236fce

Please sign in to comment.