Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new GPU tests from an upcoming blog post #24434

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions test/gpu/native/examples/blog-data/NUMLOCALES
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
4
3 changes: 3 additions & 0 deletions test/gpu/native/examples/blog-data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
This directory contains examples from a blog post
https://chapel-lang.org/blog/posts/gpu-data-movement/
https://github.com/chapel-lang/chapel-blog/pull/75
10 changes: 10 additions & 0 deletions test/gpu/native/examples/blog-data/allocation.chpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
var HostArr: [1..5] int; // allocated on the host
HostArr = 1; // executes on [multicore] CPU

on here.gpus[0] {
var DevArr: [1..5] int; // allocated on the device
DevArr += 1; // executes on GPU as a kernel
writeln(DevArr); // prints "1 1 1 1 1"
}

writeln(HostArr); // prints "1 1 1 1 1"
2 changes: 2 additions & 0 deletions test/gpu/native/examples/blog-data/allocation.good
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
1 1 1 1 1
1 1 1 1 1
28 changes: 28 additions & 0 deletions test/gpu/native/examples/blog-data/distributed.chpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import RangeChunk.chunks;

config const n = 32; // now, our application has `--n` to set this!
config const sliceSize = 4; // number of elements per slice

var HostArr: [1..n] int; // allocated on the host
HostArr = 1; // executes on [multicore] CPU

coforall (loc, locChunk) in zip(Locales, chunks(1..n, numLocales)) {
on loc {
const numGpus = here.gpus.size;
coforall (gpu, gpuChunk) in zip(here.gpus, chunks(locChunk, numGpus)) {
on gpu {
const numSlices = gpuChunk.size/sliceSize; // assume divisibility

coforall chunk in chunks(gpuChunk, numSlices) {
var DevArr: [chunk] int; // allocated on the device

DevArr = HostArr[chunk]; // copy a slice from host to device
DevArr += 1; // executes on GPU as a kernel
HostArr[chunk] = DevArr; // copy from device to a slice on host
}
}
}
}
}

writeln(HostArr); // prints "2 2 2 2 2 ..."
1 change: 1 addition & 0 deletions test/gpu/native/examples/blog-data/distributed.good
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
12 changes: 12 additions & 0 deletions test/gpu/native/examples/blog-data/movement.chpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
var HostArr: [1..5] int; // allocated on the host
HostArr = 1; // executes on [multicore] CPU

on here.gpus[0] {
var DevArr: [1..5] int; // allocated on the device

DevArr = HostArr; // copy from host to device
DevArr += 1; // executes on GPU as a kernel
HostArr = DevArr; // copy from device to host
}

writeln(HostArr); // prints "2 2 2 2 2"
1 change: 1 addition & 0 deletions test/gpu/native/examples/blog-data/movement.good
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 2 2 2 2
21 changes: 21 additions & 0 deletions test/gpu/native/examples/blog-data/overlap.chpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import RangeChunk.chunks;

config const n = 32; // now, our application has `--n` to set this!
config const sliceSize = 4; // number of elements per slice

const numSlices = n/sliceSize; // assume divisibility for simplicity

var HostArr: [1..n] int; // allocated on the host
HostArr = 1; // executes on [multicore] CPU

on here.gpus[0] {
coforall chunk in chunks(1..n, numSlices) {
var DevArr: [chunk] int; // allocated on the device *per task*

DevArr = HostArr[chunk]; // copy a slice from host to device
DevArr += 1; // executes on GPU as a kernel
HostArr[chunk] = DevArr; // copy from device to a slice on host
}
}

writeln(HostArr); // prints "2 2 2 2 2 ..."
1 change: 1 addition & 0 deletions test/gpu/native/examples/blog-data/overlap.good
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
24 changes: 24 additions & 0 deletions test/gpu/native/examples/blog-data/parallel.chpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import RangeChunk.chunks;

config const n = 32; // now, our application has `--n` to set this!
config const sliceSize = 4; // number of elements per slice

var HostArr: [1..n] int; // allocated on the host
HostArr = 1; // executes on [multicore] CPU

const numGpus = here.gpus.size; // number of GPUs on the locale
coforall (gpu, gpuChunk) in zip(here.gpus, chunks(1..n, numGpus)) {
on gpu {
const numSlices = gpuChunk.size/sliceSize; // assume divisibility

coforall chunk in chunks(gpuChunk, numSlices) {
var DevArr: [chunk] int; // allocated on the device

DevArr = HostArr[chunk]; // copy a slice from host to device
DevArr += 1; // executes on GPU as a kernel
HostArr[chunk] = DevArr; // copy from device to a slice on host
}
}
}

writeln(HostArr); // prints "2 2 2 2 2 ..."
1 change: 1 addition & 0 deletions test/gpu/native/examples/blog-data/parallel.good
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
21 changes: 21 additions & 0 deletions test/gpu/native/examples/blog-data/slices.chpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import RangeChunk.chunks;

config const n = 32; // now, our application has `--n` to set this!
config const sliceSize = 4; // number of elements per slice

const numSlices = n/sliceSize; // assume divisibility for simplicity

var HostArr: [1..n] int; // allocated on the host
HostArr = 1; // executes on [multicore] CPU

on here.gpus[0] {
var DevArr: [1..n] int; // allocated on the device

for chunk in chunks(1..n, numSlices) {
DevArr = HostArr[chunk]; // copy a slice from host to device
DevArr += 1; // executes on GPU as a kernel
HostArr[chunk] = DevArr; // copy from device to a slice on host
}
}

writeln(HostArr); // prints "2 2 2 2 2 ..."
1 change: 1 addition & 0 deletions test/gpu/native/examples/blog-data/slices.good
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
Loading
Oops, something went wrong.