From 82a9389d62d82f60f5c85599712f5439f5591426 Mon Sep 17 00:00:00 2001
From: Engin Kayraklioglu <e-kayrakli@users.noreply.github.com>
Date: Fri, 16 Feb 2024 10:44:20 -0800
Subject: [PATCH 1/4] Add a directory of tests for an upcoming blog post

Signed-off-by: Engin Kayraklioglu <e-kayrakli@users.noreply.github.com>
---
 test/gpu/native/examples/blog-data/README.md  |  2 ++
 .../native/examples/blog-data/allocation.chpl | 10 +++++++
 .../native/examples/blog-data/allocation.good |  2 ++
 .../examples/blog-data/distributed.chpl       | 28 +++++++++++++++++++
 .../examples/blog-data/distributed.good       |  1 +
 .../native/examples/blog-data/movement.chpl   | 12 ++++++++
 .../native/examples/blog-data/movement.good   |  1 +
 .../native/examples/blog-data/overlap.chpl    | 21 ++++++++++++++
 .../native/examples/blog-data/overlap.good    |  1 +
 .../native/examples/blog-data/parallel.chpl   | 24 ++++++++++++++++
 .../native/examples/blog-data/parallel.good   |  1 +
 .../gpu/native/examples/blog-data/slices.chpl | 21 ++++++++++++++
 .../gpu/native/examples/blog-data/slices.good |  1 +
 13 files changed, 125 insertions(+)
 create mode 100644 test/gpu/native/examples/blog-data/README.md
 create mode 100644 test/gpu/native/examples/blog-data/allocation.chpl
 create mode 100644 test/gpu/native/examples/blog-data/allocation.good
 create mode 100644 test/gpu/native/examples/blog-data/distributed.chpl
 create mode 100644 test/gpu/native/examples/blog-data/distributed.good
 create mode 100644 test/gpu/native/examples/blog-data/movement.chpl
 create mode 100644 test/gpu/native/examples/blog-data/movement.good
 create mode 100644 test/gpu/native/examples/blog-data/overlap.chpl
 create mode 100644 test/gpu/native/examples/blog-data/overlap.good
 create mode 100644 test/gpu/native/examples/blog-data/parallel.chpl
 create mode 100644 test/gpu/native/examples/blog-data/parallel.good
 create mode 100644 test/gpu/native/examples/blog-data/slices.chpl
 create mode 100644 test/gpu/native/examples/blog-data/slices.good

diff --git a/test/gpu/native/examples/blog-data/README.md b/test/gpu/native/examples/blog-data/README.md
new file mode 100644
index 000000000000..b169c3e45dd7
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/README.md
@@ -0,0 +1,2 @@
+This directory contains examples from the blog post whose draft is in
+  https://github.com/chapel-lang/chapel-blog/pull/75
diff --git a/test/gpu/native/examples/blog-data/allocation.chpl b/test/gpu/native/examples/blog-data/allocation.chpl
new file mode 100644
index 000000000000..af5e7274e272
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/allocation.chpl
@@ -0,0 +1,10 @@
+var HostArr: [1..5] int;  // allocated on the host
+HostArr = 1;              // executes on [multicore] CPU
+
+on here.gpus[0] {
+  var DevArr: [1..5] int;  // allocated on the device
+  DevArr += 1;             // executes on GPU as a kernel
+  writeln(DevArr);         // prints "1 1 1 1 1"
+}
+
+writeln(HostArr);  // prints "1 1 1 1 1"
diff --git a/test/gpu/native/examples/blog-data/allocation.good b/test/gpu/native/examples/blog-data/allocation.good
new file mode 100644
index 000000000000..ac144a1d28ee
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/allocation.good
@@ -0,0 +1,2 @@
+1 1 1 1 1
+1 1 1 1 1
diff --git a/test/gpu/native/examples/blog-data/distributed.chpl b/test/gpu/native/examples/blog-data/distributed.chpl
new file mode 100644
index 000000000000..af2ff901a180
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/distributed.chpl
@@ -0,0 +1,28 @@
+import RangeChunk.chunks;
+
+config const n = 10;         // now, our application has `--n` to set this!
+config const sliceSize = 5;  // number of elements per slice
+
+var HostArr: [1..n] int;  // allocated on the host
+HostArr = 1;              // executes on [multicore] CPU
+
+coforall (loc, locChunk) in zip(Locales, chunks(1..n, numLocales)) {
+  on loc {
+    const numGpus = here.gpus.size;
+    coforall (gpu, gpuChunk) in zip(here.gpus, chunks(locChunk, numGpus)) {
+      on gpu {
+        const numSlices = gpuChunk.size/sliceSize;  // assume divisibility
+
+        coforall chunk in chunks(gpuChunk, numSlices) {
+          var DevArr: [chunk] int;  // allocated per device
+
+          DevArr = HostArr[chunk];  // copy a slice from host to device
+          DevArr += 1;              // executes on GPU as a kernel
+          HostArr[chunk] = DevArr;  // copy from device to a slice on host
+        }
+      }
+    }
+  }
+}
+
+writeln(HostArr);  // prints "2 2 2 2 2 ..."
diff --git a/test/gpu/native/examples/blog-data/distributed.good b/test/gpu/native/examples/blog-data/distributed.good
new file mode 100644
index 000000000000..35ba99cb515e
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/distributed.good
@@ -0,0 +1 @@
+2 2 2 2 2 2 2 2 2 2
diff --git a/test/gpu/native/examples/blog-data/movement.chpl b/test/gpu/native/examples/blog-data/movement.chpl
new file mode 100644
index 000000000000..2bf1671f6a03
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/movement.chpl
@@ -0,0 +1,12 @@
+var HostArr: [1..5] int;  // allocated on the host
+HostArr += 1;             // executes on [multicore] CPU
+
+on here.gpus[0] {
+  var DevArr: [1..5] int;  // allocated on the device
+
+  DevArr = HostArr;  // copy from host to device
+  DevArr += 1;       // executes on GPU as a kernel
+  HostArr = DevArr;  // copy from device to host
+}
+
+writeln(HostArr);  // prints "2 2 2 2 2"
diff --git a/test/gpu/native/examples/blog-data/movement.good b/test/gpu/native/examples/blog-data/movement.good
new file mode 100644
index 000000000000..fd4deaa8d583
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/movement.good
@@ -0,0 +1 @@
+2 2 2 2 2
diff --git a/test/gpu/native/examples/blog-data/overlap.chpl b/test/gpu/native/examples/blog-data/overlap.chpl
new file mode 100644
index 000000000000..7671d3bd2ad8
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/overlap.chpl
@@ -0,0 +1,21 @@
+import RangeChunk.chunks;
+
+config const n = 10;         // now, our application has `--n` to set this!
+config const sliceSize = 5;  // number of elements per slice
+
+const numSlices = n/sliceSize; // assume divisibility for simplicity
+
+var HostArr: [1..n] int;  // allocated on the host
+HostArr = 1;              // executes on [multicore] CPU
+
+on here.gpus[0] {
+  coforall chunk in chunks(1..n, numSlices) {
+    var DevArr: [chunk] int;  // allocated on the device *per task*
+
+    DevArr = HostArr[chunk];  // copy a slice from host to device
+    DevArr += 1;              // executes on GPU as a kernel
+    HostArr[chunk] = DevArr;  // copy from device to a slice on host
+  }
+}
+
+writeln(HostArr);  // prints "2 2 2 2 2 ..."
diff --git a/test/gpu/native/examples/blog-data/overlap.good b/test/gpu/native/examples/blog-data/overlap.good
new file mode 100644
index 000000000000..35ba99cb515e
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/overlap.good
@@ -0,0 +1 @@
+2 2 2 2 2 2 2 2 2 2
diff --git a/test/gpu/native/examples/blog-data/parallel.chpl b/test/gpu/native/examples/blog-data/parallel.chpl
new file mode 100644
index 000000000000..031dfb49d9a1
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/parallel.chpl
@@ -0,0 +1,24 @@
+import RangeChunk.chunks;
+
+config const n = 10;         // now, our application has `--n` to set this!
+config const sliceSize = 5;  // number of elements per slice
+
+var HostArr: [1..n] int;  // allocated on the host
+HostArr = 1;              // executes on [multicore] CPU
+
+const numGpus = here.gpus.size;   // number of GPUs on the locale
+coforall (gpu, gpuChunk) in zip(here.gpus, chunks(1..n, numGpus)) {
+  on gpu {
+    const numSlices = gpuChunk.size/sliceSize;  // assume divisibility
+
+    coforall chunk in chunks(gpuChunk, numSlices) {
+      var DevArr: [chunk] int;  // allocated on the device
+
+      DevArr = HostArr[chunk];  // copy a slice from host to device
+      DevArr += 1;              // executes on GPU as a kernel
+      HostArr[chunk] = DevArr;  // copy from device to a slice on host
+    }
+  }
+}
+
+writeln(HostArr);  // prints "2 2 2 2 2 ..."
diff --git a/test/gpu/native/examples/blog-data/parallel.good b/test/gpu/native/examples/blog-data/parallel.good
new file mode 100644
index 000000000000..35ba99cb515e
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/parallel.good
@@ -0,0 +1 @@
+2 2 2 2 2 2 2 2 2 2
diff --git a/test/gpu/native/examples/blog-data/slices.chpl b/test/gpu/native/examples/blog-data/slices.chpl
new file mode 100644
index 000000000000..ddd971d44e0e
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/slices.chpl
@@ -0,0 +1,21 @@
+import RangeChunk;
+
+config const n = 10;           // now, our application has `--n` to set this!
+config const sliceSize = 5;    // number of elements per slice
+
+const numSlices = n/sliceSize; // assume divisibility for simplicity
+
+var HostArr: [1..n] int;  // allocated on the host
+HostArr = 1;              // executes on [multicore] CPU
+
+on here.gpus[0] {
+  var DevArr: [1..n] int;  // allocated on the device
+
+  for chunk in RangeChunk.chunks(1..n, numSlices) {
+    DevArr = HostArr[chunk];        // copy a slice from host to device
+    DevArr += 1;                    // executes on GPU as a kernel
+    HostArr[chunk] = DevArr;        // copy from device to a slice on host
+  }
+}
+
+writeln(HostArr);          // prints "2 2 2 2 2 ..."
diff --git a/test/gpu/native/examples/blog-data/slices.good b/test/gpu/native/examples/blog-data/slices.good
new file mode 100644
index 000000000000..35ba99cb515e
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/slices.good
@@ -0,0 +1 @@
+2 2 2 2 2 2 2 2 2 2

From 94a9ea0c27b3e8d433d7189d467cd68cd490df6e Mon Sep 17 00:00:00 2001
From: Engin Kayraklioglu <e-kayrakli@users.noreply.github.com>
Date: Fri, 16 Feb 2024 11:02:22 -0800
Subject: [PATCH 2/4] Small adjustments for multilocale, multigpu

Signed-off-by: Engin Kayraklioglu <e-kayrakli@users.noreply.github.com>
---
 test/gpu/native/examples/blog-data/NUMLOCALES       | 1 +
 test/gpu/native/examples/blog-data/distributed.chpl | 4 ++--
 test/gpu/native/examples/blog-data/distributed.good | 2 +-
 test/gpu/native/examples/blog-data/overlap.chpl     | 4 ++--
 test/gpu/native/examples/blog-data/overlap.good     | 2 +-
 test/gpu/native/examples/blog-data/parallel.chpl    | 4 ++--
 test/gpu/native/examples/blog-data/parallel.good    | 2 +-
 test/gpu/native/examples/blog-data/slices.chpl      | 8 ++++----
 test/gpu/native/examples/blog-data/slices.good      | 2 +-
 9 files changed, 15 insertions(+), 14 deletions(-)
 create mode 100644 test/gpu/native/examples/blog-data/NUMLOCALES

diff --git a/test/gpu/native/examples/blog-data/NUMLOCALES b/test/gpu/native/examples/blog-data/NUMLOCALES
new file mode 100644
index 000000000000..b8626c4cff28
--- /dev/null
+++ b/test/gpu/native/examples/blog-data/NUMLOCALES
@@ -0,0 +1 @@
+4
diff --git a/test/gpu/native/examples/blog-data/distributed.chpl b/test/gpu/native/examples/blog-data/distributed.chpl
index af2ff901a180..d9f7330c95b9 100644
--- a/test/gpu/native/examples/blog-data/distributed.chpl
+++ b/test/gpu/native/examples/blog-data/distributed.chpl
@@ -1,7 +1,7 @@
 import RangeChunk.chunks;
 
-config const n = 10;         // now, our application has `--n` to set this!
-config const sliceSize = 5;  // number of elements per slice
+config const n = 32;         // now, our application has `--n` to set this!
+config const sliceSize = 4;  // number of elements per slice
 
 var HostArr: [1..n] int;  // allocated on the host
 HostArr = 1;              // executes on [multicore] CPU
diff --git a/test/gpu/native/examples/blog-data/distributed.good b/test/gpu/native/examples/blog-data/distributed.good
index 35ba99cb515e..3a8b696ad930 100644
--- a/test/gpu/native/examples/blog-data/distributed.good
+++ b/test/gpu/native/examples/blog-data/distributed.good
@@ -1 +1 @@
-2 2 2 2 2 2 2 2 2 2
+2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
diff --git a/test/gpu/native/examples/blog-data/overlap.chpl b/test/gpu/native/examples/blog-data/overlap.chpl
index 7671d3bd2ad8..13b4301e6d18 100644
--- a/test/gpu/native/examples/blog-data/overlap.chpl
+++ b/test/gpu/native/examples/blog-data/overlap.chpl
@@ -1,7 +1,7 @@
 import RangeChunk.chunks;
 
-config const n = 10;         // now, our application has `--n` to set this!
-config const sliceSize = 5;  // number of elements per slice
+config const n = 32;         // now, our application has `--n` to set this!
+config const sliceSize = 4;  // number of elements per slice
 
 const numSlices = n/sliceSize; // assume divisibility for simplicity
 
diff --git a/test/gpu/native/examples/blog-data/overlap.good b/test/gpu/native/examples/blog-data/overlap.good
index 35ba99cb515e..3a8b696ad930 100644
--- a/test/gpu/native/examples/blog-data/overlap.good
+++ b/test/gpu/native/examples/blog-data/overlap.good
@@ -1 +1 @@
-2 2 2 2 2 2 2 2 2 2
+2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
diff --git a/test/gpu/native/examples/blog-data/parallel.chpl b/test/gpu/native/examples/blog-data/parallel.chpl
index 031dfb49d9a1..25ebff8ad7f1 100644
--- a/test/gpu/native/examples/blog-data/parallel.chpl
+++ b/test/gpu/native/examples/blog-data/parallel.chpl
@@ -1,7 +1,7 @@
 import RangeChunk.chunks;
 
-config const n = 10;         // now, our application has `--n` to set this!
-config const sliceSize = 5;  // number of elements per slice
+config const n = 32;         // now, our application has `--n` to set this!
+config const sliceSize = 4;  // number of elements per slice
 
 var HostArr: [1..n] int;  // allocated on the host
 HostArr = 1;              // executes on [multicore] CPU
diff --git a/test/gpu/native/examples/blog-data/parallel.good b/test/gpu/native/examples/blog-data/parallel.good
index 35ba99cb515e..3a8b696ad930 100644
--- a/test/gpu/native/examples/blog-data/parallel.good
+++ b/test/gpu/native/examples/blog-data/parallel.good
@@ -1 +1 @@
-2 2 2 2 2 2 2 2 2 2
+2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
diff --git a/test/gpu/native/examples/blog-data/slices.chpl b/test/gpu/native/examples/blog-data/slices.chpl
index ddd971d44e0e..07faaf8d2d1f 100644
--- a/test/gpu/native/examples/blog-data/slices.chpl
+++ b/test/gpu/native/examples/blog-data/slices.chpl
@@ -1,7 +1,7 @@
-import RangeChunk;
+import RangeChunk.chunks;
 
-config const n = 10;           // now, our application has `--n` to set this!
-config const sliceSize = 5;    // number of elements per slice
+config const n = 32;           // now, our application has `--n` to set this!
+config const sliceSize = 4;    // number of elements per slice
 
 const numSlices = n/sliceSize; // assume divisibility for simplicity
 
@@ -11,7 +11,7 @@ HostArr = 1;              // executes on [multicore] CPU
 on here.gpus[0] {
   var DevArr: [1..n] int;  // allocated on the device
 
-  for chunk in RangeChunk.chunks(1..n, numSlices) {
+  for chunk in chunks(1..n, numSlices) {
     DevArr = HostArr[chunk];        // copy a slice from host to device
     DevArr += 1;                    // executes on GPU as a kernel
     HostArr[chunk] = DevArr;        // copy from device to a slice on host
diff --git a/test/gpu/native/examples/blog-data/slices.good b/test/gpu/native/examples/blog-data/slices.good
index 35ba99cb515e..3a8b696ad930 100644
--- a/test/gpu/native/examples/blog-data/slices.good
+++ b/test/gpu/native/examples/blog-data/slices.good
@@ -1 +1 @@
-2 2 2 2 2 2 2 2 2 2
+2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2

From 5ac12fb6f653715b53c8d1e5866aff2c89fd5b99 Mon Sep 17 00:00:00 2001
From: Engin Kayraklioglu <e-kayrakli@users.noreply.github.com>
Date: Fri, 16 Feb 2024 11:06:57 -0800
Subject: [PATCH 3/4] Minor adjustments for consistency

Signed-off-by: Engin Kayraklioglu <e-kayrakli@users.noreply.github.com>
---
 test/gpu/native/examples/blog-data/allocation.chpl  |  2 +-
 test/gpu/native/examples/blog-data/distributed.chpl |  2 +-
 test/gpu/native/examples/blog-data/movement.chpl    |  2 +-
 test/gpu/native/examples/blog-data/overlap.chpl     |  2 +-
 test/gpu/native/examples/blog-data/slices.chpl      | 12 ++++++------
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/test/gpu/native/examples/blog-data/allocation.chpl b/test/gpu/native/examples/blog-data/allocation.chpl
index af5e7274e272..185fdc7636cc 100644
--- a/test/gpu/native/examples/blog-data/allocation.chpl
+++ b/test/gpu/native/examples/blog-data/allocation.chpl
@@ -7,4 +7,4 @@ on here.gpus[0] {
   writeln(DevArr);         // prints "1 1 1 1 1"
 }
 
-writeln(HostArr);  // prints "1 1 1 1 1"
+writeln(HostArr);  // prints "2 2 2 2 2"
diff --git a/test/gpu/native/examples/blog-data/distributed.chpl b/test/gpu/native/examples/blog-data/distributed.chpl
index d9f7330c95b9..384ef9ca2bdf 100644
--- a/test/gpu/native/examples/blog-data/distributed.chpl
+++ b/test/gpu/native/examples/blog-data/distributed.chpl
@@ -14,7 +14,7 @@ coforall (loc, locChunk) in zip(Locales, chunks(1..n, numLocales)) {
         const numSlices = gpuChunk.size/sliceSize;  // assume divisibility
 
         coforall chunk in chunks(gpuChunk, numSlices) {
-          var DevArr: [chunk] int;  // allocated per device
+          var DevArr: [chunk] int;  // allocated on the device
 
           DevArr = HostArr[chunk];  // copy a slice from host to device
           DevArr += 1;              // executes on GPU as a kernel
diff --git a/test/gpu/native/examples/blog-data/movement.chpl b/test/gpu/native/examples/blog-data/movement.chpl
index 2bf1671f6a03..7a674a168b2b 100644
--- a/test/gpu/native/examples/blog-data/movement.chpl
+++ b/test/gpu/native/examples/blog-data/movement.chpl
@@ -1,5 +1,5 @@
 var HostArr: [1..5] int;  // allocated on the host
-HostArr += 1;             // executes on [multicore] CPU
+HostArr = 1;              // executes on [multicore] CPU
 
 on here.gpus[0] {
   var DevArr: [1..5] int;  // allocated on the device
diff --git a/test/gpu/native/examples/blog-data/overlap.chpl b/test/gpu/native/examples/blog-data/overlap.chpl
index 13b4301e6d18..48e1ae32c5f5 100644
--- a/test/gpu/native/examples/blog-data/overlap.chpl
+++ b/test/gpu/native/examples/blog-data/overlap.chpl
@@ -3,7 +3,7 @@ import RangeChunk.chunks;
 config const n = 32;         // now, our application has `--n` to set this!
 config const sliceSize = 4;  // number of elements per slice
 
-const numSlices = n/sliceSize; // assume divisibility for simplicity
+const numSlices = n/sliceSize;  // assume divisibility for simplicity
 
 var HostArr: [1..n] int;  // allocated on the host
 HostArr = 1;              // executes on [multicore] CPU
diff --git a/test/gpu/native/examples/blog-data/slices.chpl b/test/gpu/native/examples/blog-data/slices.chpl
index 07faaf8d2d1f..d1055ca82197 100644
--- a/test/gpu/native/examples/blog-data/slices.chpl
+++ b/test/gpu/native/examples/blog-data/slices.chpl
@@ -1,9 +1,9 @@
 import RangeChunk.chunks;
 
-config const n = 32;           // now, our application has `--n` to set this!
-config const sliceSize = 4;    // number of elements per slice
+config const n = 32;         // now, our application has `--n` to set this!
+config const sliceSize = 4;  // number of elements per slice
 
-const numSlices = n/sliceSize; // assume divisibility for simplicity
+const numSlices = n/sliceSize;  // assume divisibility for simplicity
 
 var HostArr: [1..n] int;  // allocated on the host
 HostArr = 1;              // executes on [multicore] CPU
@@ -12,9 +12,9 @@ on here.gpus[0] {
   var DevArr: [1..n] int;  // allocated on the device
 
   for chunk in chunks(1..n, numSlices) {
-    DevArr = HostArr[chunk];        // copy a slice from host to device
-    DevArr += 1;                    // executes on GPU as a kernel
-    HostArr[chunk] = DevArr;        // copy from device to a slice on host
+    DevArr = HostArr[chunk];  // copy a slice from host to device
+    DevArr += 1;              // executes on GPU as a kernel
+    HostArr[chunk] = DevArr;  // copy from device to a slice on host
   }
 }
 

From c940b496592a3aa7fdef58d2c8372b715a5814e9 Mon Sep 17 00:00:00 2001
From: Engin Kayraklioglu <e-kayrakli@users.noreply.github.com>
Date: Mon, 1 Jul 2024 16:34:32 -0700
Subject: [PATCH 4/4] Update README and a comment

Signed-off-by: Engin Kayraklioglu <e-kayrakli@users.noreply.github.com>
---
 test/gpu/native/examples/blog-data/README.md       | 3 ++-
 test/gpu/native/examples/blog-data/allocation.chpl | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/gpu/native/examples/blog-data/README.md b/test/gpu/native/examples/blog-data/README.md
index b169c3e45dd7..d04576cc38d9 100644
--- a/test/gpu/native/examples/blog-data/README.md
+++ b/test/gpu/native/examples/blog-data/README.md
@@ -1,2 +1,3 @@
-This directory contains examples from the blog post whose draft is in
+This directory contains examples from a blog post
+  https://chapel-lang.org/blog/posts/gpu-data-movement/
   https://github.com/chapel-lang/chapel-blog/pull/75
diff --git a/test/gpu/native/examples/blog-data/allocation.chpl b/test/gpu/native/examples/blog-data/allocation.chpl
index 185fdc7636cc..af5e7274e272 100644
--- a/test/gpu/native/examples/blog-data/allocation.chpl
+++ b/test/gpu/native/examples/blog-data/allocation.chpl
@@ -7,4 +7,4 @@ on here.gpus[0] {
   writeln(DevArr);         // prints "1 1 1 1 1"
 }
 
-writeln(HostArr);  // prints "2 2 2 2 2"
+writeln(HostArr);  // prints "1 1 1 1 1"