Skip to content

Commit 0a8c5d0

Browse files
Add some at-inbounds in some interpolation funcs
1 parent 7624132 commit 0a8c5d0

File tree

3 files changed

+27
-20
lines changed

3 files changed

+27
-20
lines changed

ext/cuda/remapping_interpolate_array.jl

+18-13
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,11 @@ function interpolate_slab_kernel!(
4646
weights::AbstractArray{Tuple{A, A}},
4747
) where {A}
4848
index = threadIdx().x + (blockIdx().x - 1) * blockDim().x
49+
50+
index <= length(output_array) || return nothing
4951
space = axes(field)
5052
FT = Spaces.undertype(space)
51-
52-
if index <= length(output_array)
53+
@inbounds begin
5354
I1, I2 = weights[index]
5455
Nq1, Nq2 = length(I1), length(I2)
5556

@@ -74,10 +75,11 @@ function interpolate_slab_kernel!(
7475
weights::AbstractArray{Tuple{A}},
7576
) where {A}
7677
index = threadIdx().x + (blockIdx().x - 1) * blockDim().x
77-
space = axes(field)
78-
FT = Spaces.undertype(space)
7978

80-
if index <= length(output_array)
79+
index <= length(output_array) || return nothing
80+
@inbounds begin
81+
space = axes(field)
82+
FT = Spaces.undertype(space)
8183
I1, = weights[index]
8284
Nq = length(I1)
8385

@@ -130,11 +132,12 @@ function interpolate_slab_level_kernel!(
130132
(I1, I2)::Tuple{<:AbstractArray, <:AbstractArray},
131133
)
132134
index = threadIdx().x + (blockIdx().x - 1) * blockDim().x
133-
space = axes(field)
134-
FT = Spaces.undertype(space)
135-
Nq1, Nq2 = length(I1), length(I2)
136135

137-
if index <= length(vidx_ref_coordinates)
136+
index <= length(vidx_ref_coordinates) || return nothing
137+
@inbounds begin
138+
space = axes(field)
139+
FT = Spaces.undertype(space)
140+
Nq1, Nq2 = length(I1), length(I2)
138141
v_lo, v_hi, ξ3 = vidx_ref_coordinates[index]
139142

140143
f_lo = zero(FT)
@@ -165,11 +168,13 @@ function interpolate_slab_level_kernel!(
165168
(I1,)::Tuple{<:AbstractArray},
166169
)
167170
index = threadIdx().x + (blockIdx().x - 1) * blockDim().x
168-
space = axes(field)
169-
FT = Spaces.undertype(space)
170-
Nq = length(I1)
171171

172-
if index <= length(vidx_ref_coordinates)
172+
index <= length(vidx_ref_coordinates) || return nothing
173+
@inbounds begin
174+
space = axes(field)
175+
FT = Spaces.undertype(space)
176+
Nq = length(I1)
177+
173178
v_lo, v_hi, ξ3 = vidx_ref_coordinates[index]
174179

175180
f_lo = zero(FT)

src/Remapping/distributed_remapping.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ function _set_interpolated_values_device!(
685685

686686
for (field_index, field) in enumerate(fields)
687687
field_values = Fields.field_values(field)
688-
for (out_index, h) in enumerate(local_horiz_indices)
688+
@inbounds for (out_index, h) in enumerate(local_horiz_indices)
689689
out[out_index, field_index] = zero(FT)
690690
if hdims == 2
691691
for j in 1:Nq, i in 1:Nq

src/Remapping/interpolate_array.jl

+8-6
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ function interpolate_slab!(
2929
space = axes(field)
3030
FT = Spaces.undertype(space)
3131

32-
for index in 1:length(output_array)
32+
@inbounds for index in 1:length(output_array)
3333
(I1, I2) = weights[index]
3434
Nq1, Nq2 = length(I1), length(I2)
3535

@@ -56,7 +56,7 @@ function interpolate_slab!(
5656
space = axes(field)
5757
FT = Spaces.undertype(space)
5858

59-
for index in 1:length(output_array)
59+
@inbounds for index in 1:length(output_array)
6060
(I1,) = weights[index]
6161
Nq = length(I1)
6262

@@ -178,7 +178,7 @@ function interpolate_slab_level!(
178178
FT = Spaces.undertype(space)
179179
Nq1, Nq2 = length(I1), length(I2)
180180

181-
for index in 1:length(vidx_ref_coordinates)
181+
@inbounds for index in 1:length(vidx_ref_coordinates)
182182
v_lo, v_hi, ξ3 = vidx_ref_coordinates[index]
183183

184184
f_lo = zero(FT)
@@ -213,7 +213,7 @@ function interpolate_slab_level!(
213213
FT = Spaces.undertype(space)
214214
Nq = length(I1)
215215

216-
for index in 1:length(vidx_ref_coordinates)
216+
@inbounds for index in 1:length(vidx_ref_coordinates)
217217
v_lo, v_hi, ξ3 = vidx_ref_coordinates[index]
218218

219219
f_lo = zero(FT)
@@ -274,7 +274,7 @@ function interpolate_array(
274274
vertical_indices_ref_coordinates =
275275
[vertical_indices_ref_coordinate(space, zcoord) for zcoord in zpts]
276276

277-
for (ix, xcoord) in enumerate(xpts)
277+
@inbounds for (ix, xcoord) in enumerate(xpts)
278278
hcoord = xcoord
279279
helem = Meshes.containing_element(horz_mesh, hcoord)
280280
quad = Spaces.quadrature_style(space)
@@ -313,7 +313,9 @@ function interpolate_array(
313313
vertical_indices_ref_coordinates =
314314
[vertical_indices_ref_coordinate(space, zcoord) for zcoord in zpts]
315315

316-
for (iy, ycoord) in enumerate(ypts), (ix, xcoord) in enumerate(xpts)
316+
@inbounds for (iy, ycoord) in enumerate(ypts),
317+
(ix, xcoord) in enumerate(xpts)
318+
317319
hcoord = Geometry.product_coordinates(xcoord, ycoord)
318320
helem = Meshes.containing_element(horz_mesh, hcoord)
319321
quad = Spaces.quadrature_style(space)

0 commit comments

Comments
 (0)