Skip to content

Commit ef81982

Browse files
Change FH -> HF
Swap F and H inds
1 parent 90572d1 commit ef81982

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+750
-738
lines changed

NEWS.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ PR [#2033](https://github.com/CliMA/ClimaCore.jl/pull/2033) introduces new
2121
constructors for `DataLayout`s. Instead of writing
2222
```julia
2323
array = rand(FT, Nv, Nij, Nij, 3, Nh)
24-
data = VIJFH{S, Nv, Nij}(array)
24+
data = VIJHF{S, Nv, Nij}(array)
2525
```
2626

2727
You can now write
2828
```julia
29-
data = VIJFH{S}(ArrayType{FT}, rand; Nv, Nij, Nh)
29+
data = VIJHF{S}(ArrayType{FT}, rand; Nv, Nij, Nh)
3030
```
3131
and grab the `array` with `parent(data)` (if you need).
3232

docs/src/api.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ DataLayouts.DataF
2828
DataLayouts.IF
2929
DataLayouts.IJF
3030
DataLayouts.VF
31-
DataLayouts.IFH
32-
DataLayouts.IJFH
33-
DataLayouts.VIFH
34-
DataLayouts.VIJFH
31+
DataLayouts.IHF
32+
DataLayouts.IJHF
33+
DataLayouts.VIHF
34+
DataLayouts.VIJHF
3535
```
3636

3737
## Geometry

examples/hybrid/sphere/solid_body_rotation_3d.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ discrete_hydrostatic_balance!(ρ, p, z_top / n_vert, grav)
247247
# set up initial condition: not discretely balanced; only create a Field as a place holder
248248
Yc = map(coord -> init_sbr_thermo(coord.z), c_coords)
249249
# put the dicretely balanced ρ and ρe into Yc
250-
parent(Yc.ρ) .= ρ # Yc.ρ is a VIJFH layout
250+
parent(Yc.ρ) .= ρ # Yc.ρ is a VIJHF layout
251251
parent(Yc.ρe) .= ρe
252252

253253
# initialize velocity: at rest

ext/cuda/data_layouts.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11

22
import ClimaCore.DataLayouts: AbstractData
33
import ClimaCore.DataLayouts: FusedMultiBroadcast
4-
import ClimaCore.DataLayouts: IJKFVH, IJFH, VIJFH, VIFH, IFH, IJF, IF, VF, DataF
5-
import ClimaCore.DataLayouts: IJFHStyle, VIJFHStyle, VFStyle, DataFStyle
4+
import ClimaCore.DataLayouts: IJKFVH, IJHF, VIJHF, VIHF, IHF, IJF, IF, VF, DataF
5+
import ClimaCore.DataLayouts: IJHFStyle, VIJHFStyle, VFStyle, DataFStyle
66
import ClimaCore.DataLayouts: promote_parent_array_type
77
import ClimaCore.DataLayouts: parent_array_type
88
import ClimaCore.DataLayouts: isascalar

ext/cuda/data_layouts_mapreduce.jl

+10-10
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ end
2020
function mapreduce_cuda(
2121
f,
2222
op,
23-
data::Union{DataLayouts.VF, DataLayouts.IJFH, DataLayouts.VIJFH};
23+
data::Union{DataLayouts.VF, DataLayouts.IJHF, DataLayouts.VIJHF};
2424
weighted_jacobian = OnesArray(parent(data)),
2525
opargs...,
2626
)
@@ -83,7 +83,7 @@ function mapreduce_cuda_kernel!(
8383
gidx = _get_gidx(tidx, bidx, effective_blksize)
8484
reduction = CUDA.CuStaticSharedArray(T, shmemsize)
8585
reduction[tidx] = 0
86-
(Nv, Nij, Nf, Nh) = _get_dims(dataview)
86+
(Nv, Nij, Nh, Nf) = _get_dims(dataview)
8787
nitems = Nv * Nij * Nij * Nf * Nh
8888

8989
# load shmem
@@ -115,21 +115,21 @@ end
115115
@inline _dataview(pdata::AbstractArray{FT, 2}, fidx) where {FT} =
116116
view(pdata, :, fidx:fidx)
117117

118-
# for IJFH DataLayout
118+
# for IJHF DataLayout
119119
@inline function _get_dims(pdata::AbstractArray{FT, 4}) where {FT}
120-
(Nij, _, Nf, Nh) = size(pdata)
121-
return (1, Nij, Nf, Nh)
120+
(Nij, _, Nh, Nf) = size(pdata)
121+
return (1, Nij, Nh, Nf)
122122
end
123123
@inline _dataview(pdata::AbstractArray{FT, 4}, fidx) where {FT} =
124-
view(pdata, :, :, fidx:fidx, :)
124+
view(pdata, :, :, :, fidx:fidx)
125125

126-
# for VIJFH DataLayout
126+
# for VIJHF DataLayout
127127
@inline function _get_dims(pdata::AbstractArray{FT, 5}) where {FT}
128-
(Nv, Nij, _, Nf, Nh) = size(pdata)
129-
return (Nv, Nij, Nf, Nh)
128+
(Nv, Nij, _, Nh, Nf) = size(pdata)
129+
return (Nv, Nij, Nh, Nf)
130130
end
131131
@inline _dataview(pdata::AbstractArray{FT, 5}, fidx) where {FT} =
132-
view(pdata, :, :, :, fidx:fidx, :)
132+
view(pdata, :, :, :, :, fidx:fidx)
133133

134134
@inline function _cuda_reduce!(op, reduction, tidx, reduction_size, N)
135135
if reduction_size > N

ext/cuda/data_layouts_threadblock.jl

+17-17
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ criteria:
2424
in the thread partition
2525
- The order of the thread partition should
2626
follow the fastest changing index in the
27-
datalayout (e.g., VIJ in VIJFH)
27+
datalayout (e.g., VIJ in VIJHF)
2828
"""
2929
function partition end
3030

@@ -46,25 +46,25 @@ bounds to ensure that the result of
4646
"""
4747
function is_valid_index end
4848

49-
##### VIJFH
50-
@inline function partition(data::DataLayouts.VIJFH, n_max_threads::Integer)
49+
##### VIJHF
50+
@inline function partition(data::DataLayouts.VIJHF, n_max_threads::Integer)
5151
(Nij, _, _, Nv, Nh) = DataLayouts.universal_size(data)
5252
Nv_thread = min(Int(fld(n_max_threads, Nij * Nij)), Nv)
5353
Nv_blocks = cld(Nv, Nv_thread)
5454
@assert prod((Nv_thread, Nij, Nij)) n_max_threads "threads,n_max_threads=($(prod((Nv_thread, Nij, Nij))),$n_max_threads)"
5555
return (; threads = (Nv_thread, Nij, Nij), blocks = (Nv_blocks, Nh))
5656
end
57-
@inline function universal_index(::DataLayouts.VIJFH)
57+
@inline function universal_index(::DataLayouts.VIJHF)
5858
(tv, i, j) = CUDA.threadIdx()
5959
(bv, h) = CUDA.blockIdx()
6060
v = tv + (bv - 1) * CUDA.blockDim().x
6161
return CartesianIndex((i, j, 1, v, h))
6262
end
63-
@inline is_valid_index(::DataLayouts.VIJFH, I::CI5, us::UniversalSize) =
63+
@inline is_valid_index(::DataLayouts.VIJHF, I::CI5, us::UniversalSize) =
6464
1 I[4] DataLayouts.get_Nv(us)
6565

66-
##### IJFH
67-
@inline function partition(data::DataLayouts.IJFH, n_max_threads::Integer)
66+
##### IJHF
67+
@inline function partition(data::DataLayouts.IJHF, n_max_threads::Integer)
6868
(Nij, _, _, _, Nh) = DataLayouts.universal_size(data)
6969
Nh_thread = min(
7070
Int(fld(n_max_threads, Nij * Nij)),
@@ -75,30 +75,30 @@ end
7575
@assert prod((Nij, Nij)) n_max_threads "threads,n_max_threads=($(prod((Nij, Nij))),$n_max_threads)"
7676
return (; threads = (Nij, Nij, Nh_thread), blocks = (Nh_blocks,))
7777
end
78-
@inline function universal_index(::DataLayouts.IJFH)
78+
@inline function universal_index(::DataLayouts.IJHF)
7979
(i, j, th) = CUDA.threadIdx()
8080
(bh,) = CUDA.blockIdx()
8181
h = th + (bh - 1) * CUDA.blockDim().z
8282
return CartesianIndex((i, j, 1, 1, h))
8383
end
84-
@inline is_valid_index(::DataLayouts.IJFH, I::CI5, us::UniversalSize) =
84+
@inline is_valid_index(::DataLayouts.IJHF, I::CI5, us::UniversalSize) =
8585
1 I[5] DataLayouts.get_Nh(us)
8686

87-
##### IFH
88-
@inline function partition(data::DataLayouts.IFH, n_max_threads::Integer)
87+
##### IHF
88+
@inline function partition(data::DataLayouts.IHF, n_max_threads::Integer)
8989
(Ni, _, _, _, Nh) = DataLayouts.universal_size(data)
9090
Nh_thread = min(Int(fld(n_max_threads, Ni)), Nh)
9191
Nh_blocks = cld(Nh, Nh_thread)
9292
@assert prod((Ni, Nh_thread)) n_max_threads "threads,n_max_threads=($(prod((Ni, Nh_thread))),$n_max_threads)"
9393
return (; threads = (Ni, Nh_thread), blocks = (Nh_blocks,))
9494
end
95-
@inline function universal_index(::DataLayouts.IFH)
95+
@inline function universal_index(::DataLayouts.IHF)
9696
(i, th) = CUDA.threadIdx()
9797
(bh,) = CUDA.blockIdx()
9898
h = th + (bh - 1) * CUDA.blockDim().y
9999
return CartesianIndex((i, 1, 1, 1, h))
100100
end
101-
@inline is_valid_index(::DataLayouts.IFH, I::CI5, us::UniversalSize) =
101+
@inline is_valid_index(::DataLayouts.IHF, I::CI5, us::UniversalSize) =
102102
1 I[5] DataLayouts.get_Nh(us)
103103

104104
##### IJF
@@ -125,21 +125,21 @@ end
125125
end
126126
@inline is_valid_index(::DataLayouts.IF, I::CI5, us::UniversalSize) = true
127127

128-
##### VIFH
129-
@inline function partition(data::DataLayouts.VIFH, n_max_threads::Integer)
128+
##### VIHF
129+
@inline function partition(data::DataLayouts.VIHF, n_max_threads::Integer)
130130
(Ni, _, _, Nv, Nh) = DataLayouts.universal_size(data)
131131
Nv_thread = min(Int(fld(n_max_threads, Ni)), Nv)
132132
Nv_blocks = cld(Nv, Nv_thread)
133133
@assert prod((Nv_thread, Ni)) n_max_threads "threads,n_max_threads=($(prod((Nv_thread, Ni))),$n_max_threads)"
134134
return (; threads = (Nv_thread, Ni), blocks = (Nv_blocks, Nh))
135135
end
136-
@inline function universal_index(::DataLayouts.VIFH)
136+
@inline function universal_index(::DataLayouts.VIHF)
137137
(tv, i) = CUDA.threadIdx()
138138
(bv, h) = CUDA.blockIdx()
139139
v = tv + (bv - 1) * CUDA.blockDim().x
140140
return CartesianIndex((i, 1, 1, v, h))
141141
end
142-
@inline is_valid_index(::DataLayouts.VIFH, I::CI5, us::UniversalSize) =
142+
@inline is_valid_index(::DataLayouts.VIHF, I::CI5, us::UniversalSize) =
143143
1 I[4] DataLayouts.get_Nv(us)
144144

145145
##### VF

0 commit comments

Comments
 (0)