Skip to content

Commit 2c7b404

Browse files
Limit z-threads in columnwise_partition
1 parent 4e7c44d commit 2c7b404

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

ext/cuda/data_layouts_threadblock.jl

+5-1
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,11 @@ end
176176
n_max_threads::Integer,
177177
)
178178
(Nij, _, _, _, Nh) = DataLayouts.universal_size(us)
179-
Nh_thread = min(Int(fld(n_max_threads, Nij * Nij)), Nh)
179+
Nh_thread = min(
180+
Int(fld(n_max_threads, Nij * Nij)),
181+
maximum_allowable_threads()[3],
182+
Nh,
183+
)
180184
Nh_blocks = cld(Nh, Nh_thread)
181185
@assert prod((Nij, Nij, Nh_thread)) n_max_threads "threads,n_max_threads=($(prod((Nij, Nij, Nh_thread))),$n_max_threads)"
182186
return (; threads = (Nij, Nij, Nh_thread), blocks = (Nh_blocks,))

0 commit comments

Comments
 (0)