diff --git a/examples/histogram.jl b/examples/histogram.jl index 3e91c29d..d4a65c50 100644 --- a/examples/histogram.jl +++ b/examples/histogram.jl @@ -13,7 +13,7 @@ function create_histogram(input) end # This a 1D histogram kernel where the histogramming happens on shmem -@kernel function histogram_kernel!(histogram_output, input) +@kernel implicit_validindex = false function histogram_kernel!(histogram_output, input) tid = @index(Global, Linear) lid = @index(Local, Linear) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index ce4a6493..00320904 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -50,7 +50,7 @@ synchronize(backend) ``` """ macro kernel(expr) - return __kernel(expr, #=force_inbounds=# false) + return __kernel(expr, #=force_inbounds=# false, #=implicit_validindex=# true) end """ @@ -71,8 +71,9 @@ This allows for two different configurations: """ macro kernel(ex...) if length(ex) == 1 - return __kernel(ex[1], false) + return __kernel(ex[1], false, true) else + implicit_validindex = true force_inbounds = false for i in 1:(length(ex) - 1) if ex[i] isa Expr && ex[i].head == :(=) && @@ -81,6 +82,9 @@ macro kernel(ex...) elseif ex[i] isa Expr && ex[i].head == :(=) && ex[i].args[1] == :inbounds && ex[i].args[2] isa Bool force_inbounds = ex[i].args[2] + elseif ex[i] isa Expr && ex[i].head == :(=) && + ex[i].args[1] == :implicit_validindex && ex[i].args[2] isa Bool + implicit_validindex = ex[i].args[2] else error( "Configuration should be of form:\n" * @@ -90,7 +94,7 @@ macro kernel(ex...) ) end end - return __kernel(ex[end], force_inbounds) + return __kernel(ex[end], force_inbounds, implicit_validindex) end end diff --git a/src/macros.jl b/src/macros.jl index 205a4888..2431a82c 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -10,7 +10,7 @@ function find_return(stmt) end # XXX: Proper errors -function __kernel(expr, force_inbounds = false) +function __kernel(expr, force_inbounds = false, implicit_validindex = true) def = splitdef(expr) name = def[:name] args = def[:args] @@ -30,7 +30,7 @@ function __kernel(expr, force_inbounds = false) def_gpu = deepcopy(def) def_gpu[:name] = gpu_name = Symbol(:gpu_, name) - transform_gpu!(def_gpu, constargs, force_inbounds) + transform_gpu!(def_gpu, constargs, force_inbounds, implicit_validindex) gpu_function = combinedef(def_gpu) # create constructor functions @@ -50,7 +50,7 @@ end # The easy case, transform the function for GPU execution # - mark constant arguments by applying `constify`. -function transform_gpu!(def, constargs, force_inbounds) +function transform_gpu!(def, constargs, force_inbounds, implicit_validindex) let_constargs = Expr[] for (i, arg) in enumerate(def[:args]) if constargs[i] @@ -64,11 +64,18 @@ function transform_gpu!(def, constargs, force_inbounds) @inbounds $(body) end end - body = quote - if $__validindex(__ctx__) + if implicit_validindex + body = quote + if $__validindex(__ctx__) + $(body) + end + return nothing + end + else + body = quote $(body) + return nothing end - return nothing end def[:body] = Expr( :let,