Skip to content
This repository has been archived by the owner on Jan 13, 2025. It is now read-only.

Commit

Permalink
Change usage of DEFAULT_CPU TUNING_TARGET to DEFAULT (#515)
Browse files Browse the repository at this point in the history
* Add skipping broken test for default config on AMD and NVIDIA

* Add CMAKE warning for disabled tests.

* Changing TUNING_TARGET from DEFAULT_CPU to DEFAULT

* Update README
  • Loading branch information
s-Nick authored May 10, 2024
1 parent 30ed01a commit 5b80c99
Show file tree
Hide file tree
Showing 14 changed files with 71 additions and 37 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,13 @@ advisable for NVIDIA and **mandatory for AMD** to provide the specific device
architecture through `-DDPCPP_SYCL_ARCH=<arch>`, e.g., `<arch>` can be `sm_80`
for NVIDIA or `gfx908` for AMD.

It is possible to use the `DEFAULT` target even for AMD and NVIDIA GPUs, but
defining `-DDPCPP_SYCL_TARGET` and `-DDPCPP_SYCL_ARCH` is mandatory. The rules
mentioned above also apply in this case.
Using `DEFAULT` as the target will speedup compilation at the expense of
runtime performance. Additionally, some operators will be disabled.
For full compatibility and best performance, set the `TUNING_TARGET` appropriately.

#### DPC++ Compiler Support

As DPCPP SYCL compiler the project is fully compatible with `icpx` provided by
Expand Down Expand Up @@ -487,7 +494,7 @@ Some of the supported options are:
| `BLAS_ENABLE_TESTING` | `ON`/`OFF` | Set it to `OFF` to avoid building the tests (`ON` is the default value) |
| `BLAS_ENABLE_BENCHMARK` | `ON`/`OFF` | Set it to `OFF` to avoid building the benchmarks (`ON` is the default value) |
| `SYCL_COMPILER` | name | Used to determine which SYCL implementation to use. By default, the first implementation found is used. Supported values are: `dpcpp`, `adaptivecpp` and `computecpp`*(deprecated)*. |
| `TUNING_TARGET` | name | By default, this flag is set to `DEFAULT_CPU` to restrict any device specific compiler optimizations. Use this flag to tune the code for a target (**highly recommended** for performance). The supported targets are: `INTEL_GPU`, `NVIDIA_GPU`, `AMD_GPU` |
| `TUNING_TARGET` | name | By default, this flag is set to `DEFAULT` to restrict any device specific compiler optimizations. Use this flag to tune the code for a target (**highly recommended** for performance). The supported targets are: `INTEL_GPU`, `NVIDIA_GPU`, `AMD_GPU` |
| `CMAKE_PREFIX_PATH` | path | List of paths to check when searching for dependencies |
| `CMAKE_INSTALL_PREFIX` | path | Specify the install location, used when invoking `ninja install` |
| `BUILD_SHARED_LIBS` | `ON`/`OFF` | Build as shared library (`ON` by default) |
Expand Down
8 changes: 4 additions & 4 deletions cmake/CmakeFunctionHelper.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,11 @@ function(set_target_compile_def in_target)
elseif(${TUNING_TARGET} STREQUAL "NVIDIA_GPU")
target_compile_definitions(${in_target} PUBLIC NVIDIA_GPU=1)
else()
if(NOT ${TUNING_TARGET} STREQUAL "DEFAULT_CPU")
message(STATUS "${TUNING_TARGET} not supported. Switching to DEFAULT_CPU instead.")
set(TUNING_TARGET "DEFAULT_CPU")
if(NOT ${TUNING_TARGET} STREQUAL "DEFAULT")
message(STATUS "${TUNING_TARGET} not supported. Switching to DEFAULT instead.")
set(TUNING_TARGET "DEFAULT")
endif()
target_compile_definitions(${in_target} PUBLIC DEFAULT_CPU=1)
target_compile_definitions(${in_target} PUBLIC DEFAULT=1)
endif()
message(STATUS "Adding ${TUNING_TARGET} backend to target ${in_target}")
#setting tall skinny support
Expand Down
2 changes: 1 addition & 1 deletion cmake/Modules/ConfigurePORTBLAS.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ if(NAIVE_GEMM)
endif()

# the TUNING_TARGET variable defines the platform for which the sycl library is tuned
SET(TUNING_TARGET "DEFAULT_CPU" CACHE STRING "Default Platform 'DEFAULT_CPU'")
SET(TUNING_TARGET "DEFAULT" CACHE STRING "Default Platform 'DEFAULT'")
message(STATUS "${TUNING_TARGET} is chosen as a tuning target")

if(DEFINED TARGET)
Expand Down
2 changes: 1 addition & 1 deletion cmake/Modules/SYCL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ elseif(is_adaptivecpp)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
get_target_property(SYCL_INCLUDE_DIRS AdaptiveCpp::acpp-rt INTERFACE_INCLUDE_DIRECTORIES)
set(HIP_BENCH_UNSUPPORTED_TARGETS "INTEL_GPU" "DEFAULT_CPU")
set(HIP_BENCH_UNSUPPORTED_TARGETS "INTEL_GPU" "DEFAULT")
if((${BLAS_ENABLE_BENCHMARK}) AND (${TUNING_TARGET} IN_LIST HIP_BENCH_UNSUPPORTED_TARGETS))
message(STATUS "Benchmarks are not supported when targetting OpenCL/LevelZero backend
devices. portBLAS Benchmarks are disabled.")
Expand Down
4 changes: 2 additions & 2 deletions doc/Gemm.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ This cmake variable causes a corresponding define for the selected platform to b
#elif defined POWER_VR
#include "interface/blas3/backend/power_vr.hpp"
#else
#include "interface/blas3/backend/default_cpu.hpp"
#include "interface/blas3/backend/default.hpp"
#endif
```

Expand Down Expand Up @@ -307,7 +307,7 @@ The relevant parameters are:
- Vector size, the number of elements to use in vectorized loads/stores.
- Batch type, whether to use strided (most `GEMM` kernels) or the interleaved `GEMM` for batched calls.

For an example of a backend target header and some of the ways that configurations are selected let's look at `src/interface/blas3/backend/default_cpu.hpp` :
For an example of a backend target header and some of the ways that configurations are selected let's look at `src/interface/blas3/backend/default.hpp` :

```c++
template <bool _t_a, bool _t_b, bool is_beta_zero, typename sb_handle_t,
Expand Down
2 changes: 1 addition & 1 deletion src/interface/blas1/backend/backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@
#elif NVIDIA_GPU
#include "interface/blas1/backend/nvidia_gpu.hpp"
#else
#include "interface/blas1/backend/default_cpu.hpp"
#include "interface/blas1/backend/default.hpp"
#endif

Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
* @filename defaul_cpu.hpp
*
**************************************************************************/
#ifndef PORTBLAS_ASUM_DEFAULT_CPU_BACKEND_HPP
#define PORTBLAS_ASUM_DEFAULT_CPU_BACKEND_HPP
#ifndef PORTBLAS_ASUM_DEFAULT_BACKEND_HPP
#define PORTBLAS_ASUM_DEFAULT_BACKEND_HPP
#include "interface/blas1_interface.h"

namespace blas {
Expand Down
2 changes: 1 addition & 1 deletion src/interface/blas2/backend/backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@
#elif NVIDIA_GPU
#include "interface/blas2/backend/nvidia_gpu.hpp"
#else
#include "interface/blas2/backend/default_cpu.hpp"
#include "interface/blas2/backend/default.hpp"
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
*
* portBLAS: BLAS implementation using SYCL
*
* @filename default_cpu.hpp
* @filename default.hpp
*
**************************************************************************/
#ifndef PORTBLAS_GEMV_DEFAULT_CPU_BACKEND_HPP
#define PORTBLAS_GEMV_DEFAULT_CPU_BACKEND_HPP
#ifndef PORTBLAS_GEMV_DEFAULT_BACKEND_HPP
#define PORTBLAS_GEMV_DEFAULT_BACKEND_HPP
#include "interface/blas2_interface.h"

namespace blas {
Expand Down
2 changes: 1 addition & 1 deletion src/interface/blas3/backend/backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@
#elif defined NVIDIA_GPU
#include "interface/blas3/backend/nvidia_gpu.hpp"
#else
#include "interface/blas3/backend/default_cpu.hpp"
#include "interface/blas3/backend/default.hpp"
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
*
* portBLAS: BLAS implementation using SYCL
*
* @filename default_cpu.hpp
* @filename default.hpp
*
**************************************************************************/
#ifndef PORTBLAS_GEMM_DEFAULT_CPU_BACKEND_HPP
#define PORTBLAS_GEMM_DEFAULT_CPU_BACKEND_HPP
#ifndef PORTBLAS_GEMM_DEFAULT_BACKEND_HPP
#define PORTBLAS_GEMM_DEFAULT_BACKEND_HPP
#include "interface/gemm_launcher.h"

namespace blas {
Expand Down
2 changes: 1 addition & 1 deletion src/interface/extension/backend/backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@
#elif defined NVIDIA_GPU
#include "interface/extension/backend/nvidia_gpu.hpp"
#else
#include "interface/extension/backend/default_cpu.hpp"
#include "interface/extension/backend/default.hpp"
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
*
* portBLAS: BLAS implementation using SYCL
*
* @filename default_cpu.hpp
* @filename default.hpp
*
**************************************************************************/
#ifndef PORTBLAS_TRANSPOSE_DEFAULT_CPU_BACKEND_HPP
#define PORTBLAS_TRANSPOSE_DEFAULT_CPU_BACKEND_HPP
#ifndef PORTBLAS_TRANSPOSE_DEFAULT_BACKEND_HPP
#define PORTBLAS_TRANSPOSE_DEFAULT_BACKEND_HPP
#include "interface/extension_interface.h"

namespace blas {
Expand Down
53 changes: 40 additions & 13 deletions test/unittest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,18 +73,20 @@ set(SYCL_UNITTEST_SRCS
${PORTBLAS_UNITTEST}/buffers/sycl_buffer_test.cpp
)

# Skip these tests for AdaptiveCpp for SPIRV/OpenCL targets
# that use SYCL 2020 features like group reduction or hang
# during execution (https://github.com/AdaptiveCpp/AdaptiveCpp/issues/1309)
set(ADAPTIVE_CPP_SKIP
${PORTBLAS_UNITTEST}/blas1/blas1_asum_test.cpp
${PORTBLAS_UNITTEST}/blas1/blas1_sdsdot_test.cpp
${PORTBLAS_UNITTEST}/blas1/blas1_nrm2_test.cpp
${PORTBLAS_UNITTEST}/blas1/blas1_dot_test.cpp
${PORTBLAS_UNITTEST}/blas1/blas1_rot_test.cpp
# Hang during execution (without failing)
${PORTBLAS_UNITTEST}/blas3/blas3_trsm_test.cpp
)
if(is_adaptivecpp)
# Skip these tests for AdaptiveCpp for SPIRV/OpenCL targets
# that use SYCL 2020 features like group reduction or hang
# during execution (https://github.com/AdaptiveCpp/AdaptiveCpp/issues/1309)
set(TESTS_TO_SKIP
${PORTBLAS_UNITTEST}/blas1/blas1_asum_test.cpp
${PORTBLAS_UNITTEST}/blas1/blas1_sdsdot_test.cpp
${PORTBLAS_UNITTEST}/blas1/blas1_nrm2_test.cpp
${PORTBLAS_UNITTEST}/blas1/blas1_dot_test.cpp
${PORTBLAS_UNITTEST}/blas1/blas1_rot_test.cpp
# Hang during execution (without failing)
${PORTBLAS_UNITTEST}/blas3/blas3_trsm_test.cpp
)
endif()

if(${BLAS_ENABLE_EXTENSIONS})
list(APPEND SYCL_UNITTEST_SRCS ${PORTBLAS_UNITTEST}/extension/transpose_test.cpp
Expand All @@ -101,6 +103,31 @@ if(is_dpcpp)
)
endif()

if(is_dpcpp AND ${TUNING_TARGET} STREQUAL "DEFAULT")
if (${DPCPP_SYCL_TARGET} MATCHES "nvidia")
set(TESTS_TO_SKIP
${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp
${PORTBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp
${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp
${PORTBLAS_UNITTEST}/blas2/blas2_tpsv_test.cpp
${PORTBLAS_UNITTEST}/blas2/blas2_trsv_test.cpp
${PORTBLAS_UNITTEST}/blas3/blas3_trsm_test.cpp
)
message(WARNING "Targetting NVIDIA hardware with DEFAULT TUNING_TARGET.
Disabling tests for following operators: iamax, iamin, trsv, tbsv, tpsv, trsm.")
elseif (${DPCPP_SYCL_TARGET} MATCHES "amd")
set(TESTS_TO_SKIP
${PORTBLAS_UNITTEST}/blas1/blas1_iamax_test.cpp
${PORTBLAS_UNITTEST}/blas1/blas1_iamin_test.cpp
${PORTBLAS_UNITTEST}/blas2/blas2_tbsv_test.cpp
${PORTBLAS_UNITTEST}/blas2/blas2_tpsv_test.cpp
${PORTBLAS_UNITTEST}/blas2/blas2_trsv_test.cpp
)
message(WARNING "Targetting AMD hardware with DEFAULT TUNING_TARGET.
Disabling tests for following operators: iamax, iamin, tbsv, tpsv, trsv.")
endif()
endif()

if(GEMM_TALL_SKINNY_SUPPORT)
list(APPEND SYCL_UNITTEST_SRCS ${PORTBLAS_UNITTEST}/blas3/blas3_gemm_tall_skinny_test.cpp)
endif()
Expand All @@ -112,7 +139,7 @@ set(HALF_DATA_OPS "blas1_axpy_test"
)

foreach(blas_test ${SYCL_UNITTEST_SRCS})
if(is_adaptivecpp AND ${blas_test} IN_LIST ADAPTIVE_CPP_SKIP)
if(${blas_test} IN_LIST TESTS_TO_SKIP)
continue()
endif()
get_filename_component(test_exec ${blas_test} NAME_WE)
Expand Down

0 comments on commit 5b80c99

Please sign in to comment.