Skip to content

Commit

Permalink
NVRTC kernels for cast-transpose (NVIDIA#258)
Browse files Browse the repository at this point in the history
* Add NVRTC kernels for cast-transpose

Signed-off-by: Tim Moon <tmoon@nvidia.com>

* Update copyright year

Signed-off-by: Tim Moon <tmoon@nvidia.com>

* Add noop flag to NVRTC cast-transpose kernel

Signed-off-by: Tim Moon <tmoon@nvidia.com>

* Apply suggestions from code review

Signed-off-by: Tim Moon <4406448+timmoon10@users.noreply.github.com>

---------

Signed-off-by: Tim Moon <tmoon@nvidia.com>
Signed-off-by: Tim Moon <4406448+timmoon10@users.noreply.github.com>
Signed-off-by: Pawel Gadzinski <pgadzinski@nvidia.com>
  • Loading branch information
timmoon10 authored and pggPL committed May 16, 2024
1 parent 35dfb3a commit d5c62b9
Show file tree
Hide file tree
Showing 5 changed files with 550 additions and 457 deletions.
5 changes: 4 additions & 1 deletion tests/cpp/operator/test_cast_transpose.cu
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ std::vector<std::pair<size_t, size_t>> test_cases = {{2048, 12288},
{65536, 128},
{256, 256},
{120, 2080},
{8, 8}};
{8, 8},
{1, 3221}, // Prime 456
{2333, 1}, // Prime 345
{1481, 677}}; // Primes 234, 123
} // namespace

class CTTestSuite : public ::testing::TestWithParam<std::tuple<transformer_engine::DType,
Expand Down
6 changes: 4 additions & 2 deletions transformer_engine/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,12 @@ endfunction()
list(GET CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES 0 cuda_include_path)
make_string_header("${cuda_include_path}"
string_path_cuda_include)
make_string_header_from_file(utils.cuh
string_code_utils_cuh)
make_string_header_from_file(transpose/rtc/cast_transpose.cu
string_code_transpose_rtc_cast_transpose_cu)
make_string_header_from_file(transpose/rtc/transpose.cu
string_code_transpose_rtc_transpose_cu)
make_string_header_from_file(utils.cuh
string_code_utils_cuh)
target_include_directories(transformer_engine PRIVATE
"${CMAKE_CURRENT_BINARY_DIR}/string_headers")

Expand Down
Loading

0 comments on commit d5c62b9

Please sign in to comment.