diff --git a/LICENSE b/LICENSE index 19f87ab..2250188 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2017, King Abdullah University of Science and Technology +Copyright (c) 2012-, King Abdullah University of Science and Technology All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/src/batch_triangular/Xblas_core.ch b/src/batch_triangular/Xblas_core.ch index 1ec84f3..2d55061 100644 --- a/src/batch_triangular/Xblas_core.ch +++ b/src/batch_triangular/Xblas_core.ch @@ -31,7 +31,9 @@ void kblas_gemm_batch_wsquery(kblasHandle_t handle, int B_row_off, int B_col_off, int C_row_off, int C_col_off); -/// Non-strided batch gemm with offset +/** + * @brief Uniform-size batch non-strided GEMM with offset wrapper routine + */ int kblas_gemm_batch( kblasHandle_t handle, char transA, char transB, const int m, const int n, const int k, diff --git a/src/batch_triangular/Xgemm_batch.cu b/src/batch_triangular/Xgemm_batch.cu index 48e233c..3e525a2 100644 --- a/src/batch_triangular/Xgemm_batch.cu +++ b/src/batch_triangular/Xgemm_batch.cu @@ -42,10 +42,15 @@ //Non-Strided form /** - * @brief Uniform-size batch non-strided GEMM with offset wrapper routine - * * Workspace needed: device pointers * + * @param[in] A_row_off row offset to sub-matrix of all A's + * @param[in] A_col_off column offset to sub-matrix of all A's + * @param[in] B_row_off row offset to sub-matrix of all B's + * @param[in] B_col_off column offset to sub-matrix of all B's + * @param[in] C_row_off row offset to sub-matrix of all C's + * @param[in] C_col_off column offset to sub-matrix of all C's + * @see kblasSgemm_batch() for details about rest of params. * A, B, C: host pointer to array of device pointers to device buffers */ int kblas_gemm_batch( kblasHandle_t handle, @@ -68,6 +73,7 @@ int kblas_gemm_batch( kblasHandle_t handle, batchCount); } +// Workspace needed: none int kblas_gemm_batch( kblasHandle_t handle, char transA, char transB, const int m, const int n, const int k, @@ -87,6 +93,8 @@ int kblas_gemm_batch( kblasHandle_t handle, C_array, 0, 0, ldc, batchCount); } + +// Workspace needed: none extern "C" int kblasXgemm_batch( kblasHandle_t handle, char transA, char transB,