Skip to content

Commit

Permalink
First version of MATX sparse2dense conversion (dispatch to cuSPARSE) (#…
Browse files Browse the repository at this point in the history
…856)

* First version of MATX dense2sparse conversion (using dispatch to cuSPARSE)
  • Loading branch information
aartbik authored Feb 4, 2025
1 parent 55dd664 commit 8bf818a
Show file tree
Hide file tree
Showing 7 changed files with 446 additions and 42 deletions.
25 changes: 17 additions & 8 deletions examples/sparse_tensor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -90,24 +90,33 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
//
// A very naive way to convert the sparse matrix back to a dense
// matrix. Note that one should **never** use the ()-operator in
// performance critical code, since sparse data structures do
// performance critical code, since sparse storage formats do
// not provide O(1) random access to their elements (compressed
// levels will use some form of search to determine if an element
// is present). Instead, conversions (and other operations) should
// use sparse operations that are tailored for the sparse data
// structure (such as scanning by row for CSR).
// use sparse operations that are tailored for the sparse storage
// format (such as scanning by row for CSR).
//
auto A = make_tensor<float>({4, 8});
auto A1 = make_tensor<float>({4, 8});
for (index_t i = 0; i < 4; i++) {
for (index_t j = 0; j < 8; j++) {
A(i, j) = Acoo(i, j);
A1(i, j) = Acoo(i, j);
}
}
print(A);
print(A1);

//
// SpMM is implemented on COO through cuSPARSE. This is the
// correct way of performing an efficient sparse operation.
// A direct sparse2dense conversion. This is the correct way of
// performing the conversion, since the underlying implementation
// knows how to properly manipulate the sparse storage format.
//
auto A2 = make_tensor<float>({4, 8});
(A2 = sparse2dense(Acoo)).run(exec);
print(A2);

//
// Perform a direct SpMM. This is also the correct way of performing
// an efficient sparse operation.
//
auto B = make_tensor<float, 2>({8, 4});
auto C = make_tensor<float>({4, 4});
Expand Down
18 changes: 18 additions & 0 deletions include/matx/core/type_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <cublas_v2.h>
#include <cuda/std/complex>
#include <cuda/std/tuple>
#include <cusparse.h>
#include <type_traits>

#include "cuda_fp16.h"
Expand Down Expand Up @@ -1166,6 +1167,23 @@ template <typename T> constexpr cublasComputeType_t MatXTypeToCudaComputeType()

return CUBLAS_COMPUTE_32F;
}

template <typename T>
constexpr cusparseIndexType_t MatXTypeToCuSparseIndexType() {
if constexpr (std::is_same_v<T, uint16_t>) {
return CUSPARSE_INDEX_16U;
}
if constexpr (std::is_same_v<T, int32_t>) {
return CUSPARSE_INDEX_32I;
}
if constexpr (std::is_same_v<T, int64_t>) {
return CUSPARSE_INDEX_64I;
}
if constexpr (std::is_same_v<T, index_t>) {
return CUSPARSE_INDEX_64I;
}
}

} // end namespace detail

} // end namespace matx
1 change: 1 addition & 0 deletions include/matx/operators/operators.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@
#include "matx/operators/shift.h"
#include "matx/operators/sign.h"
#include "matx/operators/slice.h"
#include "matx/operators/sparse2dense.h"
#include "matx/operators/solve.h"
#include "matx/operators/sort.h"
#include "matx/operators/sph2cart.h"
Expand Down
146 changes: 146 additions & 0 deletions include/matx/operators/sparse2dense.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
////////////////////////////////////////////////////////////////////////////////
// BSD 3-Clause License
//
// Copyright (c) 2025, NVIDIA Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/////////////////////////////////////////////////////////////////////////////////

#pragma once

#include "matx/core/type_utils.h"
#include "matx/operators/base_operator.h"
#include "matx/transforms/convert/sparse2dense_cusparse.h"

namespace matx {
namespace detail {

template <typename OpA>
class Sparse2DenseOp : public BaseOp<Sparse2DenseOp<OpA>> {
private:
typename detail::base_type_t<OpA> a_;

static constexpr int out_rank = OpA::Rank();
cuda::std::array<index_t, out_rank> out_dims_;
mutable detail::tensor_impl_t<typename OpA::value_type, out_rank> tmp_out_;
mutable typename OpA::value_type *ptr = nullptr;

public:
using matxop = bool;
using matx_transform_op = bool;
using sparse2dense_xform_op = bool;
using value_type = typename OpA::value_type;

__MATX_INLINE__ Sparse2DenseOp(const OpA &a) : a_(a) {
for (int r = 0; r < Rank(); r++) {
out_dims_[r] = a_.Size(r);
}
}

__MATX_INLINE__ std::string str() const {
return "sparse2dense(" + get_type_str(a_) + ")";
}

__MATX_HOST__ __MATX_INLINE__ auto Data() const noexcept { return ptr; }

template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto)
operator()(Is... indices) const {
return tmp_out_(indices...);
}

static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t
Rank() {
return remove_cvref_t<OpA>::Rank();
}

constexpr __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t
Size(int dim) const {
return out_dims_[dim];
}

template <typename Out, typename Executor>
void Exec([[maybe_unused]] Out &&out, [[maybe_unused]] Executor &&ex) const {
if constexpr (is_sparse_tensor_v<OpA>) {
auto ref = cuda::std::get<0>(out);
using Rtype = decltype(ref);
if constexpr (is_sparse_tensor_v<Rtype>) {
MATX_THROW(matxNotSupported,
"Cannot use sparse2dense for sparse output");
} else {
sparse2dense_impl(ref, a_, ex);
}
} else {
MATX_THROW(matxNotSupported, "Cannot use sparse2dense on dense input");
}
}

template <typename ShapeType, typename Executor>
__MATX_INLINE__ void
InnerPreRun([[maybe_unused]] ShapeType &&shape,
[[maybe_unused]] Executor &&ex) const noexcept {
static_assert(is_sparse_tensor_v<OpA>,
"Cannot use sparse2dense on dense input");
}

template <typename ShapeType, typename Executor>
__MATX_INLINE__ void PreRun([[maybe_unused]] ShapeType &&shape,
[[maybe_unused]] Executor &&ex) const noexcept {
InnerPreRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
detail::AllocateTempTensor(tmp_out_, std::forward<Executor>(ex), out_dims_,
&ptr);
Exec(cuda::std::make_tuple(tmp_out_), std::forward<Executor>(ex));
}

template <typename ShapeType, typename Executor>
__MATX_INLINE__ void PostRun([[maybe_unused]] ShapeType &&shape,
[[maybe_unused]] Executor &&ex) const noexcept {
static_assert(is_sparse_tensor_v<OpA>,
"Cannot use sparse2dense on dense input");
matxFree(ptr);
}
};

} // end namespace detail

/**
* Convert a sparse tensor into a dense tensor.
*
* @tparam OpA
* Data type of A tensor
*
* @param A
* Sparse input tensor
*
* @return
* Dense output tensor
*/
template <typename OpA> __MATX_INLINE__ auto sparse2dense(const OpA &A) {
return detail::Sparse2DenseOp(A);
}

} // end namespace matx
Loading

0 comments on commit 8bf818a

Please sign in to comment.