First version of MATX sparse2dense conversion (dispatch to cuSPARSE) (#…

…856) * First version of MATX dense2sparse conversion (using dispatch to cuSPARSE)
NVIDIA · Feb 4, 2025 · 8bf818a · 8bf818a
1 parent 55dd664
commit 8bf818a
Show file tree

Hide file tree

Showing 7 changed files with 446 additions and 42 deletions.
diff --git a/examples/sparse_tensor.cu b/examples/sparse_tensor.cu
@@ -90,24 +90,33 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   //
   // A very naive way to convert the sparse matrix back to a dense
   // matrix. Note that one should **never** use the ()-operator in
-  // performance critical code, since sparse data structures do
+  // performance critical code, since sparse storage formats do
   // not provide O(1) random access to their elements (compressed
   // levels will use some form of search to determine if an element
   // is present). Instead, conversions (and other operations) should
-  // use sparse operations that are tailored for the sparse data
-  // structure (such as scanning by row for CSR).
+  // use sparse operations that are tailored for the sparse storage
+  // format (such as scanning by row for CSR).
   //
-  auto A = make_tensor<float>({4, 8});
+  auto A1 = make_tensor<float>({4, 8});
   for (index_t i = 0; i < 4; i++) {
     for (index_t j = 0; j < 8; j++) {
-      A(i, j) = Acoo(i, j);
+      A1(i, j) = Acoo(i, j);
     }
   }
-  print(A);
+  print(A1);
 
   //
-  // SpMM is implemented on COO through cuSPARSE. This is the
-  // correct way of performing an efficient sparse operation.
+  // A direct sparse2dense conversion. This is the correct way of
+  // performing the conversion, since the underlying implementation
+  // knows how to properly manipulate the sparse storage format.
+  //
+  auto A2 = make_tensor<float>({4, 8});
+  (A2 = sparse2dense(Acoo)).run(exec);
+  print(A2);
+
+  //
+  // Perform a direct SpMM. This is also the correct way of performing
+  // an efficient sparse operation.
   //
   auto B = make_tensor<float, 2>({8, 4});
   auto C = make_tensor<float>({4, 4});

diff --git a/include/matx/core/type_utils.h b/include/matx/core/type_utils.h
@@ -38,6 +38,7 @@
 #include <cublas_v2.h>
 #include <cuda/std/complex>
 #include <cuda/std/tuple>
+#include <cusparse.h>
 #include <type_traits>
 
 #include "cuda_fp16.h"
@@ -1166,6 +1167,23 @@ template <typename T> constexpr cublasComputeType_t MatXTypeToCudaComputeType()
 
   return CUBLAS_COMPUTE_32F;
 }
+
+template <typename T>
+constexpr cusparseIndexType_t MatXTypeToCuSparseIndexType() {
+  if constexpr (std::is_same_v<T, uint16_t>) {
+    return CUSPARSE_INDEX_16U;
+  }
+  if constexpr (std::is_same_v<T, int32_t>) {
+    return CUSPARSE_INDEX_32I;
+  }
+  if constexpr (std::is_same_v<T, int64_t>) {
+    return CUSPARSE_INDEX_64I;
+  }
+  if constexpr (std::is_same_v<T, index_t>) {
+    return CUSPARSE_INDEX_64I;
+  }
+}
+
 } // end namespace detail
 
 } // end namespace matx
diff --git a/include/matx/operators/operators.h b/include/matx/operators/operators.h
@@ -99,6 +99,7 @@
 #include "matx/operators/shift.h"
 #include "matx/operators/sign.h"
 #include "matx/operators/slice.h"
+#include "matx/operators/sparse2dense.h"
 #include "matx/operators/solve.h"
 #include "matx/operators/sort.h"
 #include "matx/operators/sph2cart.h"

diff --git a/include/matx/operators/sparse2dense.h b/include/matx/operators/sparse2dense.h
@@ -0,0 +1,146 @@
+////////////////////////////////////////////////////////////////////////////////
+// BSD 3-Clause License
+//
+// Copyright (c) 2025, NVIDIA Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+//    list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from
+//    this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/////////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+#include "matx/core/type_utils.h"
+#include "matx/operators/base_operator.h"
+#include "matx/transforms/convert/sparse2dense_cusparse.h"
+
+namespace matx {
+namespace detail {
+
+template <typename OpA>
+class Sparse2DenseOp : public BaseOp<Sparse2DenseOp<OpA>> {
+private:
+  typename detail::base_type_t<OpA> a_;
+
+  static constexpr int out_rank = OpA::Rank();
+  cuda::std::array<index_t, out_rank> out_dims_;
+  mutable detail::tensor_impl_t<typename OpA::value_type, out_rank> tmp_out_;
+  mutable typename OpA::value_type *ptr = nullptr;
+
+public:
+  using matxop = bool;
+  using matx_transform_op = bool;
+  using sparse2dense_xform_op = bool;
+  using value_type = typename OpA::value_type;
+
+  __MATX_INLINE__ Sparse2DenseOp(const OpA &a) : a_(a) {
+    for (int r = 0; r < Rank(); r++) {
+      out_dims_[r] = a_.Size(r);
+    }
+  }
+
+  __MATX_INLINE__ std::string str() const {
+    return "sparse2dense(" + get_type_str(a_) + ")";
+  }
+
+  __MATX_HOST__ __MATX_INLINE__ auto Data() const noexcept { return ptr; }
+
+  template <typename... Is>
+  __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto)
+  operator()(Is... indices) const {
+    return tmp_out_(indices...);
+  }
+
+  static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t
+  Rank() {
+    return remove_cvref_t<OpA>::Rank();
+  }
+
+  constexpr __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t
+  Size(int dim) const {
+    return out_dims_[dim];
+  }
+
+  template <typename Out, typename Executor>
+  void Exec([[maybe_unused]] Out &&out, [[maybe_unused]] Executor &&ex) const {
+    if constexpr (is_sparse_tensor_v<OpA>) {
+      auto ref = cuda::std::get<0>(out);
+      using Rtype = decltype(ref);
+      if constexpr (is_sparse_tensor_v<Rtype>) {
+        MATX_THROW(matxNotSupported,
+                   "Cannot use sparse2dense for sparse output");
+      } else {
+        sparse2dense_impl(ref, a_, ex);
+      }
+    } else {
+      MATX_THROW(matxNotSupported, "Cannot use sparse2dense on dense input");
+    }
+  }
+
+  template <typename ShapeType, typename Executor>
+  __MATX_INLINE__ void
+  InnerPreRun([[maybe_unused]] ShapeType &&shape,
+              [[maybe_unused]] Executor &&ex) const noexcept {
+    static_assert(is_sparse_tensor_v<OpA>,
+                  "Cannot use sparse2dense on dense input");
+  }
+
+  template <typename ShapeType, typename Executor>
+  __MATX_INLINE__ void PreRun([[maybe_unused]] ShapeType &&shape,
+                              [[maybe_unused]] Executor &&ex) const noexcept {
+    InnerPreRun(std::forward<ShapeType>(shape), std::forward<Executor>(ex));
+    detail::AllocateTempTensor(tmp_out_, std::forward<Executor>(ex), out_dims_,
+                               &ptr);
+    Exec(cuda::std::make_tuple(tmp_out_), std::forward<Executor>(ex));
+  }
+
+  template <typename ShapeType, typename Executor>
+  __MATX_INLINE__ void PostRun([[maybe_unused]] ShapeType &&shape,
+                               [[maybe_unused]] Executor &&ex) const noexcept {
+    static_assert(is_sparse_tensor_v<OpA>,
+                  "Cannot use sparse2dense on dense input");
+    matxFree(ptr);
+  }
+};
+
+} // end namespace detail
+
+/**
+ * Convert a sparse tensor into a dense tensor.
+ *
+ * @tparam OpA
+ *    Data type of A tensor
+ *
+ * @param A
+ *   Sparse input tensor
+ *
+ * @return
+ *   Dense output tensor
+ */
+template <typename OpA> __MATX_INLINE__ auto sparse2dense(const OpA &A) {
+  return detail::Sparse2DenseOp(A);
+}
+
+} // end namespace matx