From 5c96f23d1d15c9bdde1cd65f389ff79d29534d8f Mon Sep 17 00:00:00 2001 From: "Aart J.C. Bik" Date: Wed, 22 Jan 2025 11:26:26 -0800 Subject: [PATCH] Implement the () operator on sparse tensors Note that is a fully functional "locator" for the () operator that works for *all* versatile sparse tensors. Currently, the operator is defined at sparse_tensor level, but it should be moved into tensor_impl after this. Also, clients should always be aware that the () operator for compressed levels are *not* random-access, but involve a search to find if the element is stored. --- examples/sparse_tensor.cu | 36 ++++++++++- include/matx/core/make_sparse_tensor.h | 16 ++++- include/matx/core/sparse_tensor.h | 76 ++++++++++++++++++++++++ include/matx/core/sparse_tensor_format.h | 58 ++++++++++-------- include/matx/core/tensor_impl.h | 3 +- 5 files changed, 159 insertions(+), 30 deletions(-) diff --git a/examples/sparse_tensor.cu b/examples/sparse_tensor.cu index 40cca436..2ac5cb14 100644 --- a/examples/sparse_tensor.cu +++ b/examples/sparse_tensor.cu @@ -41,6 +41,22 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv) cudaStream_t stream = 0; cudaExecutor exec{stream}; + // + // Print some formats that are used for the versatile sparse tensor + // type. Note that common formats like COO and CSR have good library + // support in e.g. cuSPARSE, but MatX provides a much more general + // way to define the sparse tensor storage through a DSL (see doc). + // + experimental::Scalar::print(); // scalars + experimental::SpVec::print(); // sparse vectors + experimental::COO::print(); // various sparse matrix formats + experimental::CSR::print(); + experimental::CSC::print(); + experimental::DCSR::print(); + experimental::BSR<2,2>::print(); // 2x2 blocks + experimental::COO4::print(); // 4-dim tensor in COO + experimental::CSF5::print(); // 5-dim tensor in CSF + // // Creates a COO matrix for the following 4x8 dense matrix with 5 nonzero // elements, using the factory method that uses MatX tensors for the 1-dim @@ -71,7 +87,7 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv) // // This shows: // - // tensor_impl_2_f32: Tensor{float} Rank: 2, Sizes:[4, 8], Levels:[4, 8] + // tensor_impl_2_f32: SparseTensor{float} Rank: 2, Sizes:[4, 8], Levels:[4, 8] // nse = 5 // format = ( d0, d1 ) -> ( d0 : compressed(non-unique), d1 : singleton ) // crd[0] = ( 0 0 3 3 3 ) @@ -81,6 +97,24 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv) // print(Acoo); + // + // A very naive way to convert the sparse matrix back to a dense + // matrix. Note that one should **never** use the ()-operator in + // performance critical code, since sparse data structures do + // not provide O(1) random access to their elements (compressed + // levels will use some form of search to determine if an element + // is present). Instead, conversions (and other operations) should + // use sparse operations that are tailored for the sparse data + // structure (such as scanning by row for CSR). + // + tensor_t Dense{{m, n}}; + for (index_t i = 0; i < m; i++) { + for (index_t j = 0; j < n; j++) { + Dense(i, j) = Acoo(i, j); + } + } + print(Dense); + // TODO: operations on Acoo MATX_EXIT_HANDLER(); diff --git a/include/matx/core/make_sparse_tensor.h b/include/matx/core/make_sparse_tensor.h index 3cffa46c..6d81565b 100644 --- a/include/matx/core/make_sparse_tensor.h +++ b/include/matx/core/make_sparse_tensor.h @@ -55,11 +55,23 @@ auto make_tensor_coo(ValTensor &val, CrdTensor &row, CrdTensor &col, static_assert(ValTensor::Rank() == 1 && CrdTensor::Rank() == 1); using VAL = typename ValTensor::value_type; using CRD = typename CrdTensor::value_type; - using POS = int; // no positions, although some forms use [0,nse] + using POS = index_t; + // Note that the COO API typically does not involve positions. + // However, under the formal DSL specifications, the top level + // compression should set up pos[0] = {0, nse}. This is done + // here, using the same memory space as the other data. + POS *ptr; + matxMemorySpace_t space = GetPointerKind(val.GetStorage().data()); + matxAlloc((void **)&ptr, 2 * sizeof(POS), space, 0); + ptr[0] = 0; + ptr[1] = val.Size(0); + raw_pointer_buffer> topp{ptr, 2 * sizeof(POS), + owning}; + basic_storage tp{std::move(topp)}; raw_pointer_buffer> emptyp{nullptr, 0, owning}; basic_storage ep{std::move(emptyp)}; return sparse_tensor_t( - shape, val.GetStorage(), {row.GetStorage(), col.GetStorage()}, {ep, ep}); + shape, val.GetStorage(), {row.GetStorage(), col.GetStorage()}, {tp, ep}); } // Constructs a sparse matrix in CSR format directly from the values, the row diff --git a/include/matx/core/sparse_tensor.h b/include/matx/core/sparse_tensor.h index 473325b0..e0a1e03d 100644 --- a/include/matx/core/sparse_tensor.h +++ b/include/matx/core/sparse_tensor.h @@ -109,6 +109,82 @@ class sparse_tensor_t index_t crdSize(int l) const { return coordinates_[l].size() / sizeof(CRD); } index_t posSize(int l) const { return positions_[l].size() / sizeof(POS); } + // Locates position of an element at given indices, or returns -1 when not + // found. + template + __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t + GetPos(index_t *lvlsz, index_t *lvl, index_t pos) const { + if constexpr (L < LVL) { + using ftype = std::tuple_element_t; + if constexpr (ftype::lvltype == LvlType::Dense) { + // Dense level: pos * size + i. + // TODO: see below, use a constexpr GetLvlSize(L) instead? + const index_t dpos = pos * lvlsz[L] + lvl[L]; + if constexpr (L + 1 < LVL) { + return GetPos(lvlsz, lvl, dpos); + } else { + return dpos; + } + } else if constexpr (ftype::lvltype == LvlType::Singleton) { + // Singleton level: pos if crd[pos] == i and next levels match. + if (this->CRDData(L)[pos] == lvl[L]) { + if constexpr (L + 1 < LVL) { + return GetPos(lvlsz, lvl, pos); + } else { + return pos; + } + } + } else if constexpr (ftype::lvltype == LvlType::Compressed || + ftype::lvltype == LvlType::CompressedNonUnique) { + // Compressed level: scan for match on i and test next levels. + const CRD *c = this->CRDData(L); + const POS *p = this->POSData(L); + for (index_t pp = p[pos], hi = p[pos + 1]; pp < hi; pp++) { + if (c[pp] == lvl[L]) { + if constexpr (L + 1 < LVL) { + const index_t cpos = GetPos(lvlsz, lvl, pp); + if constexpr (ftype::lvltype == LvlType::Compressed) { + return cpos; // always end scan (unique) + } else if (cpos != -1) { + return cpos; // only end scan on success (non-unique) + } + } else { + return pp; + } + } + } + } + } + return -1; // not found + } + + // Element getter (viz. "lhs = Acoo(0,0);"). Note that due to the compact + // nature of sparse data structures, these storage formats do not provide + // cheap random access to their elements. Instead, the implementation will + // search for a stored element at the given position (which involves a scan + // at each compressed level). The implicit value zero is returned when the + // element cannot be found. So, although functional for testing, clients + // should avoid using getters inside performance critial regions, since + // the implementation is far worse than O(1). + template + __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ VAL + operator()(Is... indices) const noexcept { + static_assert( + sizeof...(Is) == DIM, + "Number of indices of operator() must match rank of sparse tensor"); + cuda::std::array dim{indices...}; + cuda::std::array lvl; + cuda::std::array lvlsz; + TF::dim2lvl(dim.data(), lvl.data(), /*asSize=*/false); + // TODO: only compute once and provide a constexpr LvlSize(l) instead? + TF::dim2lvl(this->Shape().data(), lvlsz.data(), /*asSize=*/true); + const index_t pos = GetPos(lvlsz.data(), lvl.data(), 0); + if (pos != -1) { + return this->Data()[pos]; + } + return static_cast(0); // implicit zero + } + private: // Primary storage of sparse tensor (explicitly stored element values). StorageV values_; diff --git a/include/matx/core/sparse_tensor_format.h b/include/matx/core/sparse_tensor_format.h index f5b733ef..cdba702d 100644 --- a/include/matx/core/sparse_tensor_format.h +++ b/include/matx/core/sparse_tensor_format.h @@ -255,42 +255,48 @@ template class SparseTensorFormat { template __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ static void dim2lvl(const CRD *dims, CRD *lvls, bool asSize) { - using ftype = std::tuple_element_t; - if constexpr (ftype::expr::op == LvlOp::Id) { - lvls[L] = dims[ftype::expr::di]; - } else if constexpr (ftype::expr::op == LvlOp::Div) { - lvls[L] = dims[ftype::expr::di] / ftype::expr::cj; - } else if constexpr (ftype::expr::op == LvlOp::Mod) { - lvls[L] = - asSize ? ftype::expr::cj : (dims[ftype::expr::di] % ftype::expr::cj); - } - if constexpr (L + 1 < LVL) { - dim2lvl(dims, lvls, asSize); + if constexpr (L < LVL) { + using ftype = std::tuple_element_t; + if constexpr (ftype::expr::op == LvlOp::Id) { + lvls[L] = dims[ftype::expr::di]; + } else if constexpr (ftype::expr::op == LvlOp::Div) { + lvls[L] = dims[ftype::expr::di] / ftype::expr::cj; + } else if constexpr (ftype::expr::op == LvlOp::Mod) { + lvls[L] = asSize ? ftype::expr::cj + : (dims[ftype::expr::di] % ftype::expr::cj); + } + if constexpr (L + 1 < LVL) { + dim2lvl(dims, lvls, asSize); + } } } template __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ static void lvl2dim(const CRD *lvls, CRD *dims) { - using ftype = std::tuple_element_t; - if constexpr (ftype::expr::op == LvlOp::Id) { - dims[ftype::expr::di] = lvls[L]; - } else if constexpr (ftype::expr::op == LvlOp::Div) { - dims[ftype::expr::di] = lvls[L] * ftype::expr::cj; - } else if constexpr (ftype::expr::op == LvlOp::Mod) { - dims[ftype::expr::di] += lvls[L]; // update (seen second) - } - if constexpr (L + 1 < LVL) { - lvl2dim(lvls, dims); + if constexpr (L < LVL) { + using ftype = std::tuple_element_t; + if constexpr (ftype::expr::op == LvlOp::Id) { + dims[ftype::expr::di] = lvls[L]; + } else if constexpr (ftype::expr::op == LvlOp::Div) { + dims[ftype::expr::di] = lvls[L] * ftype::expr::cj; + } else if constexpr (ftype::expr::op == LvlOp::Mod) { + dims[ftype::expr::di] += lvls[L]; // update (seen second) + } + if constexpr (L + 1 < LVL) { + lvl2dim(lvls, dims); + } } } template static void printLevel() { - using ftype = std::tuple_element_t; - std::cout << " " << ftype::toString(); - if constexpr (L + 1 < LVL) { - std::cout << ","; - printLevel(); + if constexpr (L < LVL) { + using ftype = std::tuple_element_t; + std::cout << " " << ftype::toString(); + if constexpr (L + 1 < LVL) { + std::cout << ","; + printLevel(); + } } } diff --git a/include/matx/core/tensor_impl.h b/include/matx/core/tensor_impl.h index 4bfe1586..12fff7ce 100644 --- a/include/matx/core/tensor_impl.h +++ b/include/matx/core/tensor_impl.h @@ -637,7 +637,8 @@ MATX_IGNORE_WARNING_POP_GCC * @return * A shape of the data with the appropriate dimensions set */ - __MATX_INLINE__ auto Shape() const noexcept { return this->desc_.Shape(); } + __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ + auto Shape() const noexcept { return this->desc_.Shape(); } /** * Get the strides the tensor from the underlying data