lib/ExecutionEngine/SparseTensorUtils.cpp

8a91bc7bSHarrietAkot//===- SparseTensorUtils.cpp - Sparse Tensor Utils for MLIR execution -----===//
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8a91bc7bSHarrietAkot// See https://llvm.org/LICENSE.txt for license information.
8a91bc7bSHarrietAkot// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot//===----------------------------------------------------------------------===//
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// This file implements a light-weight runtime support library that is useful
8a91bc7bSHarrietAkot// for sparse tensor manipulations. The functionality provided in this library
8a91bc7bSHarrietAkot// is meant to simplify benchmarking, testing, and debugging MLIR code that
8a91bc7bSHarrietAkot// operates on sparse tensors. The provided functionality is **not** part
8a91bc7bSHarrietAkot// of core MLIR, however.
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot//===----------------------------------------------------------------------===//
8a91bc7bSHarrietAkot
845561ecSwren romano#include "mlir/ExecutionEngine/SparseTensorUtils.h"
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkot#ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkot#include <algorithm>
8a91bc7bSHarrietAkot#include <cassert>
8a91bc7bSHarrietAkot#include <cctype>
8a91bc7bSHarrietAkot#include <cstdio>
8a91bc7bSHarrietAkot#include <cstdlib>
8a91bc7bSHarrietAkot#include <cstring>
efa15f41SAart Bik#include <fstream>
753fe330Swren romano#include <functional>
efa15f41SAart Bik#include <iostream>
4d0a18d0Swren romano#include <limits>
8a91bc7bSHarrietAkot#include <numeric>
736c1b66SAart Bik
8a91bc7bSHarrietAkot//===----------------------------------------------------------------------===//
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// Internal support for storing and reading sparse tensors.
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// The following memory-resident sparse storage schemes are supported:
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// (a) A coordinate scheme for temporarily storing and lexicographically
8a91bc7bSHarrietAkot//     sorting a sparse tensor by index (SparseTensorCOO).
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// (b) A "one-size-fits-all" sparse tensor storage scheme defined by
8a91bc7bSHarrietAkot//     per-dimension sparse/dense annnotations together with a dimension
8a91bc7bSHarrietAkot//     ordering used by MLIR compiler-generated code (SparseTensorStorage).
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// The following external formats are supported:
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// (1) Matrix Market Exchange (MME): *.mtx
8a91bc7bSHarrietAkot//     https://math.nist.gov/MatrixMarket/formats.html
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns
8a91bc7bSHarrietAkot//     http://frostt.io/tensors/file-formats.html
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// Two public APIs are supported:
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// (I) Methods operating on MLIR buffers (memrefs) to interact with sparse
8a91bc7bSHarrietAkot//     tensors. These methods should be used exclusively by MLIR
8a91bc7bSHarrietAkot//     compiler-generated code.
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// (II) Methods that accept C-style data structures to interact with sparse
8a91bc7bSHarrietAkot//      tensors. These methods can be used by any external runtime that wants
8a91bc7bSHarrietAkot//      to interact with MLIR compiler-generated code.
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot// In both cases (I) and (II), the SparseTensorStorage format is externally
8a91bc7bSHarrietAkot// only visible as an opaque pointer.
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot//===----------------------------------------------------------------------===//
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkotnamespace {
8a91bc7bSHarrietAkot
03fe15ceSAart Bikstatic constexpr int kColWidth = 1025;
03fe15ceSAart Bik
72ec2f76Swren romano/// A version of `operator*` on `uint64_t` which checks for overflows.
72ec2f76Swren romanostatic inline uint64_t checkedMul(uint64_t lhs, uint64_t rhs) {
72ec2f76Swren romano  assert((lhs == 0 || rhs <= std::numeric_limits<uint64_t>::max() / lhs) &&
72ec2f76Swren romano         "Integer overflow");
72ec2f76Swren romano  return lhs * rhs;
72ec2f76Swren romano}
72ec2f76Swren romano
774674ceSwren romano// This macro helps minimize repetition of this idiom, as well as ensuring
774674ceSwren romano// we have some additional output indicating where the error is coming from.
774674ceSwren romano// (Since `fprintf` doesn't provide a stacktrace, this helps make it easier
774674ceSwren romano// to track down whether an error is coming from our code vs somewhere else
774674ceSwren romano// in MLIR.)
774674ceSwren romano#define FATAL(...)                                                             \
774674ceSwren romano  {                                                                            \
774674ceSwren romano    fprintf(stderr, "SparseTensorUtils: " __VA_ARGS__);                        \
774674ceSwren romano    exit(1);                                                                   \
774674ceSwren romano  }
774674ceSwren romano
8cb33240Swren romano// TODO: adjust this so it can be used by `openSparseTensorCOO` too.
fa6aed2aSwren romano// That version doesn't have the permutation, and the `dimSizes` are
8cb33240Swren romano// a pointer/C-array rather than `std::vector`.
8cb33240Swren romano//
fa6aed2aSwren romano/// Asserts that the `dimSizes` (in target-order) under the `perm` (mapping
8cb33240Swren romano/// semantic-order to target-order) are a refinement of the desired `shape`
8cb33240Swren romano/// (in semantic-order).
8cb33240Swren romano///
8cb33240Swren romano/// Precondition: `perm` and `shape` must be valid for `rank`.
8cb33240Swren romanostatic inline void
fa6aed2aSwren romanoassertPermutedSizesMatchShape(const std::vector<uint64_t> &dimSizes,
fa6aed2aSwren romano                              uint64_t rank, const uint64_t *perm,
fa6aed2aSwren romano                              const uint64_t *shape) {
8cb33240Swren romano  assert(perm && shape);
fa6aed2aSwren romano  assert(rank == dimSizes.size() && "Rank mismatch");
8cb33240Swren romano  for (uint64_t r = 0; r < rank; r++)
fa6aed2aSwren romano    assert((shape[r] == 0 || shape[r] == dimSizes[perm[r]]) &&
8cb33240Swren romano           "Dimension size mismatch");
8cb33240Swren romano}
8cb33240Swren romano
8a91bc7bSHarrietAkot/// A sparse tensor element in coordinate scheme (value and indices).
8a91bc7bSHarrietAkot/// For example, a rank-1 vector element would look like
8a91bc7bSHarrietAkot///   ({i}, a[i])
8a91bc7bSHarrietAkot/// and a rank-5 tensor element like
8a91bc7bSHarrietAkot///   ({i,j,k,l,m}, a[i,j,k,l,m])
ccd047cbSAart Bik/// We use pointer to a shared index pool rather than e.g. a direct
ccd047cbSAart Bik/// vector since that (1) reduces the per-element memory footprint, and
ccd047cbSAart Bik/// (2) centralizes the memory reservation and (re)allocation to one place.
8a91bc7bSHarrietAkottemplate <typename V>
76944420Swren romanostruct Element final {
ccd047cbSAart Bik  Element(uint64_t *ind, V val) : indices(ind), value(val){};
ccd047cbSAart Bik  uint64_t *indices; // pointer into shared index pool
8a91bc7bSHarrietAkot  V value;
8a91bc7bSHarrietAkot};
8a91bc7bSHarrietAkot
753fe330Swren romano/// The type of callback functions which receive an element.  We avoid
753fe330Swren romano/// packaging the coordinates and value together as an `Element` object
753fe330Swren romano/// because this helps keep code somewhat cleaner.
753fe330Swren romanotemplate <typename V>
753fe330Swren romanousing ElementConsumer =
753fe330Swren romano    const std::function<void(const std::vector<uint64_t> &, V)> &;
753fe330Swren romano
8a91bc7bSHarrietAkot/// A memory-resident sparse tensor in coordinate scheme (collection of
8a91bc7bSHarrietAkot/// elements). This data structure is used to read a sparse tensor from
8a91bc7bSHarrietAkot/// any external format into memory and sort the elements lexicographically
8a91bc7bSHarrietAkot/// by indices before passing it back to the client (most packed storage
8a91bc7bSHarrietAkot/// formats require the elements to appear in lexicographic index order).
8a91bc7bSHarrietAkottemplate <typename V>
76944420Swren romanostruct SparseTensorCOO final {
8a91bc7bSHarrietAkotpublic:
fa6aed2aSwren romano  SparseTensorCOO(const std::vector<uint64_t> &dimSizes, uint64_t capacity)
fa6aed2aSwren romano      : dimSizes(dimSizes) {
ccd047cbSAart Bik    if (capacity) {
8a91bc7bSHarrietAkot      elements.reserve(capacity);
ccd047cbSAart Bik      indices.reserve(capacity * getRank());
8a91bc7bSHarrietAkot    }
ccd047cbSAart Bik  }
ccd047cbSAart Bik
8a91bc7bSHarrietAkot  /// Adds element as indices and value.
8a91bc7bSHarrietAkot  void add(const std::vector<uint64_t> &ind, V val) {
8a91bc7bSHarrietAkot    assert(!iteratorLocked && "Attempt to add() after startIterator()");
ccd047cbSAart Bik    uint64_t *base = indices.data();
ccd047cbSAart Bik    uint64_t size = indices.size();
8a91bc7bSHarrietAkot    uint64_t rank = getRank();
fa6aed2aSwren romano    assert(ind.size() == rank && "Element rank mismatch");
ccd047cbSAart Bik    for (uint64_t r = 0; r < rank; r++) {
fa6aed2aSwren romano      assert(ind[r] < dimSizes[r] && "Index is too large for the dimension");
ccd047cbSAart Bik      indices.push_back(ind[r]);
8a91bc7bSHarrietAkot    }
ccd047cbSAart Bik    // This base only changes if indices were reallocated. In that case, we
ccd047cbSAart Bik    // need to correct all previous pointers into the vector. Note that this
ccd047cbSAart Bik    // only happens if we did not set the initial capacity right, and then only
ccd047cbSAart Bik    // for every internal vector reallocation (which with the doubling rule
ccd047cbSAart Bik    // should only incur an amortized linear overhead).
298d2fa1SMehdi Amini    uint64_t *newBase = indices.data();
298d2fa1SMehdi Amini    if (newBase != base) {
ccd047cbSAart Bik      for (uint64_t i = 0, n = elements.size(); i < n; i++)
298d2fa1SMehdi Amini        elements[i].indices = newBase + (elements[i].indices - base);
298d2fa1SMehdi Amini      base = newBase;
ccd047cbSAart Bik    }
ccd047cbSAart Bik    // Add element as (pointer into shared index pool, value) pair.
ccd047cbSAart Bik    elements.emplace_back(base + size, val);
ccd047cbSAart Bik  }
ccd047cbSAart Bik
8a91bc7bSHarrietAkot  /// Sorts elements lexicographically by index.
8a91bc7bSHarrietAkot  void sort() {
8a91bc7bSHarrietAkot    assert(!iteratorLocked && "Attempt to sort() after startIterator()");
cf358253Swren romano    // TODO: we may want to cache an `isSorted` bit, to avoid
cf358253Swren romano    // unnecessary/redundant sorting.
ccd047cbSAart Bik    uint64_t rank = getRank();
aff9c89fSwren romano    std::sort(elements.begin(), elements.end(),
aff9c89fSwren romano              [rank](const Element<V> &e1, const Element<V> &e2) {
ccd047cbSAart Bik                for (uint64_t r = 0; r < rank; r++) {
ccd047cbSAart Bik                  if (e1.indices[r] == e2.indices[r])
ccd047cbSAart Bik                    continue;
ccd047cbSAart Bik                  return e1.indices[r] < e2.indices[r];
8a91bc7bSHarrietAkot                }
ccd047cbSAart Bik                return false;
ccd047cbSAart Bik              });
ccd047cbSAart Bik  }
ccd047cbSAart Bik
fa6aed2aSwren romano  /// Get the rank of the tensor.
fa6aed2aSwren romano  uint64_t getRank() const { return dimSizes.size(); }
ccd047cbSAart Bik
fa6aed2aSwren romano  /// Getter for the dimension-sizes array.
fa6aed2aSwren romano  const std::vector<uint64_t> &getDimSizes() const { return dimSizes; }
ccd047cbSAart Bik
fa6aed2aSwren romano  /// Getter for the elements array.
8a91bc7bSHarrietAkot  const std::vector<Element<V>> &getElements() const { return elements; }
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkot  /// Switch into iterator mode.
8a91bc7bSHarrietAkot  void startIterator() {
8a91bc7bSHarrietAkot    iteratorLocked = true;
8a91bc7bSHarrietAkot    iteratorPos = 0;
8a91bc7bSHarrietAkot  }
ccd047cbSAart Bik
8a91bc7bSHarrietAkot  /// Get the next element.
8a91bc7bSHarrietAkot  const Element<V> *getNext() {
8a91bc7bSHarrietAkot    assert(iteratorLocked && "Attempt to getNext() before startIterator()");
8a91bc7bSHarrietAkot    if (iteratorPos < elements.size())
8a91bc7bSHarrietAkot      return &(elements[iteratorPos++]);
8a91bc7bSHarrietAkot    iteratorLocked = false;
8a91bc7bSHarrietAkot    return nullptr;
8a91bc7bSHarrietAkot  }
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkot  /// Factory method. Permutes the original dimensions according to
8a91bc7bSHarrietAkot  /// the given ordering and expects subsequent add() calls to honor
8a91bc7bSHarrietAkot  /// that same ordering for the given indices. The result is a
8a91bc7bSHarrietAkot  /// fully permuted coordinate scheme.
8d8b566fSwren romano  ///
fa6aed2aSwren romano  /// Precondition: `dimSizes` and `perm` must be valid for `rank`.
8a91bc7bSHarrietAkot  static SparseTensorCOO<V> *newSparseTensorCOO(uint64_t rank,
fa6aed2aSwren romano                                                const uint64_t *dimSizes,
8a91bc7bSHarrietAkot                                                const uint64_t *perm,
8a91bc7bSHarrietAkot                                                uint64_t capacity = 0) {
8a91bc7bSHarrietAkot    std::vector<uint64_t> permsz(rank);
d83a7068Swren romano    for (uint64_t r = 0; r < rank; r++) {
fa6aed2aSwren romano      assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage");
fa6aed2aSwren romano      permsz[perm[r]] = dimSizes[r];
d83a7068Swren romano    }
8a91bc7bSHarrietAkot    return new SparseTensorCOO<V>(permsz, capacity);
8a91bc7bSHarrietAkot  }
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkotprivate:
fa6aed2aSwren romano  const std::vector<uint64_t> dimSizes; // per-dimension sizes
ccd047cbSAart Bik  std::vector<Element<V>> elements;     // all COO elements
ccd047cbSAart Bik  std::vector<uint64_t> indices;        // shared index pool
db6796dfSMehdi Amini  bool iteratorLocked = false;
db6796dfSMehdi Amini  unsigned iteratorPos = 0;
8a91bc7bSHarrietAkot};
8a91bc7bSHarrietAkot
8cb33240Swren romano// Forward.
8cb33240Swren romanotemplate <typename V>
8cb33240Swren romanoclass SparseTensorEnumeratorBase;
8cb33240Swren romano
774674ceSwren romano// Helper macro for generating error messages when some
774674ceSwren romano// `SparseTensorStorage<P,I,V>` is cast to `SparseTensorStorageBase`
774674ceSwren romano// and then the wrong "partial method specialization" is called.
774674ceSwren romano#define FATAL_PIV(NAME) FATAL("<P,I,V> type mismatch for: " #NAME);
774674ceSwren romano
8d8b566fSwren romano/// Abstract base class for `SparseTensorStorage<P,I,V>`.  This class
8d8b566fSwren romano/// takes responsibility for all the `<P,I,V>`-independent aspects
8d8b566fSwren romano/// of the tensor (e.g., shape, sparsity, permutation).  In addition,
8d8b566fSwren romano/// we use function overloading to implement "partial" method
8d8b566fSwren romano/// specialization, which the C-API relies on to catch type errors
8d8b566fSwren romano/// arising from our use of opaque pointers.
8a91bc7bSHarrietAkotclass SparseTensorStorageBase {
8a91bc7bSHarrietAkotpublic:
8d8b566fSwren romano  /// Constructs a new storage object.  The `perm` maps the tensor's
8d8b566fSwren romano  /// semantic-ordering of dimensions to this object's storage-order.
fa6aed2aSwren romano  /// The `dimSizes` and `sparsity` arrays are already in storage-order.
8d8b566fSwren romano  ///
fa6aed2aSwren romano  /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`.
fa6aed2aSwren romano  SparseTensorStorageBase(const std::vector<uint64_t> &dimSizes,
8d8b566fSwren romano                          const uint64_t *perm, const DimLevelType *sparsity)
fa6aed2aSwren romano      : dimSizes(dimSizes), rev(getRank()),
8d8b566fSwren romano        dimTypes(sparsity, sparsity + getRank()) {
753fe330Swren romano    assert(perm && sparsity);
8d8b566fSwren romano    const uint64_t rank = getRank();
8d8b566fSwren romano    // Validate parameters.
8d8b566fSwren romano    assert(rank > 0 && "Trivial shape is unsupported");
8d8b566fSwren romano    for (uint64_t r = 0; r < rank; r++) {
8d8b566fSwren romano      assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage");
8d8b566fSwren romano      assert((dimTypes[r] == DimLevelType::kDense ||
8d8b566fSwren romano              dimTypes[r] == DimLevelType::kCompressed) &&
8d8b566fSwren romano             "Unsupported DimLevelType");
8d8b566fSwren romano    }
8d8b566fSwren romano    // Construct the "reverse" (i.e., inverse) permutation.
8d8b566fSwren romano    for (uint64_t r = 0; r < rank; r++)
8d8b566fSwren romano      rev[perm[r]] = r;
8d8b566fSwren romano  }
8d8b566fSwren romano
8d8b566fSwren romano  virtual ~SparseTensorStorageBase() = default;
8d8b566fSwren romano
8d8b566fSwren romano  /// Get the rank of the tensor.
8d8b566fSwren romano  uint64_t getRank() const { return dimSizes.size(); }
8d8b566fSwren romano
8d8b566fSwren romano  /// Getter for the dimension-sizes array, in storage-order.
8d8b566fSwren romano  const std::vector<uint64_t> &getDimSizes() const { return dimSizes; }
8d8b566fSwren romano
8d8b566fSwren romano  /// Safely lookup the size of the given (storage-order) dimension.
8d8b566fSwren romano  uint64_t getDimSize(uint64_t d) const {
8d8b566fSwren romano    assert(d < getRank());
8d8b566fSwren romano    return dimSizes[d];
8d8b566fSwren romano  }
8d8b566fSwren romano
8d8b566fSwren romano  /// Getter for the "reverse" permutation, which maps this object's
8d8b566fSwren romano  /// storage-order to the tensor's semantic-order.
8d8b566fSwren romano  const std::vector<uint64_t> &getRev() const { return rev; }
8d8b566fSwren romano
8d8b566fSwren romano  /// Getter for the dimension-types array, in storage-order.
8d8b566fSwren romano  const std::vector<DimLevelType> &getDimTypes() const { return dimTypes; }
8d8b566fSwren romano
8d8b566fSwren romano  /// Safely check if the (storage-order) dimension uses compressed storage.
8d8b566fSwren romano  bool isCompressedDim(uint64_t d) const {
8d8b566fSwren romano    assert(d < getRank());
8d8b566fSwren romano    return (dimTypes[d] == DimLevelType::kCompressed);
8d8b566fSwren romano  }
8a91bc7bSHarrietAkot
8cb33240Swren romano  /// Allocate a new enumerator.
1313f5d3Swren romano#define DECL_NEWENUMERATOR(VNAME, V)                                           \
1313f5d3Swren romano  virtual void newEnumerator(SparseTensorEnumeratorBase<V> **, uint64_t,       \
1313f5d3Swren romano                             const uint64_t *) const {                         \
774674ceSwren romano    FATAL_PIV("newEnumerator" #VNAME);                                         \
8cb33240Swren romano  }
1313f5d3Swren romano  FOREVERY_V(DECL_NEWENUMERATOR)
1313f5d3Swren romano#undef DECL_NEWENUMERATOR
8cb33240Swren romano
4f2ec7f9SAart Bik  /// Overhead storage.
a9a19f59Swren romano#define DECL_GETPOINTERS(PNAME, P)                                             \
a9a19f59Swren romano  virtual void getPointers(std::vector<P> **, uint64_t) {                      \
a9a19f59Swren romano    FATAL_PIV("getPointers" #PNAME);                                           \
774674ceSwren romano  }
a9a19f59Swren romano  FOREVERY_FIXED_O(DECL_GETPOINTERS)
a9a19f59Swren romano#undef DECL_GETPOINTERS
a9a19f59Swren romano#define DECL_GETINDICES(INAME, I)                                              \
a9a19f59Swren romano  virtual void getIndices(std::vector<I> **, uint64_t) {                       \
a9a19f59Swren romano    FATAL_PIV("getIndices" #INAME);                                            \
774674ceSwren romano  }
a9a19f59Swren romano  FOREVERY_FIXED_O(DECL_GETINDICES)
a9a19f59Swren romano#undef DECL_GETINDICES
8a91bc7bSHarrietAkot
4f2ec7f9SAart Bik  /// Primary storage.
1313f5d3Swren romano#define DECL_GETVALUES(VNAME, V)                                               \
774674ceSwren romano  virtual void getValues(std::vector<V> **) { FATAL_PIV("getValues" #VNAME); }
1313f5d3Swren romano  FOREVERY_V(DECL_GETVALUES)
1313f5d3Swren romano#undef DECL_GETVALUES
8a91bc7bSHarrietAkot
4f2ec7f9SAart Bik  /// Element-wise insertion in lexicographic index order.
1313f5d3Swren romano#define DECL_LEXINSERT(VNAME, V)                                               \
774674ceSwren romano  virtual void lexInsert(const uint64_t *, V) { FATAL_PIV("lexInsert" #VNAME); }
1313f5d3Swren romano  FOREVERY_V(DECL_LEXINSERT)
1313f5d3Swren romano#undef DECL_LEXINSERT
4f2ec7f9SAart Bik
4f2ec7f9SAart Bik  /// Expanded insertion.
1313f5d3Swren romano#define DECL_EXPINSERT(VNAME, V)                                               \
1313f5d3Swren romano  virtual void expInsert(uint64_t *, V *, bool *, uint64_t *, uint64_t) {      \
774674ceSwren romano    FATAL_PIV("expInsert" #VNAME);                                             \
4f2ec7f9SAart Bik  }
1313f5d3Swren romano  FOREVERY_V(DECL_EXPINSERT)
1313f5d3Swren romano#undef DECL_EXPINSERT
4f2ec7f9SAart Bik
4f2ec7f9SAart Bik  /// Finishes insertion.
f66e5769SAart Bik  virtual void endInsert() = 0;
f66e5769SAart Bik
753fe330Swren romanoprotected:
753fe330Swren romano  // Since this class is virtual, we must disallow public copying in
753fe330Swren romano  // order to avoid "slicing".  Since this class has data members,
753fe330Swren romano  // that means making copying protected.
753fe330Swren romano  // <https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rc-copy-virtual>
753fe330Swren romano  SparseTensorStorageBase(const SparseTensorStorageBase &) = default;
753fe330Swren romano  // Copy-assignment would be implicitly deleted (because `dimSizes`
753fe330Swren romano  // is const), so we explicitly delete it for clarity.
753fe330Swren romano  SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete;
753fe330Swren romano
8a91bc7bSHarrietAkotprivate:
8d8b566fSwren romano  const std::vector<uint64_t> dimSizes;
8d8b566fSwren romano  std::vector<uint64_t> rev;
8d8b566fSwren romano  const std::vector<DimLevelType> dimTypes;
8a91bc7bSHarrietAkot};
8a91bc7bSHarrietAkot
774674ceSwren romano#undef FATAL_PIV
774674ceSwren romano
753fe330Swren romano// Forward.
753fe330Swren romanotemplate <typename P, typename I, typename V>
753fe330Swren romanoclass SparseTensorEnumerator;
753fe330Swren romano
8a91bc7bSHarrietAkot/// A memory-resident sparse tensor using a storage scheme based on
8a91bc7bSHarrietAkot/// per-dimension sparse/dense annotations. This data structure provides a
8a91bc7bSHarrietAkot/// bufferized form of a sparse tensor type. In contrast to generating setup
8a91bc7bSHarrietAkot/// methods for each differently annotated sparse tensor, this method provides
8a91bc7bSHarrietAkot/// a convenient "one-size-fits-all" solution that simply takes an input tensor
8a91bc7bSHarrietAkot/// and annotations to implement all required setup in a general manner.
8a91bc7bSHarrietAkottemplate <typename P, typename I, typename V>
76944420Swren romanoclass SparseTensorStorage final : public SparseTensorStorageBase {
8cb33240Swren romano  /// Private constructor to share code between the other constructors.
8cb33240Swren romano  /// Beware that the object is not necessarily guaranteed to be in a
8cb33240Swren romano  /// valid state after this constructor alone; e.g., `isCompressedDim(d)`
8cb33240Swren romano  /// doesn't entail `!(pointers[d].empty())`.
8cb33240Swren romano  ///
fa6aed2aSwren romano  /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`.
fa6aed2aSwren romano  SparseTensorStorage(const std::vector<uint64_t> &dimSizes,
fa6aed2aSwren romano                      const uint64_t *perm, const DimLevelType *sparsity)
fa6aed2aSwren romano      : SparseTensorStorageBase(dimSizes, perm, sparsity), pointers(getRank()),
8cb33240Swren romano        indices(getRank()), idx(getRank()) {}
8cb33240Swren romano
8a91bc7bSHarrietAkotpublic:
8a91bc7bSHarrietAkot  /// Constructs a sparse tensor storage scheme with the given dimensions,
8a91bc7bSHarrietAkot  /// permutation, and per-dimension dense/sparse annotations, using
8a91bc7bSHarrietAkot  /// the coordinate scheme tensor for the initial contents if provided.
8d8b566fSwren romano  ///
fa6aed2aSwren romano  /// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`.
fa6aed2aSwren romano  SparseTensorStorage(const std::vector<uint64_t> &dimSizes,
fa6aed2aSwren romano                      const uint64_t *perm, const DimLevelType *sparsity,
fa6aed2aSwren romano                      SparseTensorCOO<V> *coo)
fa6aed2aSwren romano      : SparseTensorStorage(dimSizes, perm, sparsity) {
8a91bc7bSHarrietAkot    // Provide hints on capacity of pointers and indices.
175b9af4SAart Bik    // TODO: needs much fine-tuning based on actual sparsity; currently
175b9af4SAart Bik    //       we reserve pointer/index space based on all previous dense
175b9af4SAart Bik    //       dimensions, which works well up to first sparse dim; but
175b9af4SAart Bik    //       we should really use nnz and dense/sparse distribution.
f66e5769SAart Bik    bool allDense = true;
f66e5769SAart Bik    uint64_t sz = 1;
8d8b566fSwren romano    for (uint64_t r = 0, rank = getRank(); r < rank; r++) {
8d8b566fSwren romano      if (isCompressedDim(r)) {
fa6aed2aSwren romano        // TODO: Take a parameter between 1 and `dimSizes[r]`, and multiply
8d8b566fSwren romano        // `sz` by that before reserving. (For now we just use 1.)
f66e5769SAart Bik        pointers[r].reserve(sz + 1);
8d8b566fSwren romano        pointers[r].push_back(0);
f66e5769SAart Bik        indices[r].reserve(sz);
f66e5769SAart Bik        sz = 1;
f66e5769SAart Bik        allDense = false;
8d8b566fSwren romano      } else { // Dense dimension.
8d8b566fSwren romano        sz = checkedMul(sz, getDimSizes()[r]);
8a91bc7bSHarrietAkot      }
8a91bc7bSHarrietAkot    }
8a91bc7bSHarrietAkot    // Then assign contents from coordinate scheme tensor if provided.
8d8b566fSwren romano    if (coo) {
4d0a18d0Swren romano      // Ensure both preconditions of `fromCOO`.
fa6aed2aSwren romano      assert(coo->getDimSizes() == getDimSizes() && "Tensor size mismatch");
8d8b566fSwren romano      coo->sort();
4d0a18d0Swren romano      // Now actually insert the `elements`.
8d8b566fSwren romano      const std::vector<Element<V>> &elements = coo->getElements();
ceda1ae9Swren romano      uint64_t nnz = elements.size();
8a91bc7bSHarrietAkot      values.reserve(nnz);
ceda1ae9Swren romano      fromCOO(elements, 0, nnz, 0);
1ce77b56SAart Bik    } else if (allDense) {
f66e5769SAart Bik      values.resize(sz, 0);
8a91bc7bSHarrietAkot    }
8a91bc7bSHarrietAkot  }
8a91bc7bSHarrietAkot
8cb33240Swren romano  /// Constructs a sparse tensor storage scheme with the given dimensions,
8cb33240Swren romano  /// permutation, and per-dimension dense/sparse annotations, using
8cb33240Swren romano  /// the given sparse tensor for the initial contents.
8cb33240Swren romano  ///
8cb33240Swren romano  /// Preconditions:
fa6aed2aSwren romano  /// * `perm` and `sparsity` must be valid for `dimSizes.size()`.
8cb33240Swren romano  /// * The `tensor` must have the same value type `V`.
fa6aed2aSwren romano  SparseTensorStorage(const std::vector<uint64_t> &dimSizes,
fa6aed2aSwren romano                      const uint64_t *perm, const DimLevelType *sparsity,
8cb33240Swren romano                      const SparseTensorStorageBase &tensor);
8cb33240Swren romano
76944420Swren romano  ~SparseTensorStorage() final override = default;
8a91bc7bSHarrietAkot
f66e5769SAart Bik  /// Partially specialize these getter methods based on template types.
76944420Swren romano  void getPointers(std::vector<P> **out, uint64_t d) final override {
8a91bc7bSHarrietAkot    assert(d < getRank());
8a91bc7bSHarrietAkot    *out = &pointers[d];
8a91bc7bSHarrietAkot  }
76944420Swren romano  void getIndices(std::vector<I> **out, uint64_t d) final override {
8a91bc7bSHarrietAkot    assert(d < getRank());
8a91bc7bSHarrietAkot    *out = &indices[d];
8a91bc7bSHarrietAkot  }
76944420Swren romano  void getValues(std::vector<V> **out) final override { *out = &values; }
8a91bc7bSHarrietAkot
03fe15ceSAart Bik  /// Partially specialize lexicographical insertions based on template types.
76944420Swren romano  void lexInsert(const uint64_t *cursor, V val) final override {
1ce77b56SAart Bik    // First, wrap up pending insertion path.
1ce77b56SAart Bik    uint64_t diff = 0;
1ce77b56SAart Bik    uint64_t top = 0;
1ce77b56SAart Bik    if (!values.empty()) {
1ce77b56SAart Bik      diff = lexDiff(cursor);
1ce77b56SAart Bik      endPath(diff + 1);
1ce77b56SAart Bik      top = idx[diff] + 1;
1ce77b56SAart Bik    }
1ce77b56SAart Bik    // Then continue with insertion path.
1ce77b56SAart Bik    insPath(cursor, diff, top, val);
f66e5769SAart Bik  }
f66e5769SAart Bik
4f2ec7f9SAart Bik  /// Partially specialize expanded insertions based on template types.
4f2ec7f9SAart Bik  /// Note that this method resets the values/filled-switch array back
4f2ec7f9SAart Bik  /// to all-zero/false while only iterating over the nonzero elements.
4f2ec7f9SAart Bik  void expInsert(uint64_t *cursor, V *values, bool *filled, uint64_t *added,
76944420Swren romano                 uint64_t count) final override {
4f2ec7f9SAart Bik    if (count == 0)
4f2ec7f9SAart Bik      return;
4f2ec7f9SAart Bik    // Sort.
4f2ec7f9SAart Bik    std::sort(added, added + count);
4f2ec7f9SAart Bik    // Restore insertion path for first insert.
3bf2ba3bSwren romano    const uint64_t lastDim = getRank() - 1;
4f2ec7f9SAart Bik    uint64_t index = added[0];
3bf2ba3bSwren romano    cursor[lastDim] = index;
4f2ec7f9SAart Bik    lexInsert(cursor, values[index]);
4f2ec7f9SAart Bik    assert(filled[index]);
4f2ec7f9SAart Bik    values[index] = 0;
4f2ec7f9SAart Bik    filled[index] = false;
4f2ec7f9SAart Bik    // Subsequent insertions are quick.
4f2ec7f9SAart Bik    for (uint64_t i = 1; i < count; i++) {
4f2ec7f9SAart Bik      assert(index < added[i] && "non-lexicographic insertion");
4f2ec7f9SAart Bik      index = added[i];
3bf2ba3bSwren romano      cursor[lastDim] = index;
3bf2ba3bSwren romano      insPath(cursor, lastDim, added[i - 1] + 1, values[index]);
4f2ec7f9SAart Bik      assert(filled[index]);
3bf2ba3bSwren romano      values[index] = 0;
4f2ec7f9SAart Bik      filled[index] = false;
4f2ec7f9SAart Bik    }
4f2ec7f9SAart Bik  }
4f2ec7f9SAart Bik
f66e5769SAart Bik  /// Finalizes lexicographic insertions.
76944420Swren romano  void endInsert() final override {
1ce77b56SAart Bik    if (values.empty())
72ec2f76Swren romano      finalizeSegment(0);
1ce77b56SAart Bik    else
1ce77b56SAart Bik      endPath(0);
1ce77b56SAart Bik  }
f66e5769SAart Bik
8cb33240Swren romano  void newEnumerator(SparseTensorEnumeratorBase<V> **out, uint64_t rank,
76944420Swren romano                     const uint64_t *perm) const final override {
8cb33240Swren romano    *out = new SparseTensorEnumerator<P, I, V>(*this, rank, perm);
8cb33240Swren romano  }
8cb33240Swren romano
8a91bc7bSHarrietAkot  /// Returns this sparse tensor storage scheme as a new memory-resident
8a91bc7bSHarrietAkot  /// sparse tensor in coordinate scheme with the given dimension order.
8d8b566fSwren romano  ///
8d8b566fSwren romano  /// Precondition: `perm` must be valid for `getRank()`.
753fe330Swren romano  SparseTensorCOO<V> *toCOO(const uint64_t *perm) const {
8cb33240Swren romano    SparseTensorEnumeratorBase<V> *enumerator;
8cb33240Swren romano    newEnumerator(&enumerator, getRank(), perm);
753fe330Swren romano    SparseTensorCOO<V> *coo =
8cb33240Swren romano        new SparseTensorCOO<V>(enumerator->permutedSizes(), values.size());
8cb33240Swren romano    enumerator->forallElements([&coo](const std::vector<uint64_t> &ind, V val) {
753fe330Swren romano      coo->add(ind, val);
753fe330Swren romano    });
8d8b566fSwren romano    // TODO: This assertion assumes there are no stored zeros,
8d8b566fSwren romano    // or if there are then that we don't filter them out.
8d8b566fSwren romano    // Cf., <https://github.com/llvm/llvm-project/issues/54179>
8d8b566fSwren romano    assert(coo->getElements().size() == values.size());
8cb33240Swren romano    delete enumerator;
8d8b566fSwren romano    return coo;
8a91bc7bSHarrietAkot  }
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkot  /// Factory method. Constructs a sparse tensor storage scheme with the given
8a91bc7bSHarrietAkot  /// dimensions, permutation, and per-dimension dense/sparse annotations,
8a91bc7bSHarrietAkot  /// using the coordinate scheme tensor for the initial contents if provided.
8a91bc7bSHarrietAkot  /// In the latter case, the coordinate scheme must respect the same
8a91bc7bSHarrietAkot  /// permutation as is desired for the new sparse tensor storage.
8d8b566fSwren romano  ///
8d8b566fSwren romano  /// Precondition: `shape`, `perm`, and `sparsity` must be valid for `rank`.
8a91bc7bSHarrietAkot  static SparseTensorStorage<P, I, V> *
d83a7068Swren romano  newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm,
8d8b566fSwren romano                  const DimLevelType *sparsity, SparseTensorCOO<V> *coo) {
8a91bc7bSHarrietAkot    SparseTensorStorage<P, I, V> *n = nullptr;
8d8b566fSwren romano    if (coo) {
fa6aed2aSwren romano      const auto &coosz = coo->getDimSizes();
8cb33240Swren romano      assertPermutedSizesMatchShape(coosz, rank, perm, shape);
8d8b566fSwren romano      n = new SparseTensorStorage<P, I, V>(coosz, perm, sparsity, coo);
8a91bc7bSHarrietAkot    } else {
8a91bc7bSHarrietAkot      std::vector<uint64_t> permsz(rank);
d83a7068Swren romano      for (uint64_t r = 0; r < rank; r++) {
d83a7068Swren romano        assert(shape[r] > 0 && "Dimension size zero has trivial storage");
d83a7068Swren romano        permsz[perm[r]] = shape[r];
d83a7068Swren romano      }
8cb33240Swren romano      // We pass the null `coo` to ensure we select the intended constructor.
8cb33240Swren romano      n = new SparseTensorStorage<P, I, V>(permsz, perm, sparsity, coo);
8a91bc7bSHarrietAkot    }
8a91bc7bSHarrietAkot    return n;
8a91bc7bSHarrietAkot  }
8a91bc7bSHarrietAkot
8cb33240Swren romano  /// Factory method. Constructs a sparse tensor storage scheme with
8cb33240Swren romano  /// the given dimensions, permutation, and per-dimension dense/sparse
8cb33240Swren romano  /// annotations, using the sparse tensor for the initial contents.
8cb33240Swren romano  ///
8cb33240Swren romano  /// Preconditions:
8cb33240Swren romano  /// * `shape`, `perm`, and `sparsity` must be valid for `rank`.
8cb33240Swren romano  /// * The `tensor` must have the same value type `V`.
8cb33240Swren romano  static SparseTensorStorage<P, I, V> *
8cb33240Swren romano  newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm,
8cb33240Swren romano                  const DimLevelType *sparsity,
8cb33240Swren romano                  const SparseTensorStorageBase *source) {
8cb33240Swren romano    assert(source && "Got nullptr for source");
8cb33240Swren romano    SparseTensorEnumeratorBase<V> *enumerator;
8cb33240Swren romano    source->newEnumerator(&enumerator, rank, perm);
8cb33240Swren romano    const auto &permsz = enumerator->permutedSizes();
8cb33240Swren romano    assertPermutedSizesMatchShape(permsz, rank, perm, shape);
8cb33240Swren romano    auto *tensor =
8cb33240Swren romano        new SparseTensorStorage<P, I, V>(permsz, perm, sparsity, *source);
8cb33240Swren romano    delete enumerator;
8cb33240Swren romano    return tensor;
8cb33240Swren romano  }
8cb33240Swren romano
8a91bc7bSHarrietAkotprivate:
72ec2f76Swren romano  /// Appends an arbitrary new position to `pointers[d]`.  This method
72ec2f76Swren romano  /// checks that `pos` is representable in the `P` type; however, it
72ec2f76Swren romano  /// does not check that `pos` is semantically valid (i.e., larger than
72ec2f76Swren romano  /// the previous position and smaller than `indices[d].capacity()`).
8d8b566fSwren romano  void appendPointer(uint64_t d, uint64_t pos, uint64_t count = 1) {
72ec2f76Swren romano    assert(isCompressedDim(d));
72ec2f76Swren romano    assert(pos <= std::numeric_limits<P>::max() &&
4d0a18d0Swren romano           "Pointer value is too large for the P-type");
72ec2f76Swren romano    pointers[d].insert(pointers[d].end(), count, static_cast<P>(pos));
4d0a18d0Swren romano  }
4d0a18d0Swren romano
72ec2f76Swren romano  /// Appends index `i` to dimension `d`, in the semantically general
72ec2f76Swren romano  /// sense.  For non-dense dimensions, that means appending to the
72ec2f76Swren romano  /// `indices[d]` array, checking that `i` is representable in the `I`
72ec2f76Swren romano  /// type; however, we do not verify other semantic requirements (e.g.,
fa6aed2aSwren romano  /// that `i` is in bounds for `dimSizes[d]`, and not previously occurring
72ec2f76Swren romano  /// in the same segment).  For dense dimensions, this method instead
72ec2f76Swren romano  /// appends the appropriate number of zeros to the `values` array,
72ec2f76Swren romano  /// where `full` is the number of "entries" already written to `values`
72ec2f76Swren romano  /// for this segment (aka one after the highest index previously appended).
72ec2f76Swren romano  void appendIndex(uint64_t d, uint64_t full, uint64_t i) {
72ec2f76Swren romano    if (isCompressedDim(d)) {
4d0a18d0Swren romano      assert(i <= std::numeric_limits<I>::max() &&
4d0a18d0Swren romano             "Index value is too large for the I-type");
72ec2f76Swren romano      indices[d].push_back(static_cast<I>(i));
72ec2f76Swren romano    } else { // Dense dimension.
72ec2f76Swren romano      assert(i >= full && "Index was already filled");
72ec2f76Swren romano      if (i == full)
72ec2f76Swren romano        return; // Short-circuit, since it'll be a nop.
72ec2f76Swren romano      if (d + 1 == getRank())
72ec2f76Swren romano        values.insert(values.end(), i - full, 0);
72ec2f76Swren romano      else
72ec2f76Swren romano        finalizeSegment(d + 1, 0, i - full);
72ec2f76Swren romano    }
4d0a18d0Swren romano  }
4d0a18d0Swren romano
8cb33240Swren romano  /// Writes the given coordinate to `indices[d][pos]`.  This method
8cb33240Swren romano  /// checks that `i` is representable in the `I` type; however, it
8cb33240Swren romano  /// does not check that `i` is semantically valid (i.e., in bounds
fa6aed2aSwren romano  /// for `dimSizes[d]` and not elsewhere occurring in the same segment).
8cb33240Swren romano  void writeIndex(uint64_t d, uint64_t pos, uint64_t i) {
8cb33240Swren romano    assert(isCompressedDim(d));
8cb33240Swren romano    // Subscript assignment to `std::vector` requires that the `pos`-th
8cb33240Swren romano    // entry has been initialized; thus we must be sure to check `size()`
8cb33240Swren romano    // here, instead of `capacity()` as would be ideal.
8cb33240Swren romano    assert(pos < indices[d].size() && "Index position is out of bounds");
8cb33240Swren romano    assert(i <= std::numeric_limits<I>::max() &&
8cb33240Swren romano           "Index value is too large for the I-type");
8cb33240Swren romano    indices[d][pos] = static_cast<I>(i);
8cb33240Swren romano  }
8cb33240Swren romano
8cb33240Swren romano  /// Computes the assembled-size associated with the `d`-th dimension,
8cb33240Swren romano  /// given the assembled-size associated with the `(d-1)`-th dimension.
8cb33240Swren romano  /// "Assembled-sizes" correspond to the (nominal) sizes of overhead
8cb33240Swren romano  /// storage, as opposed to "dimension-sizes" which are the cardinality
8cb33240Swren romano  /// of coordinates for that dimension.
8cb33240Swren romano  ///
8cb33240Swren romano  /// Precondition: the `pointers[d]` array must be fully initialized
8cb33240Swren romano  /// before calling this method.
8cb33240Swren romano  uint64_t assembledSize(uint64_t parentSz, uint64_t d) const {
8cb33240Swren romano    if (isCompressedDim(d))
8cb33240Swren romano      return pointers[d][parentSz];
8cb33240Swren romano    // else if dense:
8cb33240Swren romano    return parentSz * getDimSizes()[d];
8cb33240Swren romano  }
8cb33240Swren romano
8a91bc7bSHarrietAkot  /// Initializes sparse tensor storage scheme from a memory-resident sparse
8a91bc7bSHarrietAkot  /// tensor in coordinate scheme. This method prepares the pointers and
8a91bc7bSHarrietAkot  /// indices arrays under the given per-dimension dense/sparse annotations.
4d0a18d0Swren romano  ///
4d0a18d0Swren romano  /// Preconditions:
4d0a18d0Swren romano  /// (1) the `elements` must be lexicographically sorted.
fa6aed2aSwren romano  /// (2) the indices of every element are valid for `dimSizes` (equal rank
4d0a18d0Swren romano  ///     and pointwise less-than).
ceda1ae9Swren romano  void fromCOO(const std::vector<Element<V>> &elements, uint64_t lo,
ceda1ae9Swren romano               uint64_t hi, uint64_t d) {
753fe330Swren romano    uint64_t rank = getRank();
753fe330Swren romano    assert(d <= rank && hi <= elements.size());
8a91bc7bSHarrietAkot    // Once dimensions are exhausted, insert the numerical values.
753fe330Swren romano    if (d == rank) {
c4017f9dSwren romano      assert(lo < hi);
1ce77b56SAart Bik      values.push_back(elements[lo].value);
8a91bc7bSHarrietAkot      return;
8a91bc7bSHarrietAkot    }
8a91bc7bSHarrietAkot    // Visit all elements in this interval.
8a91bc7bSHarrietAkot    uint64_t full = 0;
c4017f9dSwren romano    while (lo < hi) { // If `hi` is unchanged, then `lo < elements.size()`.
8a91bc7bSHarrietAkot      // Find segment in interval with same index elements in this dimension.
f66e5769SAart Bik      uint64_t i = elements[lo].indices[d];
8a91bc7bSHarrietAkot      uint64_t seg = lo + 1;
f66e5769SAart Bik      while (seg < hi && elements[seg].indices[d] == i)
8a91bc7bSHarrietAkot        seg++;
8a91bc7bSHarrietAkot      // Handle segment in interval for sparse or dense dimension.
72ec2f76Swren romano      appendIndex(d, full, i);
72ec2f76Swren romano      full = i + 1;
ceda1ae9Swren romano      fromCOO(elements, lo, seg, d + 1);
8a91bc7bSHarrietAkot      // And move on to next segment in interval.
8a91bc7bSHarrietAkot      lo = seg;
8a91bc7bSHarrietAkot    }
8a91bc7bSHarrietAkot    // Finalize the sparse pointer structure at this dimension.
72ec2f76Swren romano    finalizeSegment(d, full);
8a91bc7bSHarrietAkot  }
8a91bc7bSHarrietAkot
72ec2f76Swren romano  /// Finalize the sparse pointer structure at this dimension.
72ec2f76Swren romano  void finalizeSegment(uint64_t d, uint64_t full = 0, uint64_t count = 1) {
72ec2f76Swren romano    if (count == 0)
72ec2f76Swren romano      return; // Short-circuit, since it'll be a nop.
72ec2f76Swren romano    if (isCompressedDim(d)) {
72ec2f76Swren romano      appendPointer(d, indices[d].size(), count);
72ec2f76Swren romano    } else { // Dense dimension.
8d8b566fSwren romano      const uint64_t sz = getDimSizes()[d];
72ec2f76Swren romano      assert(sz >= full && "Segment is overfull");
8d8b566fSwren romano      count = checkedMul(count, sz - full);
72ec2f76Swren romano      // For dense storage we must enumerate all the remaining coordinates
72ec2f76Swren romano      // in this dimension (i.e., coordinates after the last non-zero
72ec2f76Swren romano      // element), and either fill in their zero values or else recurse
72ec2f76Swren romano      // to finalize some deeper dimension.
72ec2f76Swren romano      if (d + 1 == getRank())
72ec2f76Swren romano        values.insert(values.end(), count, 0);
72ec2f76Swren romano      else
72ec2f76Swren romano        finalizeSegment(d + 1, 0, count);
1ce77b56SAart Bik    }
1ce77b56SAart Bik  }
1ce77b56SAart Bik
1ce77b56SAart Bik  /// Wraps up a single insertion path, inner to outer.
1ce77b56SAart Bik  void endPath(uint64_t diff) {
1ce77b56SAart Bik    uint64_t rank = getRank();
1ce77b56SAart Bik    assert(diff <= rank);
1ce77b56SAart Bik    for (uint64_t i = 0; i < rank - diff; i++) {
72ec2f76Swren romano      const uint64_t d = rank - i - 1;
72ec2f76Swren romano      finalizeSegment(d, idx[d] + 1);
1ce77b56SAart Bik    }
1ce77b56SAart Bik  }
1ce77b56SAart Bik
1ce77b56SAart Bik  /// Continues a single insertion path, outer to inner.
c03fd1e6Swren romano  void insPath(const uint64_t *cursor, uint64_t diff, uint64_t top, V val) {
1ce77b56SAart Bik    uint64_t rank = getRank();
1ce77b56SAart Bik    assert(diff < rank);
1ce77b56SAart Bik    for (uint64_t d = diff; d < rank; d++) {
1ce77b56SAart Bik      uint64_t i = cursor[d];
72ec2f76Swren romano      appendIndex(d, top, i);
1ce77b56SAart Bik      top = 0;
1ce77b56SAart Bik      idx[d] = i;
1ce77b56SAart Bik    }
1ce77b56SAart Bik    values.push_back(val);
1ce77b56SAart Bik  }
1ce77b56SAart Bik
1ce77b56SAart Bik  /// Finds the lexicographic differing dimension.
46bdacaaSwren romano  uint64_t lexDiff(const uint64_t *cursor) const {
1ce77b56SAart Bik    for (uint64_t r = 0, rank = getRank(); r < rank; r++)
1ce77b56SAart Bik      if (cursor[r] > idx[r])
1ce77b56SAart Bik        return r;
1ce77b56SAart Bik      else
1ce77b56SAart Bik        assert(cursor[r] == idx[r] && "non-lexicographic insertion");
1ce77b56SAart Bik    assert(0 && "duplication insertion");
1ce77b56SAart Bik    return -1u;
1ce77b56SAart Bik  }
1ce77b56SAart Bik
753fe330Swren romano  // Allow `SparseTensorEnumerator` to access the data-members (to avoid
753fe330Swren romano  // the cost of virtual-function dispatch in inner loops), without
753fe330Swren romano  // making them public to other client code.
753fe330Swren romano  friend class SparseTensorEnumerator<P, I, V>;
753fe330Swren romano
8a91bc7bSHarrietAkot  std::vector<std::vector<P>> pointers;
8a91bc7bSHarrietAkot  std::vector<std::vector<I>> indices;
8a91bc7bSHarrietAkot  std::vector<V> values;
8d8b566fSwren romano  std::vector<uint64_t> idx; // index cursor for lexicographic insertion.
8a91bc7bSHarrietAkot};
8a91bc7bSHarrietAkot
753fe330Swren romano/// A (higher-order) function object for enumerating the elements of some
753fe330Swren romano/// `SparseTensorStorage` under a permutation.  That is, the `forallElements`
753fe330Swren romano/// method encapsulates the loop-nest for enumerating the elements of
753fe330Swren romano/// the source tensor (in whatever order is best for the source tensor),
753fe330Swren romano/// and applies a permutation to the coordinates/indices before handing
753fe330Swren romano/// each element to the callback.  A single enumerator object can be
753fe330Swren romano/// freely reused for several calls to `forallElements`, just so long
753fe330Swren romano/// as each call is sequential with respect to one another.
753fe330Swren romano///
753fe330Swren romano/// N.B., this class stores a reference to the `SparseTensorStorageBase`
753fe330Swren romano/// passed to the constructor; thus, objects of this class must not
753fe330Swren romano/// outlive the sparse tensor they depend on.
753fe330Swren romano///
753fe330Swren romano/// Design Note: The reason we define this class instead of simply using
753fe330Swren romano/// `SparseTensorEnumerator<P,I,V>` is because we need to hide/generalize
753fe330Swren romano/// the `<P,I>` template parameters from MLIR client code (to simplify the
753fe330Swren romano/// type parameters used for direct sparse-to-sparse conversion).  And the
753fe330Swren romano/// reason we define the `SparseTensorEnumerator<P,I,V>` subclasses rather
753fe330Swren romano/// than simply using this class, is to avoid the cost of virtual-method
753fe330Swren romano/// dispatch within the loop-nest.
753fe330Swren romanotemplate <typename V>
753fe330Swren romanoclass SparseTensorEnumeratorBase {
753fe330Swren romanopublic:
753fe330Swren romano  /// Constructs an enumerator with the given permutation for mapping
753fe330Swren romano  /// the semantic-ordering of dimensions to the desired target-ordering.
753fe330Swren romano  ///
753fe330Swren romano  /// Preconditions:
753fe330Swren romano  /// * the `tensor` must have the same `V` value type.
753fe330Swren romano  /// * `perm` must be valid for `rank`.
753fe330Swren romano  SparseTensorEnumeratorBase(const SparseTensorStorageBase &tensor,
753fe330Swren romano                             uint64_t rank, const uint64_t *perm)
753fe330Swren romano      : src(tensor), permsz(src.getRev().size()), reord(getRank()),
753fe330Swren romano        cursor(getRank()) {
753fe330Swren romano    assert(perm && "Received nullptr for permutation");
753fe330Swren romano    assert(rank == getRank() && "Permutation rank mismatch");
fa6aed2aSwren romano    const auto &rev = src.getRev();           // source-order -> semantic-order
fa6aed2aSwren romano    const auto &dimSizes = src.getDimSizes(); // in source storage-order
753fe330Swren romano    for (uint64_t s = 0; s < rank; s++) {     // `s` source storage-order
753fe330Swren romano      uint64_t t = perm[rev[s]];              // `t` target-order
753fe330Swren romano      reord[s] = t;
fa6aed2aSwren romano      permsz[t] = dimSizes[s];
753fe330Swren romano    }
753fe330Swren romano  }
753fe330Swren romano
753fe330Swren romano  virtual ~SparseTensorEnumeratorBase() = default;
753fe330Swren romano
753fe330Swren romano  // We disallow copying to help avoid leaking the `src` reference.
753fe330Swren romano  // (In addition to avoiding the problem of slicing.)
753fe330Swren romano  SparseTensorEnumeratorBase(const SparseTensorEnumeratorBase &) = delete;
753fe330Swren romano  SparseTensorEnumeratorBase &
753fe330Swren romano  operator=(const SparseTensorEnumeratorBase &) = delete;
753fe330Swren romano
753fe330Swren romano  /// Returns the source/target tensor's rank.  (The source-rank and
753fe330Swren romano  /// target-rank are always equal since we only support permutations.
753fe330Swren romano  /// Though once we add support for other dimension mappings, this
753fe330Swren romano  /// method will have to be split in two.)
753fe330Swren romano  uint64_t getRank() const { return permsz.size(); }
753fe330Swren romano
753fe330Swren romano  /// Returns the target tensor's dimension sizes.
753fe330Swren romano  const std::vector<uint64_t> &permutedSizes() const { return permsz; }
753fe330Swren romano
753fe330Swren romano  /// Enumerates all elements of the source tensor, permutes their
753fe330Swren romano  /// indices, and passes the permuted element to the callback.
753fe330Swren romano  /// The callback must not store the cursor reference directly,
753fe330Swren romano  /// since this function reuses the storage.  Instead, the callback
753fe330Swren romano  /// must copy it if they want to keep it.
753fe330Swren romano  virtual void forallElements(ElementConsumer<V> yield) = 0;
753fe330Swren romano
753fe330Swren romanoprotected:
753fe330Swren romano  const SparseTensorStorageBase &src;
753fe330Swren romano  std::vector<uint64_t> permsz; // in target order.
753fe330Swren romano  std::vector<uint64_t> reord;  // source storage-order -> target order.
753fe330Swren romano  std::vector<uint64_t> cursor; // in target order.
753fe330Swren romano};
753fe330Swren romano
753fe330Swren romanotemplate <typename P, typename I, typename V>
753fe330Swren romanoclass SparseTensorEnumerator final : public SparseTensorEnumeratorBase<V> {
753fe330Swren romano  using Base = SparseTensorEnumeratorBase<V>;
753fe330Swren romano
753fe330Swren romanopublic:
753fe330Swren romano  /// Constructs an enumerator with the given permutation for mapping
753fe330Swren romano  /// the semantic-ordering of dimensions to the desired target-ordering.
753fe330Swren romano  ///
753fe330Swren romano  /// Precondition: `perm` must be valid for `rank`.
753fe330Swren romano  SparseTensorEnumerator(const SparseTensorStorage<P, I, V> &tensor,
753fe330Swren romano                         uint64_t rank, const uint64_t *perm)
753fe330Swren romano      : Base(tensor, rank, perm) {}
753fe330Swren romano
f38765a8SMehdi Amini  ~SparseTensorEnumerator() final = default;
753fe330Swren romano
f38765a8SMehdi Amini  void forallElements(ElementConsumer<V> yield) final {
753fe330Swren romano    forallElements(yield, 0, 0);
753fe330Swren romano  }
753fe330Swren romano
753fe330Swren romanoprivate:
753fe330Swren romano  /// The recursive component of the public `forallElements`.
753fe330Swren romano  void forallElements(ElementConsumer<V> yield, uint64_t parentPos,
753fe330Swren romano                      uint64_t d) {
753fe330Swren romano    // Recover the `<P,I,V>` type parameters of `src`.
753fe330Swren romano    const auto &src =
753fe330Swren romano        static_cast<const SparseTensorStorage<P, I, V> &>(this->src);
753fe330Swren romano    if (d == Base::getRank()) {
753fe330Swren romano      assert(parentPos < src.values.size() &&
753fe330Swren romano             "Value position is out of bounds");
753fe330Swren romano      // TODO: <https://github.com/llvm/llvm-project/issues/54179>
753fe330Swren romano      yield(this->cursor, src.values[parentPos]);
753fe330Swren romano    } else if (src.isCompressedDim(d)) {
753fe330Swren romano      // Look up the bounds of the `d`-level segment determined by the
753fe330Swren romano      // `d-1`-level position `parentPos`.
753fe330Swren romano      const std::vector<P> &pointers_d = src.pointers[d];
753fe330Swren romano      assert(parentPos + 1 < pointers_d.size() &&
753fe330Swren romano             "Parent pointer position is out of bounds");
753fe330Swren romano      const uint64_t pstart = static_cast<uint64_t>(pointers_d[parentPos]);
753fe330Swren romano      const uint64_t pstop = static_cast<uint64_t>(pointers_d[parentPos + 1]);
753fe330Swren romano      // Loop-invariant code for looking up the `d`-level coordinates/indices.
753fe330Swren romano      const std::vector<I> &indices_d = src.indices[d];
3b13f880SAart Bik      assert(pstop <= indices_d.size() && "Index position is out of bounds");
753fe330Swren romano      uint64_t &cursor_reord_d = this->cursor[this->reord[d]];
753fe330Swren romano      for (uint64_t pos = pstart; pos < pstop; pos++) {
753fe330Swren romano        cursor_reord_d = static_cast<uint64_t>(indices_d[pos]);
753fe330Swren romano        forallElements(yield, pos, d + 1);
753fe330Swren romano      }
753fe330Swren romano    } else { // Dense dimension.
753fe330Swren romano      const uint64_t sz = src.getDimSizes()[d];
753fe330Swren romano      const uint64_t pstart = parentPos * sz;
753fe330Swren romano      uint64_t &cursor_reord_d = this->cursor[this->reord[d]];
753fe330Swren romano      for (uint64_t i = 0; i < sz; i++) {
753fe330Swren romano        cursor_reord_d = i;
753fe330Swren romano        forallElements(yield, pstart + i, d + 1);
753fe330Swren romano      }
753fe330Swren romano    }
753fe330Swren romano  }
753fe330Swren romano};
753fe330Swren romano
8cb33240Swren romano/// Statistics regarding the number of nonzero subtensors in
8cb33240Swren romano/// a source tensor, for direct sparse=>sparse conversion a la
8cb33240Swren romano/// <https://arxiv.org/abs/2001.02609>.
8cb33240Swren romano///
8cb33240Swren romano/// N.B., this class stores references to the parameters passed to
8cb33240Swren romano/// the constructor; thus, objects of this class must not outlive
8cb33240Swren romano/// those parameters.
76944420Swren romanoclass SparseTensorNNZ final {
8cb33240Swren romanopublic:
8cb33240Swren romano  /// Allocate the statistics structure for the desired sizes and
8cb33240Swren romano  /// sparsity (in the target tensor's storage-order).  This constructor
8cb33240Swren romano  /// does not actually populate the statistics, however; for that see
8cb33240Swren romano  /// `initialize`.
8cb33240Swren romano  ///
fa6aed2aSwren romano  /// Precondition: `dimSizes` must not contain zeros.
fa6aed2aSwren romano  SparseTensorNNZ(const std::vector<uint64_t> &dimSizes,
8cb33240Swren romano                  const std::vector<DimLevelType> &sparsity)
fa6aed2aSwren romano      : dimSizes(dimSizes), dimTypes(sparsity), nnz(getRank()) {
8cb33240Swren romano    assert(dimSizes.size() == dimTypes.size() && "Rank mismatch");
8cb33240Swren romano    bool uncompressed = true;
8cb33240Swren romano    uint64_t sz = 1; // the product of all `dimSizes` strictly less than `r`.
8cb33240Swren romano    for (uint64_t rank = getRank(), r = 0; r < rank; r++) {
8cb33240Swren romano      switch (dimTypes[r]) {
8cb33240Swren romano      case DimLevelType::kCompressed:
8cb33240Swren romano        assert(uncompressed &&
8cb33240Swren romano               "Multiple compressed layers not currently supported");
8cb33240Swren romano        uncompressed = false;
8cb33240Swren romano        nnz[r].resize(sz, 0); // Both allocate and zero-initialize.
8cb33240Swren romano        break;
8cb33240Swren romano      case DimLevelType::kDense:
8cb33240Swren romano        assert(uncompressed &&
8cb33240Swren romano               "Dense after compressed not currently supported");
8cb33240Swren romano        break;
8cb33240Swren romano      case DimLevelType::kSingleton:
8cb33240Swren romano        // Singleton after Compressed causes no problems for allocating
8cb33240Swren romano        // `nnz` nor for the yieldPos loop.  This remains true even
8cb33240Swren romano        // when adding support for multiple compressed dimensions or
8cb33240Swren romano        // for dense-after-compressed.
8cb33240Swren romano        break;
8cb33240Swren romano      }
8cb33240Swren romano      sz = checkedMul(sz, dimSizes[r]);
8cb33240Swren romano    }
8cb33240Swren romano  }
8cb33240Swren romano
8cb33240Swren romano  // We disallow copying to help avoid leaking the stored references.
8cb33240Swren romano  SparseTensorNNZ(const SparseTensorNNZ &) = delete;
8cb33240Swren romano  SparseTensorNNZ &operator=(const SparseTensorNNZ &) = delete;
8cb33240Swren romano
8cb33240Swren romano  /// Returns the rank of the target tensor.
8cb33240Swren romano  uint64_t getRank() const { return dimSizes.size(); }
8cb33240Swren romano
8cb33240Swren romano  /// Enumerate the source tensor to fill in the statistics.  The
8cb33240Swren romano  /// enumerator should already incorporate the permutation (from
8cb33240Swren romano  /// semantic-order to the target storage-order).
8cb33240Swren romano  template <typename V>
8cb33240Swren romano  void initialize(SparseTensorEnumeratorBase<V> &enumerator) {
8cb33240Swren romano    assert(enumerator.getRank() == getRank() && "Tensor rank mismatch");
8cb33240Swren romano    assert(enumerator.permutedSizes() == dimSizes && "Tensor size mismatch");
8cb33240Swren romano    enumerator.forallElements(
8cb33240Swren romano        [this](const std::vector<uint64_t> &ind, V) { add(ind); });
8cb33240Swren romano  }
8cb33240Swren romano
8cb33240Swren romano  /// The type of callback functions which receive an nnz-statistic.
8cb33240Swren romano  using NNZConsumer = const std::function<void(uint64_t)> &;
8cb33240Swren romano
8cb33240Swren romano  /// Lexicographically enumerates all indicies for dimensions strictly
8cb33240Swren romano  /// less than `stopDim`, and passes their nnz statistic to the callback.
8cb33240Swren romano  /// Since our use-case only requires the statistic not the coordinates
8cb33240Swren romano  /// themselves, we do not bother to construct those coordinates.
8cb33240Swren romano  void forallIndices(uint64_t stopDim, NNZConsumer yield) const {
8cb33240Swren romano    assert(stopDim < getRank() && "Stopping-dimension is out of bounds");
8cb33240Swren romano    assert(dimTypes[stopDim] == DimLevelType::kCompressed &&
8cb33240Swren romano           "Cannot look up non-compressed dimensions");
8cb33240Swren romano    forallIndices(yield, stopDim, 0, 0);
8cb33240Swren romano  }
8cb33240Swren romano
8cb33240Swren romanoprivate:
8cb33240Swren romano  /// Adds a new element (i.e., increment its statistics).  We use
8cb33240Swren romano  /// a method rather than inlining into the lambda in `initialize`,
8cb33240Swren romano  /// to avoid spurious templating over `V`.  And this method is private
8cb33240Swren romano  /// to avoid needing to re-assert validity of `ind` (which is guaranteed
8cb33240Swren romano  /// by `forallElements`).
8cb33240Swren romano  void add(const std::vector<uint64_t> &ind) {
8cb33240Swren romano    uint64_t parentPos = 0;
8cb33240Swren romano    for (uint64_t rank = getRank(), r = 0; r < rank; r++) {
8cb33240Swren romano      if (dimTypes[r] == DimLevelType::kCompressed)
8cb33240Swren romano        nnz[r][parentPos]++;
8cb33240Swren romano      parentPos = parentPos * dimSizes[r] + ind[r];
8cb33240Swren romano    }
8cb33240Swren romano  }
8cb33240Swren romano
8cb33240Swren romano  /// Recursive component of the public `forallIndices`.
8cb33240Swren romano  void forallIndices(NNZConsumer yield, uint64_t stopDim, uint64_t parentPos,
8cb33240Swren romano                     uint64_t d) const {
8cb33240Swren romano    assert(d <= stopDim);
8cb33240Swren romano    if (d == stopDim) {
8cb33240Swren romano      assert(parentPos < nnz[d].size() && "Cursor is out of range");
8cb33240Swren romano      yield(nnz[d][parentPos]);
8cb33240Swren romano    } else {
8cb33240Swren romano      const uint64_t sz = dimSizes[d];
8cb33240Swren romano      const uint64_t pstart = parentPos * sz;
8cb33240Swren romano      for (uint64_t i = 0; i < sz; i++)
8cb33240Swren romano        forallIndices(yield, stopDim, pstart + i, d + 1);
8cb33240Swren romano    }
8cb33240Swren romano  }
8cb33240Swren romano
8cb33240Swren romano  // All of these are in the target storage-order.
8cb33240Swren romano  const std::vector<uint64_t> &dimSizes;
8cb33240Swren romano  const std::vector<DimLevelType> &dimTypes;
8cb33240Swren romano  std::vector<std::vector<uint64_t>> nnz;
8cb33240Swren romano};
8cb33240Swren romano
8cb33240Swren romanotemplate <typename P, typename I, typename V>
8cb33240Swren romanoSparseTensorStorage<P, I, V>::SparseTensorStorage(
fa6aed2aSwren romano    const std::vector<uint64_t> &dimSizes, const uint64_t *perm,
8cb33240Swren romano    const DimLevelType *sparsity, const SparseTensorStorageBase &tensor)
fa6aed2aSwren romano    : SparseTensorStorage(dimSizes, perm, sparsity) {
8cb33240Swren romano  SparseTensorEnumeratorBase<V> *enumerator;
8cb33240Swren romano  tensor.newEnumerator(&enumerator, getRank(), perm);
8cb33240Swren romano  {
8cb33240Swren romano    // Initialize the statistics structure.
8cb33240Swren romano    SparseTensorNNZ nnz(getDimSizes(), getDimTypes());
8cb33240Swren romano    nnz.initialize(*enumerator);
8cb33240Swren romano    // Initialize "pointers" overhead (and allocate "indices", "values").
8cb33240Swren romano    uint64_t parentSz = 1; // assembled-size (not dimension-size) of `r-1`.
8cb33240Swren romano    for (uint64_t rank = getRank(), r = 0; r < rank; r++) {
8cb33240Swren romano      if (isCompressedDim(r)) {
8cb33240Swren romano        pointers[r].reserve(parentSz + 1);
8cb33240Swren romano        pointers[r].push_back(0);
8cb33240Swren romano        uint64_t currentPos = 0;
8cb33240Swren romano        nnz.forallIndices(r, [this, &currentPos, r](uint64_t n) {
8cb33240Swren romano          currentPos += n;
8cb33240Swren romano          appendPointer(r, currentPos);
8cb33240Swren romano        });
8cb33240Swren romano        assert(pointers[r].size() == parentSz + 1 &&
8cb33240Swren romano               "Final pointers size doesn't match allocated size");
8cb33240Swren romano        // That assertion entails `assembledSize(parentSz, r)`
8cb33240Swren romano        // is now in a valid state.  That is, `pointers[r][parentSz]`
8cb33240Swren romano        // equals the present value of `currentPos`, which is the
8cb33240Swren romano        // correct assembled-size for `indices[r]`.
8cb33240Swren romano      }
8cb33240Swren romano      // Update assembled-size for the next iteration.
8cb33240Swren romano      parentSz = assembledSize(parentSz, r);
8cb33240Swren romano      // Ideally we need only `indices[r].reserve(parentSz)`, however
8cb33240Swren romano      // the `std::vector` implementation forces us to initialize it too.
8cb33240Swren romano      // That is, in the yieldPos loop we need random-access assignment
8cb33240Swren romano      // to `indices[r]`; however, `std::vector`'s subscript-assignment
8cb33240Swren romano      // only allows assigning to already-initialized positions.
8cb33240Swren romano      if (isCompressedDim(r))
8cb33240Swren romano        indices[r].resize(parentSz, 0);
8cb33240Swren romano    }
8cb33240Swren romano    values.resize(parentSz, 0); // Both allocate and zero-initialize.
8cb33240Swren romano  }
8cb33240Swren romano  // The yieldPos loop
8cb33240Swren romano  enumerator->forallElements([this](const std::vector<uint64_t> &ind, V val) {
8cb33240Swren romano    uint64_t parentSz = 1, parentPos = 0;
8cb33240Swren romano    for (uint64_t rank = getRank(), r = 0; r < rank; r++) {
8cb33240Swren romano      if (isCompressedDim(r)) {
8cb33240Swren romano        // If `parentPos == parentSz` then it's valid as an array-lookup;
8cb33240Swren romano        // however, it's semantically invalid here since that entry
8cb33240Swren romano        // does not represent a segment of `indices[r]`.  Moreover, that
8cb33240Swren romano        // entry must be immutable for `assembledSize` to remain valid.
8cb33240Swren romano        assert(parentPos < parentSz && "Pointers position is out of bounds");
8cb33240Swren romano        const uint64_t currentPos = pointers[r][parentPos];
8cb33240Swren romano        // This increment won't overflow the `P` type, since it can't
8cb33240Swren romano        // exceed the original value of `pointers[r][parentPos+1]`
8cb33240Swren romano        // which was already verified to be within bounds for `P`
8cb33240Swren romano        // when it was written to the array.
8cb33240Swren romano        pointers[r][parentPos]++;
8cb33240Swren romano        writeIndex(r, currentPos, ind[r]);
8cb33240Swren romano        parentPos = currentPos;
8cb33240Swren romano      } else { // Dense dimension.
8cb33240Swren romano        parentPos = parentPos * getDimSizes()[r] + ind[r];
8cb33240Swren romano      }
8cb33240Swren romano      parentSz = assembledSize(parentSz, r);
8cb33240Swren romano    }
8cb33240Swren romano    assert(parentPos < values.size() && "Value position is out of bounds");
8cb33240Swren romano    values[parentPos] = val;
8cb33240Swren romano  });
8cb33240Swren romano  // No longer need the enumerator, so we'll delete it ASAP.
8cb33240Swren romano  delete enumerator;
8cb33240Swren romano  // The finalizeYieldPos loop
8cb33240Swren romano  for (uint64_t parentSz = 1, rank = getRank(), r = 0; r < rank; r++) {
8cb33240Swren romano    if (isCompressedDim(r)) {
8cb33240Swren romano      assert(parentSz == pointers[r].size() - 1 &&
8cb33240Swren romano             "Actual pointers size doesn't match the expected size");
8cb33240Swren romano      // Can't check all of them, but at least we can check the last one.
8cb33240Swren romano      assert(pointers[r][parentSz - 1] == pointers[r][parentSz] &&
8cb33240Swren romano             "Pointers got corrupted");
8cb33240Swren romano      // TODO: optimize this by using `memmove` or similar.
8cb33240Swren romano      for (uint64_t n = 0; n < parentSz; n++) {
8cb33240Swren romano        const uint64_t parentPos = parentSz - n;
8cb33240Swren romano        pointers[r][parentPos] = pointers[r][parentPos - 1];
8cb33240Swren romano      }
8cb33240Swren romano      pointers[r][0] = 0;
8cb33240Swren romano    }
8cb33240Swren romano    parentSz = assembledSize(parentSz, r);
8cb33240Swren romano  }
8cb33240Swren romano}
8cb33240Swren romano
8a91bc7bSHarrietAkot/// Helper to convert string to lower case.
8a91bc7bSHarrietAkotstatic char *toLower(char *token) {
8a91bc7bSHarrietAkot  for (char *c = token; *c; c++)
8a91bc7bSHarrietAkot    *c = tolower(*c);
8a91bc7bSHarrietAkot  return token;
8a91bc7bSHarrietAkot}
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkot/// Read the MME header of a general sparse matrix of type real.
03fe15ceSAart Bikstatic void readMMEHeader(FILE *file, char *filename, char *line,
33e8ab8eSAart Bik                          uint64_t *idata, bool *isPattern, bool *isSymmetric) {
8a91bc7bSHarrietAkot  char header[64];
8a91bc7bSHarrietAkot  char object[64];
8a91bc7bSHarrietAkot  char format[64];
8a91bc7bSHarrietAkot  char field[64];
8a91bc7bSHarrietAkot  char symmetry[64];
8a91bc7bSHarrietAkot  // Read header line.
8a91bc7bSHarrietAkot  if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field,
774674ceSwren romano             symmetry) != 5)
774674ceSwren romano    FATAL("Corrupt header in %s\n", filename);
33e8ab8eSAart Bik  // Set properties
33e8ab8eSAart Bik  *isPattern = (strcmp(toLower(field), "pattern") == 0);
bb56c2b3SMehdi Amini  *isSymmetric = (strcmp(toLower(symmetry), "symmetric") == 0);
8a91bc7bSHarrietAkot  // Make sure this is a general sparse matrix.
8a91bc7bSHarrietAkot  if (strcmp(toLower(header), "%%matrixmarket") ||
8a91bc7bSHarrietAkot      strcmp(toLower(object), "matrix") ||
33e8ab8eSAart Bik      strcmp(toLower(format), "coordinate") ||
33e8ab8eSAart Bik      (strcmp(toLower(field), "real") && !(*isPattern)) ||
774674ceSwren romano      (strcmp(toLower(symmetry), "general") && !(*isSymmetric)))
774674ceSwren romano    FATAL("Cannot find a general sparse matrix in %s\n", filename);
8a91bc7bSHarrietAkot  // Skip comments.
e5639b3fSMehdi Amini  while (true) {
774674ceSwren romano    if (!fgets(line, kColWidth, file))
774674ceSwren romano      FATAL("Cannot find data in %s\n", filename);
8a91bc7bSHarrietAkot    if (line[0] != '%')
8a91bc7bSHarrietAkot      break;
8a91bc7bSHarrietAkot  }
8a91bc7bSHarrietAkot  // Next line contains M N NNZ.
8a91bc7bSHarrietAkot  idata[0] = 2; // rank
8a91bc7bSHarrietAkot  if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3,
774674ceSwren romano             idata + 1) != 3)
774674ceSwren romano    FATAL("Cannot find size in %s\n", filename);
8a91bc7bSHarrietAkot}
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkot/// Read the "extended" FROSTT header. Although not part of the documented
8a91bc7bSHarrietAkot/// format, we assume that the file starts with optional comments followed
8a91bc7bSHarrietAkot/// by two lines that define the rank, the number of nonzeros, and the
8a91bc7bSHarrietAkot/// dimensions sizes (one per rank) of the sparse tensor.
03fe15ceSAart Bikstatic void readExtFROSTTHeader(FILE *file, char *filename, char *line,
03fe15ceSAart Bik                                uint64_t *idata) {
8a91bc7bSHarrietAkot  // Skip comments.
e5639b3fSMehdi Amini  while (true) {
774674ceSwren romano    if (!fgets(line, kColWidth, file))
774674ceSwren romano      FATAL("Cannot find data in %s\n", filename);
8a91bc7bSHarrietAkot    if (line[0] != '#')
8a91bc7bSHarrietAkot      break;
8a91bc7bSHarrietAkot  }
8a91bc7bSHarrietAkot  // Next line contains RANK and NNZ.
774674ceSwren romano  if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2)
774674ceSwren romano    FATAL("Cannot find metadata in %s\n", filename);
8a91bc7bSHarrietAkot  // Followed by a line with the dimension sizes (one per rank).
774674ceSwren romano  for (uint64_t r = 0; r < idata[0]; r++)
774674ceSwren romano    if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1)
774674ceSwren romano      FATAL("Cannot find dimension size %s\n", filename);
03fe15ceSAart Bik  fgets(line, kColWidth, file); // end of line
8a91bc7bSHarrietAkot}
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkot/// Reads a sparse tensor with the given filename into a memory-resident
8a91bc7bSHarrietAkot/// sparse tensor in coordinate scheme.
8a91bc7bSHarrietAkottemplate <typename V>
8a91bc7bSHarrietAkotstatic SparseTensorCOO<V> *openSparseTensorCOO(char *filename, uint64_t rank,
d83a7068Swren romano                                               const uint64_t *shape,
8a91bc7bSHarrietAkot                                               const uint64_t *perm) {
8a91bc7bSHarrietAkot  // Open the file.
3734c078Swren romano  assert(filename && "Received nullptr for filename");
774674ceSwren romano  FILE *file = fopen(filename, "r");
774674ceSwren romano  if (!file)
774674ceSwren romano    FATAL("Cannot find file %s\n", filename);
8a91bc7bSHarrietAkot  // Perform some file format dependent set up.
03fe15ceSAart Bik  char line[kColWidth];
8a91bc7bSHarrietAkot  uint64_t idata[512];
33e8ab8eSAart Bik  bool isPattern = false;
bb56c2b3SMehdi Amini  bool isSymmetric = false;
8a91bc7bSHarrietAkot  if (strstr(filename, ".mtx")) {
33e8ab8eSAart Bik    readMMEHeader(file, filename, line, idata, &isPattern, &isSymmetric);
8a91bc7bSHarrietAkot  } else if (strstr(filename, ".tns")) {
03fe15ceSAart Bik    readExtFROSTTHeader(file, filename, line, idata);
8a91bc7bSHarrietAkot  } else {
774674ceSwren romano    FATAL("Unknown format %s\n", filename);
8a91bc7bSHarrietAkot  }
8a91bc7bSHarrietAkot  // Prepare sparse tensor object with per-dimension sizes
8a91bc7bSHarrietAkot  // and the number of nonzeros as initial capacity.
8a91bc7bSHarrietAkot  assert(rank == idata[0] && "rank mismatch");
8a91bc7bSHarrietAkot  uint64_t nnz = idata[1];
8a91bc7bSHarrietAkot  for (uint64_t r = 0; r < rank; r++)
d83a7068Swren romano    assert((shape[r] == 0 || shape[r] == idata[2 + r]) &&
8a91bc7bSHarrietAkot           "dimension size mismatch");
8a91bc7bSHarrietAkot  SparseTensorCOO<V> *tensor =
8a91bc7bSHarrietAkot      SparseTensorCOO<V>::newSparseTensorCOO(rank, idata + 2, perm, nnz);
8a91bc7bSHarrietAkot  // Read all nonzero elements.
8a91bc7bSHarrietAkot  std::vector<uint64_t> indices(rank);
8a91bc7bSHarrietAkot  for (uint64_t k = 0; k < nnz; k++) {
774674ceSwren romano    if (!fgets(line, kColWidth, file))
774674ceSwren romano      FATAL("Cannot find next line of data in %s\n", filename);
03fe15ceSAart Bik    char *linePtr = line;
03fe15ceSAart Bik    for (uint64_t r = 0; r < rank; r++) {
03fe15ceSAart Bik      uint64_t idx = strtoul(linePtr, &linePtr, 10);
8a91bc7bSHarrietAkot      // Add 0-based index.
8a91bc7bSHarrietAkot      indices[perm[r]] = idx - 1;
8a91bc7bSHarrietAkot    }
8a91bc7bSHarrietAkot    // The external formats always store the numerical values with the type
8a91bc7bSHarrietAkot    // double, but we cast these values to the sparse tensor object type.
33e8ab8eSAart Bik    // For a pattern tensor, we arbitrarily pick the value 1 for all entries.
33e8ab8eSAart Bik    double value = isPattern ? 1.0 : strtod(linePtr, &linePtr);
8a91bc7bSHarrietAkot    tensor->add(indices, value);
02710413SBixia Zheng    // We currently chose to deal with symmetric matrices by fully constructing
02710413SBixia Zheng    // them. In the future, we may want to make symmetry implicit for storage
02710413SBixia Zheng    // reasons.
bb56c2b3SMehdi Amini    if (isSymmetric && indices[0] != indices[1])
02710413SBixia Zheng      tensor->add({indices[1], indices[0]}, value);
8a91bc7bSHarrietAkot  }
8a91bc7bSHarrietAkot  // Close the file and return tensor.
8a91bc7bSHarrietAkot  fclose(file);
8a91bc7bSHarrietAkot  return tensor;
8a91bc7bSHarrietAkot}
8a91bc7bSHarrietAkot
2046e11aSwren romano/// Writes the sparse tensor to `dest` in extended FROSTT format.
efa15f41SAart Biktemplate <typename V>
46bdacaaSwren romanostatic void outSparseTensor(void *tensor, void *dest, bool sort) {
6438783fSAart Bik  assert(tensor && dest);
6438783fSAart Bik  auto coo = static_cast<SparseTensorCOO<V> *>(tensor);
6438783fSAart Bik  if (sort)
6438783fSAart Bik    coo->sort();
6438783fSAart Bik  char *filename = static_cast<char *>(dest);
fa6aed2aSwren romano  auto &dimSizes = coo->getDimSizes();
6438783fSAart Bik  auto &elements = coo->getElements();
6438783fSAart Bik  uint64_t rank = coo->getRank();
efa15f41SAart Bik  uint64_t nnz = elements.size();
efa15f41SAart Bik  std::fstream file;
efa15f41SAart Bik  file.open(filename, std::ios_base::out | std::ios_base::trunc);
efa15f41SAart Bik  assert(file.is_open());
efa15f41SAart Bik  file << "; extended FROSTT format\n" << rank << " " << nnz << std::endl;
efa15f41SAart Bik  for (uint64_t r = 0; r < rank - 1; r++)
fa6aed2aSwren romano    file << dimSizes[r] << " ";
fa6aed2aSwren romano  file << dimSizes[rank - 1] << std::endl;
efa15f41SAart Bik  for (uint64_t i = 0; i < nnz; i++) {
efa15f41SAart Bik    auto &idx = elements[i].indices;
efa15f41SAart Bik    for (uint64_t r = 0; r < rank; r++)
efa15f41SAart Bik      file << (idx[r] + 1) << " ";
efa15f41SAart Bik    file << elements[i].value << std::endl;
efa15f41SAart Bik  }
efa15f41SAart Bik  file.flush();
efa15f41SAart Bik  file.close();
efa15f41SAart Bik  assert(file.good());
6438783fSAart Bik}
6438783fSAart Bik
6438783fSAart Bik/// Initializes sparse tensor from an external COO-flavored format.
6438783fSAart Biktemplate <typename V>
46bdacaaSwren romanostatic SparseTensorStorage<uint64_t, uint64_t, V> *
6438783fSAart BiktoMLIRSparseTensor(uint64_t rank, uint64_t nse, uint64_t *shape, V *values,
20eaa88fSBixia Zheng                   uint64_t *indices, uint64_t *perm, uint8_t *sparse) {
20eaa88fSBixia Zheng  const DimLevelType *sparsity = (DimLevelType *)(sparse);
20eaa88fSBixia Zheng#ifndef NDEBUG
20eaa88fSBixia Zheng  // Verify that perm is a permutation of 0..(rank-1).
20eaa88fSBixia Zheng  std::vector<uint64_t> order(perm, perm + rank);
20eaa88fSBixia Zheng  std::sort(order.begin(), order.end());
774674ceSwren romano  for (uint64_t i = 0; i < rank; ++i)
774674ceSwren romano    if (i != order[i])
774674ceSwren romano      FATAL("Not a permutation of 0..%" PRIu64 "\n", rank);
20eaa88fSBixia Zheng
20eaa88fSBixia Zheng  // Verify that the sparsity values are supported.
774674ceSwren romano  for (uint64_t i = 0; i < rank; ++i)
20eaa88fSBixia Zheng    if (sparsity[i] != DimLevelType::kDense &&
774674ceSwren romano        sparsity[i] != DimLevelType::kCompressed)
774674ceSwren romano      FATAL("Unsupported sparsity value %d\n", static_cast<int>(sparsity[i]));
20eaa88fSBixia Zheng#endif
20eaa88fSBixia Zheng
6438783fSAart Bik  // Convert external format to internal COO.
63bdcaf9Swren romano  auto *coo = SparseTensorCOO<V>::newSparseTensorCOO(rank, shape, perm, nse);
6438783fSAart Bik  std::vector<uint64_t> idx(rank);
6438783fSAart Bik  for (uint64_t i = 0, base = 0; i < nse; i++) {
6438783fSAart Bik    for (uint64_t r = 0; r < rank; r++)
d8b229a1SAart Bik      idx[perm[r]] = indices[base + r];
63bdcaf9Swren romano    coo->add(idx, values[i]);
6438783fSAart Bik    base += rank;
6438783fSAart Bik  }
6438783fSAart Bik  // Return sparse tensor storage format as opaque pointer.
63bdcaf9Swren romano  auto *tensor = SparseTensorStorage<uint64_t, uint64_t, V>::newSparseTensor(
63bdcaf9Swren romano      rank, shape, perm, sparsity, coo);
63bdcaf9Swren romano  delete coo;
63bdcaf9Swren romano  return tensor;
6438783fSAart Bik}
6438783fSAart Bik
6438783fSAart Bik/// Converts a sparse tensor to an external COO-flavored format.
6438783fSAart Biktemplate <typename V>
46bdacaaSwren romanostatic void fromMLIRSparseTensor(void *tensor, uint64_t *pRank, uint64_t *pNse,
46bdacaaSwren romano                                 uint64_t **pShape, V **pValues,
46bdacaaSwren romano                                 uint64_t **pIndices) {
736c1b66SAart Bik  assert(tensor);
6438783fSAart Bik  auto sparseTensor =
6438783fSAart Bik      static_cast<SparseTensorStorage<uint64_t, uint64_t, V> *>(tensor);
6438783fSAart Bik  uint64_t rank = sparseTensor->getRank();
6438783fSAart Bik  std::vector<uint64_t> perm(rank);
6438783fSAart Bik  std::iota(perm.begin(), perm.end(), 0);
6438783fSAart Bik  SparseTensorCOO<V> *coo = sparseTensor->toCOO(perm.data());
6438783fSAart Bik
6438783fSAart Bik  const std::vector<Element<V>> &elements = coo->getElements();
6438783fSAart Bik  uint64_t nse = elements.size();
6438783fSAart Bik
6438783fSAart Bik  uint64_t *shape = new uint64_t[rank];
6438783fSAart Bik  for (uint64_t i = 0; i < rank; i++)
fa6aed2aSwren romano    shape[i] = coo->getDimSizes()[i];
6438783fSAart Bik
6438783fSAart Bik  V *values = new V[nse];
6438783fSAart Bik  uint64_t *indices = new uint64_t[rank * nse];
6438783fSAart Bik
6438783fSAart Bik  for (uint64_t i = 0, base = 0; i < nse; i++) {
6438783fSAart Bik    values[i] = elements[i].value;
6438783fSAart Bik    for (uint64_t j = 0; j < rank; j++)
6438783fSAart Bik      indices[base + j] = elements[i].indices[j];
6438783fSAart Bik    base += rank;
6438783fSAart Bik  }
6438783fSAart Bik
6438783fSAart Bik  delete coo;
6438783fSAart Bik  *pRank = rank;
6438783fSAart Bik  *pNse = nse;
6438783fSAart Bik  *pShape = shape;
6438783fSAart Bik  *pValues = values;
6438783fSAart Bik  *pIndices = indices;
efa15f41SAart Bik}
efa15f41SAart Bik
2046e11aSwren romano} // anonymous namespace
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkotextern "C" {
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkot//===----------------------------------------------------------------------===//
8a91bc7bSHarrietAkot//
2046e11aSwren romano// Public functions which operate on MLIR buffers (memrefs) to interact
2046e11aSwren romano// with sparse tensors (which are only visible as opaque pointers externally).
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot//===----------------------------------------------------------------------===//
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkot#define CASE(p, i, v, P, I, V)                                                 \
8a91bc7bSHarrietAkot  if (ptrTp == (p) && indTp == (i) && valTp == (v)) {                          \
63bdcaf9Swren romano    SparseTensorCOO<V> *coo = nullptr;                                         \
845561ecSwren romano    if (action <= Action::kFromCOO) {                                          \
845561ecSwren romano      if (action == Action::kFromFile) {                                       \
8a91bc7bSHarrietAkot        char *filename = static_cast<char *>(ptr);                             \
63bdcaf9Swren romano        coo = openSparseTensorCOO<V>(filename, rank, shape, perm);             \
845561ecSwren romano      } else if (action == Action::kFromCOO) {                                 \
63bdcaf9Swren romano        coo = static_cast<SparseTensorCOO<V> *>(ptr);                          \
8a91bc7bSHarrietAkot      } else {                                                                 \
845561ecSwren romano        assert(action == Action::kEmpty);                                      \
8a91bc7bSHarrietAkot      }                                                                        \
63bdcaf9Swren romano      auto *tensor = SparseTensorStorage<P, I, V>::newSparseTensor(            \
63bdcaf9Swren romano          rank, shape, perm, sparsity, coo);                                   \
63bdcaf9Swren romano      if (action == Action::kFromFile)                                         \
63bdcaf9Swren romano        delete coo;                                                            \
63bdcaf9Swren romano      return tensor;                                                           \
bb56c2b3SMehdi Amini    }                                                                          \
8cb33240Swren romano    if (action == Action::kSparseToSparse) {                                   \
8cb33240Swren romano      auto *tensor = static_cast<SparseTensorStorageBase *>(ptr);              \
8cb33240Swren romano      return SparseTensorStorage<P, I, V>::newSparseTensor(rank, shape, perm,  \
8cb33240Swren romano                                                           sparsity, tensor);  \
8cb33240Swren romano    }                                                                          \
bb56c2b3SMehdi Amini    if (action == Action::kEmptyCOO)                                           \
d83a7068Swren romano      return SparseTensorCOO<V>::newSparseTensorCOO(rank, shape, perm);        \
63bdcaf9Swren romano    coo = static_cast<SparseTensorStorage<P, I, V> *>(ptr)->toCOO(perm);       \
845561ecSwren romano    if (action == Action::kToIterator) {                                       \
63bdcaf9Swren romano      coo->startIterator();                                                    \
8a91bc7bSHarrietAkot    } else {                                                                   \
845561ecSwren romano      assert(action == Action::kToCOO);                                        \
8a91bc7bSHarrietAkot    }                                                                          \
63bdcaf9Swren romano    return coo;                                                                \
8a91bc7bSHarrietAkot  }
8a91bc7bSHarrietAkot
845561ecSwren romano#define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V)
4f2ec7f9SAart Bik
d2215e79SRainer Orth// Assume index_type is in fact uint64_t, so that _mlir_ciface_newSparseTensor
bc04a470Swren romano// can safely rewrite kIndex to kU64.  We make this assertion to guarantee
bc04a470Swren romano// that this file cannot get out of sync with its header.
d2215e79SRainer Orthstatic_assert(std::is_same<index_type, uint64_t>::value,
d2215e79SRainer Orth              "Expected index_type == uint64_t");
bc04a470Swren romano
8a91bc7bSHarrietAkotvoid *
845561ecSwren romano_mlir_ciface_newSparseTensor(StridedMemRefType<DimLevelType, 1> *aref, // NOLINT
d2215e79SRainer Orth                             StridedMemRefType<index_type, 1> *sref,
d2215e79SRainer Orth                             StridedMemRefType<index_type, 1> *pref,
845561ecSwren romano                             OverheadType ptrTp, OverheadType indTp,
845561ecSwren romano                             PrimaryType valTp, Action action, void *ptr) {
8a91bc7bSHarrietAkot  assert(aref && sref && pref);
8a91bc7bSHarrietAkot  assert(aref->strides[0] == 1 && sref->strides[0] == 1 &&
8a91bc7bSHarrietAkot         pref->strides[0] == 1);
8a91bc7bSHarrietAkot  assert(aref->sizes[0] == sref->sizes[0] && sref->sizes[0] == pref->sizes[0]);
845561ecSwren romano  const DimLevelType *sparsity = aref->data + aref->offset;
d83a7068Swren romano  const index_type *shape = sref->data + sref->offset;
d2215e79SRainer Orth  const index_type *perm = pref->data + pref->offset;
8a91bc7bSHarrietAkot  uint64_t rank = aref->sizes[0];
8a91bc7bSHarrietAkot
bc04a470Swren romano  // Rewrite kIndex to kU64, to avoid introducing a bunch of new cases.
bc04a470Swren romano  // This is safe because of the static_assert above.
bc04a470Swren romano  if (ptrTp == OverheadType::kIndex)
bc04a470Swren romano    ptrTp = OverheadType::kU64;
bc04a470Swren romano  if (indTp == OverheadType::kIndex)
bc04a470Swren romano    indTp = OverheadType::kU64;
bc04a470Swren romano
8a91bc7bSHarrietAkot  // Double matrices with all combinations of overhead storage.
845561ecSwren romano  CASE(OverheadType::kU64, OverheadType::kU64, PrimaryType::kF64, uint64_t,
845561ecSwren romano       uint64_t, double);
845561ecSwren romano  CASE(OverheadType::kU64, OverheadType::kU32, PrimaryType::kF64, uint64_t,
845561ecSwren romano       uint32_t, double);
845561ecSwren romano  CASE(OverheadType::kU64, OverheadType::kU16, PrimaryType::kF64, uint64_t,
845561ecSwren romano       uint16_t, double);
845561ecSwren romano  CASE(OverheadType::kU64, OverheadType::kU8, PrimaryType::kF64, uint64_t,
845561ecSwren romano       uint8_t, double);
845561ecSwren romano  CASE(OverheadType::kU32, OverheadType::kU64, PrimaryType::kF64, uint32_t,
845561ecSwren romano       uint64_t, double);
845561ecSwren romano  CASE(OverheadType::kU32, OverheadType::kU32, PrimaryType::kF64, uint32_t,
845561ecSwren romano       uint32_t, double);
845561ecSwren romano  CASE(OverheadType::kU32, OverheadType::kU16, PrimaryType::kF64, uint32_t,
845561ecSwren romano       uint16_t, double);
845561ecSwren romano  CASE(OverheadType::kU32, OverheadType::kU8, PrimaryType::kF64, uint32_t,
845561ecSwren romano       uint8_t, double);
845561ecSwren romano  CASE(OverheadType::kU16, OverheadType::kU64, PrimaryType::kF64, uint16_t,
845561ecSwren romano       uint64_t, double);
845561ecSwren romano  CASE(OverheadType::kU16, OverheadType::kU32, PrimaryType::kF64, uint16_t,
845561ecSwren romano       uint32_t, double);
845561ecSwren romano  CASE(OverheadType::kU16, OverheadType::kU16, PrimaryType::kF64, uint16_t,
845561ecSwren romano       uint16_t, double);
845561ecSwren romano  CASE(OverheadType::kU16, OverheadType::kU8, PrimaryType::kF64, uint16_t,
845561ecSwren romano       uint8_t, double);
845561ecSwren romano  CASE(OverheadType::kU8, OverheadType::kU64, PrimaryType::kF64, uint8_t,
845561ecSwren romano       uint64_t, double);
845561ecSwren romano  CASE(OverheadType::kU8, OverheadType::kU32, PrimaryType::kF64, uint8_t,
845561ecSwren romano       uint32_t, double);
845561ecSwren romano  CASE(OverheadType::kU8, OverheadType::kU16, PrimaryType::kF64, uint8_t,
845561ecSwren romano       uint16_t, double);
845561ecSwren romano  CASE(OverheadType::kU8, OverheadType::kU8, PrimaryType::kF64, uint8_t,
845561ecSwren romano       uint8_t, double);
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkot  // Float matrices with all combinations of overhead storage.
845561ecSwren romano  CASE(OverheadType::kU64, OverheadType::kU64, PrimaryType::kF32, uint64_t,
845561ecSwren romano       uint64_t, float);
845561ecSwren romano  CASE(OverheadType::kU64, OverheadType::kU32, PrimaryType::kF32, uint64_t,
845561ecSwren romano       uint32_t, float);
845561ecSwren romano  CASE(OverheadType::kU64, OverheadType::kU16, PrimaryType::kF32, uint64_t,
845561ecSwren romano       uint16_t, float);
845561ecSwren romano  CASE(OverheadType::kU64, OverheadType::kU8, PrimaryType::kF32, uint64_t,
845561ecSwren romano       uint8_t, float);
845561ecSwren romano  CASE(OverheadType::kU32, OverheadType::kU64, PrimaryType::kF32, uint32_t,
845561ecSwren romano       uint64_t, float);
845561ecSwren romano  CASE(OverheadType::kU32, OverheadType::kU32, PrimaryType::kF32, uint32_t,
845561ecSwren romano       uint32_t, float);
845561ecSwren romano  CASE(OverheadType::kU32, OverheadType::kU16, PrimaryType::kF32, uint32_t,
845561ecSwren romano       uint16_t, float);
845561ecSwren romano  CASE(OverheadType::kU32, OverheadType::kU8, PrimaryType::kF32, uint32_t,
845561ecSwren romano       uint8_t, float);
845561ecSwren romano  CASE(OverheadType::kU16, OverheadType::kU64, PrimaryType::kF32, uint16_t,
845561ecSwren romano       uint64_t, float);
845561ecSwren romano  CASE(OverheadType::kU16, OverheadType::kU32, PrimaryType::kF32, uint16_t,
845561ecSwren romano       uint32_t, float);
845561ecSwren romano  CASE(OverheadType::kU16, OverheadType::kU16, PrimaryType::kF32, uint16_t,
845561ecSwren romano       uint16_t, float);
845561ecSwren romano  CASE(OverheadType::kU16, OverheadType::kU8, PrimaryType::kF32, uint16_t,
845561ecSwren romano       uint8_t, float);
845561ecSwren romano  CASE(OverheadType::kU8, OverheadType::kU64, PrimaryType::kF32, uint8_t,
845561ecSwren romano       uint64_t, float);
845561ecSwren romano  CASE(OverheadType::kU8, OverheadType::kU32, PrimaryType::kF32, uint8_t,
845561ecSwren romano       uint32_t, float);
845561ecSwren romano  CASE(OverheadType::kU8, OverheadType::kU16, PrimaryType::kF32, uint8_t,
845561ecSwren romano       uint16_t, float);
845561ecSwren romano  CASE(OverheadType::kU8, OverheadType::kU8, PrimaryType::kF32, uint8_t,
845561ecSwren romano       uint8_t, float);
8a91bc7bSHarrietAkot
845561ecSwren romano  // Integral matrices with both overheads of the same type.
845561ecSwren romano  CASE_SECSAME(OverheadType::kU64, PrimaryType::kI64, uint64_t, int64_t);
845561ecSwren romano  CASE_SECSAME(OverheadType::kU64, PrimaryType::kI32, uint64_t, int32_t);
845561ecSwren romano  CASE_SECSAME(OverheadType::kU64, PrimaryType::kI16, uint64_t, int16_t);
845561ecSwren romano  CASE_SECSAME(OverheadType::kU64, PrimaryType::kI8, uint64_t, int8_t);
2046e11aSwren romano  CASE_SECSAME(OverheadType::kU32, PrimaryType::kI64, uint32_t, int64_t);
845561ecSwren romano  CASE_SECSAME(OverheadType::kU32, PrimaryType::kI32, uint32_t, int32_t);
845561ecSwren romano  CASE_SECSAME(OverheadType::kU32, PrimaryType::kI16, uint32_t, int16_t);
845561ecSwren romano  CASE_SECSAME(OverheadType::kU32, PrimaryType::kI8, uint32_t, int8_t);
2046e11aSwren romano  CASE_SECSAME(OverheadType::kU16, PrimaryType::kI64, uint16_t, int64_t);
845561ecSwren romano  CASE_SECSAME(OverheadType::kU16, PrimaryType::kI32, uint16_t, int32_t);
845561ecSwren romano  CASE_SECSAME(OverheadType::kU16, PrimaryType::kI16, uint16_t, int16_t);
845561ecSwren romano  CASE_SECSAME(OverheadType::kU16, PrimaryType::kI8, uint16_t, int8_t);
2046e11aSwren romano  CASE_SECSAME(OverheadType::kU8, PrimaryType::kI64, uint8_t, int64_t);
845561ecSwren romano  CASE_SECSAME(OverheadType::kU8, PrimaryType::kI32, uint8_t, int32_t);
845561ecSwren romano  CASE_SECSAME(OverheadType::kU8, PrimaryType::kI16, uint8_t, int16_t);
845561ecSwren romano  CASE_SECSAME(OverheadType::kU8, PrimaryType::kI8, uint8_t, int8_t);
8a91bc7bSHarrietAkot
736c1b66SAart Bik  // Complex matrices with wide overhead.
736c1b66SAart Bik  CASE_SECSAME(OverheadType::kU64, PrimaryType::kC64, uint64_t, complex64);
736c1b66SAart Bik  CASE_SECSAME(OverheadType::kU64, PrimaryType::kC32, uint64_t, complex32);
736c1b66SAart Bik
8a91bc7bSHarrietAkot  // Unsupported case (add above if needed).
774674ceSwren romano  // TODO: better pretty-printing of enum values!
774674ceSwren romano  FATAL("unsupported combination of types: <P=%d, I=%d, V=%d>\n",
774674ceSwren romano        static_cast<int>(ptrTp), static_cast<int>(indTp),
774674ceSwren romano        static_cast<int>(valTp));
8a91bc7bSHarrietAkot}
8a91bc7bSHarrietAkot#undef CASE
1313f5d3Swren romano#undef CASE_SECSAME
6438783fSAart Bik
bfadd13dSwren romano#define IMPL_SPARSEVALUES(VNAME, V)                                            \
bfadd13dSwren romano  void _mlir_ciface_sparseValues##VNAME(StridedMemRefType<V, 1> *ref,          \
bfadd13dSwren romano                                        void *tensor) {                        \
bfadd13dSwren romano    assert(ref &&tensor);                                                      \
bfadd13dSwren romano    std::vector<V> *v;                                                         \
bfadd13dSwren romano    static_cast<SparseTensorStorageBase *>(tensor)->getValues(&v);             \
bfadd13dSwren romano    ref->basePtr = ref->data = v->data();                                      \
bfadd13dSwren romano    ref->offset = 0;                                                           \
bfadd13dSwren romano    ref->sizes[0] = v->size();                                                 \
bfadd13dSwren romano    ref->strides[0] = 1;                                                       \
bfadd13dSwren romano  }
bfadd13dSwren romanoFOREVERY_V(IMPL_SPARSEVALUES)
bfadd13dSwren romano#undef IMPL_SPARSEVALUES
bfadd13dSwren romano
bfadd13dSwren romano#define IMPL_GETOVERHEAD(NAME, TYPE, LIB)                                      \
bfadd13dSwren romano  void _mlir_ciface_##NAME(StridedMemRefType<TYPE, 1> *ref, void *tensor,      \
bfadd13dSwren romano                           index_type d) {                                     \
bfadd13dSwren romano    assert(ref &&tensor);                                                      \
bfadd13dSwren romano    std::vector<TYPE> *v;                                                      \
bfadd13dSwren romano    static_cast<SparseTensorStorageBase *>(tensor)->LIB(&v, d);                \
bfadd13dSwren romano    ref->basePtr = ref->data = v->data();                                      \
bfadd13dSwren romano    ref->offset = 0;                                                           \
bfadd13dSwren romano    ref->sizes[0] = v->size();                                                 \
bfadd13dSwren romano    ref->strides[0] = 1;                                                       \
bfadd13dSwren romano  }
a9a19f59Swren romano#define IMPL_SPARSEPOINTERS(PNAME, P)                                          \
a9a19f59Swren romano  IMPL_GETOVERHEAD(sparsePointers##PNAME, P, getPointers)
a9a19f59Swren romanoFOREVERY_O(IMPL_SPARSEPOINTERS)
a9a19f59Swren romano#undef IMPL_SPARSEPOINTERS
bfadd13dSwren romano
a9a19f59Swren romano#define IMPL_SPARSEINDICES(INAME, I)                                           \
a9a19f59Swren romano  IMPL_GETOVERHEAD(sparseIndices##INAME, I, getIndices)
a9a19f59Swren romanoFOREVERY_O(IMPL_SPARSEINDICES)
a9a19f59Swren romano#undef IMPL_SPARSEINDICES
bfadd13dSwren romano#undef IMPL_GETOVERHEAD
bfadd13dSwren romano
bfadd13dSwren romano#define IMPL_ADDELT(VNAME, V)                                                  \
bfadd13dSwren romano  void *_mlir_ciface_addElt##VNAME(void *coo, V value,                         \
bfadd13dSwren romano                                   StridedMemRefType<index_type, 1> *iref,     \
bfadd13dSwren romano                                   StridedMemRefType<index_type, 1> *pref) {   \
bfadd13dSwren romano    assert(coo &&iref &&pref);                                                 \
bfadd13dSwren romano    assert(iref->strides[0] == 1 && pref->strides[0] == 1);                    \
bfadd13dSwren romano    assert(iref->sizes[0] == pref->sizes[0]);                                  \
bfadd13dSwren romano    const index_type *indx = iref->data + iref->offset;                        \
bfadd13dSwren romano    const index_type *perm = pref->data + pref->offset;                        \
bfadd13dSwren romano    uint64_t isize = iref->sizes[0];                                           \
bfadd13dSwren romano    std::vector<index_type> indices(isize);                                    \
bfadd13dSwren romano    for (uint64_t r = 0; r < isize; r++)                                       \
bfadd13dSwren romano      indices[perm[r]] = indx[r];                                              \
bfadd13dSwren romano    static_cast<SparseTensorCOO<V> *>(coo)->add(indices, value);               \
bfadd13dSwren romano    return coo;                                                                \
bfadd13dSwren romano  }
bfadd13dSwren romanoFOREVERY_SIMPLEX_V(IMPL_ADDELT)
bfadd13dSwren romanoIMPL_ADDELT(C64, complex64)
2046e11aSwren romano// Marked static because it's not part of the public API.
*0fbe3f3fSwren romano// NOTE: the `static` keyword confuses clang-format here, causing
*0fbe3f3fSwren romano// the strange indentation of the `_mlir_ciface_addEltC32` prototype.
*0fbe3f3fSwren romano// In C++11 we can add a semicolon after the call to `IMPL_ADDELT`
*0fbe3f3fSwren romano// and that will correct clang-format.  Alas, this file is compiled
*0fbe3f3fSwren romano// in C++98 mode where that semicolon is illegal (and there's no portable
*0fbe3f3fSwren romano// macro magic to license a no-op semicolon at the top level).
*0fbe3f3fSwren romanostatic IMPL_ADDELT(C32ABI, complex32)
2046e11aSwren romano#undef IMPL_ADDELT
bfadd13dSwren romano    void *_mlir_ciface_addEltC32(void *coo, float r, float i,
bfadd13dSwren romano                                 StridedMemRefType<index_type, 1> *iref,
bfadd13dSwren romano                                 StridedMemRefType<index_type, 1> *pref) {
bfadd13dSwren romano  return _mlir_ciface_addEltC32ABI(coo, complex32(r, i), iref, pref);
bfadd13dSwren romano}
bfadd13dSwren romano
bfadd13dSwren romano#define IMPL_GETNEXT(VNAME, V)                                                 \
bfadd13dSwren romano  bool _mlir_ciface_getNext##VNAME(void *coo,                                  \
bfadd13dSwren romano                                   StridedMemRefType<index_type, 1> *iref,     \
bfadd13dSwren romano                                   StridedMemRefType<V, 0> *vref) {            \
bfadd13dSwren romano    assert(coo &&iref &&vref);                                                 \
bfadd13dSwren romano    assert(iref->strides[0] == 1);                                             \
bfadd13dSwren romano    index_type *indx = iref->data + iref->offset;                              \
bfadd13dSwren romano    V *value = vref->data + vref->offset;                                      \
bfadd13dSwren romano    const uint64_t isize = iref->sizes[0];                                     \
bfadd13dSwren romano    const Element<V> *elem =                                                   \
bfadd13dSwren romano        static_cast<SparseTensorCOO<V> *>(coo)->getNext();                     \
bfadd13dSwren romano    if (elem == nullptr)                                                       \
bfadd13dSwren romano      return false;                                                            \
bfadd13dSwren romano    for (uint64_t r = 0; r < isize; r++)                                       \
bfadd13dSwren romano      indx[r] = elem->indices[r];                                              \
bfadd13dSwren romano    *value = elem->value;                                                      \
bfadd13dSwren romano    return true;                                                               \
bfadd13dSwren romano  }
bfadd13dSwren romanoFOREVERY_V(IMPL_GETNEXT)
bfadd13dSwren romano#undef IMPL_GETNEXT
bfadd13dSwren romano
bfadd13dSwren romano#define IMPL_LEXINSERT(VNAME, V)                                               \
bfadd13dSwren romano  void _mlir_ciface_lexInsert##VNAME(                                          \
bfadd13dSwren romano      void *tensor, StridedMemRefType<index_type, 1> *cref, V val) {           \
bfadd13dSwren romano    assert(tensor &&cref);                                                     \
bfadd13dSwren romano    assert(cref->strides[0] == 1);                                             \
bfadd13dSwren romano    index_type *cursor = cref->data + cref->offset;                            \
bfadd13dSwren romano    assert(cursor);                                                            \
bfadd13dSwren romano    static_cast<SparseTensorStorageBase *>(tensor)->lexInsert(cursor, val);    \
bfadd13dSwren romano  }
bfadd13dSwren romanoFOREVERY_SIMPLEX_V(IMPL_LEXINSERT)
bfadd13dSwren romanoIMPL_LEXINSERT(C64, complex64)
2046e11aSwren romano// Marked static because it's not part of the public API.
*0fbe3f3fSwren romano// NOTE: see the note for `_mlir_ciface_addEltC32ABI`
*0fbe3f3fSwren romanostatic IMPL_LEXINSERT(C32ABI, complex32)
2046e11aSwren romano#undef IMPL_LEXINSERT
bfadd13dSwren romano    void _mlir_ciface_lexInsertC32(void *tensor,
*0fbe3f3fSwren romano                                   StridedMemRefType<index_type, 1> *cref,
*0fbe3f3fSwren romano                                   float r, float i) {
bfadd13dSwren romano  _mlir_ciface_lexInsertC32ABI(tensor, cref, complex32(r, i));
bfadd13dSwren romano}
bfadd13dSwren romano
bfadd13dSwren romano#define IMPL_EXPINSERT(VNAME, V)                                               \
bfadd13dSwren romano  void _mlir_ciface_expInsert##VNAME(                                          \
bfadd13dSwren romano      void *tensor, StridedMemRefType<index_type, 1> *cref,                    \
bfadd13dSwren romano      StridedMemRefType<V, 1> *vref, StridedMemRefType<bool, 1> *fref,         \
bfadd13dSwren romano      StridedMemRefType<index_type, 1> *aref, index_type count) {              \
bfadd13dSwren romano    assert(tensor &&cref &&vref &&fref &&aref);                                \
bfadd13dSwren romano    assert(cref->strides[0] == 1);                                             \
bfadd13dSwren romano    assert(vref->strides[0] == 1);                                             \
bfadd13dSwren romano    assert(fref->strides[0] == 1);                                             \
bfadd13dSwren romano    assert(aref->strides[0] == 1);                                             \
bfadd13dSwren romano    assert(vref->sizes[0] == fref->sizes[0]);                                  \
bfadd13dSwren romano    index_type *cursor = cref->data + cref->offset;                            \
bfadd13dSwren romano    V *values = vref->data + vref->offset;                                     \
bfadd13dSwren romano    bool *filled = fref->data + fref->offset;                                  \
bfadd13dSwren romano    index_type *added = aref->data + aref->offset;                             \
bfadd13dSwren romano    static_cast<SparseTensorStorageBase *>(tensor)->expInsert(                 \
bfadd13dSwren romano        cursor, values, filled, added, count);                                 \
bfadd13dSwren romano  }
bfadd13dSwren romanoFOREVERY_V(IMPL_EXPINSERT)
bfadd13dSwren romano#undef IMPL_EXPINSERT
bfadd13dSwren romano
8a91bc7bSHarrietAkot//===----------------------------------------------------------------------===//
8a91bc7bSHarrietAkot//
2046e11aSwren romano// Public functions which accept only C-style data structures to interact
2046e11aSwren romano// with sparse tensors (which are only visible as opaque pointers externally).
8a91bc7bSHarrietAkot//
8a91bc7bSHarrietAkot//===----------------------------------------------------------------------===//
8a91bc7bSHarrietAkot
d2215e79SRainer Orthindex_type sparseDimSize(void *tensor, index_type d) {
8a91bc7bSHarrietAkot  return static_cast<SparseTensorStorageBase *>(tensor)->getDimSize(d);
8a91bc7bSHarrietAkot}
8a91bc7bSHarrietAkot
f66e5769SAart Bikvoid endInsert(void *tensor) {
f66e5769SAart Bik  return static_cast<SparseTensorStorageBase *>(tensor)->endInsert();
f66e5769SAart Bik}
f66e5769SAart Bik
05c17bc4Swren romano#define IMPL_OUTSPARSETENSOR(VNAME, V)                                         \
05c17bc4Swren romano  void outSparseTensor##VNAME(void *coo, void *dest, bool sort) {              \
05c17bc4Swren romano    return outSparseTensor<V>(coo, dest, sort);                                \
05c17bc4Swren romano  }
05c17bc4Swren romanoFOREVERY_V(IMPL_OUTSPARSETENSOR)
05c17bc4Swren romano#undef IMPL_OUTSPARSETENSOR
05c17bc4Swren romano
8a91bc7bSHarrietAkotvoid delSparseTensor(void *tensor) {
8a91bc7bSHarrietAkot  delete static_cast<SparseTensorStorageBase *>(tensor);
8a91bc7bSHarrietAkot}
8a91bc7bSHarrietAkot
63bdcaf9Swren romano#define IMPL_DELCOO(VNAME, V)                                                  \
63bdcaf9Swren romano  void delSparseTensorCOO##VNAME(void *coo) {                                  \
63bdcaf9Swren romano    delete static_cast<SparseTensorCOO<V> *>(coo);                             \
63bdcaf9Swren romano  }
1313f5d3Swren romanoFOREVERY_V(IMPL_DELCOO)
63bdcaf9Swren romano#undef IMPL_DELCOO
63bdcaf9Swren romano
05c17bc4Swren romanochar *getTensorFilename(index_type id) {
05c17bc4Swren romano  char var[80];
05c17bc4Swren romano  sprintf(var, "TENSOR%" PRIu64, id);
05c17bc4Swren romano  char *env = getenv(var);
05c17bc4Swren romano  if (!env)
05c17bc4Swren romano    FATAL("Environment variable %s is not set\n", var);
05c17bc4Swren romano  return env;
05c17bc4Swren romano}
05c17bc4Swren romano
20eaa88fSBixia Zheng// TODO: generalize beyond 64-bit indices.
1313f5d3Swren romano#define IMPL_CONVERTTOMLIRSPARSETENSOR(VNAME, V)                               \
1313f5d3Swren romano  void *convertToMLIRSparseTensor##VNAME(                                      \
1313f5d3Swren romano      uint64_t rank, uint64_t nse, uint64_t *shape, V *values,                 \
1313f5d3Swren romano      uint64_t *indices, uint64_t *perm, uint8_t *sparse) {                    \
1313f5d3Swren romano    return toMLIRSparseTensor<V>(rank, nse, shape, values, indices, perm,      \
1313f5d3Swren romano                                 sparse);                                      \
8a91bc7bSHarrietAkot  }
1313f5d3Swren romanoFOREVERY_V(IMPL_CONVERTTOMLIRSPARSETENSOR)
1313f5d3Swren romano#undef IMPL_CONVERTTOMLIRSPARSETENSOR
8a91bc7bSHarrietAkot
2f49e6b0SBixia Zheng// TODO: Currently, values are copied from SparseTensorStorage to
2046e11aSwren romano// SparseTensorCOO, then to the output.  We may want to reduce the number
2046e11aSwren romano// of copies.
2f49e6b0SBixia Zheng//
6438783fSAart Bik// TODO: generalize beyond 64-bit indices, no dim ordering, all dimensions
6438783fSAart Bik// compressed
1313f5d3Swren romano#define IMPL_CONVERTFROMMLIRSPARSETENSOR(VNAME, V)                             \
1313f5d3Swren romano  void convertFromMLIRSparseTensor##VNAME(void *tensor, uint64_t *pRank,       \
1313f5d3Swren romano                                          uint64_t *pNse, uint64_t **pShape,   \
1313f5d3Swren romano                                          V **pValues, uint64_t **pIndices) {  \
1313f5d3Swren romano    fromMLIRSparseTensor<V>(tensor, pRank, pNse, pShape, pValues, pIndices);   \
2f49e6b0SBixia Zheng  }
1313f5d3Swren romanoFOREVERY_V(IMPL_CONVERTFROMMLIRSPARSETENSOR)
1313f5d3Swren romano#undef IMPL_CONVERTFROMMLIRSPARSETENSOR
efa15f41SAart Bik
8a91bc7bSHarrietAkot} // extern "C"
8a91bc7bSHarrietAkot
8a91bc7bSHarrietAkot#endif // MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS