1#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
2#  See https://llvm.org/LICENSE.txt for license information.
3#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
5# This file contains the utilities to process sparse tensor outputs.
6
7from typing import Callable, Dict, Sequence, Tuple
8import ctypes
9import functools
10import numpy as np
11import os
12
13# Import MLIR related modules.
14from mlir import execution_engine
15from mlir import ir
16from mlir import runtime
17from mlir.dialects import sparse_tensor
18
19from . import mlir_sparse_compiler
20
21# Type aliases for type annotation.
22_SupportFunc = Callable[..., None]
23_SupportFuncLocator = Callable[[np.dtype], Tuple[_SupportFunc, _SupportFunc]]
24
25# The name for the environment variable that provides the full path for the
26# supporting library.
27_SUPPORTLIB_ENV_VAR = "SUPPORTLIB"
28# The default supporting library if the environment variable is not provided.
29_DEFAULT_SUPPORTLIB = "libmlir_c_runner_utils.so"
30
31# The JIT compiler optimization level.
32_OPT_LEVEL = 2
33# The entry point to the JIT compiled program.
34_ENTRY_NAME = "main"
35
36
37@functools.lru_cache()
38def _get_support_lib_name() -> str:
39  """Gets the string name for the supporting C shared library."""
40  return os.getenv(_SUPPORTLIB_ENV_VAR, _DEFAULT_SUPPORTLIB)
41
42
43@functools.lru_cache()
44def _get_sparse_compiler() -> mlir_sparse_compiler.SparseCompiler:
45  """Gets the MLIR sparse compiler with default setting."""
46  return mlir_sparse_compiler.SparseCompiler(
47      options="", opt_level=_OPT_LEVEL, shared_libs=[_get_support_lib_name()])
48
49
50def _record_support_funcs(
51    ty: np.dtype, to_func: _SupportFunc, from_func: _SupportFunc,
52    ty_to_funcs: Dict[np.dtype, Tuple[_SupportFunc, _SupportFunc]]) -> None:
53  """Records the two supporting functions for a given data type."""
54  to_func.restype = ctypes.c_void_p
55  from_func.restype = ctypes.c_void_p
56  ty_to_funcs[ty] = (to_func, from_func)
57
58
59@functools.lru_cache()
60def _get_support_func_locator() -> _SupportFuncLocator:
61  """Constructs a function to locate the supporting functions for a data type.
62
63  Loads the supporting C shared library with the needed routines. Constructs a
64  dictionary from the supported data types to the routines for the data types,
65  and then a function to look up the dictionary for a given data type.
66
67  The name of the supporting C shared library is either provided by an
68  an environment variable or a default value.
69
70  Returns:
71    The function to look up the supporting functions for a given data type.
72
73  Raises:
74    OSError: If there is any problem in loading the shared library.
75    ValueError: If the shared library doesn't contain the needed routines.
76  """
77  # This raises OSError exception if there is any problem in loading the shared
78  # library.
79  c_lib = ctypes.CDLL(_get_support_lib_name())
80
81  type_to_funcs = {}
82  try:
83    support_types = [(np.int8, c_lib.convertToMLIRSparseTensorI8,
84                      c_lib.convertFromMLIRSparseTensorI8),
85                     (np.int16, c_lib.convertToMLIRSparseTensorI16,
86                      c_lib.convertFromMLIRSparseTensorI16),
87                     (np.int32, c_lib.convertToMLIRSparseTensorI32,
88                      c_lib.convertFromMLIRSparseTensorI32),
89                     (np.int64, c_lib.convertToMLIRSparseTensorI64,
90                      c_lib.convertFromMLIRSparseTensorI64),
91                     (np.float16, c_lib.convertToMLIRSparseTensorF16,
92                      c_lib.convertFromMLIRSparseTensorF16),
93                     (np.float32, c_lib.convertToMLIRSparseTensorF32,
94                      c_lib.convertFromMLIRSparseTensorF32),
95                     (np.float64, c_lib.convertToMLIRSparseTensorF64,
96                      c_lib.convertFromMLIRSparseTensorF64),
97                     (np.complex64, c_lib.convertToMLIRSparseTensorC32,
98                      c_lib.convertFromMLIRSparseTensorC32),
99                     (np.complex128, c_lib.convertToMLIRSparseTensorC64,
100                      c_lib.convertFromMLIRSparseTensorC64)]
101  except Exception as e:
102    raise ValueError(f"Missing supporting function: {e}") from e
103  for i, info in enumerate(support_types):
104    _record_support_funcs(info[0], info[1], info[2], type_to_funcs)
105
106  def get_support_funcs(ty: np.dtype):
107    funcs = type_to_funcs[ty]
108    assert funcs is not None
109    return funcs
110
111  return get_support_funcs
112
113
114def sparse_tensor_to_coo_tensor(
115    sparse_tensor: ctypes.c_void_p,
116    dtype: np.dtype,
117) -> Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]:
118  """Converts an MLIR sparse tensor to a COO-flavored format tensor.
119
120  Args:
121     sparse_tensor: A ctypes.c_void_p to the MLIR sparse tensor descriptor.
122     dtype: The numpy data type for the tensor elements.
123
124  Returns:
125    A tuple that contains the following values for the COO-flavored format
126    tensor:
127    rank: An integer for the rank of the tensor.
128    nse: An integer for the number of non-zero values in the tensor.
129    shape: A 1D numpy array of integers, for the shape of the tensor.
130    values: A 1D numpy array, for the non-zero values in the tensor.
131    indices: A 2D numpy array of integers, representing the indices for the
132      non-zero values in the tensor.
133
134  Raises:
135    OSError: If there is any problem in loading the shared library.
136    ValueError: If the shared library doesn't contain the needed routines.
137  """
138  convert_from = _get_support_func_locator()(dtype)[1]
139  rank = ctypes.c_ulonglong(0)
140  nse = ctypes.c_ulonglong(0)
141  shape = ctypes.POINTER(ctypes.c_ulonglong)()
142
143  values = ctypes.POINTER(runtime.as_ctype(np.dtype(dtype)))()
144  indices = ctypes.POINTER(ctypes.c_ulonglong)()
145  convert_from(sparse_tensor, ctypes.byref(rank), ctypes.byref(nse),
146               ctypes.byref(shape), ctypes.byref(values), ctypes.byref(indices))
147
148  # Convert the returned values to the corresponding numpy types.
149  shape = np.ctypeslib.as_array(shape, shape=[rank.value])
150  values = runtime.to_numpy(np.ctypeslib.as_array(values, shape=[nse.value]))
151  indices = np.ctypeslib.as_array(indices, shape=[nse.value, rank.value])
152  return rank.value, nse.value, shape, values, indices
153
154
155def coo_tensor_to_sparse_tensor(np_shape: np.ndarray, np_values: np.ndarray,
156                                np_indices: np.ndarray, np_perm: np.ndarray,
157                                np_sparse: np.ndarray) -> int:
158  """Converts a COO-flavored format sparse tensor to an MLIR sparse tensor.
159
160  Args:
161     np_shape: A 1D numpy array of integers, for the shape of the tensor.
162     np_values: A 1D numpy array, for the non-zero values in the tensor.
163     np_indices: A 2D numpy array of integers, representing the indices for the
164       non-zero values in the tensor.
165     np_perm: A 1D numpy array of integers, representing the storage ordering
166       for the dimensions.
167     np_sparse: A 1D numpy array of uint8, representing the sparsity values
168       for the dimensions.
169
170  Returns:
171     An integer for the non-null ctypes.c_void_p to the MLIR sparse tensor
172     descriptor.
173
174  Raises:
175    OSError: If there is any problem in loading the shared library.
176    ValueError: If the shared library doesn't contain the needed routines.
177  """
178
179  r = len(np_shape)
180  rank = ctypes.c_ulonglong(r)
181  nse = ctypes.c_ulonglong(len(np_values))
182  shape = np_shape.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
183  values = np_values.ctypes.data_as(
184      ctypes.POINTER(runtime.as_ctype(np.dtype(np_values.dtype))))
185  indices = np_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
186
187  perm = np_perm.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
188  sparse = np_sparse.ctypes.data_as(ctypes.POINTER(ctypes.c_uint8))
189
190  convert_to = _get_support_func_locator()(np_values.dtype.type)[0]
191  ptr = convert_to(rank, nse, shape, values, indices, perm, sparse)
192  assert ptr is not None, "Problem with calling convertToMLIRSparseTensorF64"
193  return ptr
194
195
196def compile_and_build_engine(
197    module: ir.Module) -> execution_engine.ExecutionEngine:
198  """Compiles an MLIR module and builds a JIT execution engine.
199
200  Args:
201    module: The MLIR module.
202
203  Returns:
204    A JIT execution engine for the MLIR module.
205
206  """
207  return _get_sparse_compiler().compile_and_jit(module)
208
209
210class _SparseTensorDescriptor(ctypes.Structure):
211  """A C structure for an MLIR sparse tensor."""
212  _fields_ = [
213      # A pointer for the MLIR sparse tensor storage.
214      ("storage", ctypes.POINTER(ctypes.c_ulonglong)),
215      # An MLIR MemRef descriptor for the shape of the sparse tensor.
216      ("shape", runtime.make_nd_memref_descriptor(1, ctypes.c_ulonglong)),
217  ]
218
219
220def _output_one_dim(dim: int, rank: int, shape: str, type: str) -> str:
221  """Produces the MLIR text code to output the size for the given dimension."""
222  return f"""
223  %c{dim} = arith.constant {dim} : index
224  %d{dim} = tensor.dim %t, %c{dim} : tensor<{shape}x{type}, #enc>
225  memref.store %d{dim}, %b[%c{dim}] : memref<{rank}xindex>
226"""
227
228
229# TODO: With better support from MLIR, we may improve the current implementation
230# by doing the following:
231# (1) Use Python code to generate the kernel instead of doing MLIR text code
232#     stitching.
233# (2) Use scf.for instead of an unrolled loop to write out the dimension sizes
234#     when tensor.dim supports non-constant dimension value.
235def _get_create_sparse_tensor_kernel(
236    sparsity_codes: Sequence[sparse_tensor.DimLevelType], type: str) -> str:
237  """Creates an MLIR text kernel to contruct a sparse tensor from a file.
238
239  The kernel returns a _SparseTensorDescriptor structure.
240  """
241  rank = len(sparsity_codes)
242
243  # Use ? to represent a dimension in the dynamic shape string representation.
244  shape = "x".join(map(lambda d: "?", range(rank)))
245
246  # Convert the encoded sparsity values to a string representation.
247  sparsity = ", ".join(
248      map(lambda s: '"compressed"' if s.value else '"dense"', sparsity_codes))
249
250  # Get the MLIR text code to write the dimension sizes to the output buffer.
251  output_dims = "\n".join(
252      map(lambda d: _output_one_dim(d, rank, shape, type), range(rank)))
253
254  # Return the MLIR text kernel.
255  return f"""
256!Ptr = !llvm.ptr<i8>
257#enc = #sparse_tensor.encoding<{{
258  dimLevelType = [ {sparsity} ]
259}}>
260func.func @{_ENTRY_NAME}(%filename: !Ptr) -> (tensor<{shape}x{type}, #enc>, memref<{rank}xindex>)
261attributes {{ llvm.emit_c_interface }} {{
262  %t = sparse_tensor.new %filename : !Ptr to tensor<{shape}x{type}, #enc>
263  %b = memref.alloc() : memref<{rank}xindex>
264  {output_dims}
265  return %t, %b : tensor<{shape}x{type}, #enc>, memref<{rank}xindex>
266}}"""
267
268
269def create_sparse_tensor(filename: str,
270                         sparsity: Sequence[sparse_tensor.DimLevelType],
271                         type: str) -> Tuple[ctypes.c_void_p, np.ndarray]:
272  """Creates an MLIR sparse tensor from the input file.
273
274  Args:
275    filename: A string for the name of the file that contains the tensor data in
276      a COO-flavored format.
277    sparsity: A sequence of DimLevelType values, one for each dimension of the
278      tensor.
279
280  Returns:
281    A Tuple containing the following values:
282    storage: A ctypes.c_void_p for the MLIR sparse tensor storage.
283    shape: A 1D numpy array of integers, for the shape of the tensor.
284
285  Raises:
286    OSError: If there is any problem in loading the supporting C shared library.
287    ValueError:  If the shared library doesn't contain the needed routine.
288  """
289  with ir.Context() as ctx, ir.Location.unknown():
290    module = _get_create_sparse_tensor_kernel(sparsity, type)
291    module = ir.Module.parse(module)
292    engine = compile_and_build_engine(module)
293
294  # A sparse tensor descriptor to receive the kernel result.
295  c_tensor_desc = _SparseTensorDescriptor()
296  # Convert the filename to a byte stream.
297  c_filename = ctypes.c_char_p(bytes(filename, "utf-8"))
298
299  arg_pointers = [
300      ctypes.byref(ctypes.pointer(c_tensor_desc)),
301      ctypes.byref(c_filename)
302  ]
303
304  # Invoke the execution engine to run the module and return the result.
305  engine.invoke(_ENTRY_NAME, *arg_pointers)
306  shape = runtime.ranked_memref_to_numpy(ctypes.pointer(c_tensor_desc.shape))
307  return c_tensor_desc.storage, shape
308
309
310# TODO: With better support from MLIR, we may improve the current implementation
311# by using Python code to generate the kernel instead of doing MLIR text code
312# stitching.
313def _get_output_sparse_tensor_kernel(
314        sparsity_codes: Sequence[sparse_tensor.DimLevelType],
315        type: str) -> str:
316  """Creates an MLIR text kernel to output a sparse tensor to a file.
317
318  The kernel returns void.
319  """
320  rank = len(sparsity_codes)
321
322  # Use ? to represent a dimension in the dynamic shape string representation.
323  shape = "x".join(map(lambda d: "?", range(rank)))
324
325  # Convert the encoded sparsity values to a string representation.
326  sparsity = ", ".join(
327      map(lambda s: '"compressed"'
328          if s.value else '"dense"', sparsity_codes))
329
330  # Return the MLIR text kernel.
331  return f"""
332!Ptr = !llvm.ptr<i8>
333#enc = #sparse_tensor.encoding<{{
334  dimLevelType = [ {sparsity} ]
335}}>
336func.func @{_ENTRY_NAME}(%t: tensor<{shape}x{type}, #enc>, %filename: !Ptr)
337attributes {{ llvm.emit_c_interface }} {{
338  sparse_tensor.out %t, %filename : tensor<{shape}x{type}, #enc>, !Ptr
339  func.return
340}}"""
341
342
343def output_sparse_tensor(tensor: ctypes.c_void_p, filename: str,
344                         sparsity: Sequence[sparse_tensor.DimLevelType],
345                         type: str) -> None:
346  """Outputs an MLIR sparse tensor to the given file.
347
348  Args:
349    tensor: A C pointer to the MLIR sparse tensor.
350    filename: A string for the name of the file that contains the tensor data in
351      a COO-flavored format.
352    sparsity: A sequence of DimLevelType values, one for each dimension of the
353      tensor.
354    type: The MLIR string for the data type.
355
356  Raises:
357    OSError: If there is any problem in loading the supporting C shared library.
358    ValueError:  If the shared library doesn't contain the needed routine.
359  """
360  with ir.Context() as ctx, ir.Location.unknown():
361    module = _get_output_sparse_tensor_kernel(sparsity, type)
362    module = ir.Module.parse(module)
363    engine = compile_and_build_engine(module)
364
365  # Convert the filename to a byte stream.
366  c_filename = ctypes.c_char_p(bytes(filename, "utf-8"))
367
368  arg_pointers = [
369      ctypes.byref(ctypes.cast(tensor, ctypes.c_void_p)),
370      ctypes.byref(c_filename)
371  ]
372
373  # Invoke the execution engine to run the module and return the result.
374  engine.invoke(_ENTRY_NAME, *arg_pointers)
375