1# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 2# See https://llvm.org/LICENSE.txt for license information. 3# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 4 5# This file contains the utilities to process sparse tensor outputs. 6 7from typing import Callable, Dict, Sequence, Tuple 8import ctypes 9import functools 10import numpy as np 11import os 12 13# Import MLIR related modules. 14from mlir import execution_engine 15from mlir import ir 16from mlir import runtime 17from mlir.dialects import sparse_tensor 18 19from . import mlir_sparse_compiler 20 21# Type aliases for type annotation. 22_SupportFunc = Callable[..., None] 23_SupportFuncLocator = Callable[[np.dtype], Tuple[_SupportFunc, _SupportFunc]] 24 25# The name for the environment variable that provides the full path for the 26# supporting library. 27_SUPPORTLIB_ENV_VAR = "SUPPORTLIB" 28# The default supporting library if the environment variable is not provided. 29_DEFAULT_SUPPORTLIB = "libmlir_c_runner_utils.so" 30 31# The JIT compiler optimization level. 32_OPT_LEVEL = 2 33# The entry point to the JIT compiled program. 34_ENTRY_NAME = "main" 35 36 37@functools.lru_cache() 38def _get_support_lib_name() -> str: 39 """Gets the string name for the supporting C shared library.""" 40 return os.getenv(_SUPPORTLIB_ENV_VAR, _DEFAULT_SUPPORTLIB) 41 42 43@functools.lru_cache() 44def _get_sparse_compiler() -> mlir_sparse_compiler.SparseCompiler: 45 """Gets the MLIR sparse compiler with default setting.""" 46 return mlir_sparse_compiler.SparseCompiler( 47 options="", opt_level=_OPT_LEVEL, shared_libs=[_get_support_lib_name()]) 48 49 50def _record_support_funcs( 51 ty: np.dtype, to_func: _SupportFunc, from_func: _SupportFunc, 52 ty_to_funcs: Dict[np.dtype, Tuple[_SupportFunc, _SupportFunc]]) -> None: 53 """Records the two supporting functions for a given data type.""" 54 to_func.restype = ctypes.c_void_p 55 from_func.restype = ctypes.c_void_p 56 ty_to_funcs[ty] = (to_func, from_func) 57 58 59@functools.lru_cache() 60def _get_support_func_locator() -> _SupportFuncLocator: 61 """Constructs a function to locate the supporting functions for a data type. 62 63 Loads the supporting C shared library with the needed routines. Constructs a 64 dictionary from the supported data types to the routines for the data types, 65 and then a function to look up the dictionary for a given data type. 66 67 The name of the supporting C shared library is either provided by an 68 an environment variable or a default value. 69 70 Returns: 71 The function to look up the supporting functions for a given data type. 72 73 Raises: 74 OSError: If there is any problem in loading the shared library. 75 ValueError: If the shared library doesn't contain the needed routines. 76 """ 77 # This raises OSError exception if there is any problem in loading the shared 78 # library. 79 c_lib = ctypes.CDLL(_get_support_lib_name()) 80 81 type_to_funcs = {} 82 try: 83 support_types = [(np.int8, c_lib.convertToMLIRSparseTensorI8, 84 c_lib.convertFromMLIRSparseTensorI8), 85 (np.int16, c_lib.convertToMLIRSparseTensorI16, 86 c_lib.convertFromMLIRSparseTensorI16), 87 (np.int32, c_lib.convertToMLIRSparseTensorI32, 88 c_lib.convertFromMLIRSparseTensorI32), 89 (np.int64, c_lib.convertToMLIRSparseTensorI64, 90 c_lib.convertFromMLIRSparseTensorI64), 91 (np.float16, c_lib.convertToMLIRSparseTensorF16, 92 c_lib.convertFromMLIRSparseTensorF16), 93 (np.float32, c_lib.convertToMLIRSparseTensorF32, 94 c_lib.convertFromMLIRSparseTensorF32), 95 (np.float64, c_lib.convertToMLIRSparseTensorF64, 96 c_lib.convertFromMLIRSparseTensorF64), 97 (np.complex64, c_lib.convertToMLIRSparseTensorC32, 98 c_lib.convertFromMLIRSparseTensorC32), 99 (np.complex128, c_lib.convertToMLIRSparseTensorC64, 100 c_lib.convertFromMLIRSparseTensorC64)] 101 except Exception as e: 102 raise ValueError(f"Missing supporting function: {e}") from e 103 for i, info in enumerate(support_types): 104 _record_support_funcs(info[0], info[1], info[2], type_to_funcs) 105 106 def get_support_funcs(ty: np.dtype): 107 funcs = type_to_funcs[ty] 108 assert funcs is not None 109 return funcs 110 111 return get_support_funcs 112 113 114def sparse_tensor_to_coo_tensor( 115 sparse_tensor: ctypes.c_void_p, 116 dtype: np.dtype, 117) -> Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]: 118 """Converts an MLIR sparse tensor to a COO-flavored format tensor. 119 120 Args: 121 sparse_tensor: A ctypes.c_void_p to the MLIR sparse tensor descriptor. 122 dtype: The numpy data type for the tensor elements. 123 124 Returns: 125 A tuple that contains the following values for the COO-flavored format 126 tensor: 127 rank: An integer for the rank of the tensor. 128 nse: An integer for the number of non-zero values in the tensor. 129 shape: A 1D numpy array of integers, for the shape of the tensor. 130 values: A 1D numpy array, for the non-zero values in the tensor. 131 indices: A 2D numpy array of integers, representing the indices for the 132 non-zero values in the tensor. 133 134 Raises: 135 OSError: If there is any problem in loading the shared library. 136 ValueError: If the shared library doesn't contain the needed routines. 137 """ 138 convert_from = _get_support_func_locator()(dtype)[1] 139 rank = ctypes.c_ulonglong(0) 140 nse = ctypes.c_ulonglong(0) 141 shape = ctypes.POINTER(ctypes.c_ulonglong)() 142 143 values = ctypes.POINTER(runtime.as_ctype(np.dtype(dtype)))() 144 indices = ctypes.POINTER(ctypes.c_ulonglong)() 145 convert_from(sparse_tensor, ctypes.byref(rank), ctypes.byref(nse), 146 ctypes.byref(shape), ctypes.byref(values), ctypes.byref(indices)) 147 148 # Convert the returned values to the corresponding numpy types. 149 shape = np.ctypeslib.as_array(shape, shape=[rank.value]) 150 values = runtime.to_numpy(np.ctypeslib.as_array(values, shape=[nse.value])) 151 indices = np.ctypeslib.as_array(indices, shape=[nse.value, rank.value]) 152 return rank.value, nse.value, shape, values, indices 153 154 155def coo_tensor_to_sparse_tensor(np_shape: np.ndarray, np_values: np.ndarray, 156 np_indices: np.ndarray, np_perm: np.ndarray, 157 np_sparse: np.ndarray) -> int: 158 """Converts a COO-flavored format sparse tensor to an MLIR sparse tensor. 159 160 Args: 161 np_shape: A 1D numpy array of integers, for the shape of the tensor. 162 np_values: A 1D numpy array, for the non-zero values in the tensor. 163 np_indices: A 2D numpy array of integers, representing the indices for the 164 non-zero values in the tensor. 165 np_perm: A 1D numpy array of integers, representing the storage ordering 166 for the dimensions. 167 np_sparse: A 1D numpy array of uint8, representing the sparsity values 168 for the dimensions. 169 170 Returns: 171 An integer for the non-null ctypes.c_void_p to the MLIR sparse tensor 172 descriptor. 173 174 Raises: 175 OSError: If there is any problem in loading the shared library. 176 ValueError: If the shared library doesn't contain the needed routines. 177 """ 178 179 r = len(np_shape) 180 rank = ctypes.c_ulonglong(r) 181 nse = ctypes.c_ulonglong(len(np_values)) 182 shape = np_shape.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong)) 183 values = np_values.ctypes.data_as( 184 ctypes.POINTER(runtime.as_ctype(np.dtype(np_values.dtype)))) 185 indices = np_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong)) 186 187 perm = np_perm.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong)) 188 sparse = np_sparse.ctypes.data_as(ctypes.POINTER(ctypes.c_uint8)) 189 190 convert_to = _get_support_func_locator()(np_values.dtype.type)[0] 191 ptr = convert_to(rank, nse, shape, values, indices, perm, sparse) 192 assert ptr is not None, "Problem with calling convertToMLIRSparseTensorF64" 193 return ptr 194 195 196def compile_and_build_engine( 197 module: ir.Module) -> execution_engine.ExecutionEngine: 198 """Compiles an MLIR module and builds a JIT execution engine. 199 200 Args: 201 module: The MLIR module. 202 203 Returns: 204 A JIT execution engine for the MLIR module. 205 206 """ 207 return _get_sparse_compiler().compile_and_jit(module) 208 209 210class _SparseTensorDescriptor(ctypes.Structure): 211 """A C structure for an MLIR sparse tensor.""" 212 _fields_ = [ 213 # A pointer for the MLIR sparse tensor storage. 214 ("storage", ctypes.POINTER(ctypes.c_ulonglong)), 215 # An MLIR MemRef descriptor for the shape of the sparse tensor. 216 ("shape", runtime.make_nd_memref_descriptor(1, ctypes.c_ulonglong)), 217 ] 218 219 220def _output_one_dim(dim: int, rank: int, shape: str, type: str) -> str: 221 """Produces the MLIR text code to output the size for the given dimension.""" 222 return f""" 223 %c{dim} = arith.constant {dim} : index 224 %d{dim} = tensor.dim %t, %c{dim} : tensor<{shape}x{type}, #enc> 225 memref.store %d{dim}, %b[%c{dim}] : memref<{rank}xindex> 226""" 227 228 229# TODO: With better support from MLIR, we may improve the current implementation 230# by doing the following: 231# (1) Use Python code to generate the kernel instead of doing MLIR text code 232# stitching. 233# (2) Use scf.for instead of an unrolled loop to write out the dimension sizes 234# when tensor.dim supports non-constant dimension value. 235def _get_create_sparse_tensor_kernel( 236 sparsity_codes: Sequence[sparse_tensor.DimLevelType], type: str) -> str: 237 """Creates an MLIR text kernel to contruct a sparse tensor from a file. 238 239 The kernel returns a _SparseTensorDescriptor structure. 240 """ 241 rank = len(sparsity_codes) 242 243 # Use ? to represent a dimension in the dynamic shape string representation. 244 shape = "x".join(map(lambda d: "?", range(rank))) 245 246 # Convert the encoded sparsity values to a string representation. 247 sparsity = ", ".join( 248 map(lambda s: '"compressed"' if s.value else '"dense"', sparsity_codes)) 249 250 # Get the MLIR text code to write the dimension sizes to the output buffer. 251 output_dims = "\n".join( 252 map(lambda d: _output_one_dim(d, rank, shape, type), range(rank))) 253 254 # Return the MLIR text kernel. 255 return f""" 256!Ptr = !llvm.ptr<i8> 257#enc = #sparse_tensor.encoding<{{ 258 dimLevelType = [ {sparsity} ] 259}}> 260func.func @{_ENTRY_NAME}(%filename: !Ptr) -> (tensor<{shape}x{type}, #enc>, memref<{rank}xindex>) 261attributes {{ llvm.emit_c_interface }} {{ 262 %t = sparse_tensor.new %filename : !Ptr to tensor<{shape}x{type}, #enc> 263 %b = memref.alloc() : memref<{rank}xindex> 264 {output_dims} 265 return %t, %b : tensor<{shape}x{type}, #enc>, memref<{rank}xindex> 266}}""" 267 268 269def create_sparse_tensor(filename: str, 270 sparsity: Sequence[sparse_tensor.DimLevelType], 271 type: str) -> Tuple[ctypes.c_void_p, np.ndarray]: 272 """Creates an MLIR sparse tensor from the input file. 273 274 Args: 275 filename: A string for the name of the file that contains the tensor data in 276 a COO-flavored format. 277 sparsity: A sequence of DimLevelType values, one for each dimension of the 278 tensor. 279 280 Returns: 281 A Tuple containing the following values: 282 storage: A ctypes.c_void_p for the MLIR sparse tensor storage. 283 shape: A 1D numpy array of integers, for the shape of the tensor. 284 285 Raises: 286 OSError: If there is any problem in loading the supporting C shared library. 287 ValueError: If the shared library doesn't contain the needed routine. 288 """ 289 with ir.Context() as ctx, ir.Location.unknown(): 290 module = _get_create_sparse_tensor_kernel(sparsity, type) 291 module = ir.Module.parse(module) 292 engine = compile_and_build_engine(module) 293 294 # A sparse tensor descriptor to receive the kernel result. 295 c_tensor_desc = _SparseTensorDescriptor() 296 # Convert the filename to a byte stream. 297 c_filename = ctypes.c_char_p(bytes(filename, "utf-8")) 298 299 arg_pointers = [ 300 ctypes.byref(ctypes.pointer(c_tensor_desc)), 301 ctypes.byref(c_filename) 302 ] 303 304 # Invoke the execution engine to run the module and return the result. 305 engine.invoke(_ENTRY_NAME, *arg_pointers) 306 shape = runtime.ranked_memref_to_numpy(ctypes.pointer(c_tensor_desc.shape)) 307 return c_tensor_desc.storage, shape 308 309 310# TODO: With better support from MLIR, we may improve the current implementation 311# by using Python code to generate the kernel instead of doing MLIR text code 312# stitching. 313def _get_output_sparse_tensor_kernel( 314 sparsity_codes: Sequence[sparse_tensor.DimLevelType], 315 type: str) -> str: 316 """Creates an MLIR text kernel to output a sparse tensor to a file. 317 318 The kernel returns void. 319 """ 320 rank = len(sparsity_codes) 321 322 # Use ? to represent a dimension in the dynamic shape string representation. 323 shape = "x".join(map(lambda d: "?", range(rank))) 324 325 # Convert the encoded sparsity values to a string representation. 326 sparsity = ", ".join( 327 map(lambda s: '"compressed"' 328 if s.value else '"dense"', sparsity_codes)) 329 330 # Return the MLIR text kernel. 331 return f""" 332!Ptr = !llvm.ptr<i8> 333#enc = #sparse_tensor.encoding<{{ 334 dimLevelType = [ {sparsity} ] 335}}> 336func.func @{_ENTRY_NAME}(%t: tensor<{shape}x{type}, #enc>, %filename: !Ptr) 337attributes {{ llvm.emit_c_interface }} {{ 338 sparse_tensor.out %t, %filename : tensor<{shape}x{type}, #enc>, !Ptr 339 func.return 340}}""" 341 342 343def output_sparse_tensor(tensor: ctypes.c_void_p, filename: str, 344 sparsity: Sequence[sparse_tensor.DimLevelType], 345 type: str) -> None: 346 """Outputs an MLIR sparse tensor to the given file. 347 348 Args: 349 tensor: A C pointer to the MLIR sparse tensor. 350 filename: A string for the name of the file that contains the tensor data in 351 a COO-flavored format. 352 sparsity: A sequence of DimLevelType values, one for each dimension of the 353 tensor. 354 type: The MLIR string for the data type. 355 356 Raises: 357 OSError: If there is any problem in loading the supporting C shared library. 358 ValueError: If the shared library doesn't contain the needed routine. 359 """ 360 with ir.Context() as ctx, ir.Location.unknown(): 361 module = _get_output_sparse_tensor_kernel(sparsity, type) 362 module = ir.Module.parse(module) 363 engine = compile_and_build_engine(module) 364 365 # Convert the filename to a byte stream. 366 c_filename = ctypes.c_char_p(bytes(filename, "utf-8")) 367 368 arg_pointers = [ 369 ctypes.byref(ctypes.cast(tensor, ctypes.c_void_p)), 370 ctypes.byref(c_filename) 371 ] 372 373 # Invoke the execution engine to run the module and return the result. 374 engine.invoke(_ENTRY_NAME, *arg_pointers) 375