1// RUN: mlir-opt %s --sparse-compiler | \ 2// RUN: TENSOR0="%mlir_integration_test_dir/data/mttkrp_b.tns" \ 3// RUN: mlir-cpu-runner \ 4// RUN: -e entry -entry-point-result=void \ 5// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ 6// RUN: FileCheck %s 7// 8// Do the same run, but now with SIMDization as well. This should not change the outcome. 9// 10// RUN: mlir-opt %s --sparse-compiler="vectorization-strategy=2 vl=4" | \ 11// RUN: TENSOR0="%mlir_integration_test_dir/data/mttkrp_b.tns" \ 12// RUN: mlir-cpu-runner \ 13// RUN: -e entry -entry-point-result=void \ 14// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ 15// RUN: FileCheck %s 16 17!Filename = type !llvm.ptr<i8> 18 19#SparseTensor = #sparse_tensor.encoding<{ 20 dimLevelType = [ "compressed", "compressed", "compressed" ] 21}> 22 23#mttkrp = { 24 indexing_maps = [ 25 affine_map<(i,j,k,l) -> (i,k,l)>, // B 26 affine_map<(i,j,k,l) -> (k,j)>, // C 27 affine_map<(i,j,k,l) -> (l,j)>, // D 28 affine_map<(i,j,k,l) -> (i,j)> // A (out) 29 ], 30 iterator_types = ["parallel", "parallel", "reduction", "reduction"], 31 doc = "A(i,j) += B(i,k,l) * D(l,j) * C(k,j)" 32} 33 34// 35// Integration test that lowers a kernel annotated as sparse to 36// actual sparse code, initializes a matching sparse storage scheme 37// from file, and runs the resulting code with the JIT compiler. 38// 39module { 40 // 41 // Computes Matricized Tensor Times Khatri-Rao Product (MTTKRP) kernel. See 42 // http://tensor-compiler.org/docs/data_analytics/index.html. 43 // 44 func.func @kernel_mttkrp(%argb: tensor<?x?x?xf64, #SparseTensor>, 45 %argc: tensor<?x?xf64>, 46 %argd: tensor<?x?xf64>, 47 %arga: tensor<?x?xf64> {linalg.inplaceable = true}) 48 -> tensor<?x?xf64> { 49 %0 = linalg.generic #mttkrp 50 ins(%argb, %argc, %argd: 51 tensor<?x?x?xf64, #SparseTensor>, tensor<?x?xf64>, tensor<?x?xf64>) 52 outs(%arga: tensor<?x?xf64>) { 53 ^bb(%b: f64, %c: f64, %d: f64, %a: f64): 54 %0 = arith.mulf %b, %c : f64 55 %1 = arith.mulf %d, %0 : f64 56 %2 = arith.addf %a, %1 : f64 57 linalg.yield %2 : f64 58 } -> tensor<?x?xf64> 59 return %0 : tensor<?x?xf64> 60 } 61 62 func.func private @getTensorFilename(index) -> (!Filename) 63 64 // 65 // Main driver that reads matrix from file and calls the sparse kernel. 66 // 67 func.func @entry() { 68 %f0 = arith.constant 0.0 : f64 69 %c0 = arith.constant 0 : index 70 %c1 = arith.constant 1 : index 71 %c2 = arith.constant 2 : index 72 73 // Read the sparse input tensor B from a file. 74 %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename) 75 %b = sparse_tensor.new %fileName 76 : !Filename to tensor<?x?x?xf64, #SparseTensor> 77 78 // Get sizes from B, pick a fixed size for dim-2 of A. 79 %isz = tensor.dim %b, %c0 : tensor<?x?x?xf64, #SparseTensor> 80 %jsz = arith.constant 5 : index 81 %ksz = tensor.dim %b, %c1 : tensor<?x?x?xf64, #SparseTensor> 82 %lsz = tensor.dim %b, %c2 : tensor<?x?x?xf64, #SparseTensor> 83 84 // Initialize dense input matrix C. 85 %cdata = memref.alloc(%ksz, %jsz) : memref<?x?xf64> 86 scf.for %k = %c0 to %ksz step %c1 { 87 scf.for %j = %c0 to %jsz step %c1 { 88 %k0 = arith.muli %k, %jsz : index 89 %k1 = arith.addi %k0, %j : index 90 %k2 = arith.index_cast %k1 : index to i32 91 %kf = arith.sitofp %k2 : i32 to f64 92 memref.store %kf, %cdata[%k, %j] : memref<?x?xf64> 93 } 94 } 95 %c = bufferization.to_tensor %cdata : memref<?x?xf64> 96 97 // Initialize dense input matrix D. 98 %ddata = memref.alloc(%lsz, %jsz) : memref<?x?xf64> 99 scf.for %l = %c0 to %lsz step %c1 { 100 scf.for %j = %c0 to %jsz step %c1 { 101 %k0 = arith.muli %l, %jsz : index 102 %k1 = arith.addi %k0, %j : index 103 %k2 = arith.index_cast %k1 : index to i32 104 %kf = arith.sitofp %k2 : i32 to f64 105 memref.store %kf, %ddata[%l, %j] : memref<?x?xf64> 106 } 107 } 108 %d = bufferization.to_tensor %ddata : memref<?x?xf64> 109 110 // Initialize dense output matrix A. 111 %adata = memref.alloc(%isz, %jsz) : memref<?x?xf64> 112 scf.for %i = %c0 to %isz step %c1 { 113 scf.for %j = %c0 to %jsz step %c1 { 114 memref.store %f0, %adata[%i, %j] : memref<?x?xf64> 115 } 116 } 117 %a = bufferization.to_tensor %adata : memref<?x?xf64> 118 119 // Call kernel. 120 %0 = call @kernel_mttkrp(%b, %c, %d, %a) 121 : (tensor<?x?x?xf64, #SparseTensor>, 122 tensor<?x?xf64>, tensor<?x?xf64>, tensor<?x?xf64>) -> tensor<?x?xf64> 123 124 // Print the result for verification. 125 // 126 // CHECK: ( ( 16075, 21930, 28505, 35800, 43815 ), 127 // CHECK: ( 10000, 14225, 19180, 24865, 31280 ) ) 128 // 129 %m = bufferization.to_memref %0 : memref<?x?xf64> 130 %v = vector.transfer_read %m[%c0, %c0], %f0 131 : memref<?x?xf64>, vector<2x5xf64> 132 vector.print %v : vector<2x5xf64> 133 134 // Release the resources. 135 memref.dealloc %adata : memref<?x?xf64> 136 memref.dealloc %cdata : memref<?x?xf64> 137 memref.dealloc %ddata : memref<?x?xf64> 138 sparse_tensor.release %b : tensor<?x?x?xf64, #SparseTensor> 139 140 return 141 } 142} 143