1// RUN: mlir-opt %s --sparse-compiler | \ 2// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ 3// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ 4// RUN: FileCheck %s 5 6#CSR = #sparse_tensor.encoding<{ 7 dimLevelType = [ "dense", "compressed" ], 8 dimOrdering = affine_map<(i,j) -> (i,j)> 9}> 10 11#DCSR = #sparse_tensor.encoding<{ 12 dimLevelType = [ "compressed", "compressed" ], 13 dimOrdering = affine_map<(i,j) -> (i,j)> 14}> 15 16module { 17 // 18 // Computes C = A x B with all matrices dense. 19 // 20 func.func @matmul1(%A: tensor<4x8xf64>, 21 %B: tensor<8x4xf64>) -> tensor<4x4xf64> { 22 %C = arith.constant dense<0.0> : tensor<4x4xf64> 23 %D = linalg.matmul 24 ins(%A, %B: tensor<4x8xf64>, tensor<8x4xf64>) 25 outs(%C: tensor<4x4xf64>) -> tensor<4x4xf64> 26 return %D: tensor<4x4xf64> 27 } 28 29 // 30 // Computes C = A x B with all matrices sparse (SpMSpM) in CSR. 31 // 32 func.func @matmul2(%A: tensor<4x8xf64, #CSR>, 33 %B: tensor<8x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> { 34 %c4 = arith.constant 4 : index 35 %C = sparse_tensor.init [%c4, %c4] : tensor<4x4xf64, #CSR> 36 %D = linalg.matmul 37 ins(%A, %B: tensor<4x8xf64, #CSR>, tensor<8x4xf64, #CSR>) 38 outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> 39 return %D: tensor<4x4xf64, #CSR> 40 } 41 42 // 43 // Computes C = A x B with all matrices sparse (SpMSpM) in DCSR. 44 // 45 func.func @matmul3(%A: tensor<4x8xf64, #DCSR>, 46 %B: tensor<8x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> { 47 %c4 = arith.constant 4 : index 48 %C = sparse_tensor.init [%c4, %c4] : tensor<4x4xf64, #DCSR> 49 %D = linalg.matmul 50 ins(%A, %B: tensor<4x8xf64, #DCSR>, tensor<8x4xf64, #DCSR>) 51 outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> 52 return %D: tensor<4x4xf64, #DCSR> 53 } 54 55 // 56 // Main driver. 57 // 58 func.func @entry() { 59 %c0 = arith.constant 0 : index 60 %d1 = arith.constant -1.0 : f64 61 62 // Initialize various matrices, dense for stress testing, 63 // and sparse to verify correct nonzero structure. 64 %da = arith.constant dense<[ 65 [ 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1 ], 66 [ 1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 8.2 ], 67 [ 1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3, 8.3 ], 68 [ 1.4, 2.4, 3.4, 4.4, 5.4, 6.4, 7.4, 8.4 ] 69 ]> : tensor<4x8xf64> 70 %db = arith.constant dense<[ 71 [ 10.1, 11.1, 12.1, 13.1 ], 72 [ 10.2, 11.2, 12.2, 13.2 ], 73 [ 10.3, 11.3, 12.3, 13.3 ], 74 [ 10.4, 11.4, 12.4, 13.4 ], 75 [ 10.5, 11.5, 12.5, 13.5 ], 76 [ 10.6, 11.6, 12.6, 13.6 ], 77 [ 10.7, 11.7, 12.7, 13.7 ], 78 [ 10.8, 11.8, 12.8, 13.8 ] 79 ]> : tensor<8x4xf64> 80 %sa = arith.constant dense<[ 81 [ 0.0, 2.1, 0.0, 0.0, 0.0, 6.1, 0.0, 0.0 ], 82 [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], 83 [ 0.0, 2.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], 84 [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] 85 ]> : tensor<4x8xf64> 86 %sb = arith.constant dense<[ 87 [ 0.0, 0.0, 0.0, 1.0 ], 88 [ 0.0, 0.0, 2.0, 0.0 ], 89 [ 0.0, 3.0, 0.0, 0.0 ], 90 [ 4.0, 0.0, 0.0, 0.0 ], 91 [ 0.0, 0.0, 0.0, 0.0 ], 92 [ 0.0, 5.0, 0.0, 0.0 ], 93 [ 0.0, 0.0, 6.0, 0.0 ], 94 [ 0.0, 0.0, 7.0, 8.0 ] 95 ]> : tensor<8x4xf64> 96 97 // Convert all these matrices to sparse format. 98 %a1 = sparse_tensor.convert %da : tensor<4x8xf64> to tensor<4x8xf64, #CSR> 99 %a2 = sparse_tensor.convert %da : tensor<4x8xf64> to tensor<4x8xf64, #DCSR> 100 %a3 = sparse_tensor.convert %sa : tensor<4x8xf64> to tensor<4x8xf64, #CSR> 101 %a4 = sparse_tensor.convert %sa : tensor<4x8xf64> to tensor<4x8xf64, #DCSR> 102 %b1 = sparse_tensor.convert %db : tensor<8x4xf64> to tensor<8x4xf64, #CSR> 103 %b2 = sparse_tensor.convert %db : tensor<8x4xf64> to tensor<8x4xf64, #DCSR> 104 %b3 = sparse_tensor.convert %sb : tensor<8x4xf64> to tensor<8x4xf64, #CSR> 105 %b4 = sparse_tensor.convert %sb : tensor<8x4xf64> to tensor<8x4xf64, #DCSR> 106 107 // Call kernels with dense. 108 %0 = call @matmul1(%da, %db) 109 : (tensor<4x8xf64>, tensor<8x4xf64>) -> tensor<4x4xf64> 110 %1 = call @matmul2(%a1, %b1) 111 : (tensor<4x8xf64, #CSR>, 112 tensor<8x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> 113 %2 = call @matmul3(%a2, %b2) 114 : (tensor<4x8xf64, #DCSR>, 115 tensor<8x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> 116 117 // Call kernels with one sparse. 118 %3 = call @matmul1(%sa, %db) 119 : (tensor<4x8xf64>, tensor<8x4xf64>) -> tensor<4x4xf64> 120 %4 = call @matmul2(%a3, %b1) 121 : (tensor<4x8xf64, #CSR>, 122 tensor<8x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> 123 %5 = call @matmul3(%a4, %b2) 124 : (tensor<4x8xf64, #DCSR>, 125 tensor<8x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> 126 127 // Call kernels with sparse. 128 %6 = call @matmul1(%sa, %sb) 129 : (tensor<4x8xf64>, tensor<8x4xf64>) -> tensor<4x4xf64> 130 %7 = call @matmul2(%a3, %b3) 131 : (tensor<4x8xf64, #CSR>, 132 tensor<8x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> 133 %8 = call @matmul3(%a4, %b4) 134 : (tensor<4x8xf64, #DCSR>, 135 tensor<8x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> 136 137 // 138 // CHECK: ( ( 388.76, 425.56, 462.36, 499.16 ), 139 // CHECK-SAME: ( 397.12, 434.72, 472.32, 509.92 ), 140 // CHECK-SAME: ( 405.48, 443.88, 482.28, 520.68 ), 141 // CHECK-SAME: ( 413.84, 453.04, 492.24, 531.44 ) ) 142 // 143 %m0 = bufferization.to_memref %0 : memref<4x4xf64> 144 %v0 = vector.transfer_read %m0[%c0, %c0], %d1 : memref<4x4xf64>, vector<4x4xf64> 145 vector.print %v0 : vector<4x4xf64> 146 147 // 148 // CHECK: ( ( 388.76, 425.56, 462.36, 499.16 ), 149 // CHECK-SAME: ( 397.12, 434.72, 472.32, 509.92 ), 150 // CHECK-SAME: ( 405.48, 443.88, 482.28, 520.68 ), 151 // CHECK-SAME: ( 413.84, 453.04, 492.24, 531.44 ) ) 152 // 153 %c1 = sparse_tensor.convert %1 : tensor<4x4xf64, #CSR> to tensor<4x4xf64> 154 %m1 = bufferization.to_memref %c1 : memref<4x4xf64> 155 %v1 = vector.transfer_read %m1[%c0, %c0], %d1 : memref<4x4xf64>, vector<4x4xf64> 156 vector.print %v1 : vector<4x4xf64> 157 158 // 159 // CHECK: ( ( 388.76, 425.56, 462.36, 499.16 ), 160 // CHECK-SAME: ( 397.12, 434.72, 472.32, 509.92 ), 161 // CHECK-SAME: ( 405.48, 443.88, 482.28, 520.68 ), 162 // CHECK-SAME: ( 413.84, 453.04, 492.24, 531.44 ) ) 163 // 164 %c2 = sparse_tensor.convert %2 : tensor<4x4xf64, #DCSR> to tensor<4x4xf64> 165 %m2 = bufferization.to_memref %c2 : memref<4x4xf64> 166 %v2 = vector.transfer_read %m2[%c0, %c0], %d1 : memref<4x4xf64>, vector<4x4xf64> 167 vector.print %v2 : vector<4x4xf64> 168 169 // 170 // CHECK: ( ( 86.08, 94.28, 102.48, 110.68 ), 171 // CHECK-SAME: ( 0, 0, 0, 0 ), 172 // CHECK-SAME: ( 23.46, 25.76, 28.06, 30.36 ), 173 // CHECK-SAME: ( 10.8, 11.8, 12.8, 13.8 ) ) 174 // 175 %m3 = bufferization.to_memref %3 : memref<4x4xf64> 176 %v3 = vector.transfer_read %m3[%c0, %c0], %d1 : memref<4x4xf64>, vector<4x4xf64> 177 vector.print %v3 : vector<4x4xf64> 178 179 // 180 // CHECK: ( ( 86.08, 94.28, 102.48, 110.68 ), 181 // CHECK-SAME: ( 0, 0, 0, 0 ), 182 // CHECK-SAME: ( 23.46, 25.76, 28.06, 30.36 ), 183 // CHECK-SAME: ( 10.8, 11.8, 12.8, 13.8 ) ) 184 // 185 %c4 = sparse_tensor.convert %4 : tensor<4x4xf64, #CSR> to tensor<4x4xf64> 186 %m4 = bufferization.to_memref %c4 : memref<4x4xf64> 187 %v4 = vector.transfer_read %m4[%c0, %c0], %d1 : memref<4x4xf64>, vector<4x4xf64> 188 vector.print %v4 : vector<4x4xf64> 189 190 // 191 // CHECK: ( ( 86.08, 94.28, 102.48, 110.68 ), 192 // CHECK-SAME: ( 0, 0, 0, 0 ), 193 // CHECK-SAME: ( 23.46, 25.76, 28.06, 30.36 ), 194 // CHECK-SAME: ( 10.8, 11.8, 12.8, 13.8 ) ) 195 // 196 %c5 = sparse_tensor.convert %5 : tensor<4x4xf64, #DCSR> to tensor<4x4xf64> 197 %m5 = bufferization.to_memref %c5 : memref<4x4xf64> 198 %v5 = vector.transfer_read %m5[%c0, %c0], %d1 : memref<4x4xf64>, vector<4x4xf64> 199 vector.print %v5 : vector<4x4xf64> 200 201 // 202 // CHECK: ( ( 0, 30.5, 4.2, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 4.6, 0 ), ( 0, 0, 7, 8 ) ) 203 // 204 %m6 = bufferization.to_memref %6 : memref<4x4xf64> 205 %v6 = vector.transfer_read %m6[%c0, %c0], %d1 : memref<4x4xf64>, vector<4x4xf64> 206 vector.print %v6 : vector<4x4xf64> 207 208 // 209 // CHECK: ( ( 0, 30.5, 4.2, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 4.6, 0 ), ( 0, 0, 7, 8 ) ) 210 // 211 %c7 = sparse_tensor.convert %7 : tensor<4x4xf64, #CSR> to tensor<4x4xf64> 212 %m7 = bufferization.to_memref %c7 : memref<4x4xf64> 213 %v7 = vector.transfer_read %m7[%c0, %c0], %d1 : memref<4x4xf64>, vector<4x4xf64> 214 vector.print %v7 : vector<4x4xf64> 215 216 // 217 // CHECK: ( ( 0, 30.5, 4.2, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 4.6, 0 ), ( 0, 0, 7, 8 ) ) 218 // 219 %c8 = sparse_tensor.convert %8 : tensor<4x4xf64, #DCSR> to tensor<4x4xf64> 220 %m8 = bufferization.to_memref %c8 : memref<4x4xf64> 221 %v8 = vector.transfer_read %m8[%c0, %c0], %d1 : memref<4x4xf64>, vector<4x4xf64> 222 vector.print %v8 : vector<4x4xf64> 223 224 // 225 // Sanity check on nonzeros. 226 // 227 // CHECK: ( 30.5, 4.2, 4.6, 7, 8, -1, -1, -1 ) 228 // CHECK: ( 30.5, 4.2, 4.6, 7, 8, -1, -1, -1 ) 229 // 230 %val7 = sparse_tensor.values %7 : tensor<4x4xf64, #CSR> to memref<?xf64> 231 %val8 = sparse_tensor.values %8 : tensor<4x4xf64, #DCSR> to memref<?xf64> 232 %nz7 = vector.transfer_read %val7[%c0], %d1 : memref<?xf64>, vector<8xf64> 233 %nz8 = vector.transfer_read %val8[%c0], %d1 : memref<?xf64>, vector<8xf64> 234 vector.print %nz7 : vector<8xf64> 235 vector.print %nz8 : vector<8xf64> 236 237 // Release the resources. 238 sparse_tensor.release %a1 : tensor<4x8xf64, #CSR> 239 sparse_tensor.release %a2 : tensor<4x8xf64, #DCSR> 240 sparse_tensor.release %a3 : tensor<4x8xf64, #CSR> 241 sparse_tensor.release %a4 : tensor<4x8xf64, #DCSR> 242 sparse_tensor.release %b1 : tensor<8x4xf64, #CSR> 243 sparse_tensor.release %b2 : tensor<8x4xf64, #DCSR> 244 sparse_tensor.release %b3 : tensor<8x4xf64, #CSR> 245 sparse_tensor.release %b4 : tensor<8x4xf64, #DCSR> 246 sparse_tensor.release %1 : tensor<4x4xf64, #CSR> 247 sparse_tensor.release %2 : tensor<4x4xf64, #DCSR> 248 sparse_tensor.release %4 : tensor<4x4xf64, #CSR> 249 sparse_tensor.release %5 : tensor<4x4xf64, #DCSR> 250 sparse_tensor.release %7 : tensor<4x4xf64, #CSR> 251 sparse_tensor.release %8 : tensor<4x4xf64, #DCSR> 252 memref.dealloc %m0 : memref<4x4xf64> 253 memref.dealloc %m1 : memref<4x4xf64> 254 memref.dealloc %m2 : memref<4x4xf64> 255 memref.dealloc %m3 : memref<4x4xf64> 256 memref.dealloc %m4 : memref<4x4xf64> 257 memref.dealloc %m5 : memref<4x4xf64> 258 memref.dealloc %m6 : memref<4x4xf64> 259 memref.dealloc %m7 : memref<4x4xf64> 260 memref.dealloc %m8 : memref<4x4xf64> 261 262 return 263 } 264} 265