1// RUN: mlir-opt %s --sparse-compiler | \ 2// RUN: mlir-cpu-runner \ 3// RUN: -e entry -entry-point-result=void \ 4// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ 5// RUN: FileCheck %s 6 7#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"]}> 8#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> 9 10// 11// Traits for tensor operations. 12// 13#trait_vec_scale = { 14 indexing_maps = [ 15 affine_map<(i) -> (i)>, // a (in) 16 affine_map<(i) -> (i)> // x (out) 17 ], 18 iterator_types = ["parallel"] 19} 20#trait_vec_op = { 21 indexing_maps = [ 22 affine_map<(i) -> (i)>, // a (in) 23 affine_map<(i) -> (i)>, // b (in) 24 affine_map<(i) -> (i)> // x (out) 25 ], 26 iterator_types = ["parallel"] 27} 28#trait_mat_op = { 29 indexing_maps = [ 30 affine_map<(i,j) -> (i,j)>, // A (in) 31 affine_map<(i,j) -> (i,j)>, // B (in) 32 affine_map<(i,j) -> (i,j)> // X (out) 33 ], 34 iterator_types = ["parallel", "parallel"], 35 doc = "X(i,j) = A(i,j) OP B(i,j)" 36} 37 38// 39// Contains test cases for the sparse_tensor.binary operator (different cases when left/right/overlap 40// is empty/identity, etc). 41// 42 43module { 44 // Creates a new sparse vector using the minimum values from two input sparse vectors. 45 // When there is no overlap, include the present value in the output. 46 func.func @vector_min(%arga: tensor<?xf64, #SparseVector>, 47 %argb: tensor<?xf64, #SparseVector>) -> tensor<?xf64, #SparseVector> { 48 %c = arith.constant 0 : index 49 %d = tensor.dim %arga, %c : tensor<?xf64, #SparseVector> 50 %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector> 51 %0 = linalg.generic #trait_vec_op 52 ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>) 53 outs(%xv: tensor<?xf64, #SparseVector>) { 54 ^bb(%a: f64, %b: f64, %x: f64): 55 %1 = sparse_tensor.binary %a, %b : f64, f64 to f64 56 overlap={ 57 ^bb0(%a0: f64, %b0: f64): 58 %cmp = arith.cmpf "olt", %a0, %b0 : f64 59 %2 = arith.select %cmp, %a0, %b0: f64 60 sparse_tensor.yield %2 : f64 61 } 62 left=identity 63 right=identity 64 linalg.yield %1 : f64 65 } -> tensor<?xf64, #SparseVector> 66 return %0 : tensor<?xf64, #SparseVector> 67 } 68 69 // Creates a new sparse vector by multiplying a sparse vector with a dense vector. 70 // When there is no overlap, leave the result empty. 71 func.func @vector_mul(%arga: tensor<?xf64, #SparseVector>, 72 %argb: tensor<?xf64>) -> tensor<?xf64, #SparseVector> { 73 %c = arith.constant 0 : index 74 %d = tensor.dim %arga, %c : tensor<?xf64, #SparseVector> 75 %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector> 76 %0 = linalg.generic #trait_vec_op 77 ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64>) 78 outs(%xv: tensor<?xf64, #SparseVector>) { 79 ^bb(%a: f64, %b: f64, %x: f64): 80 %1 = sparse_tensor.binary %a, %b : f64, f64 to f64 81 overlap={ 82 ^bb0(%a0: f64, %b0: f64): 83 %ret = arith.mulf %a0, %b0 : f64 84 sparse_tensor.yield %ret : f64 85 } 86 left={} 87 right={} 88 linalg.yield %1 : f64 89 } -> tensor<?xf64, #SparseVector> 90 return %0 : tensor<?xf64, #SparseVector> 91 } 92 93 // Take a set difference of two sparse vectors. The result will include only those 94 // sparse elements present in the first, but not the second vector. 95 func.func @vector_setdiff(%arga: tensor<?xf64, #SparseVector>, 96 %argb: tensor<?xf64, #SparseVector>) -> tensor<?xf64, #SparseVector> { 97 %c = arith.constant 0 : index 98 %d = tensor.dim %arga, %c : tensor<?xf64, #SparseVector> 99 %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector> 100 %0 = linalg.generic #trait_vec_op 101 ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>) 102 outs(%xv: tensor<?xf64, #SparseVector>) { 103 ^bb(%a: f64, %b: f64, %x: f64): 104 %1 = sparse_tensor.binary %a, %b : f64, f64 to f64 105 overlap={} 106 left=identity 107 right={} 108 linalg.yield %1 : f64 109 } -> tensor<?xf64, #SparseVector> 110 return %0 : tensor<?xf64, #SparseVector> 111 } 112 113 // Return the index of each entry 114 func.func @vector_index(%arga: tensor<?xf64, #SparseVector>) -> tensor<?xi32, #SparseVector> { 115 %c = arith.constant 0 : index 116 %d = tensor.dim %arga, %c : tensor<?xf64, #SparseVector> 117 %xv = bufferization.alloc_tensor(%d) : tensor<?xi32, #SparseVector> 118 %0 = linalg.generic #trait_vec_scale 119 ins(%arga: tensor<?xf64, #SparseVector>) 120 outs(%xv: tensor<?xi32, #SparseVector>) { 121 ^bb(%a: f64, %x: i32): 122 %idx = linalg.index 0 : index 123 %1 = sparse_tensor.binary %a, %idx : f64, index to i32 124 overlap={ 125 ^bb0(%x0: f64, %i: index): 126 %ret = arith.index_cast %i : index to i32 127 sparse_tensor.yield %ret : i32 128 } 129 left={} 130 right={} 131 linalg.yield %1 : i32 132 } -> tensor<?xi32, #SparseVector> 133 return %0 : tensor<?xi32, #SparseVector> 134 } 135 136 // Adds two sparse matrices when they intersect. Where they don't intersect, 137 // negate the 2nd argument's values; ignore 1st argument-only values. 138 func.func @matrix_intersect(%arga: tensor<?x?xf64, #DCSR>, 139 %argb: tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR> { 140 %c0 = arith.constant 0 : index 141 %c1 = arith.constant 1 : index 142 %d0 = tensor.dim %arga, %c0 : tensor<?x?xf64, #DCSR> 143 %d1 = tensor.dim %arga, %c1 : tensor<?x?xf64, #DCSR> 144 %xv = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR> 145 %0 = linalg.generic #trait_mat_op 146 ins(%arga, %argb: tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>) 147 outs(%xv: tensor<?x?xf64, #DCSR>) { 148 ^bb(%a: f64, %b: f64, %x: f64): 149 %1 = sparse_tensor.binary %a, %b: f64, f64 to f64 150 overlap={ 151 ^bb0(%x0: f64, %y0: f64): 152 %ret = arith.addf %x0, %y0 : f64 153 sparse_tensor.yield %ret : f64 154 } 155 left={} 156 right={ 157 ^bb0(%x1: f64): 158 %lret = arith.negf %x1 : f64 159 sparse_tensor.yield %lret : f64 160 } 161 linalg.yield %1 : f64 162 } -> tensor<?x?xf64, #DCSR> 163 return %0 : tensor<?x?xf64, #DCSR> 164 } 165 166 // Tensor addition (use semi-ring binary operation). 167 func.func @add_tensor_1(%A: tensor<4x4xf64, #DCSR>, 168 %B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> { 169 %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR> 170 %0 = linalg.generic #trait_mat_op 171 ins(%A, %B: tensor<4x4xf64, #DCSR>, 172 tensor<4x4xf64, #DCSR>) 173 outs(%C: tensor<4x4xf64, #DCSR>) { 174 ^bb0(%a: f64, %b: f64, %c: f64) : 175 %result = sparse_tensor.binary %a, %b : f64, f64 to f64 176 overlap={ 177 ^bb0(%x: f64, %y: f64): 178 %ret = arith.addf %x, %y : f64 179 sparse_tensor.yield %ret : f64 180 } 181 left=identity 182 right=identity 183 linalg.yield %result : f64 184 } -> tensor<4x4xf64, #DCSR> 185 return %0 : tensor<4x4xf64, #DCSR> 186 } 187 188 // Same as @add_tensor_1, but use sparse_tensor.yield instead of identity to yield value. 189 func.func @add_tensor_2(%A: tensor<4x4xf64, #DCSR>, 190 %B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> { 191 %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR> 192 %0 = linalg.generic #trait_mat_op 193 ins(%A, %B: tensor<4x4xf64, #DCSR>, 194 tensor<4x4xf64, #DCSR>) 195 outs(%C: tensor<4x4xf64, #DCSR>) { 196 ^bb0(%a: f64, %b: f64, %c: f64) : 197 %result = sparse_tensor.binary %a, %b : f64, f64 to f64 198 overlap={ 199 ^bb0(%x: f64, %y: f64): 200 %ret = arith.addf %x, %y : f64 201 sparse_tensor.yield %ret : f64 202 } 203 left={ 204 ^bb0(%x: f64): 205 sparse_tensor.yield %x : f64 206 } 207 right={ 208 ^bb0(%y: f64): 209 sparse_tensor.yield %y : f64 210 } 211 linalg.yield %result : f64 212 } -> tensor<4x4xf64, #DCSR> 213 return %0 : tensor<4x4xf64, #DCSR> 214 } 215 216 // Performs triangular add/sub operation (using semi-ring binary op). 217 func.func @triangular(%A: tensor<4x4xf64, #DCSR>, 218 %B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> { 219 %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR> 220 %0 = linalg.generic #trait_mat_op 221 ins(%A, %B: tensor<4x4xf64, #DCSR>, 222 tensor<4x4xf64, #DCSR>) 223 outs(%C: tensor<4x4xf64, #DCSR>) { 224 ^bb0(%a: f64, %b: f64, %c: f64) : 225 %row = linalg.index 0 : index 226 %col = linalg.index 1 : index 227 %result = sparse_tensor.binary %a, %b : f64, f64 to f64 228 overlap={ 229 ^bb0(%x: f64, %y: f64): 230 %cmp = arith.cmpi "uge", %col, %row : index 231 %upperTriangleResult = arith.addf %x, %y : f64 232 %lowerTriangleResult = arith.subf %x, %y : f64 233 %ret = arith.select %cmp, %upperTriangleResult, %lowerTriangleResult : f64 234 sparse_tensor.yield %ret : f64 235 } 236 left=identity 237 right={ 238 ^bb0(%y: f64): 239 %cmp = arith.cmpi "uge", %col, %row : index 240 %lowerTriangleResult = arith.negf %y : f64 241 %ret = arith.select %cmp, %y, %lowerTriangleResult : f64 242 sparse_tensor.yield %ret : f64 243 } 244 linalg.yield %result : f64 245 } -> tensor<4x4xf64, #DCSR> 246 return %0 : tensor<4x4xf64, #DCSR> 247 } 248 249 // Perform sub operation (using semi-ring binary op) with a constant threshold. 250 func.func @sub_with_thres(%A: tensor<4x4xf64, #DCSR>, 251 %B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> { 252 %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR> 253 // Defines out-block constant bounds. 254 %thres_out_up = arith.constant 2.0 : f64 255 %thres_out_lo = arith.constant -2.0 : f64 256 257 %0 = linalg.generic #trait_mat_op 258 ins(%A, %B: tensor<4x4xf64, #DCSR>, 259 tensor<4x4xf64, #DCSR>) 260 outs(%C: tensor<4x4xf64, #DCSR>) { 261 ^bb0(%a: f64, %b: f64, %c: f64) : 262 %result = sparse_tensor.binary %a, %b : f64, f64 to f64 263 overlap={ 264 ^bb0(%x: f64, %y: f64): 265 // Defines in-block constant bounds. 266 %thres_up = arith.constant 1.0 : f64 267 %thres_lo = arith.constant -1.0 : f64 268 %result = arith.subf %x, %y : f64 269 %cmp = arith.cmpf "oge", %result, %thres_up : f64 270 %tmp = arith.select %cmp, %thres_up, %result : f64 271 %cmp1 = arith.cmpf "ole", %tmp, %thres_lo : f64 272 %ret = arith.select %cmp1, %thres_lo, %tmp : f64 273 sparse_tensor.yield %ret : f64 274 } 275 left={ 276 ^bb0(%x: f64): 277 // Uses out-block constant bounds. 278 %cmp = arith.cmpf "oge", %x, %thres_out_up : f64 279 %tmp = arith.select %cmp, %thres_out_up, %x : f64 280 %cmp1 = arith.cmpf "ole", %tmp, %thres_out_lo : f64 281 %ret = arith.select %cmp1, %thres_out_lo, %tmp : f64 282 sparse_tensor.yield %ret : f64 283 } 284 right={ 285 ^bb0(%y: f64): 286 %ny = arith.negf %y : f64 287 %cmp = arith.cmpf "oge", %ny, %thres_out_up : f64 288 %tmp = arith.select %cmp, %thres_out_up, %ny : f64 289 %cmp1 = arith.cmpf "ole", %tmp, %thres_out_lo : f64 290 %ret = arith.select %cmp1, %thres_out_lo, %tmp : f64 291 sparse_tensor.yield %ret : f64 292 } 293 linalg.yield %result : f64 294 } -> tensor<4x4xf64, #DCSR> 295 return %0 : tensor<4x4xf64, #DCSR> 296 } 297 298 // Performs isEqual only on intersecting elements. 299 func.func @intersect_equal(%A: tensor<4x4xf64, #DCSR>, 300 %B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xi8, #DCSR> { 301 %C = bufferization.alloc_tensor() : tensor<4x4xi8, #DCSR> 302 %0 = linalg.generic #trait_mat_op 303 ins(%A, %B: tensor<4x4xf64, #DCSR>, 304 tensor<4x4xf64, #DCSR>) 305 outs(%C: tensor<4x4xi8, #DCSR>) { 306 ^bb0(%a: f64, %b: f64, %c: i8) : 307 %result = sparse_tensor.binary %a, %b : f64, f64 to i8 308 overlap={ 309 ^bb0(%x: f64, %y: f64): 310 %cmp = arith.cmpf "oeq", %x, %y : f64 311 %ret = arith.extui %cmp : i1 to i8 312 sparse_tensor.yield %ret : i8 313 } 314 left={} 315 right={} 316 linalg.yield %result : i8 317 } -> tensor<4x4xi8, #DCSR> 318 return %0 : tensor<4x4xi8, #DCSR> 319 } 320 321 // Keeps values on left, negate value on right, ignore value when overlapping. 322 func.func @only_left_right(%A: tensor<4x4xf64, #DCSR>, 323 %B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> { 324 %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR> 325 %0 = linalg.generic #trait_mat_op 326 ins(%A, %B: tensor<4x4xf64, #DCSR>, 327 tensor<4x4xf64, #DCSR>) 328 outs(%C: tensor<4x4xf64, #DCSR>) { 329 ^bb0(%a: f64, %b: f64, %c: f64) : 330 %result = sparse_tensor.binary %a, %b : f64, f64 to f64 331 overlap={} 332 left=identity 333 right={ 334 ^bb0(%y: f64): 335 %ret = arith.negf %y : f64 336 sparse_tensor.yield %ret : f64 337 } 338 linalg.yield %result : f64 339 } -> tensor<4x4xf64, #DCSR> 340 return %0 : tensor<4x4xf64, #DCSR> 341 } 342 343 // 344 // Utility functions to dump the value of a tensor. 345 // 346 347 func.func @dump_vec(%arg0: tensor<?xf64, #SparseVector>) { 348 // Dump the values array to verify only sparse contents are stored. 349 %c0 = arith.constant 0 : index 350 %d0 = arith.constant -1.0 : f64 351 %0 = sparse_tensor.values %arg0 : tensor<?xf64, #SparseVector> to memref<?xf64> 352 %1 = vector.transfer_read %0[%c0], %d0: memref<?xf64>, vector<16xf64> 353 vector.print %1 : vector<16xf64> 354 // Dump the dense vector to verify structure is correct. 355 %dv = sparse_tensor.convert %arg0 : tensor<?xf64, #SparseVector> to tensor<?xf64> 356 %3 = vector.transfer_read %dv[%c0], %d0: tensor<?xf64>, vector<32xf64> 357 vector.print %3 : vector<32xf64> 358 return 359 } 360 361 func.func @dump_vec_i32(%arg0: tensor<?xi32, #SparseVector>) { 362 // Dump the values array to verify only sparse contents are stored. 363 %c0 = arith.constant 0 : index 364 %d0 = arith.constant -1 : i32 365 %0 = sparse_tensor.values %arg0 : tensor<?xi32, #SparseVector> to memref<?xi32> 366 %1 = vector.transfer_read %0[%c0], %d0: memref<?xi32>, vector<24xi32> 367 vector.print %1 : vector<24xi32> 368 // Dump the dense vector to verify structure is correct. 369 %dv = sparse_tensor.convert %arg0 : tensor<?xi32, #SparseVector> to tensor<?xi32> 370 %3 = vector.transfer_read %dv[%c0], %d0: tensor<?xi32>, vector<32xi32> 371 vector.print %3 : vector<32xi32> 372 return 373 } 374 375 func.func @dump_mat(%arg0: tensor<?x?xf64, #DCSR>) { 376 %d0 = arith.constant 0.0 : f64 377 %c0 = arith.constant 0 : index 378 %dm = sparse_tensor.convert %arg0 : tensor<?x?xf64, #DCSR> to tensor<?x?xf64> 379 %1 = vector.transfer_read %dm[%c0, %c0], %d0: tensor<?x?xf64>, vector<4x8xf64> 380 vector.print %1 : vector<4x8xf64> 381 return 382 } 383 384 func.func @dump_mat_4x4(%A: tensor<4x4xf64, #DCSR>) { 385 %c0 = arith.constant 0 : index 386 %du = arith.constant -1.0 : f64 387 388 %c = sparse_tensor.convert %A : tensor<4x4xf64, #DCSR> to tensor<4x4xf64> 389 %v = vector.transfer_read %c[%c0, %c0], %du: tensor<4x4xf64>, vector<4x4xf64> 390 vector.print %v : vector<4x4xf64> 391 392 %1 = sparse_tensor.values %A : tensor<4x4xf64, #DCSR> to memref<?xf64> 393 %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<16xf64> 394 vector.print %2 : vector<16xf64> 395 396 return 397 } 398 399 func.func @dump_mat_4x4_i8(%A: tensor<4x4xi8, #DCSR>) { 400 %c0 = arith.constant 0 : index 401 %du = arith.constant -1 : i8 402 403 %c = sparse_tensor.convert %A : tensor<4x4xi8, #DCSR> to tensor<4x4xi8> 404 %v = vector.transfer_read %c[%c0, %c0], %du: tensor<4x4xi8>, vector<4x4xi8> 405 vector.print %v : vector<4x4xi8> 406 407 %1 = sparse_tensor.values %A : tensor<4x4xi8, #DCSR> to memref<?xi8> 408 %2 = vector.transfer_read %1[%c0], %du: memref<?xi8>, vector<16xi8> 409 vector.print %2 : vector<16xi8> 410 411 return 412 } 413 414 // Driver method to call and verify kernels. 415 func.func @entry() { 416 %c0 = arith.constant 0 : index 417 418 // Setup sparse vectors. 419 %v1 = arith.constant sparse< 420 [ [0], [3], [11], [17], [20], [21], [28], [29], [31] ], 421 [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 ] 422 > : tensor<32xf64> 423 %v2 = arith.constant sparse< 424 [ [1], [3], [4], [10], [16], [18], [21], [28], [29], [31] ], 425 [11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0 ] 426 > : tensor<32xf64> 427 %v3 = arith.constant dense< 428 [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 429 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 0., 1.] 430 > : tensor<32xf64> 431 %sv1 = sparse_tensor.convert %v1 : tensor<32xf64> to tensor<?xf64, #SparseVector> 432 %sv2 = sparse_tensor.convert %v2 : tensor<32xf64> to tensor<?xf64, #SparseVector> 433 %dv3 = tensor.cast %v3 : tensor<32xf64> to tensor<?xf64> 434 435 // Setup sparse matrices. 436 %m1 = arith.constant sparse< 437 [ [0,0], [0,1], [1,7], [2,2], [2,4], [2,7], [3,0], [3,2], [3,3] ], 438 [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 ] 439 > : tensor<4x8xf64> 440 %m2 = arith.constant sparse< 441 [ [0,0], [0,7], [1,0], [1,6], [2,1], [2,7] ], 442 [6.0, 5.0, 4.0, 3.0, 2.0, 1.0 ] 443 > : tensor<4x8xf64> 444 %sm1 = sparse_tensor.convert %m1 : tensor<4x8xf64> to tensor<?x?xf64, #DCSR> 445 %sm2 = sparse_tensor.convert %m2 : tensor<4x8xf64> to tensor<?x?xf64, #DCSR> 446 447 %m3 = arith.constant dense< 448 [ [ 1.0, 0.0, 3.0, 0.0], 449 [ 0.0, 2.0, 0.0, 0.0], 450 [ 0.0, 0.0, 0.0, 4.0], 451 [ 3.0, 4.0, 0.0, 0.0] ]> : tensor<4x4xf64> 452 %m4 = arith.constant dense< 453 [ [ 1.0, 0.0, 1.0, 1.0], 454 [ 0.0, 0.5, 0.0, 0.0], 455 [ 1.0, 5.0, 2.0, 0.0], 456 [ 2.0, 0.0, 0.0, 0.0] ]> : tensor<4x4xf64> 457 458 %sm3 = sparse_tensor.convert %m3 : tensor<4x4xf64> to tensor<4x4xf64, #DCSR> 459 %sm4 = sparse_tensor.convert %m4 : tensor<4x4xf64> to tensor<4x4xf64, #DCSR> 460 461 // Call sparse vector kernels. 462 %0 = call @vector_min(%sv1, %sv2) 463 : (tensor<?xf64, #SparseVector>, 464 tensor<?xf64, #SparseVector>) -> tensor<?xf64, #SparseVector> 465 %1 = call @vector_mul(%sv1, %dv3) 466 : (tensor<?xf64, #SparseVector>, 467 tensor<?xf64>) -> tensor<?xf64, #SparseVector> 468 %2 = call @vector_setdiff(%sv1, %sv2) 469 : (tensor<?xf64, #SparseVector>, 470 tensor<?xf64, #SparseVector>) -> tensor<?xf64, #SparseVector> 471 %3 = call @vector_index(%sv1) 472 : (tensor<?xf64, #SparseVector>) -> tensor<?xi32, #SparseVector> 473 474 // Call sparse matrix kernels. 475 %5 = call @matrix_intersect(%sm1, %sm2) 476 : (tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR> 477 %6 = call @add_tensor_1(%sm3, %sm4) 478 : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> 479 %7 = call @add_tensor_2(%sm3, %sm4) 480 : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> 481 %8 = call @triangular(%sm3, %sm4) 482 : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> 483 %9 = call @sub_with_thres(%sm3, %sm4) 484 : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> 485 %10 = call @intersect_equal(%sm3, %sm4) 486 : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xi8, #DCSR> 487 %11 = call @only_left_right(%sm3, %sm4) 488 : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> 489 490 // 491 // Verify the results. 492 // 493 // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1 ) 494 // CHECK-NEXT: ( 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4, 0, 0, 5, 6, 0, 0, 0, 0, 0, 0, 7, 8, 0, 9 ) 495 // CHECK-NEXT: ( 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, -1, -1, -1, -1, -1, -1 ) 496 // CHECK-NEXT: ( 0, 11, 0, 12, 13, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 16, 0, 0, 17, 0, 0, 0, 0, 0, 0, 18, 19, 0, 20 ) 497 // CHECK-NEXT: ( 1, 11, 2, 13, 14, 3, 15, 4, 16, 5, 6, 7, 8, 9, -1, -1 ) 498 // CHECK-NEXT: ( 1, 11, 0, 2, 13, 0, 0, 0, 0, 0, 14, 3, 0, 0, 0, 0, 15, 4, 16, 0, 5, 6, 0, 0, 0, 0, 0, 0, 7, 8, 0, 9 ) 499 // CHECK-NEXT: ( 0, 6, 3, 28, 0, 6, 56, 72, 9, -1, -1, -1, -1, -1, -1, -1 ) 500 // CHECK-NEXT: ( 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 28, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 56, 72, 0, 9 ) 501 // CHECK-NEXT: ( 1, 3, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) 502 // CHECK-NEXT: ( 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) 503 // CHECK-NEXT: ( 0, 3, 11, 17, 20, 21, 28, 29, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) 504 // CHECK-NEXT: ( 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 17, 0, 0, 20, 21, 0, 0, 0, 0, 0, 0, 28, 29, 0, 31 ) 505 // CHECK-NEXT: ( ( 7, 0, 0, 0, 0, 0, 0, -5 ), ( -4, 0, 0, 0, 0, 0, -3, 0 ), ( 0, -2, 0, 0, 0, 0, 0, 7 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ) ) 506 // CHECK-NEXT: ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( 1, 5, 2, 4 ), ( 5, 4, 0, 0 ) ) 507 // CHECK-NEXT: ( 2, 4, 1, 2.5, 1, 5, 2, 4, 5, 4, -1, -1, -1, -1, -1, -1 ) 508 // CHECK-NEXT: ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( 1, 5, 2, 4 ), ( 5, 4, 0, 0 ) ) 509 // CHECK-NEXT: ( 2, 4, 1, 2.5, 1, 5, 2, 4, 5, 4, -1, -1, -1, -1, -1, -1 ) 510 // CHECK-NEXT: ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( -1, -5, 2, 4 ), ( 1, 4, 0, 0 ) ) 511 // CHECK-NEXT: ( 2, 4, 1, 2.5, -1, -5, 2, 4, 1, 4, -1, -1, -1, -1, -1, -1 ) 512 // CHECK-NEXT: ( ( 0, 0, 1, -1 ), ( 0, 1, 0, 0 ), ( -1, -2, -2, 2 ), ( 1, 2, 0, 0 ) ) 513 // CHECK-NEXT: ( 0, 1, -1, 1, -1, -2, -2, 2, 1, 2, -1, -1, -1, -1, -1, -1 ) 514 // CHECK-NEXT: ( ( 1, 0, 0, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ) ) 515 // CHECK-NEXT: ( 1, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) 516 // CHECK-NEXT: ( ( 0, 0, 0, -1 ), ( 0, 0, 0, 0 ), ( -1, -5, -2, 4 ), ( 0, 4, 0, 0 ) ) 517 // CHECK-NEXT: ( -1, -1, -5, -2, 4, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) 518 // 519 call @dump_vec(%sv1) : (tensor<?xf64, #SparseVector>) -> () 520 call @dump_vec(%sv2) : (tensor<?xf64, #SparseVector>) -> () 521 call @dump_vec(%0) : (tensor<?xf64, #SparseVector>) -> () 522 call @dump_vec(%1) : (tensor<?xf64, #SparseVector>) -> () 523 call @dump_vec(%2) : (tensor<?xf64, #SparseVector>) -> () 524 call @dump_vec_i32(%3) : (tensor<?xi32, #SparseVector>) -> () 525 call @dump_mat(%5) : (tensor<?x?xf64, #DCSR>) -> () 526 call @dump_mat_4x4(%6) : (tensor<4x4xf64, #DCSR>) -> () 527 call @dump_mat_4x4(%7) : (tensor<4x4xf64, #DCSR>) -> () 528 call @dump_mat_4x4(%8) : (tensor<4x4xf64, #DCSR>) -> () 529 call @dump_mat_4x4(%9) : (tensor<4x4xf64, #DCSR>) -> () 530 call @dump_mat_4x4_i8(%10) : (tensor<4x4xi8, #DCSR>) -> () 531 call @dump_mat_4x4(%11) : (tensor<4x4xf64, #DCSR>) -> () 532 533 // Release the resources. 534 bufferization.dealloc_tensor %sv1 : tensor<?xf64, #SparseVector> 535 bufferization.dealloc_tensor %sv2 : tensor<?xf64, #SparseVector> 536 bufferization.dealloc_tensor %sm1 : tensor<?x?xf64, #DCSR> 537 bufferization.dealloc_tensor %sm2 : tensor<?x?xf64, #DCSR> 538 bufferization.dealloc_tensor %sm3 : tensor<4x4xf64, #DCSR> 539 bufferization.dealloc_tensor %sm4 : tensor<4x4xf64, #DCSR> 540 bufferization.dealloc_tensor %0 : tensor<?xf64, #SparseVector> 541 bufferization.dealloc_tensor %1 : tensor<?xf64, #SparseVector> 542 bufferization.dealloc_tensor %2 : tensor<?xf64, #SparseVector> 543 bufferization.dealloc_tensor %3 : tensor<?xi32, #SparseVector> 544 bufferization.dealloc_tensor %5 : tensor<?x?xf64, #DCSR> 545 bufferization.dealloc_tensor %6 : tensor<4x4xf64, #DCSR> 546 bufferization.dealloc_tensor %7 : tensor<4x4xf64, #DCSR> 547 bufferization.dealloc_tensor %8 : tensor<4x4xf64, #DCSR> 548 bufferization.dealloc_tensor %9 : tensor<4x4xf64, #DCSR> 549 bufferization.dealloc_tensor %10 : tensor<4x4xi8, #DCSR> 550 bufferization.dealloc_tensor %11 : tensor<4x4xf64, #DCSR> 551 return 552 } 553} 554