1// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py 2 3// RUN: mlir-opt %s -sparsification="vectorization-strategy=2 vl=8" -canonicalize | \ 4// RUN: FileCheck %s 5 6#SparseMatrix = #sparse_tensor.encoding<{dimLevelType = ["dense","compressed"]}> 7 8#trait = { 9 indexing_maps = [ 10 affine_map<(i,j) -> (i,j)>, // a (in) 11 affine_map<(i,j) -> (i,j)>, // b (in) 12 affine_map<(i,j) -> ()> // x (out) 13 ], 14 iterator_types = ["reduction", "reduction"] 15} 16 17// Verifies that the SIMD reductions in the two for-loops after the 18// while-loop are chained before horizontally reducing these back to scalar. 19// 20// CHECK-LABEL: func @sparse_matrix_sum( 21// CHECK-SAME: %[[VAL_0:.*]]: tensor<f64>, 22// CHECK-SAME: %[[VAL_1:.*]]: tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>>, 23// CHECK-SAME: %[[VAL_2:.*]]: tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<f64> { 24// CHECK-DAG: %[[VAL_3:.*]] = arith.constant dense<0.000000e+00> : vector<8xf64> 25// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 8 : index 26// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index 27// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 64 : index 28// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 1 : index 29// CHECK: %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_8]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> 30// CHECK: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_8]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> 31// CHECK: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> 32// CHECK: %[[VAL_12:.*]] = sparse_tensor.pointers %[[VAL_2]], %[[VAL_8]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> 33// CHECK: %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_2]], %[[VAL_8]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> 34// CHECK: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> 35// CHECK: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_0]] : memref<f64> 36// CHECK: %[[VAL_16:.*]] = tensor.extract %[[VAL_0]][] : tensor<f64> 37// CHECK: %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_8]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f64) { 38// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref<?xindex> 39// CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_18]], %[[VAL_8]] : index 40// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xindex> 41// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_18]]] : memref<?xindex> 42// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_18]], %[[VAL_8]] : index 43// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_24]]] : memref<?xindex> 44// CHECK: %[[VAL_26:.*]]:3 = scf.while (%[[VAL_27:.*]] = %[[VAL_20]], %[[VAL_28:.*]] = %[[VAL_23]], %[[VAL_29:.*]] = %[[VAL_19]]) : (index, index, f64) -> (index, index, f64) { 45// CHECK: %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_27]], %[[VAL_22]] : index 46// CHECK: %[[VAL_31:.*]] = arith.cmpi ult, %[[VAL_28]], %[[VAL_25]] : index 47// CHECK: %[[VAL_32:.*]] = arith.andi %[[VAL_30]], %[[VAL_31]] : i1 48// CHECK: scf.condition(%[[VAL_32]]) %[[VAL_27]], %[[VAL_28]], %[[VAL_29]] : index, index, f64 49// CHECK: } do { 50// CHECK: ^bb0(%[[VAL_33:.*]]: index, %[[VAL_34:.*]]: index, %[[VAL_35:.*]]: f64): 51// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_33]]] : memref<?xindex> 52// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_34]]] : memref<?xindex> 53// CHECK: %[[VAL_38:.*]] = arith.cmpi ult, %[[VAL_37]], %[[VAL_36]] : index 54// CHECK: %[[VAL_39:.*]] = arith.select %[[VAL_38]], %[[VAL_37]], %[[VAL_36]] : index 55// CHECK: %[[VAL_40:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_39]] : index 56// CHECK: %[[VAL_41:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_39]] : index 57// CHECK: %[[VAL_42:.*]] = arith.andi %[[VAL_40]], %[[VAL_41]] : i1 58// CHECK: %[[VAL_43:.*]] = scf.if %[[VAL_42]] -> (f64) { 59// CHECK: %[[VAL_44:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref<?xf64> 60// CHECK: %[[VAL_45:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xf64> 61// CHECK: %[[VAL_46:.*]] = arith.addf %[[VAL_44]], %[[VAL_45]] : f64 62// CHECK: %[[VAL_47:.*]] = arith.addf %[[VAL_35]], %[[VAL_46]] : f64 63// CHECK: scf.yield %[[VAL_47]] : f64 64// CHECK: } else { 65// CHECK: %[[VAL_48:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_39]] : index 66// CHECK: %[[VAL_49:.*]] = scf.if %[[VAL_48]] -> (f64) { 67// CHECK: %[[VAL_50:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref<?xf64> 68// CHECK: %[[VAL_51:.*]] = arith.addf %[[VAL_35]], %[[VAL_50]] : f64 69// CHECK: scf.yield %[[VAL_51]] : f64 70// CHECK: } else { 71// CHECK: %[[VAL_52:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_39]] : index 72// CHECK: %[[VAL_53:.*]] = scf.if %[[VAL_52]] -> (f64) { 73// CHECK: %[[VAL_54:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xf64> 74// CHECK: %[[VAL_55:.*]] = arith.addf %[[VAL_35]], %[[VAL_54]] : f64 75// CHECK: scf.yield %[[VAL_55]] : f64 76// CHECK: } else { 77// CHECK: scf.yield %[[VAL_35]] : f64 78// CHECK: } 79// CHECK: scf.yield %[[VAL_56:.*]] : f64 80// CHECK: } 81// CHECK: scf.yield %[[VAL_57:.*]] : f64 82// CHECK: } 83// CHECK: %[[VAL_58:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_39]] : index 84// CHECK: %[[VAL_59:.*]] = arith.addi %[[VAL_33]], %[[VAL_8]] : index 85// CHECK: %[[VAL_60:.*]] = arith.select %[[VAL_58]], %[[VAL_59]], %[[VAL_33]] : index 86// CHECK: %[[VAL_61:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_39]] : index 87// CHECK: %[[VAL_62:.*]] = arith.addi %[[VAL_34]], %[[VAL_8]] : index 88// CHECK: %[[VAL_63:.*]] = arith.select %[[VAL_61]], %[[VAL_62]], %[[VAL_34]] : index 89// CHECK: scf.yield %[[VAL_60]], %[[VAL_63]], %[[VAL_64:.*]] : index, index, f64 90// CHECK: } 91// CHECK: %[[VAL_65:.*]] = vector.insertelement %[[VAL_66:.*]]#2, %[[VAL_3]]{{\[}}%[[VAL_6]] : index] : vector<8xf64> 92// CHECK: %[[VAL_67:.*]] = scf.for %[[VAL_68:.*]] = %[[VAL_66]]#0 to %[[VAL_22]] step %[[VAL_4]] iter_args(%[[VAL_69:.*]] = %[[VAL_65]]) -> (vector<8xf64>) { 93// CHECK: %[[VAL_70:.*]] = affine.min #map(%[[VAL_22]], %[[VAL_68]]) 94// CHECK: %[[VAL_71:.*]] = vector.create_mask %[[VAL_70]] : vector<8xi1> 95// CHECK: %[[VAL_72:.*]] = vector.maskedload %[[VAL_11]]{{\[}}%[[VAL_68]]], %[[VAL_71]], %[[VAL_3]] : memref<?xf64>, vector<8xi1>, vector<8xf64> into vector<8xf64> 96// CHECK: %[[VAL_73:.*]] = arith.addf %[[VAL_69]], %[[VAL_72]] : vector<8xf64> 97// CHECK: %[[VAL_74:.*]] = arith.select %[[VAL_71]], %[[VAL_73]], %[[VAL_69]] : vector<8xi1>, vector<8xf64> 98// CHECK: scf.yield %[[VAL_74]] : vector<8xf64> 99// CHECK: } 100// CHECK: %[[VAL_75:.*]] = scf.for %[[VAL_76:.*]] = %[[VAL_66]]#1 to %[[VAL_25]] step %[[VAL_4]] iter_args(%[[VAL_77:.*]] = %[[VAL_78:.*]]) -> (vector<8xf64>) { 101// CHECK: %[[VAL_79:.*]] = affine.min #map(%[[VAL_25]], %[[VAL_76]]) 102// CHECK: %[[VAL_80:.*]] = vector.create_mask %[[VAL_79]] : vector<8xi1> 103// CHECK: %[[VAL_81:.*]] = vector.maskedload %[[VAL_14]]{{\[}}%[[VAL_76]]], %[[VAL_80]], %[[VAL_3]] : memref<?xf64>, vector<8xi1>, vector<8xf64> into vector<8xf64> 104// CHECK: %[[VAL_82:.*]] = arith.addf %[[VAL_77]], %[[VAL_81]] : vector<8xf64> 105// CHECK: %[[VAL_83:.*]] = arith.select %[[VAL_80]], %[[VAL_82]], %[[VAL_77]] : vector<8xi1>, vector<8xf64> 106// CHECK: scf.yield %[[VAL_83]] : vector<8xf64> 107// CHECK: } 108// CHECK: %[[VAL_84:.*]] = vector.reduction <add>, %[[VAL_85:.*]] : vector<8xf64> into f64 109// CHECK: scf.yield %[[VAL_84]] : f64 110// CHECK: } 111// CHECK: memref.store %[[VAL_86:.*]], %[[VAL_15]][] : memref<f64> 112// CHECK: %[[VAL_87:.*]] = bufferization.to_tensor %[[VAL_15]] : memref<f64> 113// CHECK: return %[[VAL_87]] : tensor<f64> 114// CHECK: } 115func.func @sparse_matrix_sum(%argx: tensor<f64>, 116 %arga: tensor<64x32xf64, #SparseMatrix>, 117 %argb: tensor<64x32xf64, #SparseMatrix>) -> tensor<f64> { 118 %0 = linalg.generic #trait 119 ins(%arga, %argb: tensor<64x32xf64, #SparseMatrix>, 120 tensor<64x32xf64, #SparseMatrix>) 121 outs(%argx: tensor<f64>) { 122 ^bb(%a: f64, %b: f64, %x: f64): 123 %m = arith.addf %a, %b : f64 124 %t = arith.addf %x, %m : f64 125 linalg.yield %t : f64 126 } -> tensor<f64> 127 return %0 : tensor<f64> 128} 129