1// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py 2// RUN: mlir-opt %s -sparsification | FileCheck %s 3 4// Test to demonstrate the difference between non-annotated dense tensors 5// and all-dense-annotated "sparse" tensors. The former class remains as 6// two-dimensional tensors that are bufferized by subsequent passes. The 7// latter class is linearized into one-dimensional buffers that are backed 8// by the runtime support library. 9 10#DenseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] }> 11 12#trait_2d = { 13 indexing_maps = [ 14 affine_map<(i,j) -> (i,j)>, // A 15 affine_map<(i,j) -> (i,j)> // X (out) 16 ], 17 iterator_types = ["parallel", "parallel"], 18 doc = "X(i,j) = A(i,j) + 1" 19} 20 21#trait_3d = { 22 indexing_maps = [ 23 affine_map<(i,j,k) -> (i,j,k)>, // A 24 affine_map<(i,j,k) -> (i,j)> // X (out) 25 ], 26 iterator_types = ["parallel", "parallel", "reduction"], 27 doc = "X(i,j) += A(i,j,k)" 28} 29 30// 31// Test with an all-dense-annotated "sparse" matrix as input and 32// a non-annotated dense matrix as output that is not inplacable. 33// This results in an explicit allocation to facilitate output. 34// 35// CHECK-LABEL: func @dense1( 36// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>, 37// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32> {linalg.inplaceable = false}) -> tensor<32x16xf32> { 38// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32 39// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index 40// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index 41// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index 42// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index 43// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32> 44// CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> 45// CHECK-DAG: %[[VAL_9:.*]] = memref.alloc() : memref<32x16xf32> 46// CHECK: memref.copy %[[VAL_8]], %[[VAL_9]] : memref<32x16xf32> to memref<32x16xf32> 47// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { 48// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { 49// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_10]], %[[VAL_4]] : index 50// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[VAL_11]] : index 51// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xf32> 52// CHECK: %[[VAL_15:.*]] = arith.addf %[[VAL_14]], %[[VAL_2]] : f32 53// CHECK: memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_10]], %[[VAL_11]]] : memref<32x16xf32> 54// CHECK: } 55// CHECK: } 56// CHECK: %[[VAL_16:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32x16xf32> 57// CHECK: return %[[VAL_16]] : tensor<32x16xf32> 58// CHECK: } 59func.func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>, 60 %argx: tensor<32x16xf32> {linalg.inplaceable = false}) 61 -> tensor<32x16xf32> { 62 %c = arith.constant 1.0 : f32 63 %0 = linalg.generic #trait_2d 64 ins(%arga: tensor<32x16xf32, #DenseMatrix>) 65 outs(%argx: tensor<32x16xf32>) { 66 ^bb(%a: f32, %x: f32): 67 %1 = arith.addf %a, %c : f32 68 linalg.yield %1 : f32 69 } -> tensor<32x16xf32> 70 return %0 : tensor<32x16xf32> 71} 72 73// 74// Test with an all-dense-annotated "sparse" matrix as input and 75// a non-annotated dense matrix as output that is inplacable. 76// This allows updating the dense output in place. 77// 78// CHECK-LABEL: func @dense2( 79// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>, 80// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32> {linalg.inplaceable = true}) -> tensor<32x16xf32> { 81// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32 82// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index 83// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index 84// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index 85// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index 86// CHECK: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32> 87// CHECK: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> 88// CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { 89// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { 90// CHECK: %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index 91// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_10]] : index 92// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xf32> 93// CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_13]], %[[VAL_2]] : f32 94// CHECK: memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_9]], %[[VAL_10]]] : memref<32x16xf32> 95// CHECK: } 96// CHECK: } 97// CHECK: %[[VAL_15:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32x16xf32> 98// CHECK: return %[[VAL_15]] : tensor<32x16xf32> 99// CHECK: } 100func.func @dense2(%arga: tensor<32x16xf32, #DenseMatrix>, 101 %argx: tensor<32x16xf32> {linalg.inplaceable = true}) 102 -> tensor<32x16xf32> { 103 %c = arith.constant 1.0 : f32 104 %0 = linalg.generic #trait_2d 105 ins(%arga: tensor<32x16xf32, #DenseMatrix>) 106 outs(%argx: tensor<32x16xf32>) { 107 ^bb(%a: f32, %x: f32): 108 %1 = arith.addf %a, %c : f32 109 linalg.yield %1 : f32 110 } -> tensor<32x16xf32> 111 return %0 : tensor<32x16xf32> 112} 113 114// 115// Test with a non-annotated dense matrix as input and 116// an all-dense annotated "sparse" matrix as output. 117// The rewriting would fail if argx was not in-placeable. 118// 119// CHECK-LABEL: func @dense3( 120// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32>, 121// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {linalg.inplaceable = true}) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> { 122// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32 123// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index 124// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index 125// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index 126// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index 127// CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16xf32> 128// CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32> 129// CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { 130// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { 131// CHECK: %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index 132// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_10]] : index 133// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_9]], %[[VAL_10]]] : memref<32x16xf32> 134// CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_13]], %[[VAL_2]] : f32 135// CHECK: memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32> 136// CHECK: } 137// CHECK: } 138// CHECK: %[[VAL_15:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> 139// CHECK: return %[[VAL_15]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> 140// CHECK: } 141func.func @dense3(%arga: tensor<32x16xf32>, 142 %argx: tensor<32x16xf32, #DenseMatrix> {linalg.inplaceable = true}) 143 -> tensor<32x16xf32, #DenseMatrix> { 144 %c = arith.constant 1.0 : f32 145 %0 = linalg.generic #trait_2d 146 ins(%arga: tensor<32x16xf32>) 147 outs(%argx: tensor<32x16xf32, #DenseMatrix>) { 148 ^bb(%a: f32, %x: f32): 149 %1 = arith.addf %a, %c : f32 150 linalg.yield %1 : f32 151 } -> tensor<32x16xf32, #DenseMatrix> 152 return %0 : tensor<32x16xf32, #DenseMatrix> 153} 154 155 156// 157// Test with a non-annotated dense matrix as input and 158// an all-dense annotated "sparse" matrix as output. 159// The rewriting would fail if argx was not in-placeable. 160// The missing innermost "k" index (due to a reduction) is accounted 161// for by scalarizing the reduction operation for the output tensor. 162// 163// CHECK-LABEL: func @dense4( 164// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16x8xf32>, 165// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {linalg.inplaceable = true}) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> { 166// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 8 : index 167// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index 168// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index 169// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index 170// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index 171// CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16x8xf32> 172// CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}}>> to memref<?xf32> 173// CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { 174// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { 175// CHECK: %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index 176// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_10]] : index 177// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32> 178// CHECK: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_6]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f32) { 179// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_9]], %[[VAL_10]], %[[VAL_15]]] : memref<32x16x8xf32> 180// CHECK: %[[VAL_18:.*]] = arith.addf %[[VAL_16]], %[[VAL_17]] : f32 181// CHECK: scf.yield %[[VAL_18]] : f32 182// CHECK: } 183// CHECK: memref.store %[[VAL_19:.*]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32> 184// CHECK: } 185// CHECK: } 186// CHECK: %[[VAL_20:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> 187// CHECK: return %[[VAL_20]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> 188// CHECK: } 189func.func @dense4(%arga: tensor<32x16x8xf32>, 190 %argx: tensor<32x16xf32, #DenseMatrix> {linalg.inplaceable = true}) 191 -> tensor<32x16xf32, #DenseMatrix> { 192 %0 = linalg.generic #trait_3d 193 ins(%arga: tensor<32x16x8xf32>) 194 outs(%argx: tensor<32x16xf32, #DenseMatrix>) { 195 ^bb(%a: f32, %x: f32): 196 %1 = arith.addf %x, %a : f32 197 linalg.yield %1 : f32 198 } -> tensor<32x16xf32, #DenseMatrix> 199 return %0 : tensor<32x16xf32, #DenseMatrix> 200} 201