1// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
2// RUN: mlir-opt %s -sparsification | FileCheck %s
3
4// Test to demonstrate the difference between non-annotated dense tensors
5// and all-dense-annotated "sparse" tensors. The former class remains as
6// two-dimensional tensors that are bufferized by subsequent passes. The
7// latter class is linearized into one-dimensional buffers that are backed
8// by the runtime support library.
9
10#DenseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense"  ] }>
11
12#trait_2d = {
13  indexing_maps = [
14    affine_map<(i,j) -> (i,j)>,  // A
15    affine_map<(i,j) -> (i,j)>   // X (out)
16  ],
17  iterator_types = ["parallel", "parallel"],
18  doc = "X(i,j) = A(i,j) + 1"
19}
20
21#trait_3d = {
22  indexing_maps = [
23    affine_map<(i,j,k) -> (i,j,k)>,  // A
24    affine_map<(i,j,k) -> (i,j)>     // X (out)
25  ],
26  iterator_types = ["parallel", "parallel", "reduction"],
27  doc = "X(i,j) += A(i,j,k)"
28}
29
30//
31// Test with an all-dense-annotated "sparse" matrix as input and
32// a non-annotated dense matrix as output that is not inplacable.
33// This results in an explicit allocation to facilitate output.
34//
35// CHECK-LABEL:   func @dense1(
36// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>,
37// CHECK-SAME:                 %[[VAL_1:.*]]: tensor<32x16xf32> {linalg.inplaceable = false}) -> tensor<32x16xf32> {
38// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
39// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
40// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
41// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
42// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
43// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
44// CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
45// CHECK-DAG:       %[[VAL_9:.*]] = memref.alloc() : memref<32x16xf32>
46// CHECK:           memref.copy %[[VAL_8]], %[[VAL_9]] : memref<32x16xf32> to memref<32x16xf32>
47// CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
48// CHECK:             scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
49// CHECK:               %[[VAL_12:.*]] = arith.muli %[[VAL_10]], %[[VAL_4]] : index
50// CHECK:               %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[VAL_11]] : index
51// CHECK:               %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xf32>
52// CHECK:               %[[VAL_15:.*]] = arith.addf %[[VAL_14]], %[[VAL_2]] : f32
53// CHECK:               memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_10]], %[[VAL_11]]] : memref<32x16xf32>
54// CHECK:             }
55// CHECK:           }
56// CHECK:           %[[VAL_16:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32x16xf32>
57// CHECK:           return %[[VAL_16]] : tensor<32x16xf32>
58// CHECK:         }
59func.func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>,
60             %argx: tensor<32x16xf32> {linalg.inplaceable = false})
61	     -> tensor<32x16xf32> {
62  %c = arith.constant 1.0 : f32
63  %0 = linalg.generic #trait_2d
64     ins(%arga: tensor<32x16xf32, #DenseMatrix>)
65    outs(%argx: tensor<32x16xf32>) {
66      ^bb(%a: f32, %x: f32):
67        %1 = arith.addf %a, %c : f32
68        linalg.yield %1 : f32
69  } -> tensor<32x16xf32>
70  return %0 : tensor<32x16xf32>
71}
72
73//
74// Test with an all-dense-annotated "sparse" matrix as input and
75// a non-annotated dense matrix as output that is inplacable.
76// This allows updating the dense output in place.
77//
78// CHECK-LABEL:   func @dense2(
79// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>,
80// CHECK-SAME:                 %[[VAL_1:.*]]: tensor<32x16xf32> {linalg.inplaceable = true}) -> tensor<32x16xf32> {
81// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
82// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
83// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
84// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
85// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
86// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
87// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
88// CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
89// CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
90// CHECK:               %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
91// CHECK:               %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_10]] : index
92// CHECK:               %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xf32>
93// CHECK:               %[[VAL_14:.*]] = arith.addf %[[VAL_13]], %[[VAL_2]] : f32
94// CHECK:               memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_9]], %[[VAL_10]]] : memref<32x16xf32>
95// CHECK:             }
96// CHECK:           }
97// CHECK:           %[[VAL_15:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32x16xf32>
98// CHECK:           return %[[VAL_15]] : tensor<32x16xf32>
99// CHECK:         }
100func.func @dense2(%arga: tensor<32x16xf32, #DenseMatrix>,
101             %argx: tensor<32x16xf32> {linalg.inplaceable = true})
102	     -> tensor<32x16xf32> {
103  %c = arith.constant 1.0 : f32
104  %0 = linalg.generic #trait_2d
105     ins(%arga: tensor<32x16xf32, #DenseMatrix>)
106    outs(%argx: tensor<32x16xf32>) {
107      ^bb(%a: f32, %x: f32):
108        %1 = arith.addf %a, %c : f32
109        linalg.yield %1 : f32
110  } -> tensor<32x16xf32>
111  return %0 : tensor<32x16xf32>
112}
113
114//
115// Test with a non-annotated dense matrix as input and
116// an all-dense annotated "sparse" matrix as output.
117// The rewriting would fail if argx was not in-placeable.
118//
119// CHECK-LABEL:   func @dense3(
120// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32>,
121// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {linalg.inplaceable = true}) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {
122// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
123// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
124// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
125// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
126// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
127// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16xf32>
128// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
129// CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
130// CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
131// CHECK:               %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
132// CHECK:               %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_10]] : index
133// CHECK:               %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_9]], %[[VAL_10]]] : memref<32x16xf32>
134// CHECK:               %[[VAL_14:.*]] = arith.addf %[[VAL_13]], %[[VAL_2]] : f32
135// CHECK:               memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
136// CHECK:             }
137// CHECK:           }
138// CHECK:           %[[VAL_15:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
139// CHECK:           return %[[VAL_15]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
140// CHECK:         }
141func.func @dense3(%arga: tensor<32x16xf32>,
142             %argx: tensor<32x16xf32, #DenseMatrix> {linalg.inplaceable = true})
143	     -> tensor<32x16xf32, #DenseMatrix> {
144  %c = arith.constant 1.0 : f32
145  %0 = linalg.generic #trait_2d
146     ins(%arga: tensor<32x16xf32>)
147    outs(%argx: tensor<32x16xf32, #DenseMatrix>) {
148      ^bb(%a: f32, %x: f32):
149        %1 = arith.addf %a, %c : f32
150        linalg.yield %1 : f32
151  } -> tensor<32x16xf32, #DenseMatrix>
152  return %0 : tensor<32x16xf32, #DenseMatrix>
153}
154
155
156//
157// Test with a non-annotated dense matrix as input and
158// an all-dense annotated "sparse" matrix as output.
159// The rewriting would fail if argx was not in-placeable.
160// The missing innermost "k" index (due to a reduction) is accounted
161// for by scalarizing the reduction operation for the output tensor.
162//
163// CHECK-LABEL:   func @dense4(
164// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32>,
165// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {linalg.inplaceable = true}) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {
166// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 8 : index
167// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
168// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
169// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
170// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
171// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16x8xf32>
172// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}}>> to memref<?xf32>
173// CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
174// CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
175// CHECK:               %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
176// CHECK:               %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_10]] : index
177// CHECK:               %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
178// CHECK:               %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_6]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f32) {
179// CHECK:                 %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_9]], %[[VAL_10]], %[[VAL_15]]] : memref<32x16x8xf32>
180// CHECK:                 %[[VAL_18:.*]] = arith.addf %[[VAL_16]], %[[VAL_17]] : f32
181// CHECK:                 scf.yield %[[VAL_18]] : f32
182// CHECK:               }
183// CHECK:               memref.store %[[VAL_19:.*]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
184// CHECK:             }
185// CHECK:           }
186// CHECK:           %[[VAL_20:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
187// CHECK:           return %[[VAL_20]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
188// CHECK:         }
189func.func @dense4(%arga: tensor<32x16x8xf32>,
190             %argx: tensor<32x16xf32, #DenseMatrix> {linalg.inplaceable = true})
191	     -> tensor<32x16xf32, #DenseMatrix> {
192  %0 = linalg.generic #trait_3d
193     ins(%arga: tensor<32x16x8xf32>)
194    outs(%argx: tensor<32x16xf32, #DenseMatrix>) {
195      ^bb(%a: f32, %x: f32):
196        %1 = arith.addf %x, %a : f32
197        linalg.yield %1 : f32
198  } -> tensor<32x16xf32, #DenseMatrix>
199  return %0 : tensor<32x16xf32, #DenseMatrix>
200}
201