1// RUN: mlir-opt -linalg-bufferize -canonicalize -cse -split-input-file %s | FileCheck %s
2
3#map0 = affine_map<(d0) -> (d0)>
4
5// In-depth checking of a basic case, this is testing
6// - bufferization.to_memref / bufferization.to_tensor materializations are
7//   properly inserted
8// - payload is correctly carried over
9// - affine maps are correctly carried over
10// Later tests will not check all these details.
11
12// CHECK: #map = affine_map<(d0) -> (d0)>
13// CHECK-LABEL:   func @basic(
14// CHECK-SAME:                %[[TENSOR:.*]]: tensor<4xf32>) -> tensor<4xf32> {
15// CHECK-DAG:       %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<4xf32>
16// CHECK-DAG:       %[[RESULT_MEMREF:.*]] = memref.alloc() {{.*}} : memref<4xf32>
17// CHECK:           linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]}
18// CHECK-SAME:      ins(%[[MEMREF]] : memref<4xf32>)
19// CHECK-SAME:      outs(%[[RESULT_MEMREF]] : memref<4xf32>) {
20// CHECK:           ^bb0(%[[RESULT1:.*]]: f32, %[[UNUSED:.*]]: f32):
21// CHECK:             %[[DIM1:.*]] = math.exp %[[RESULT1]] : f32
22// CHECK:             linalg.yield %[[DIM1]] : f32
23// CHECK:           }
24// CHECK:           %[[RESULT:.*]] = bufferization.to_tensor %[[RESULT_MEMREF]] : memref<4xf32>
25// CHECK:           return %[[RESULT]] : tensor<4xf32>
26func.func @basic(%arg0: tensor<4xf32>) -> tensor<4xf32> {
27    %0 = linalg.generic {
28      indexing_maps = [#map0, #map0],
29      iterator_types = ["parallel"]
30    } ins(%arg0 : tensor<4xf32>)
31      outs(%arg0 : tensor<4xf32>) {
32      ^bb0(%gen_arg1: f32, %out: f32):
33        %tmp1 = math.exp %gen_arg1 : f32
34        linalg.yield %tmp1 : f32
35    } -> tensor<4xf32>
36    return %0 : tensor<4xf32>
37}
38
39
40// -----
41
42#map0 = affine_map<(d0) -> (d0)>
43
44// Same as above but with linalg.init_tensor op.
45
46// CHECK: #map = affine_map<(d0) -> (d0)>
47// CHECK-LABEL: func @init_tensor(
48// CHECK-SAME:      %[[IN:.*]]: tensor<?xf32>, %[[SIZE:.*]]: index)
49// CHECK-DAG:     %[[MEMREF:.*]] = bufferization.to_memref %[[IN]] : memref<?xf32>
50// CHECK-DAG:     %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) {{.*}} : memref<?xf32>
51// CHECK:         linalg.generic
52// CHECK-SAME:    ins(%[[MEMREF]] : memref<?xf32>)
53// CHECK-SAME:    outs(%[[OUT_BUF]] : memref<?xf32>) {
54func.func @init_tensor(%in : tensor<?xf32>, %size: index) -> tensor<?xf32> {
55  %init = linalg.init_tensor [%size] : tensor<?xf32>
56  %0 = linalg.generic {
57    indexing_maps = [#map0, #map0],
58    iterator_types = ["parallel"]
59  } ins(%in : tensor<?xf32>)
60    outs(%init : tensor<?xf32>) {
61    ^bb0(%gen_arg1: f32, %out: f32):
62      %tmp1 = math.exp %gen_arg1 : f32
63      linalg.yield %tmp1 : f32
64  } -> tensor<?xf32>
65  return %0 : tensor<?xf32>
66}
67
68
69// -----
70
71#map0 = affine_map<(d0) -> (d0)>
72
73// CHECK-LABEL:   func @multiple_results
74// CHECK:           %[[RESULT0:.*]] = memref.alloc() {{.*}} : memref<4xf32>
75// CHECK:           %[[RESULT1:.*]] = memref.alloc() {{.*}} : memref<4xf32>
76// CHECK:           linalg.generic
77// CHECK-SAME:      ins(%{{.*}} : memref<4xf32>)
78// CHECK-SAME:      outs(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>)
79// CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32):
80func.func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
81    %0, %1 = linalg.generic {
82      indexing_maps = [#map0, #map0, #map0],
83      iterator_types = ["parallel"]
84    } ins(%arg0 : tensor<4xf32>)
85      outs (%arg0, %arg0 : tensor<4xf32>, tensor<4xf32>) {
86      ^bb0(%gen_arg1: f32, %out1: f32, %out2: f32):
87        %tmp1 = math.exp %gen_arg1 : f32
88        linalg.yield %tmp1, %tmp1 : f32, f32
89    } -> (tensor<4xf32>, tensor<4xf32>)
90    return %0, %1 : tensor<4xf32>, tensor<4xf32>
91}
92
93// -----
94
95#map_2d = affine_map<(d0, d1) -> (d0, d1)>
96
97// Check that the allocs properly consider the different shapes of the output
98// operands. The permuted indexing maps translate to different output shapes.
99
100// CHECK-LABEL:   func @dynamic_results(
101// CHECK-SAME:                          %[[ARG:.*]]: tensor<?x?xf32>
102// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
103// CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
104// CHECK-DAG:       %[[DIM0:.*]] = tensor.dim %[[ARG]], %[[C0]] : tensor<?x?xf32>
105// CHECK-DAG:       %[[DIM1:.*]] = tensor.dim %[[ARG]], %[[C1]] : tensor<?x?xf32>
106// CHECK-DAG:       %[[RESULT0:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) {{.*}} : memref<?x?xf32>
107// CHECK-DAG:       %[[RESULT1:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) {{.*}} : memref<?x?xf32>
108// CHECK-DAG:       %[[MEMREF_ARG:.*]] = bufferization.to_memref %[[ARG]] : memref<?x?xf32>
109// CHECK:           linalg.generic
110// CHECK-SAME:      ins(%[[MEMREF_ARG]] : memref<?x?xf32>)
111// CHECK-SAME:      outs(%[[RESULT0]], %[[RESULT1]] : memref<?x?xf32>, memref<?x?xf32>)
112func.func @dynamic_results(%arg0: tensor<?x?xf32>)
113         -> (tensor<?x?xf32>, tensor<?x?xf32>) {
114    %0, %1 = linalg.generic {
115      indexing_maps = [#map_2d, #map_2d, #map_2d],
116      iterator_types = ["parallel", "parallel"]
117    } ins(%arg0 : tensor<?x?xf32>)
118      outs (%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) {
119      ^bb0(%gen_arg1: f32, %out1: f32, %out2: f32):
120        %tmp1 = math.exp %gen_arg1 : f32
121        linalg.yield %tmp1, %tmp1 : f32, f32
122    } -> (tensor<?x?xf32>, tensor<?x?xf32>)
123    return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
124}
125
126// -----
127
128#accesses = [
129  affine_map<(i, j, k) -> (j, i, k)>,
130  affine_map<(i, j, k) -> (i, j)>
131]
132
133#trait = {
134  indexing_maps = #accesses,
135  iterator_types = ["parallel", "parallel", "reduction"]
136}
137
138// Check the bufferization of init tensors.
139
140// CHECK-LABEL:   func @generic_with_init_tensor(
141// CHECK-SAME:                                   %[[ARG0_TENSOR:.*]]: tensor<2x3x4xvector<3x4xi4>>,
142// CHECK-SAME:                                   %[[ARG1_TENSOR:.*]]: tensor<3x2xf32>) -> tensor<3x2xf32> {
143// CHECK-DAG:       %[[INIT_BUFFER:.*]] = memref.alloc() {{.*}} : memref<3x2xf32>
144// CHECK-DAG:       %[[ARG0_MEMREF:.*]] = bufferization.to_memref %[[ARG0_TENSOR]] : memref<2x3x4xvector<3x4xi4>>
145// CHECK-DAG:       %[[ARG1_MEMREF:.*]] = bufferization.to_memref %[[ARG1_TENSOR]] : memref<3x2xf32>
146// CHECK:           memref.copy %[[ARG1_MEMREF]], %[[INIT_BUFFER]] : memref<3x2xf32> to memref<3x2xf32>
147// CHECK:           linalg.generic
148// CHECK-SAME:      ins(%[[ARG0_MEMREF]] : memref<2x3x4xvector<3x4xi4>>)
149// CHECK-SAME:      outs(%[[INIT_BUFFER]] : memref<3x2xf32>) {
150func.func @generic_with_init_tensor(%arg0: tensor<2x3x4xvector<3x4xi4>>,
151  %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) {
152
153  %0 = linalg.generic #trait
154    ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
155   outs(%arg1 : tensor<3x2xf32>) {
156    ^bb(%v0: vector<3x4xi4>, %v1: f32) :
157      linalg.yield %v1 : f32
158  } -> tensor<3x2xf32>
159
160  return %0 : tensor<3x2xf32>
161}
162
163// -----
164
165// CHECK-LABEL: func @bufferize_fill(
166// CHECK-SAME:    %[[IN:.*]]: tensor<?xf32>
167func.func @bufferize_fill(%arg0: tensor<?xf32>) -> tensor<?xf32> {
168  %c0 = arith.constant 0.0 : f32
169  // CHECK: %[[ALLOC:.*]] = memref.alloc
170  // CHECK: linalg.fill ins(%cst : f32) outs(%[[ALLOC]] : memref<?xf32>)
171  // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref<?xf32>
172  // CHECK: return %[[TENSOR]]
173  %0 = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor<?xf32>) -> tensor<?xf32>
174  return %0 : tensor<?xf32>
175}
176
177// -----
178
179// CHECK-LABEL:   func @bufferize_dot
180func.func @bufferize_dot(%in: tensor<4xf32>, %out: tensor<f32>) -> tensor<f32> {
181  %dot = linalg.dot ins(%in, %in : tensor<4xf32>, tensor<4xf32>)
182                    outs(%out : tensor<f32>) -> tensor<f32>
183  return %dot : tensor<f32>
184  // CHECK: %[[ALLOC:.*]] = memref.alloc
185  // TODO: The copy is not necessary.
186  // CHECK: memref.copy {{.*}}, %[[ALLOC]]
187  // CHECK: linalg.dot ins(%{{.*}}, %{{.*}} : memref<4xf32>, memref<4xf32>)
188  // CHECK-SAME:       outs(%[[ALLOC:.*]] : memref<f32>)
189  // CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref<f32>
190  // CHECK: return %[[OUT_TENSOR]]
191}
192
193// -----
194
195// This is a regression test. The linalg-bufferize pass should ignore all func
196// dialect ops.
197
198// CHECK-LABEL: func private @csum(tensor<6xi64>) -> tensor<6xi64>
199func.func private @csum(%arg0: tensor<6xi64>) -> tensor<6xi64>
200
201// CHECK: func public @main(%[[arg0:.*]]: tensor<2x3xi1>)
202// CHECK:   %[[collapse:.*]] = tensor.collapse_shape %[[arg0]]
203// CHECK:   %[[collapse_m:.*]] = bufferization.to_memref %[[collapse]]
204// CHECK:   %[[alloc:.*]] = memref.alloc()
205// CHECK:   linalg.generic {{.*}} ins(%[[collapse_m]] : memref<6xi1>) outs(%[[alloc]] : memref<6xi64>)
206// CHECK:   %[[generic_t:.*]] = bufferization.to_tensor %[[alloc]]
207// CHECK:   %[[call:.*]] = call @csum(%[[generic_t]])
208// CHECK:   return %[[call]]
209func.func public @main(%arg0: tensor<2x3xi1>) -> tensor<6xi64> {
210  %0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<2x3xi1> into tensor<6xi1>
211  %1 = linalg.init_tensor [6] : tensor<6xi64>
212  %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) outs(%1 : tensor<6xi64>) {
213  ^bb0(%arg1: i1, %arg2: i64):
214    %4 = arith.extui %arg1 : i1 to i64
215    linalg.yield %4 : i64
216  } -> tensor<6xi64>
217  %3 = func.call @csum(%2) : (tensor<6xi64>) -> tensor<6xi64>
218  return %3 : tensor<6xi64>
219}
220