1// Note: Default is function-boundary-type-conversion=infer-layout-map
2// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs" -drop-equivalent-buffer-results -split-input-file | FileCheck %s
3
4// Run fuzzer with different seeds.
5// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
6// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
7// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
8
9// Test bufferization using memref types that have no layout map.
10// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
11
12// Test bufferization using memref types that have fully dynamic layout maps.
13// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs function-boundary-type-conversion=fully-dynamic-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-FULLY-DYNAMIC-LAYOUT-MAP
14
15
16// Bufferization of bodiless function with no tensor return value.
17
18// CHECK: #[[$map0:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
19// CHECK: #[[$map1:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
20// CHECK-LABEL: func private @private_func(memref<?xf32,
21//  CHECK-SAME:                                          #[[$map0]]>)
22// CHECK-NO-LAYOUT-MAP-LABEL: func private @private_func(memref<?xf32>)
23func.func private @private_func(tensor<?xf32>) -> ()
24
25// CHECK-LABEL: func private @private_func_2d(memref<?x?xf32,
26//  CHECK-SAME:                                               #[[$map1]]>)
27// CHECK-NO-LAYOUT-MAP-LABEL: func private @private_func_2d(memref<?x?xf32>)
28func.func private @private_func_2d(tensor<?x?xf32>) -> ()
29
30// CHECK-LABEL: func @empty_func() {
31// CHECK-NO-LAYOUT-MAP-LABEL: func @empty_func() {
32// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func @empty_func() {
33func.func @empty_func() -> () {
34  return
35}
36
37// -----
38
39// A bodiless function that returns something that is not a tensor.
40
41// CHECK: func private @external_func_with_return_val(memref<4xi32, #{{.*}}>) -> f32
42// CHECK-FULLY-DYNAMIC-LAYOUT-MAP: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
43// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func private @external_func_with_return_val(memref<4xi32,
44// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: #[[$map1]]>
45func.func private @external_func_with_return_val(tensor<4xi32>) -> f32
46
47// -----
48
49// A function that returns a non-equivalent tensor with layout map.
50
51// CHECK: #[[$map2:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>
52// CHECK-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32,
53//  CHECK-SAME:     #[[$map2]]> {
54//       CHECK:   %[[alloc:.*]] = memref.alloc() {{.*}} : memref<20x10xf32>
55//       CHECK:   %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, #[[$map2]]>
56//       CHECK:   return %[[subview]]
57
58// CHECK-NO-LAYOUT-MAP: #[[$map2:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>
59// CHECK-NO-LAYOUT-MAP-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32>
60//       CHECK-NO-LAYOUT-MAP:   %[[alloc:.*]] = memref.alloc() {{.*}} : memref<20x10xf32>
61//       CHECK-NO-LAYOUT-MAP:   %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, #[[$map2]]>
62//       CHECK-NO-LAYOUT-MAP:   %[[alloc_no_layout:.*]] = memref.alloc(%{{.*}}) : memref<2x?xf32>
63//       CHECK-NO-LAYOUT-MAP:   memref.copy %[[subview]], %[[alloc_no_layout]]
64//       CHECK-NO-LAYOUT-MAP:   memref.dealloc %[[alloc]]
65//       CHECK-NO-LAYOUT-MAP:   return %[[alloc_no_layout]]
66
67// CHECK-FULLY-DYNAMIC-LAYOUT-MAP: #[[$map2a:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
68// CHECK-FULLY-DYNAMIC-LAYOUT-MAP: #[[$map2b:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)>
69// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32,
70//  CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: #[[$map2a]]> {
71func.func @return_extract_slice(%idx: index, %sz: index) -> (tensor<2x?xf32>)
72{
73  %t = bufferization.alloc_tensor() : tensor<20x10xf32>
74  %0 = tensor.extract_slice %t[%idx, %idx][2, %sz][1, 1]
75      : tensor<20x10xf32> to tensor<2x?xf32>
76  return %0 : tensor<2x?xf32>
77}
78
79// -----
80
81// CHECK-LABEL: func private @private_func
82func.func private @private_func(tensor<?xf32>) -> (f32)
83
84// private_func may modify the buffer arg, but that's OK because %t is writable.
85// No alloc/copy should be inserted.
86
87// CHECK-LABEL: func @main(
88//  CHECK-SAME:     %[[t:.*]]: memref<?xf32
89//   CHECK-NOT: alloc
90//   CHECK-NOT: copy
91//       CHECK: call @private_func(%[[t]])
92func.func @main(%t: tensor<?xf32> {bufferization.writable = true}) -> (f32) {
93  %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32)
94  return %0 : f32
95}
96
97// -----
98
99// CHECK-LABEL: func private @private_func
100func.func private @private_func(tensor<?xf32>) -> (f32)
101
102// private_func may modify the buffer arg, %t is not writable. A copy is needed.
103
104// CHECK-LABEL: func @main(
105//  CHECK-SAME:     %[[t:.*]]: memref<?xf32
106//       CHECK: %[[alloc:.*]] = memref.alloc
107//   CHECK-DAG: memref.copy %[[t]], %[[alloc]]
108//   CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
109//       CHECK: call @private_func(%[[casted]])
110//       CHECK: memref.dealloc %[[alloc]]
111func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
112  %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32)
113  return %0 : f32
114}
115
116// -----
117
118// Test bufferization of a function without tensor args.
119
120// CHECK-LABEL: func @func_without_tensor_args
121func.func @func_without_tensor_args(%v : vector<10xf32>) -> () {
122  // CHECK: %[[alloc:.*]] = memref.alloc()
123  %0 = bufferization.alloc_tensor() : tensor<10xf32>
124
125  %c0 = arith.constant 0 : index
126  // CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
127  %1 = vector.transfer_write %v, %0[%c0] : vector<10xf32>, tensor<10xf32>
128
129  %cst = arith.constant 0.0 : f32
130  // CHECK: vector.transfer_read %[[alloc]]
131  %r = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<11xf32>
132
133  vector.print %r : vector<11xf32>
134  return
135}
136
137// -----
138
139// Bufferization of a function that is reading and writing. %t0 is writable, so
140// no copy should be inserted.
141
142// CHECK-LABEL: func @inner_func(
143//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
144func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
145  // CHECK-NOT: copy
146  %f = arith.constant 1.0 : f32
147  %c0 = arith.constant 0 : index
148  %c1 = arith.constant 1 : index
149  // CHECK: memref.store %{{.*}}, %[[arg0]]
150  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
151  // CHECK: %[[load:.*]] = memref.load %[[arg0]]
152  %1 = tensor.extract %0[%c1] : tensor<?xf32>
153  // CHECK: return %[[load]] : f32
154  return %0, %1 : tensor<?xf32>, f32
155}
156
157// CHECK-LABEL: func @call_func_with_non_tensor_return(
158//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
159func.func @call_func_with_non_tensor_return(
160    %t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) {
161  // CHECK-NOT: alloc
162  // CHECK-NOT: copy
163  // CHECK: %[[call:.*]] = call @inner_func(%[[arg0]])
164  %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32)
165  // CHECK: return %[[call]] : f32
166  return %1, %0 : f32, tensor<?xf32>
167}
168
169// -----
170
171// Bufferization of a function that is reading and writing. %t0 is not writable,
172// so a copy is needed.
173
174// CHECK-LABEL: func @inner_func(
175//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
176func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
177  // CHECK-NOT: copy
178  %f = arith.constant 1.0 : f32
179  %c0 = arith.constant 0 : index
180  %c1 = arith.constant 1 : index
181  // CHECK: memref.store %{{.*}}, %[[arg0]]
182  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
183  // CHECK: %[[load:.*]] = memref.load %[[arg0]]
184  %1 = tensor.extract %0[%c1] : tensor<?xf32>
185  // CHECK: return %[[load]] : f32
186  return %0, %1 : tensor<?xf32>, f32
187}
188
189// CHECK-LABEL: func @call_func_with_non_tensor_return(
190//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
191func.func @call_func_with_non_tensor_return(
192    %t0: tensor<?xf32> {bufferization.writable = false}) -> (f32, tensor<?xf32>) {
193  // CHECK: %[[alloc:.*]] = memref.alloc
194  // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]]
195  // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
196  // CHECK: %[[call:.*]] = call @inner_func(%[[casted]])
197  %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32)
198
199  // Note: The tensor return value cannot fold away because the CallOp
200  // bufferized out-of-place.
201  // CHECK: return %[[call]], %[[casted]] : f32, memref<?xf32
202  return %1, %0 : f32, tensor<?xf32>
203}
204
205// -----
206
207// A chain of function calls. The last function f0 is potentially writing to the
208// buffer. This becomes a problem when bufferizing main and a copy must be
209// inserted then. (No copies in the other functions.)
210
211// CHECK-LABEL: func private @f0(
212func.func private @f0(tensor<?xf32>) -> (f32)
213
214// CHECK-LABEL: func @f1(
215//  CHECK-SAME:     %[[t1:.*]]: memref<?xf32
216//       CHECK:   %[[r1:.*]] = call @f0(%[[t1]])
217//       CHECK:   return %[[r1]]
218func.func @f1(%t: tensor<?xf32>) -> (f32) {
219  %0 = call @f0(%t) : (tensor<?xf32>) -> (f32)
220  return %0 : f32
221}
222
223// CHECK-LABEL: func @f2(
224//  CHECK-SAME:     %[[t2:.*]]: memref<?xf32
225//       CHECK:   %[[r2:.*]] = call @f1(%[[t2]])
226//       CHECK:   return %[[r2]]
227func.func @f2(%t: tensor<?xf32>) -> (f32) {
228  %0 = call @f1(%t) : (tensor<?xf32>) -> (f32)
229  return %0 : f32
230}
231
232// CHECK-LABEL: func @main(
233//  CHECK-SAME:     %[[t3:.*]]: memref<?xf32
234//       CHECK: %[[alloc:.*]] = memref.alloc
235//   CHECK-DAG: memref.copy %[[t3]], %[[alloc]]
236//   CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
237//       CHECK: call @f2(%[[casted]])
238//       CHECK: memref.dealloc %[[alloc]]
239func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
240  %0 = call @f2(%t) : (tensor<?xf32>) -> (f32)
241  return %0 : f32
242}
243
244// -----
245
246// This function does not read, just write. We need an alloc, but no copy.
247
248// CHECK-LABEL: func @does_not_read(
249//   CHECK-NOT:   alloc
250//   CHECK-NOT:   copy
251func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
252  %f0 = arith.constant 0.0 : f32
253  %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32>
254  return %r : tensor<?xf32>
255}
256
257// CHECK-LABEL: func @main(
258//  CHECK-SAME:     %[[t:.*]]: memref<?xf32
259//       CHECK:   %[[alloc:.*]] = memref.alloc
260//   CHECK-NOT:   copy
261//       CHECK: %[[casted:.*]] = memref.cast %[[alloc]]
262//   CHECK-NOT:   copy
263//       CHECK:   call @does_not_read(%[[casted]])
264//       CHECK:   %[[r:.*]] = memref.load %[[casted]]
265//       CHECK:   memref.dealloc %[[alloc]]
266func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> f32 {
267  %0 = call @does_not_read(%t) : (tensor<?xf32>) -> (tensor<?xf32>)
268  %idx = arith.constant 4 : index
269  %r = tensor.extract %0[%idx] : tensor<?xf32>
270  return %r : f32
271}
272
273// -----
274
275// Alloc and copy must be inserted because the arith.constant is read-only.
276
277//      CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
278
279//      CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]>
280//      CHECK: func private @some_external_func(memref<4xi32, #[[$DYN_1D_MAP]]>)
281func.func private @some_external_func(tensor<4xi32>)
282
283//      CHECK: func @main()
284func.func @main() {
285//  CHECK-DAG:   %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32>
286  %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
287
288//  CHECK-DAG:   %[[alloc:.*]] = memref.alloc
289//  CHECK-DAG:   %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, #[[$DYN_1D_MAP]]>
290//  CHECK-DAG:   memref.copy %[[A]], %[[alloc]]
291//      CHECK:   call @some_external_func(%[[B]]) : (memref<4xi32, #[[$DYN_1D_MAP]]>) -> ()
292  call @some_external_func(%A) : (tensor<4xi32>) -> ()
293
294//      CHECK: memref.dealloc %[[alloc]]
295  return
296}
297
298// -----
299
300// Alloc and copy must be inserted because the arith.constant is read-only. The
301// function call is inside of an scf.execute_region.
302
303//      CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
304
305//      CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]>
306//      CHECK: func private @some_external_func_within_scf_execute(memref<4xi32, #[[$DYN_1D_MAP]]>)
307func.func private @some_external_func_within_scf_execute(tensor<4xi32>)
308
309//      CHECK: func @main()
310func.func @main() {
311//  CHECK-DAG:   %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32>
312  %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
313
314// Note: The scf.execute_region canonicalizes away.
315
316//  CHECK-DAG:   %[[alloc:.*]] = memref.alloc
317//  CHECK-DAG:   %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, #[[$DYN_1D_MAP]]>
318//  CHECK-DAG:   memref.copy %[[A]], %[[alloc]]
319//      CHECK:   call @some_external_func_within_scf_execute(%[[B]]) : (memref<4xi32, #[[$DYN_1D_MAP]]>) -> ()
320  scf.execute_region {
321    func.call @some_external_func_within_scf_execute(%A) : (tensor<4xi32>) -> ()
322    scf.yield
323  }
324
325//      CHECK:   memref.dealloc %[[alloc]]
326  return
327}
328
329// -----
330
331// A write inside an scf.execute_region. An equivalent tensor is yielded.
332
333// CHECK-LABEL: func @execute_region_test(
334//  CHECK-SAME:     %[[m1:.*]]: memref<?xf32
335func.func @execute_region_test(%t1 : tensor<?xf32>)
336    -> (f32, tensor<?xf32>, f32)
337{
338  %f1 = arith.constant 0.0 : f32
339  %f2 = arith.constant 1.0 : f32
340  %idx = arith.constant 7 : index
341
342  // scf.execute_region is canonicalized away after bufferization. So just the
343  // memref.store is left over.
344
345  // CHECK-NOT: alloc
346  // CHECK-NOT: copy
347  // CHECK: memref.store %{{.*}}, %[[m1]][%{{.*}}]
348  %0, %1, %2 = scf.execute_region -> (f32, tensor<?xf32>, f32) {
349    %t2 = tensor.insert %f2 into %t1[%idx] : tensor<?xf32>
350    scf.yield %f1, %t2, %f2 : f32, tensor<?xf32>, f32
351  }
352
353  // CHECK: return %{{.*}}, %{{.*}} : f32, f32
354  return %0, %1, %2 : f32, tensor<?xf32>, f32
355}
356
357// -----
358
359//      CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
360
361//      CHECK:  func private @some_external_func(memref<?xf32, #[[$DYN_1D_MAP]]>)
362func.func private @some_external_func(tensor<?xf32>)
363
364//      CHECK:  func @scf_for_with_tensor_insert_slice(
365// CHECK-SAME:    %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]>
366// CHECK-SAME:    %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]>
367// CHECK-SAME:    %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$DYN_1D_MAP]]>
368func.func @scf_for_with_tensor_insert_slice(
369    %A : tensor<?xf32>, %B : tensor<?xf32>, %C : tensor<4xf32>,
370    %lb : index, %ub : index, %step : index)
371  -> (tensor<?xf32>, tensor<?xf32>)
372{
373  // CHECK-NEXT: scf.for
374  %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
375      -> (tensor<?xf32>, tensor<?xf32>)
376  {
377    // CHECK-NEXT:   %[[SVA:.*]] = memref.subview %[[A]]
378    // CHECK-NEXT:   memref.copy %[[C]], %[[SVA]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, #[[$DYN_1D_MAP]]>
379    %ttA = tensor.insert_slice %C into %tA[%i][4][1] : tensor<4xf32> into tensor<?xf32>
380
381    // CHECK-NEXT:   %[[SVB:.*]] = memref.subview %[[B]]
382    // CHECK-NEXT:   memref.copy %[[C]], %[[SVB]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, #[[$DYN_1D_MAP]]>
383    %ttB = tensor.insert_slice %C into %tB[%i][4][1] : tensor<4xf32> into tensor<?xf32>
384
385    // scf.yield is empty and is elided
386    //  CHECK-NOT:   scf.yield
387    scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32>
388  }
389
390  // Swaparoo requires bufferizing the whole function to figure out who's who.
391  return %r0#1, %r0#0: tensor<?xf32>, tensor<?xf32>
392}
393
394//      CHECK:  func @bar(
395// CHECK-SAME:    %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]>
396// CHECK-SAME:    %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]>
397// CHECK-SAME:    %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$DYN_1D_MAP]]>
398func.func @bar(
399    %A : tensor<?xf32> {bufferization.writable = true},
400    %B : tensor<?xf32> {bufferization.writable = true},
401    %C : tensor<4xf32> {bufferization.writable = true},
402    %lb : index, %ub : index, %step : index)
403  -> (tensor<?xf32>, tensor<?xf32>)
404{
405//  CHECK-DAG:   call @scf_for_with_tensor_insert_slice(%[[A]], %[[B]], %[[C]]
406  %r0:2 = call @scf_for_with_tensor_insert_slice(%A, %B, %C, %lb, %ub, %step) :
407      (tensor<?xf32>, tensor<?xf32>, tensor<4xf32>, index, index, index)
408        -> (tensor<?xf32>, tensor<?xf32>)
409
410  // %r0#0 requires a copy because we have no idea what the function is doing.
411//  CHECK-DAG:   %[[alloc:.*]] = memref.alloc
412//  CHECK-DAG:   %[[casted:.*]] = memref.cast %[[alloc]]
413//  CHECK-DAG:   memref.copy %[[B]], %[[alloc]]
414// CHECK-NEXT:   call @some_external_func(%[[casted]]) : (memref<?xf32, #[[$DYN_1D_MAP]]>) -> ()
415  call @some_external_func(%r0#0) : (tensor<?xf32>) -> ()
416
417//      CHECK:   return
418  return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32>
419}
420
421// -----
422
423//  CHECK-DAG: #[[$DYN_0D_MAP:.*]] = affine_map<()[s0] -> (s0)>
424//  CHECK-DAG: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
425
426//      CHECK:  func @init_and_dot(
427// CHECK-SAME:    %[[A:[a-zA-Z0-9]*]]: memref<64xf32, #[[$DYN_1D_MAP]]>
428// CHECK-SAME:    %[[B:[a-zA-Z0-9]*]]: memref<64xf32, #[[$DYN_1D_MAP]]>
429// CHECK-SAME:    %[[C:[a-zA-Z0-9]*]]: memref<f32, #[[$DYN_0D_MAP]]>
430func.func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> {
431  // CHECK-NEXT:   %[[C0:.*]] = arith.constant 0{{.*}} : f32
432  %v0 = arith.constant 0.0 : f32
433
434  // CHECK-NEXT:   linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32, #[[$DYN_0D_MAP]]>)
435  %d = linalg.fill ins(%v0 : f32) outs(%c : tensor<f32>) -> tensor<f32>
436
437  // CHECK-NEXT:   linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, #[[$DYN_1D_MAP]]>, memref<64xf32, #[[$DYN_1D_MAP]]>) outs(%[[C]] : memref<f32, #[[$DYN_0D_MAP]]>)
438  %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>)
439    outs(%d: tensor<f32>) -> tensor<f32>
440
441  // CHECK-NEXT:   return
442  return %e : tensor<f32>
443}
444
445//      CHECK:  func @main()
446func.func @main() {
447  //  CHECK-DAG:   %[[C0:.*]] = arith.constant 0{{.*}} : f32
448  //  CHECK-DAG:   %[[C1:.*]] = arith.constant 1{{.*}} : f32
449  //  CHECK-DAG:   %[[C2:.*]] = arith.constant 2{{.*}} : f32
450  %v0 = arith.constant 0.0 : f32
451  %v1 = arith.constant 1.0 : f32
452  %v2 = arith.constant 2.0 : f32
453
454  // CHECK-NEXT:   %[[A:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32>
455  // CHECK-NEXT:   %[[B:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32>
456  // CHECK-NEXT:   %[[C:.*]] = memref.alloc() {alignment = 128 : i64} : memref<f32>
457  //  CHECK-DAG:   %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
458  //  CHECK-DAG:   %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
459  //  CHECK-DAG:   %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]>
460  %A = bufferization.alloc_tensor() : tensor<64xf32>
461  %B = bufferization.alloc_tensor() : tensor<64xf32>
462  %C = bufferization.alloc_tensor() : tensor<f32>
463
464  //  CHECK-DAG:   linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>)
465  //  CHECK-DAG:   linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>)
466  //  CHECK-DAG:   linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32>)
467  %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32>
468  %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32>
469  %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor<f32>) -> tensor<f32>
470
471  // CHECK-NEXT:   call @init_and_dot(%[[cA]], %[[cB]], %[[cC]])
472  %res = call @init_and_dot(%AA, %BB, %CC) :
473    (tensor<64xf32>, tensor<64xf32>, tensor<f32>) -> tensor<f32>
474
475  // CHECK-NEXT:   %[[dC:.*]] = memref.cast %[[cC]] : memref<f32, {{.*}}> to memref<*xf32>
476  %res2 = tensor.cast %res: tensor<f32> to tensor<*xf32>
477
478  // CHECK-NEXT:   call @printMemrefF32(%[[dC]]) : (memref<*xf32>) -> ()
479  call @printMemrefF32(%res2) : (tensor<*xf32>) -> ()
480
481  // CHECK-DAG:   memref.dealloc %[[A]] : memref<64xf32>
482  // CHECK-DAG:   memref.dealloc %[[B]] : memref<64xf32>
483  // CHECK-DAG:   memref.dealloc %[[C]] : memref<f32>
484  // CHECK-NEXT:   return
485  return
486}
487
488//     CHECK:   func private @printMemrefF32(memref<*xf32>)
489func.func private @printMemrefF32(tensor<*xf32>)
490
491// -----
492
493// CHECK: #[[$DYNAMIC:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
494
495// CHECK: func private @external_func(memref<?xf32, #[[$DYNAMIC]]>)
496func.func private @external_func(tensor<?xf32>)
497
498//      CHECK: func @callee(
499// CHECK-SAME:   %[[A:[0-9a-zA-Z]*]]: memref<?xf32>
500// CHECK-SAME:   %[[B:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]>
501// CHECK-SAME:   %[[C:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]>
502func.func @callee(
503    %A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>},
504    %B : tensor<?xf32>,
505    %C : tensor<?xf32>) {
506// CHECK-NEXT: %[[CASTED:.*]] = memref.cast %[[A]] : memref<?xf32> to memref<?xf32, #[[$DYNAMIC]]>
507// CHECK-NEXT: call @external_func(%[[CASTED]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> ()
508  call @external_func(%A) : (tensor<?xf32>) -> ()
509
510// CHECK-NEXT: call @external_func(%[[B]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> ()
511  call @external_func(%B) : (tensor<?xf32>) -> ()
512
513// CHECK-NEXT: call @external_func(%[[C]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> ()
514  call @external_func(%C) : (tensor<?xf32>) -> ()
515
516  return
517}
518
519//      CHECK: func @entry(
520// CHECK-SAME:   %[[A:[0-9a-zA-Z]*]]: memref<?xf32>
521// CHECK-SAME:   %[[B:[0-9a-zA-Z]*]]: memref<?xf32>
522// CHECK-SAME:   %[[C:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]>
523func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false},
524                 %B : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false},
525                 %C : tensor<?xf32> {bufferization.writable = false}) {
526// Note: `callee` does not write to its bbArg directly, but `external_func`
527// does. Inside `callee`, the writes via `external_func` do not cause a
528// conflict. However, inside `entry`, the writes do cause a conflict because
529// %A, %B and %C are not inplaceable. This test case shows that this kind of
530// conflict detection has a "transitive" nature.
531//  CHECK-DAG: %[[ALLOC_A:.*]] = memref.alloc
532//  CHECK-DAG: %[[CASTED_A:.*]] = memref.cast %[[ALLOC_A]]
533//  CHECK-DAG: %[[ALLOC_B:.*]] = memref.alloc
534//  CHECK-DAG: %[[CASTED_B:.*]] = memref.cast %[[ALLOC_B]]
535//  CHECK-DAG: %[[ALLOC_C:.*]] = memref.alloc
536//  CHECK-DAG: %[[CASTED_C:.*]] = memref.cast %[[ALLOC_C]]
537//  CHECK-DAG: memref.copy %[[A]], %[[ALLOC_A]]
538//  CHECK-DAG: memref.copy %[[B]], %[[ALLOC_B]]
539//  CHECK-DAG: memref.copy %[[C]], %[[ALLOC_C]]
540// CHECK-NEXT: call @callee(%[[CASTED_A]], %[[CASTED_B]], %[[CASTED_C]])
541  call @callee(%A, %B, %C) : (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) -> ()
542  return
543}
544
545// -----
546
547// No alloc or copy inside of the loop.
548
549// CHECK-LABEL: func @inner_func(
550//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
551func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
552  %f = arith.constant 1.0 : f32
553  %c0 = arith.constant 0 : index
554  // CHECK: memref.store %{{.*}}, %[[arg0]]
555  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
556  return %0 : tensor<?xf32>
557}
558
559// CHECK-LABEL: func @equivalent_func_arg(
560//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
561func.func @equivalent_func_arg(%t0: tensor<?xf32> {bufferization.writable = true},
562                               %c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
563  // CHECK-NOT: alloc
564  // CHECK-NOT: copy
565  // CHECK: scf.for {{.*}} iter_args(%[[t1:.*]] = %[[arg0]])
566  %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
567    // CHECK: call @inner_func(%[[t1]])
568    %3 = func.call @inner_func(%t1) : (tensor<?xf32>) -> tensor<?xf32>
569    // CHECK: scf.yield %[[t1]]
570    scf.yield %3 : tensor<?xf32>
571  }
572  return %1: tensor<?xf32>
573}
574
575// -----
576
577// inner_func_2 modifies the bbArg, but the loop yields the original value. A
578// buffer copy must be inserted inside the loop.
579
580// CHECK-LABEL: func @inner_func_2(
581//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
582func.func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> {
583  %f = arith.constant 1.0 : f32
584  %c0 = arith.constant 0 : index
585  // CHECK: memref.store %{{.*}}, %[[arg0]]
586  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
587  return %0 : tensor<?xf32>
588}
589
590// CHECK-LABEL: func @equivalent_func_arg_2(
591//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
592func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {bufferization.writable = true},
593                                 %c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
594  // CHECK: scf.for {{.*}} {
595  %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
596    // CHECK: %[[alloc:.*]] = memref.alloc
597    // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
598    // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]]
599    // CHECK: call @inner_func_2(%[[casted]])
600    // CHECK: memref.dealloc %[[alloc]]
601    // CHECK-NOT: scf.yield
602    %3 = func.call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32>
603    scf.yield %t1 : tensor<?xf32>
604  }
605  return %1: tensor<?xf32>
606}
607
608// -----
609
610// Bufferize without fully dynamic layout maps.
611
612// CHECK-LABEL: func @transfer_read(%{{.*}}: memref<?xf32, #map>) -> vector<4xf32> {
613// CHECK-NO-LAYOUT-MAP-LABEL: func @transfer_read(%{{.*}}: memref<?xf32>) -> vector<4xf32>
614func.func @transfer_read(
615    %A : tensor<?xf32> {bufferization.writable = false})
616  -> (vector<4xf32>)
617{
618  %c0 = arith.constant 0 : index
619  %f0 = arith.constant 0.0 : f32
620
621//       CHECK: %[[RES:.*]] = vector.transfer_read {{.*}} : memref<?xf32, #{{.*}}>, vector<4xf32>
622  %0 = vector.transfer_read %A[%c0], %f0 : tensor<?xf32>, vector<4xf32>
623
624//       CHECK: return %[[RES]] : vector<4xf32>
625  return %0 : vector<4xf32>
626}
627