1// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1" -split-input-file | FileCheck %s
2
3// Run fuzzer with different seeds.
4// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
5// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
6// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
7
8// Test bufferization using memref types that have no layout map.
9// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP-LABEL
10
11// Bufferization of bodiless function with no tensor return value.
12
13// CHECK-LABEL: func private @private_func
14func.func private @private_func(tensor<?xf32>) -> ()
15
16// CHECK-LABEL: func @empty_func()
17func.func @empty_func() -> () {
18  return
19}
20
21// -----
22
23// A bodiless function that returns something that is not a tensor.
24
25// CHECK: func private @external_func_with_return_val(memref<4xi32, #{{.*}}>) -> f32
26func.func private @external_func_with_return_val(tensor<4xi32>) -> f32
27
28// -----
29
30// CHECK-LABEL: func private @private_func
31func.func private @private_func(tensor<?xf32>) -> (f32)
32
33// private_func may modify the buffer arg, but that's OK because %t is writable.
34// No alloc/copy should be inserted.
35
36// CHECK-LABEL: func @main(
37//  CHECK-SAME:     %[[t:.*]]: memref<?xf32
38//   CHECK-NOT: alloc
39//   CHECK-NOT: copy
40//       CHECK: call @private_func(%[[t]])
41func.func @main(%t: tensor<?xf32> {bufferization.writable = true}) -> (f32) {
42  %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32)
43  return %0 : f32
44}
45
46// -----
47
48// CHECK-LABEL: func private @private_func
49func.func private @private_func(tensor<?xf32>) -> (f32)
50
51// private_func may modify the buffer arg, %t is not writable. A copy is needed.
52
53// CHECK-LABEL: func @main(
54//  CHECK-SAME:     %[[t:.*]]: memref<?xf32
55//       CHECK: %[[alloc:.*]] = memref.alloc
56//   CHECK-DAG: memref.copy %[[t]], %[[alloc]]
57//   CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
58//       CHECK: call @private_func(%[[casted]])
59//       CHECK: memref.dealloc %[[alloc]]
60func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
61  %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32)
62  return %0 : f32
63}
64
65// -----
66
67// Test bufferization of a function without tensor args.
68
69// CHECK-LABEL: func @func_without_tensor_args
70func.func @func_without_tensor_args(%v : vector<10xf32>) -> () {
71  // CHECK: %[[alloc:.*]] = memref.alloc()
72  %0 = linalg.init_tensor[10] : tensor<10xf32>
73
74  %c0 = arith.constant 0 : index
75  // CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
76  %1 = vector.transfer_write %v, %0[%c0] : vector<10xf32>, tensor<10xf32>
77
78  %cst = arith.constant 0.0 : f32
79  // CHECK: vector.transfer_read %[[alloc]]
80  %r = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<11xf32>
81
82  vector.print %r : vector<11xf32>
83  return
84}
85
86// -----
87
88// Bufferization of a function that is reading and writing. %t0 is writable, so
89// no copy should be inserted.
90
91// CHECK-LABEL: func @inner_func(
92//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
93func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
94  // CHECK-NOT: copy
95  %f = arith.constant 1.0 : f32
96  %c0 = arith.constant 0 : index
97  %c1 = arith.constant 1 : index
98  // CHECK: memref.store %{{.*}}, %[[arg0]]
99  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
100  // CHECK: %[[load:.*]] = memref.load %[[arg0]]
101  %1 = tensor.extract %0[%c1] : tensor<?xf32>
102  // CHECK: return %[[load]] : f32
103  return %0, %1 : tensor<?xf32>, f32
104}
105
106// CHECK-LABEL: func @call_func_with_non_tensor_return(
107//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
108func.func @call_func_with_non_tensor_return(
109    %t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) {
110  // CHECK-NOT: alloc
111  // CHECK-NOT: copy
112  // CHECK: %[[call:.*]] = call @inner_func(%[[arg0]])
113  %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32)
114  // CHECK: return %[[call]] : f32
115  return %1, %0 : f32, tensor<?xf32>
116}
117
118// -----
119
120// Bufferization of a function that is reading and writing. %t0 is not writable,
121// so a copy is needed.
122
123// CHECK-LABEL: func @inner_func(
124//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
125func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
126  // CHECK-NOT: copy
127  %f = arith.constant 1.0 : f32
128  %c0 = arith.constant 0 : index
129  %c1 = arith.constant 1 : index
130  // CHECK: memref.store %{{.*}}, %[[arg0]]
131  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
132  // CHECK: %[[load:.*]] = memref.load %[[arg0]]
133  %1 = tensor.extract %0[%c1] : tensor<?xf32>
134  // CHECK: return %[[load]] : f32
135  return %0, %1 : tensor<?xf32>, f32
136}
137
138// CHECK-LABEL: func @call_func_with_non_tensor_return(
139//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
140func.func @call_func_with_non_tensor_return(
141    %t0: tensor<?xf32> {bufferization.writable = false}) -> (f32, tensor<?xf32>) {
142  // CHECK: %[[alloc:.*]] = memref.alloc
143  // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]]
144  // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
145  // CHECK: %[[call:.*]] = call @inner_func(%[[casted]])
146  %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32)
147
148  // Note: The tensor return value has folded away.
149  // CHECK: return %[[call]] : f32
150  return %1, %0 : f32, tensor<?xf32>
151}
152
153// -----
154
155// A chain of function calls. The last function f0 is potentially writing to the
156// buffer. This becomes a problem when bufferizing main and a copy must be
157// inserted then. (No copies in the other functions.)
158
159// CHECK-LABEL: func private @f0(
160func.func private @f0(tensor<?xf32>) -> (f32)
161
162// CHECK-LABEL: func @f1(
163//  CHECK-SAME:     %[[t1:.*]]: memref<?xf32
164//       CHECK:   %[[r1:.*]] = call @f0(%[[t1]])
165//       CHECK:   return %[[r1]]
166func.func @f1(%t: tensor<?xf32>) -> (f32) {
167  %0 = call @f0(%t) : (tensor<?xf32>) -> (f32)
168  return %0 : f32
169}
170
171// CHECK-LABEL: func @f2(
172//  CHECK-SAME:     %[[t2:.*]]: memref<?xf32
173//       CHECK:   %[[r2:.*]] = call @f1(%[[t2]])
174//       CHECK:   return %[[r2]]
175func.func @f2(%t: tensor<?xf32>) -> (f32) {
176  %0 = call @f1(%t) : (tensor<?xf32>) -> (f32)
177  return %0 : f32
178}
179
180// CHECK-LABEL: func @main(
181//  CHECK-SAME:     %[[t3:.*]]: memref<?xf32
182//       CHECK: %[[alloc:.*]] = memref.alloc
183//   CHECK-DAG: memref.copy %[[t3]], %[[alloc]]
184//   CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
185//       CHECK: call @f2(%[[casted]])
186//       CHECK: memref.dealloc %[[alloc]]
187func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
188  %0 = call @f2(%t) : (tensor<?xf32>) -> (f32)
189  return %0 : f32
190}
191
192// -----
193
194// This function does not read, just write. We need an alloc, but no copy.
195
196// CHECK-LABEL: func @does_not_read(
197//   CHECK-NOT:   alloc
198//   CHECK-NOT:   copy
199func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
200  %f0 = arith.constant 0.0 : f32
201  %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32>
202  return %r : tensor<?xf32>
203}
204
205// CHECK-LABEL: func @main(
206//  CHECK-SAME:     %[[t:.*]]: memref<?xf32
207//       CHECK:   %[[alloc:.*]] = memref.alloc
208//   CHECK-NOT:   copy
209//       CHECK: %[[casted:.*]] = memref.cast %[[alloc]]
210//   CHECK-NOT:   copy
211//       CHECK:   call @does_not_read(%[[casted]])
212//       CHECK:   %[[r:.*]] = memref.load %[[alloc]]
213//       CHECK:   memref.dealloc %[[alloc]]
214func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> f32 {
215  %0 = call @does_not_read(%t) : (tensor<?xf32>) -> (tensor<?xf32>)
216  %idx = arith.constant 4 : index
217  %r = tensor.extract %0[%idx] : tensor<?xf32>
218  return %r : f32
219}
220
221// -----
222
223// Alloc and copy must be inserted because the arith.constant is read-only.
224
225//      CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
226
227//      CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]>
228//      CHECK: func private @some_external_func(memref<4xi32, #[[$DYN_1D_MAP]]>)
229func.func private @some_external_func(tensor<4xi32>)
230
231//      CHECK: func @main()
232func.func @main() {
233//  CHECK-DAG:   %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32>
234  %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
235
236//  CHECK-DAG:   %[[alloc:.*]] = memref.alloc
237//  CHECK-DAG:   %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, #[[$DYN_1D_MAP]]>
238//  CHECK-DAG:   memref.copy %[[A]], %[[alloc]]
239//      CHECK:   call @some_external_func(%[[B]]) : (memref<4xi32, #[[$DYN_1D_MAP]]>) -> ()
240  call @some_external_func(%A) : (tensor<4xi32>) -> ()
241
242//      CHECK: memref.dealloc %[[alloc]]
243  return
244}
245
246// -----
247
248// Alloc and copy must be inserted because the arith.constant is read-only. The
249// function call is inside of an scf.execute_region.
250
251//      CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
252
253//      CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]>
254//      CHECK: func private @some_external_func_within_scf_execute(memref<4xi32, #[[$DYN_1D_MAP]]>)
255func.func private @some_external_func_within_scf_execute(tensor<4xi32>)
256
257//      CHECK: func @main()
258func.func @main() {
259//  CHECK-DAG:   %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32>
260  %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
261
262// Note: The scf.execute_region canonicalizes away.
263
264//  CHECK-DAG:   %[[alloc:.*]] = memref.alloc
265//  CHECK-DAG:   %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, #[[$DYN_1D_MAP]]>
266//  CHECK-DAG:   memref.copy %[[A]], %[[alloc]]
267//      CHECK:   call @some_external_func_within_scf_execute(%[[B]]) : (memref<4xi32, #[[$DYN_1D_MAP]]>) -> ()
268  scf.execute_region {
269    func.call @some_external_func_within_scf_execute(%A) : (tensor<4xi32>) -> ()
270    scf.yield
271  }
272
273//      CHECK:   memref.dealloc %[[alloc]]
274  return
275}
276
277// -----
278
279// A write inside an scf.execute_region. An equivalent tensor is yielded.
280
281// CHECK-LABEL: func @execute_region_test(
282//  CHECK-SAME:     %[[m1:.*]]: memref<?xf32
283func.func @execute_region_test(%t1 : tensor<?xf32>)
284    -> (f32, tensor<?xf32>, f32)
285{
286  %f1 = arith.constant 0.0 : f32
287  %f2 = arith.constant 1.0 : f32
288  %idx = arith.constant 7 : index
289
290  // scf.execute_region is canonicalized away after bufferization. So just the
291  // memref.store is left over.
292
293  // CHECK-NOT: alloc
294  // CHECK-NOT: copy
295  // CHECK: memref.store %{{.*}}, %[[m1]][%{{.*}}]
296  %0, %1, %2 = scf.execute_region -> (f32, tensor<?xf32>, f32) {
297    %t2 = tensor.insert %f2 into %t1[%idx] : tensor<?xf32>
298    scf.yield %f1, %t2, %f2 : f32, tensor<?xf32>, f32
299  }
300
301  // CHECK: return %{{.*}}, %{{.*}} : f32, f32
302  return %0, %1, %2 : f32, tensor<?xf32>, f32
303}
304
305// -----
306
307//      CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
308
309//      CHECK:  func private @some_external_func(memref<?xf32, #[[$DYN_1D_MAP]]>)
310func.func private @some_external_func(tensor<?xf32>)
311
312//      CHECK:  func @scf_for_with_tensor_insert_slice(
313// CHECK-SAME:    %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]>
314// CHECK-SAME:    %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]>
315// CHECK-SAME:    %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$DYN_1D_MAP]]>
316func.func @scf_for_with_tensor_insert_slice(
317    %A : tensor<?xf32>, %B : tensor<?xf32>, %C : tensor<4xf32>,
318    %lb : index, %ub : index, %step : index)
319  -> (tensor<?xf32>, tensor<?xf32>)
320{
321  // CHECK-NEXT: scf.for
322  %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
323      -> (tensor<?xf32>, tensor<?xf32>)
324  {
325    // CHECK-NEXT:   %[[SVA:.*]] = memref.subview %[[A]]
326    // CHECK-NEXT:   memref.copy %[[C]], %[[SVA]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, #[[$DYN_1D_MAP]]>
327    %ttA = tensor.insert_slice %C into %tA[%i][4][1] : tensor<4xf32> into tensor<?xf32>
328
329    // CHECK-NEXT:   %[[SVB:.*]] = memref.subview %[[B]]
330    // CHECK-NEXT:   memref.copy %[[C]], %[[SVB]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, #[[$DYN_1D_MAP]]>
331    %ttB = tensor.insert_slice %C into %tB[%i][4][1] : tensor<4xf32> into tensor<?xf32>
332
333    // scf.yield is empty and is elided
334    //  CHECK-NOT:   scf.yield
335    scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32>
336  }
337
338  // Swaparoo requires bufferizing the whole function to figure out who's who.
339  return %r0#1, %r0#0: tensor<?xf32>, tensor<?xf32>
340}
341
342//      CHECK:  func @bar(
343// CHECK-SAME:    %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]>
344// CHECK-SAME:    %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]>
345// CHECK-SAME:    %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$DYN_1D_MAP]]>
346func.func @bar(
347    %A : tensor<?xf32> {bufferization.writable = true},
348    %B : tensor<?xf32> {bufferization.writable = true},
349    %C : tensor<4xf32> {bufferization.writable = true},
350    %lb : index, %ub : index, %step : index)
351  -> (tensor<?xf32>, tensor<?xf32>)
352{
353//  CHECK-DAG:   call @scf_for_with_tensor_insert_slice(%[[A]], %[[B]], %[[C]]
354  %r0:2 = call @scf_for_with_tensor_insert_slice(%A, %B, %C, %lb, %ub, %step) :
355      (tensor<?xf32>, tensor<?xf32>, tensor<4xf32>, index, index, index)
356        -> (tensor<?xf32>, tensor<?xf32>)
357
358  // %r0#0 requires a copy because we have no idea what the function is doing.
359//  CHECK-DAG:   %[[alloc:.*]] = memref.alloc
360//  CHECK-DAG:   %[[casted:.*]] = memref.cast %[[alloc]]
361//  CHECK-DAG:   memref.copy %[[B]], %[[alloc]]
362// CHECK-NEXT:   call @some_external_func(%[[casted]]) : (memref<?xf32, #[[$DYN_1D_MAP]]>) -> ()
363  call @some_external_func(%r0#0) : (tensor<?xf32>) -> ()
364
365//      CHECK:   return
366  return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32>
367}
368
369// -----
370
371//  CHECK-DAG: #[[$DYN_0D_MAP:.*]] = affine_map<()[s0] -> (s0)>
372//  CHECK-DAG: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
373
374//      CHECK:  func @init_and_dot(
375// CHECK-SAME:    %[[A:[a-zA-Z0-9]*]]: memref<64xf32, #[[$DYN_1D_MAP]]>
376// CHECK-SAME:    %[[B:[a-zA-Z0-9]*]]: memref<64xf32, #[[$DYN_1D_MAP]]>
377// CHECK-SAME:    %[[C:[a-zA-Z0-9]*]]: memref<f32, #[[$DYN_0D_MAP]]>
378func.func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> {
379  // CHECK-NEXT:   %[[C0:.*]] = arith.constant 0{{.*}} : f32
380  %v0 = arith.constant 0.0 : f32
381
382  // CHECK-NEXT:   linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32, #[[$DYN_0D_MAP]]>)
383  %d = linalg.fill ins(%v0 : f32) outs(%c : tensor<f32>) -> tensor<f32>
384
385  // CHECK-NEXT:   linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, #[[$DYN_1D_MAP]]>, memref<64xf32, #[[$DYN_1D_MAP]]>) outs(%[[C]] : memref<f32, #[[$DYN_0D_MAP]]>)
386  %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>)
387    outs(%d: tensor<f32>) -> tensor<f32>
388
389  // CHECK-NEXT:   return
390  return %e : tensor<f32>
391}
392
393//      CHECK:  func @main()
394func.func @main() {
395  //  CHECK-DAG:   %[[C0:.*]] = arith.constant 0{{.*}} : f32
396  //  CHECK-DAG:   %[[C1:.*]] = arith.constant 1{{.*}} : f32
397  //  CHECK-DAG:   %[[C2:.*]] = arith.constant 2{{.*}} : f32
398  %v0 = arith.constant 0.0 : f32
399  %v1 = arith.constant 1.0 : f32
400  %v2 = arith.constant 2.0 : f32
401
402  // CHECK-NEXT:   %[[A:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32>
403  // CHECK-NEXT:   %[[B:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32>
404  // CHECK-NEXT:   %[[C:.*]] = memref.alloc() {alignment = 128 : i64} : memref<f32>
405  //  CHECK-DAG:   %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
406  //  CHECK-DAG:   %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
407  //  CHECK-DAG:   %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]>
408  %A = linalg.init_tensor [64] : tensor<64xf32>
409  %B = linalg.init_tensor [64] : tensor<64xf32>
410  %C = linalg.init_tensor [] : tensor<f32>
411
412  //  CHECK-DAG:   linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>)
413  //  CHECK-DAG:   linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>)
414  //  CHECK-DAG:   linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32>)
415  %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32>
416  %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32>
417  %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor<f32>) -> tensor<f32>
418
419  // CHECK-NEXT:   call @init_and_dot(%[[cA]], %[[cB]], %[[cC]])
420  %res = call @init_and_dot(%AA, %BB, %CC) :
421    (tensor<64xf32>, tensor<64xf32>, tensor<f32>) -> tensor<f32>
422
423  // CHECK-NEXT:   %[[dC:.*]] = memref.cast %[[C]] : memref<f32> to memref<*xf32>
424  %res2 = tensor.cast %res: tensor<f32> to tensor<*xf32>
425
426  // CHECK-NEXT:   call @print_memref_f32(%[[dC]]) : (memref<*xf32>) -> ()
427  call @print_memref_f32(%res2) : (tensor<*xf32>) -> ()
428
429  // CHECK-DAG:   memref.dealloc %[[A]] : memref<64xf32>
430  // CHECK-DAG:   memref.dealloc %[[B]] : memref<64xf32>
431  // CHECK-DAG:   memref.dealloc %[[C]] : memref<f32>
432  // CHECK-NEXT:   return
433  return
434}
435
436//     CHECK:   func private @print_memref_f32(memref<*xf32>)
437func.func private @print_memref_f32(tensor<*xf32>)
438
439// -----
440
441// CHECK: #[[$DYNAMIC:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
442
443// CHECK: func private @external_func(memref<?xf32, #[[$DYNAMIC]]>)
444func.func private @external_func(tensor<?xf32>)
445
446//      CHECK: func @callee(
447// CHECK-SAME:   %[[A:[0-9a-zA-Z]*]]: memref<?xf32>
448// CHECK-SAME:   %[[B:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]>
449// CHECK-SAME:   %[[C:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]>
450func.func @callee(
451    %A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>},
452    %B : tensor<?xf32>,
453    %C : tensor<?xf32>) {
454// CHECK-NEXT: %[[CASTED:.*]] = memref.cast %[[A]] : memref<?xf32> to memref<?xf32, #[[$DYNAMIC]]>
455// CHECK-NEXT: call @external_func(%[[CASTED]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> ()
456  call @external_func(%A) : (tensor<?xf32>) -> ()
457
458// CHECK-NEXT: call @external_func(%[[B]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> ()
459  call @external_func(%B) : (tensor<?xf32>) -> ()
460
461// CHECK-NEXT: call @external_func(%[[C]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> ()
462  call @external_func(%C) : (tensor<?xf32>) -> ()
463
464  return
465}
466
467//      CHECK: func @entry(
468// CHECK-SAME:   %[[A:[0-9a-zA-Z]*]]: memref<?xf32>
469// CHECK-SAME:   %[[B:[0-9a-zA-Z]*]]: memref<?xf32>
470// CHECK-SAME:   %[[C:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]>
471func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false},
472                 %B : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false},
473                 %C : tensor<?xf32> {bufferization.writable = false}) {
474// Note: `callee` does not write to its bbArg directly, but `external_func`
475// does. Inside `callee`, the writes via `external_func` do not cause a
476// conflict. However, inside `entry`, the writes do cause a conflict because
477// %A, %B and %C are not inplaceable. This test case shows that this kind of
478// conflict detection has a "transitive" nature.
479//  CHECK-DAG: %[[ALLOC_C:.*]] = memref.alloc
480//  CHECK-DAG: %[[CASTED_C:.*]] = memref.cast %[[ALLOC_C]]
481//  CHECK-DAG: %[[ALLOC_B:.*]] = memref.alloc
482//  CHECK-DAG: %[[CASTED_B:.*]] = memref.cast %[[ALLOC_B]]
483//  CHECK-DAG: %[[ALLOC_A:.*]] = memref.alloc
484//  CHECK-DAG: %[[CASTED_A:.*]] = memref.cast %[[ALLOC_A]]
485//  CHECK-DAG: memref.copy %[[A]], %[[ALLOC_A]]
486//  CHECK-DAG: memref.copy %[[B]], %[[ALLOC_B]]
487//  CHECK-DAG: memref.copy %[[C]], %[[ALLOC_C]]
488// CHECK-NEXT: call @callee(%[[CASTED_A]], %[[CASTED_B]], %[[CASTED_C]])
489  call @callee(%A, %B, %C) : (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) -> ()
490  return
491}
492
493// -----
494
495// No alloc or copy inside of the loop.
496
497// CHECK-LABEL: func @inner_func(
498//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
499func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
500  %f = arith.constant 1.0 : f32
501  %c0 = arith.constant 0 : index
502  // CHECK: memref.store %{{.*}}, %[[arg0]]
503  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
504  return %0 : tensor<?xf32>
505}
506
507// CHECK-LABEL: func @equivalent_func_arg(
508//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
509func.func @equivalent_func_arg(%t0: tensor<?xf32> {bufferization.writable = true},
510                               %c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
511  // CHECK-NOT: alloc
512  // CHECK-NOT: copy
513  %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
514    // CHECK: call @inner_func(%[[arg0]])
515    %3 = func.call @inner_func(%t1) : (tensor<?xf32>) -> tensor<?xf32>
516    scf.yield %3 : tensor<?xf32>
517  }
518  return %1: tensor<?xf32>
519}
520
521// -----
522
523// inner_func_2 modifies the bbArg, but the loop yields the original value. A
524// buffer copy must be inserted inside the loop.
525
526// CHECK-LABEL: func @inner_func_2(
527//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
528func.func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> {
529  %f = arith.constant 1.0 : f32
530  %c0 = arith.constant 0 : index
531  // CHECK: memref.store %{{.*}}, %[[arg0]]
532  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
533  return %0 : tensor<?xf32>
534}
535
536// CHECK-LABEL: func @equivalent_func_arg_2(
537//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
538func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {bufferization.writable = true},
539                                 %c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
540  // CHECK: scf.for {{.*}} {
541  %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
542    // CHECK: %[[alloc:.*]] = memref.alloc
543    // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
544    // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]]
545    // CHECK: call @inner_func_2(%[[casted]])
546    // CHECK: memref.dealloc %[[alloc]]
547    // CHECK-NOT: scf.yield
548    %3 = func.call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32>
549    scf.yield %t1 : tensor<?xf32>
550  }
551  return %1: tensor<?xf32>
552}
553
554// -----
555
556// Bufferize without fully dynamic layout maps.
557
558// CHECK-LABEL: func @transfer_read(%{{.*}}: memref<?xf32, #map>) -> vector<4xf32> {
559// CHECK-NO-LAYOUT-MAP-LABEL: func @transfer_read(%{{.*}}: memref<?xf32>) -> vector<4xf32>
560func.func @transfer_read(
561    %A : tensor<?xf32> {bufferization.writable = false})
562  -> (vector<4xf32>)
563{
564  %c0 = arith.constant 0 : index
565  %f0 = arith.constant 0.0 : f32
566
567//       CHECK: %[[RES:.*]] = vector.transfer_read {{.*}} : memref<?xf32, #{{.*}}>, vector<4xf32>
568  %0 = vector.transfer_read %A[%c0], %f0 : tensor<?xf32>, vector<4xf32>
569
570//       CHECK: return %[[RES]] : vector<4xf32>
571  return %0 : vector<4xf32>
572}
573