1// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1" -split-input-file | FileCheck %s 2 3// Run fuzzer with different seeds. 4// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null 5// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null 6// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null 7 8// Test bufferization using memref types that have no layout map. 9// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP-LABEL 10 11// Bufferization of bodiless function with no tensor return value. 12 13// CHECK-LABEL: func private @private_func 14func.func private @private_func(tensor<?xf32>) -> () 15 16// CHECK-LABEL: func @empty_func() 17func.func @empty_func() -> () { 18 return 19} 20 21// ----- 22 23// A bodiless function that returns something that is not a tensor. 24 25// CHECK: func private @external_func_with_return_val(memref<4xi32, #{{.*}}>) -> f32 26func.func private @external_func_with_return_val(tensor<4xi32>) -> f32 27 28// ----- 29 30// CHECK-LABEL: func private @private_func 31func.func private @private_func(tensor<?xf32>) -> (f32) 32 33// private_func may modify the buffer arg, but that's OK because %t is writable. 34// No alloc/copy should be inserted. 35 36// CHECK-LABEL: func @main( 37// CHECK-SAME: %[[t:.*]]: memref<?xf32 38// CHECK-NOT: alloc 39// CHECK-NOT: copy 40// CHECK: call @private_func(%[[t]]) 41func.func @main(%t: tensor<?xf32> {bufferization.writable = true}) -> (f32) { 42 %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32) 43 return %0 : f32 44} 45 46// ----- 47 48// CHECK-LABEL: func private @private_func 49func.func private @private_func(tensor<?xf32>) -> (f32) 50 51// private_func may modify the buffer arg, %t is not writable. A copy is needed. 52 53// CHECK-LABEL: func @main( 54// CHECK-SAME: %[[t:.*]]: memref<?xf32 55// CHECK: %[[alloc:.*]] = memref.alloc 56// CHECK-DAG: memref.copy %[[t]], %[[alloc]] 57// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 58// CHECK: call @private_func(%[[casted]]) 59// CHECK: memref.dealloc %[[alloc]] 60func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) { 61 %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32) 62 return %0 : f32 63} 64 65// ----- 66 67// Test bufferization of a function without tensor args. 68 69// CHECK-LABEL: func @func_without_tensor_args 70func.func @func_without_tensor_args(%v : vector<10xf32>) -> () { 71 // CHECK: %[[alloc:.*]] = memref.alloc() 72 %0 = linalg.init_tensor[10] : tensor<10xf32> 73 74 %c0 = arith.constant 0 : index 75 // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] 76 %1 = vector.transfer_write %v, %0[%c0] : vector<10xf32>, tensor<10xf32> 77 78 %cst = arith.constant 0.0 : f32 79 // CHECK: vector.transfer_read %[[alloc]] 80 %r = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<11xf32> 81 82 vector.print %r : vector<11xf32> 83 return 84} 85 86// ----- 87 88// Bufferization of a function that is reading and writing. %t0 is writable, so 89// no copy should be inserted. 90 91// CHECK-LABEL: func @inner_func( 92// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 93func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { 94 // CHECK-NOT: copy 95 %f = arith.constant 1.0 : f32 96 %c0 = arith.constant 0 : index 97 %c1 = arith.constant 1 : index 98 // CHECK: memref.store %{{.*}}, %[[arg0]] 99 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 100 // CHECK: %[[load:.*]] = memref.load %[[arg0]] 101 %1 = tensor.extract %0[%c1] : tensor<?xf32> 102 // CHECK: return %[[load]] : f32 103 return %0, %1 : tensor<?xf32>, f32 104} 105 106// CHECK-LABEL: func @call_func_with_non_tensor_return( 107// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 108func.func @call_func_with_non_tensor_return( 109 %t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) { 110 // CHECK-NOT: alloc 111 // CHECK-NOT: copy 112 // CHECK: %[[call:.*]] = call @inner_func(%[[arg0]]) 113 %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32) 114 // CHECK: return %[[call]] : f32 115 return %1, %0 : f32, tensor<?xf32> 116} 117 118// ----- 119 120// Bufferization of a function that is reading and writing. %t0 is not writable, 121// so a copy is needed. 122 123// CHECK-LABEL: func @inner_func( 124// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 125func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { 126 // CHECK-NOT: copy 127 %f = arith.constant 1.0 : f32 128 %c0 = arith.constant 0 : index 129 %c1 = arith.constant 1 : index 130 // CHECK: memref.store %{{.*}}, %[[arg0]] 131 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 132 // CHECK: %[[load:.*]] = memref.load %[[arg0]] 133 %1 = tensor.extract %0[%c1] : tensor<?xf32> 134 // CHECK: return %[[load]] : f32 135 return %0, %1 : tensor<?xf32>, f32 136} 137 138// CHECK-LABEL: func @call_func_with_non_tensor_return( 139// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 140func.func @call_func_with_non_tensor_return( 141 %t0: tensor<?xf32> {bufferization.writable = false}) -> (f32, tensor<?xf32>) { 142 // CHECK: %[[alloc:.*]] = memref.alloc 143 // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]] 144 // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 145 // CHECK: %[[call:.*]] = call @inner_func(%[[casted]]) 146 %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32) 147 148 // Note: The tensor return value has folded away. 149 // CHECK: return %[[call]] : f32 150 return %1, %0 : f32, tensor<?xf32> 151} 152 153// ----- 154 155// A chain of function calls. The last function f0 is potentially writing to the 156// buffer. This becomes a problem when bufferizing main and a copy must be 157// inserted then. (No copies in the other functions.) 158 159// CHECK-LABEL: func private @f0( 160func.func private @f0(tensor<?xf32>) -> (f32) 161 162// CHECK-LABEL: func @f1( 163// CHECK-SAME: %[[t1:.*]]: memref<?xf32 164// CHECK: %[[r1:.*]] = call @f0(%[[t1]]) 165// CHECK: return %[[r1]] 166func.func @f1(%t: tensor<?xf32>) -> (f32) { 167 %0 = call @f0(%t) : (tensor<?xf32>) -> (f32) 168 return %0 : f32 169} 170 171// CHECK-LABEL: func @f2( 172// CHECK-SAME: %[[t2:.*]]: memref<?xf32 173// CHECK: %[[r2:.*]] = call @f1(%[[t2]]) 174// CHECK: return %[[r2]] 175func.func @f2(%t: tensor<?xf32>) -> (f32) { 176 %0 = call @f1(%t) : (tensor<?xf32>) -> (f32) 177 return %0 : f32 178} 179 180// CHECK-LABEL: func @main( 181// CHECK-SAME: %[[t3:.*]]: memref<?xf32 182// CHECK: %[[alloc:.*]] = memref.alloc 183// CHECK-DAG: memref.copy %[[t3]], %[[alloc]] 184// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 185// CHECK: call @f2(%[[casted]]) 186// CHECK: memref.dealloc %[[alloc]] 187func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) { 188 %0 = call @f2(%t) : (tensor<?xf32>) -> (f32) 189 return %0 : f32 190} 191 192// ----- 193 194// This function does not read, just write. We need an alloc, but no copy. 195 196// CHECK-LABEL: func @does_not_read( 197// CHECK-NOT: alloc 198// CHECK-NOT: copy 199func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> { 200 %f0 = arith.constant 0.0 : f32 201 %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32> 202 return %r : tensor<?xf32> 203} 204 205// CHECK-LABEL: func @main( 206// CHECK-SAME: %[[t:.*]]: memref<?xf32 207// CHECK: %[[alloc:.*]] = memref.alloc 208// CHECK-NOT: copy 209// CHECK: %[[casted:.*]] = memref.cast %[[alloc]] 210// CHECK-NOT: copy 211// CHECK: call @does_not_read(%[[casted]]) 212// CHECK: %[[r:.*]] = memref.load %[[alloc]] 213// CHECK: memref.dealloc %[[alloc]] 214func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> f32 { 215 %0 = call @does_not_read(%t) : (tensor<?xf32>) -> (tensor<?xf32>) 216 %idx = arith.constant 4 : index 217 %r = tensor.extract %0[%idx] : tensor<?xf32> 218 return %r : f32 219} 220 221// ----- 222 223// Alloc and copy must be inserted because the arith.constant is read-only. 224 225// CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 226 227// CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> 228// CHECK: func private @some_external_func(memref<4xi32, #[[$DYN_1D_MAP]]>) 229func.func private @some_external_func(tensor<4xi32>) 230 231// CHECK: func @main() 232func.func @main() { 233// CHECK-DAG: %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32> 234 %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> 235 236// CHECK-DAG: %[[alloc:.*]] = memref.alloc 237// CHECK-DAG: %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, #[[$DYN_1D_MAP]]> 238// CHECK-DAG: memref.copy %[[A]], %[[alloc]] 239// CHECK: call @some_external_func(%[[B]]) : (memref<4xi32, #[[$DYN_1D_MAP]]>) -> () 240 call @some_external_func(%A) : (tensor<4xi32>) -> () 241 242// CHECK: memref.dealloc %[[alloc]] 243 return 244} 245 246// ----- 247 248// Alloc and copy must be inserted because the arith.constant is read-only. The 249// function call is inside of an scf.execute_region. 250 251// CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 252 253// CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> 254// CHECK: func private @some_external_func_within_scf_execute(memref<4xi32, #[[$DYN_1D_MAP]]>) 255func.func private @some_external_func_within_scf_execute(tensor<4xi32>) 256 257// CHECK: func @main() 258func.func @main() { 259// CHECK-DAG: %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32> 260 %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> 261 262// Note: The scf.execute_region canonicalizes away. 263 264// CHECK-DAG: %[[alloc:.*]] = memref.alloc 265// CHECK-DAG: %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, #[[$DYN_1D_MAP]]> 266// CHECK-DAG: memref.copy %[[A]], %[[alloc]] 267// CHECK: call @some_external_func_within_scf_execute(%[[B]]) : (memref<4xi32, #[[$DYN_1D_MAP]]>) -> () 268 scf.execute_region { 269 func.call @some_external_func_within_scf_execute(%A) : (tensor<4xi32>) -> () 270 scf.yield 271 } 272 273// CHECK: memref.dealloc %[[alloc]] 274 return 275} 276 277// ----- 278 279// A write inside an scf.execute_region. An equivalent tensor is yielded. 280 281// CHECK-LABEL: func @execute_region_test( 282// CHECK-SAME: %[[m1:.*]]: memref<?xf32 283func.func @execute_region_test(%t1 : tensor<?xf32>) 284 -> (f32, tensor<?xf32>, f32) 285{ 286 %f1 = arith.constant 0.0 : f32 287 %f2 = arith.constant 1.0 : f32 288 %idx = arith.constant 7 : index 289 290 // scf.execute_region is canonicalized away after bufferization. So just the 291 // memref.store is left over. 292 293 // CHECK-NOT: alloc 294 // CHECK-NOT: copy 295 // CHECK: memref.store %{{.*}}, %[[m1]][%{{.*}}] 296 %0, %1, %2 = scf.execute_region -> (f32, tensor<?xf32>, f32) { 297 %t2 = tensor.insert %f2 into %t1[%idx] : tensor<?xf32> 298 scf.yield %f1, %t2, %f2 : f32, tensor<?xf32>, f32 299 } 300 301 // CHECK: return %{{.*}}, %{{.*}} : f32, f32 302 return %0, %1, %2 : f32, tensor<?xf32>, f32 303} 304 305// ----- 306 307// CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 308 309// CHECK: func private @some_external_func(memref<?xf32, #[[$DYN_1D_MAP]]>) 310func.func private @some_external_func(tensor<?xf32>) 311 312// CHECK: func @scf_for_with_tensor_insert_slice( 313// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]> 314// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]> 315// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$DYN_1D_MAP]]> 316func.func @scf_for_with_tensor_insert_slice( 317 %A : tensor<?xf32>, %B : tensor<?xf32>, %C : tensor<4xf32>, 318 %lb : index, %ub : index, %step : index) 319 -> (tensor<?xf32>, tensor<?xf32>) 320{ 321 // CHECK-NEXT: scf.for 322 %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) 323 -> (tensor<?xf32>, tensor<?xf32>) 324 { 325 // CHECK-NEXT: %[[SVA:.*]] = memref.subview %[[A]] 326 // CHECK-NEXT: memref.copy %[[C]], %[[SVA]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, #[[$DYN_1D_MAP]]> 327 %ttA = tensor.insert_slice %C into %tA[%i][4][1] : tensor<4xf32> into tensor<?xf32> 328 329 // CHECK-NEXT: %[[SVB:.*]] = memref.subview %[[B]] 330 // CHECK-NEXT: memref.copy %[[C]], %[[SVB]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, #[[$DYN_1D_MAP]]> 331 %ttB = tensor.insert_slice %C into %tB[%i][4][1] : tensor<4xf32> into tensor<?xf32> 332 333 // scf.yield is empty and is elided 334 // CHECK-NOT: scf.yield 335 scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32> 336 } 337 338 // Swaparoo requires bufferizing the whole function to figure out who's who. 339 return %r0#1, %r0#0: tensor<?xf32>, tensor<?xf32> 340} 341 342// CHECK: func @bar( 343// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]> 344// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]> 345// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$DYN_1D_MAP]]> 346func.func @bar( 347 %A : tensor<?xf32> {bufferization.writable = true}, 348 %B : tensor<?xf32> {bufferization.writable = true}, 349 %C : tensor<4xf32> {bufferization.writable = true}, 350 %lb : index, %ub : index, %step : index) 351 -> (tensor<?xf32>, tensor<?xf32>) 352{ 353// CHECK-DAG: call @scf_for_with_tensor_insert_slice(%[[A]], %[[B]], %[[C]] 354 %r0:2 = call @scf_for_with_tensor_insert_slice(%A, %B, %C, %lb, %ub, %step) : 355 (tensor<?xf32>, tensor<?xf32>, tensor<4xf32>, index, index, index) 356 -> (tensor<?xf32>, tensor<?xf32>) 357 358 // %r0#0 requires a copy because we have no idea what the function is doing. 359// CHECK-DAG: %[[alloc:.*]] = memref.alloc 360// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 361// CHECK-DAG: memref.copy %[[B]], %[[alloc]] 362// CHECK-NEXT: call @some_external_func(%[[casted]]) : (memref<?xf32, #[[$DYN_1D_MAP]]>) -> () 363 call @some_external_func(%r0#0) : (tensor<?xf32>) -> () 364 365// CHECK: return 366 return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32> 367} 368 369// ----- 370 371// CHECK-DAG: #[[$DYN_0D_MAP:.*]] = affine_map<()[s0] -> (s0)> 372// CHECK-DAG: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 373 374// CHECK: func @init_and_dot( 375// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<64xf32, #[[$DYN_1D_MAP]]> 376// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<64xf32, #[[$DYN_1D_MAP]]> 377// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<f32, #[[$DYN_0D_MAP]]> 378func.func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> { 379 // CHECK-NEXT: %[[C0:.*]] = arith.constant 0{{.*}} : f32 380 %v0 = arith.constant 0.0 : f32 381 382 // CHECK-NEXT: linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32, #[[$DYN_0D_MAP]]>) 383 %d = linalg.fill ins(%v0 : f32) outs(%c : tensor<f32>) -> tensor<f32> 384 385 // CHECK-NEXT: linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, #[[$DYN_1D_MAP]]>, memref<64xf32, #[[$DYN_1D_MAP]]>) outs(%[[C]] : memref<f32, #[[$DYN_0D_MAP]]>) 386 %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>) 387 outs(%d: tensor<f32>) -> tensor<f32> 388 389 // CHECK-NEXT: return 390 return %e : tensor<f32> 391} 392 393// CHECK: func @main() 394func.func @main() { 395 // CHECK-DAG: %[[C0:.*]] = arith.constant 0{{.*}} : f32 396 // CHECK-DAG: %[[C1:.*]] = arith.constant 1{{.*}} : f32 397 // CHECK-DAG: %[[C2:.*]] = arith.constant 2{{.*}} : f32 398 %v0 = arith.constant 0.0 : f32 399 %v1 = arith.constant 1.0 : f32 400 %v2 = arith.constant 2.0 : f32 401 402 // CHECK-NEXT: %[[A:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32> 403 // CHECK-NEXT: %[[B:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32> 404 // CHECK-NEXT: %[[C:.*]] = memref.alloc() {alignment = 128 : i64} : memref<f32> 405 // CHECK-DAG: %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]> 406 // CHECK-DAG: %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]> 407 // CHECK-DAG: %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]> 408 %A = linalg.init_tensor [64] : tensor<64xf32> 409 %B = linalg.init_tensor [64] : tensor<64xf32> 410 %C = linalg.init_tensor [] : tensor<f32> 411 412 // CHECK-DAG: linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>) 413 // CHECK-DAG: linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>) 414 // CHECK-DAG: linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32>) 415 %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32> 416 %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32> 417 %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor<f32>) -> tensor<f32> 418 419 // CHECK-NEXT: call @init_and_dot(%[[cA]], %[[cB]], %[[cC]]) 420 %res = call @init_and_dot(%AA, %BB, %CC) : 421 (tensor<64xf32>, tensor<64xf32>, tensor<f32>) -> tensor<f32> 422 423 // CHECK-NEXT: %[[dC:.*]] = memref.cast %[[C]] : memref<f32> to memref<*xf32> 424 %res2 = tensor.cast %res: tensor<f32> to tensor<*xf32> 425 426 // CHECK-NEXT: call @print_memref_f32(%[[dC]]) : (memref<*xf32>) -> () 427 call @print_memref_f32(%res2) : (tensor<*xf32>) -> () 428 429 // CHECK-DAG: memref.dealloc %[[A]] : memref<64xf32> 430 // CHECK-DAG: memref.dealloc %[[B]] : memref<64xf32> 431 // CHECK-DAG: memref.dealloc %[[C]] : memref<f32> 432 // CHECK-NEXT: return 433 return 434} 435 436// CHECK: func private @print_memref_f32(memref<*xf32>) 437func.func private @print_memref_f32(tensor<*xf32>) 438 439// ----- 440 441// CHECK: #[[$DYNAMIC:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 442 443// CHECK: func private @external_func(memref<?xf32, #[[$DYNAMIC]]>) 444func.func private @external_func(tensor<?xf32>) 445 446// CHECK: func @callee( 447// CHECK-SAME: %[[A:[0-9a-zA-Z]*]]: memref<?xf32> 448// CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]> 449// CHECK-SAME: %[[C:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]> 450func.func @callee( 451 %A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>}, 452 %B : tensor<?xf32>, 453 %C : tensor<?xf32>) { 454// CHECK-NEXT: %[[CASTED:.*]] = memref.cast %[[A]] : memref<?xf32> to memref<?xf32, #[[$DYNAMIC]]> 455// CHECK-NEXT: call @external_func(%[[CASTED]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> () 456 call @external_func(%A) : (tensor<?xf32>) -> () 457 458// CHECK-NEXT: call @external_func(%[[B]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> () 459 call @external_func(%B) : (tensor<?xf32>) -> () 460 461// CHECK-NEXT: call @external_func(%[[C]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> () 462 call @external_func(%C) : (tensor<?xf32>) -> () 463 464 return 465} 466 467// CHECK: func @entry( 468// CHECK-SAME: %[[A:[0-9a-zA-Z]*]]: memref<?xf32> 469// CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<?xf32> 470// CHECK-SAME: %[[C:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]> 471func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false}, 472 %B : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false}, 473 %C : tensor<?xf32> {bufferization.writable = false}) { 474// Note: `callee` does not write to its bbArg directly, but `external_func` 475// does. Inside `callee`, the writes via `external_func` do not cause a 476// conflict. However, inside `entry`, the writes do cause a conflict because 477// %A, %B and %C are not inplaceable. This test case shows that this kind of 478// conflict detection has a "transitive" nature. 479// CHECK-DAG: %[[ALLOC_C:.*]] = memref.alloc 480// CHECK-DAG: %[[CASTED_C:.*]] = memref.cast %[[ALLOC_C]] 481// CHECK-DAG: %[[ALLOC_B:.*]] = memref.alloc 482// CHECK-DAG: %[[CASTED_B:.*]] = memref.cast %[[ALLOC_B]] 483// CHECK-DAG: %[[ALLOC_A:.*]] = memref.alloc 484// CHECK-DAG: %[[CASTED_A:.*]] = memref.cast %[[ALLOC_A]] 485// CHECK-DAG: memref.copy %[[A]], %[[ALLOC_A]] 486// CHECK-DAG: memref.copy %[[B]], %[[ALLOC_B]] 487// CHECK-DAG: memref.copy %[[C]], %[[ALLOC_C]] 488// CHECK-NEXT: call @callee(%[[CASTED_A]], %[[CASTED_B]], %[[CASTED_C]]) 489 call @callee(%A, %B, %C) : (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) -> () 490 return 491} 492 493// ----- 494 495// No alloc or copy inside of the loop. 496 497// CHECK-LABEL: func @inner_func( 498// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 499func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> { 500 %f = arith.constant 1.0 : f32 501 %c0 = arith.constant 0 : index 502 // CHECK: memref.store %{{.*}}, %[[arg0]] 503 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 504 return %0 : tensor<?xf32> 505} 506 507// CHECK-LABEL: func @equivalent_func_arg( 508// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 509func.func @equivalent_func_arg(%t0: tensor<?xf32> {bufferization.writable = true}, 510 %c0: index, %c10: index, %c1: index) -> tensor<?xf32> { 511 // CHECK-NOT: alloc 512 // CHECK-NOT: copy 513 %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) { 514 // CHECK: call @inner_func(%[[arg0]]) 515 %3 = func.call @inner_func(%t1) : (tensor<?xf32>) -> tensor<?xf32> 516 scf.yield %3 : tensor<?xf32> 517 } 518 return %1: tensor<?xf32> 519} 520 521// ----- 522 523// inner_func_2 modifies the bbArg, but the loop yields the original value. A 524// buffer copy must be inserted inside the loop. 525 526// CHECK-LABEL: func @inner_func_2( 527// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 528func.func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> { 529 %f = arith.constant 1.0 : f32 530 %c0 = arith.constant 0 : index 531 // CHECK: memref.store %{{.*}}, %[[arg0]] 532 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 533 return %0 : tensor<?xf32> 534} 535 536// CHECK-LABEL: func @equivalent_func_arg_2( 537// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 538func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {bufferization.writable = true}, 539 %c0: index, %c10: index, %c1: index) -> tensor<?xf32> { 540 // CHECK: scf.for {{.*}} { 541 %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) { 542 // CHECK: %[[alloc:.*]] = memref.alloc 543 // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 544 // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]] 545 // CHECK: call @inner_func_2(%[[casted]]) 546 // CHECK: memref.dealloc %[[alloc]] 547 // CHECK-NOT: scf.yield 548 %3 = func.call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32> 549 scf.yield %t1 : tensor<?xf32> 550 } 551 return %1: tensor<?xf32> 552} 553 554// ----- 555 556// Bufferize without fully dynamic layout maps. 557 558// CHECK-LABEL: func @transfer_read(%{{.*}}: memref<?xf32, #map>) -> vector<4xf32> { 559// CHECK-NO-LAYOUT-MAP-LABEL: func @transfer_read(%{{.*}}: memref<?xf32>) -> vector<4xf32> 560func.func @transfer_read( 561 %A : tensor<?xf32> {bufferization.writable = false}) 562 -> (vector<4xf32>) 563{ 564 %c0 = arith.constant 0 : index 565 %f0 = arith.constant 0.0 : f32 566 567// CHECK: %[[RES:.*]] = vector.transfer_read {{.*}} : memref<?xf32, #{{.*}}>, vector<4xf32> 568 %0 = vector.transfer_read %A[%c0], %f0 : tensor<?xf32>, vector<4xf32> 569 570// CHECK: return %[[RES]] : vector<4xf32> 571 return %0 : vector<4xf32> 572} 573