1// Note: Default is function-boundary-type-conversion=infer-layout-map 2// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs" -drop-equivalent-buffer-results -split-input-file | FileCheck %s 3 4// Run fuzzer with different seeds. 5// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null 6// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null 7// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null 8 9// Test bufferization using memref types that have no layout map. 10// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP 11 12// Test bufferization using memref types that have fully dynamic layout maps. 13// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs function-boundary-type-conversion=fully-dynamic-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-FULLY-DYNAMIC-LAYOUT-MAP 14 15 16// Bufferization of bodiless function with no tensor return value. 17 18// CHECK: #[[$map0:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 19// CHECK: #[[$map1:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)> 20// CHECK-LABEL: func private @private_func(memref<?xf32, 21// CHECK-SAME: #[[$map0]]>) 22// CHECK-NO-LAYOUT-MAP-LABEL: func private @private_func(memref<?xf32>) 23func.func private @private_func(tensor<?xf32>) -> () 24 25// CHECK-LABEL: func private @private_func_2d(memref<?x?xf32, 26// CHECK-SAME: #[[$map1]]>) 27// CHECK-NO-LAYOUT-MAP-LABEL: func private @private_func_2d(memref<?x?xf32>) 28func.func private @private_func_2d(tensor<?x?xf32>) -> () 29 30// CHECK-LABEL: func @empty_func() { 31// CHECK-NO-LAYOUT-MAP-LABEL: func @empty_func() { 32// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func @empty_func() { 33func.func @empty_func() -> () { 34 return 35} 36 37// ----- 38 39// A bodiless function that returns something that is not a tensor. 40 41// CHECK: func private @external_func_with_return_val(memref<4xi32, #{{.*}}>) -> f32 42// CHECK-FULLY-DYNAMIC-LAYOUT-MAP: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 43// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func private @external_func_with_return_val(memref<4xi32, 44// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: #[[$map1]]> 45func.func private @external_func_with_return_val(tensor<4xi32>) -> f32 46 47// ----- 48 49// A function that returns a non-equivalent tensor with layout map. 50 51// CHECK: #[[$map2:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)> 52// CHECK-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32, 53// CHECK-SAME: #[[$map2]]> { 54// CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<20x10xf32> 55// CHECK: %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, #[[$map2]]> 56// CHECK: return %[[subview]] 57 58// CHECK-NO-LAYOUT-MAP: #[[$map2:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)> 59// CHECK-NO-LAYOUT-MAP-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32> 60// CHECK-NO-LAYOUT-MAP: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<20x10xf32> 61// CHECK-NO-LAYOUT-MAP: %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, #[[$map2]]> 62// CHECK-NO-LAYOUT-MAP: %[[alloc_no_layout:.*]] = memref.alloc(%{{.*}}) : memref<2x?xf32> 63// CHECK-NO-LAYOUT-MAP: memref.copy %[[subview]], %[[alloc_no_layout]] 64// CHECK-NO-LAYOUT-MAP: memref.dealloc %[[alloc]] 65// CHECK-NO-LAYOUT-MAP: return %[[alloc_no_layout]] 66 67// CHECK-FULLY-DYNAMIC-LAYOUT-MAP: #[[$map2a:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)> 68// CHECK-FULLY-DYNAMIC-LAYOUT-MAP: #[[$map2b:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 10 + s0 + d1)> 69// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32, 70// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: #[[$map2a]]> { 71func.func @return_extract_slice(%idx: index, %sz: index) -> (tensor<2x?xf32>) 72{ 73 %t = bufferization.alloc_tensor() : tensor<20x10xf32> 74 %0 = tensor.extract_slice %t[%idx, %idx][2, %sz][1, 1] 75 : tensor<20x10xf32> to tensor<2x?xf32> 76 return %0 : tensor<2x?xf32> 77} 78 79// ----- 80 81// CHECK-LABEL: func private @private_func 82func.func private @private_func(tensor<?xf32>) -> (f32) 83 84// private_func may modify the buffer arg, but that's OK because %t is writable. 85// No alloc/copy should be inserted. 86 87// CHECK-LABEL: func @main( 88// CHECK-SAME: %[[t:.*]]: memref<?xf32 89// CHECK-NOT: alloc 90// CHECK-NOT: copy 91// CHECK: call @private_func(%[[t]]) 92func.func @main(%t: tensor<?xf32> {bufferization.writable = true}) -> (f32) { 93 %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32) 94 return %0 : f32 95} 96 97// ----- 98 99// CHECK-LABEL: func private @private_func 100func.func private @private_func(tensor<?xf32>) -> (f32) 101 102// private_func may modify the buffer arg, %t is not writable. A copy is needed. 103 104// CHECK-LABEL: func @main( 105// CHECK-SAME: %[[t:.*]]: memref<?xf32 106// CHECK: %[[alloc:.*]] = memref.alloc 107// CHECK-DAG: memref.copy %[[t]], %[[alloc]] 108// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 109// CHECK: call @private_func(%[[casted]]) 110// CHECK: memref.dealloc %[[alloc]] 111func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) { 112 %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32) 113 return %0 : f32 114} 115 116// ----- 117 118// Test bufferization of a function without tensor args. 119 120// CHECK-LABEL: func @func_without_tensor_args 121func.func @func_without_tensor_args(%v : vector<10xf32>) -> () { 122 // CHECK: %[[alloc:.*]] = memref.alloc() 123 %0 = bufferization.alloc_tensor() : tensor<10xf32> 124 125 %c0 = arith.constant 0 : index 126 // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] 127 %1 = vector.transfer_write %v, %0[%c0] : vector<10xf32>, tensor<10xf32> 128 129 %cst = arith.constant 0.0 : f32 130 // CHECK: vector.transfer_read %[[alloc]] 131 %r = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<11xf32> 132 133 vector.print %r : vector<11xf32> 134 return 135} 136 137// ----- 138 139// Bufferization of a function that is reading and writing. %t0 is writable, so 140// no copy should be inserted. 141 142// CHECK-LABEL: func @inner_func( 143// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 144func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { 145 // CHECK-NOT: copy 146 %f = arith.constant 1.0 : f32 147 %c0 = arith.constant 0 : index 148 %c1 = arith.constant 1 : index 149 // CHECK: memref.store %{{.*}}, %[[arg0]] 150 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 151 // CHECK: %[[load:.*]] = memref.load %[[arg0]] 152 %1 = tensor.extract %0[%c1] : tensor<?xf32> 153 // CHECK: return %[[load]] : f32 154 return %0, %1 : tensor<?xf32>, f32 155} 156 157// CHECK-LABEL: func @call_func_with_non_tensor_return( 158// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 159func.func @call_func_with_non_tensor_return( 160 %t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) { 161 // CHECK-NOT: alloc 162 // CHECK-NOT: copy 163 // CHECK: %[[call:.*]] = call @inner_func(%[[arg0]]) 164 %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32) 165 // CHECK: return %[[call]] : f32 166 return %1, %0 : f32, tensor<?xf32> 167} 168 169// ----- 170 171// Bufferization of a function that is reading and writing. %t0 is not writable, 172// so a copy is needed. 173 174// CHECK-LABEL: func @inner_func( 175// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 176func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { 177 // CHECK-NOT: copy 178 %f = arith.constant 1.0 : f32 179 %c0 = arith.constant 0 : index 180 %c1 = arith.constant 1 : index 181 // CHECK: memref.store %{{.*}}, %[[arg0]] 182 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 183 // CHECK: %[[load:.*]] = memref.load %[[arg0]] 184 %1 = tensor.extract %0[%c1] : tensor<?xf32> 185 // CHECK: return %[[load]] : f32 186 return %0, %1 : tensor<?xf32>, f32 187} 188 189// CHECK-LABEL: func @call_func_with_non_tensor_return( 190// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 191func.func @call_func_with_non_tensor_return( 192 %t0: tensor<?xf32> {bufferization.writable = false}) -> (f32, tensor<?xf32>) { 193 // CHECK: %[[alloc:.*]] = memref.alloc 194 // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]] 195 // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 196 // CHECK: %[[call:.*]] = call @inner_func(%[[casted]]) 197 %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32) 198 199 // Note: The tensor return value cannot fold away because the CallOp 200 // bufferized out-of-place. 201 // CHECK: return %[[call]], %[[casted]] : f32, memref<?xf32 202 return %1, %0 : f32, tensor<?xf32> 203} 204 205// ----- 206 207// A chain of function calls. The last function f0 is potentially writing to the 208// buffer. This becomes a problem when bufferizing main and a copy must be 209// inserted then. (No copies in the other functions.) 210 211// CHECK-LABEL: func private @f0( 212func.func private @f0(tensor<?xf32>) -> (f32) 213 214// CHECK-LABEL: func @f1( 215// CHECK-SAME: %[[t1:.*]]: memref<?xf32 216// CHECK: %[[r1:.*]] = call @f0(%[[t1]]) 217// CHECK: return %[[r1]] 218func.func @f1(%t: tensor<?xf32>) -> (f32) { 219 %0 = call @f0(%t) : (tensor<?xf32>) -> (f32) 220 return %0 : f32 221} 222 223// CHECK-LABEL: func @f2( 224// CHECK-SAME: %[[t2:.*]]: memref<?xf32 225// CHECK: %[[r2:.*]] = call @f1(%[[t2]]) 226// CHECK: return %[[r2]] 227func.func @f2(%t: tensor<?xf32>) -> (f32) { 228 %0 = call @f1(%t) : (tensor<?xf32>) -> (f32) 229 return %0 : f32 230} 231 232// CHECK-LABEL: func @main( 233// CHECK-SAME: %[[t3:.*]]: memref<?xf32 234// CHECK: %[[alloc:.*]] = memref.alloc 235// CHECK-DAG: memref.copy %[[t3]], %[[alloc]] 236// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 237// CHECK: call @f2(%[[casted]]) 238// CHECK: memref.dealloc %[[alloc]] 239func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) { 240 %0 = call @f2(%t) : (tensor<?xf32>) -> (f32) 241 return %0 : f32 242} 243 244// ----- 245 246// This function does not read, just write. We need an alloc, but no copy. 247 248// CHECK-LABEL: func @does_not_read( 249// CHECK-NOT: alloc 250// CHECK-NOT: copy 251func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> { 252 %f0 = arith.constant 0.0 : f32 253 %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32> 254 return %r : tensor<?xf32> 255} 256 257// CHECK-LABEL: func @main( 258// CHECK-SAME: %[[t:.*]]: memref<?xf32 259// CHECK: %[[alloc:.*]] = memref.alloc 260// CHECK-NOT: copy 261// CHECK: %[[casted:.*]] = memref.cast %[[alloc]] 262// CHECK-NOT: copy 263// CHECK: call @does_not_read(%[[casted]]) 264// CHECK: %[[r:.*]] = memref.load %[[casted]] 265// CHECK: memref.dealloc %[[alloc]] 266func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> f32 { 267 %0 = call @does_not_read(%t) : (tensor<?xf32>) -> (tensor<?xf32>) 268 %idx = arith.constant 4 : index 269 %r = tensor.extract %0[%idx] : tensor<?xf32> 270 return %r : f32 271} 272 273// ----- 274 275// Alloc and copy must be inserted because the arith.constant is read-only. 276 277// CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 278 279// CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> 280// CHECK: func private @some_external_func(memref<4xi32, #[[$DYN_1D_MAP]]>) 281func.func private @some_external_func(tensor<4xi32>) 282 283// CHECK: func @main() 284func.func @main() { 285// CHECK-DAG: %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32> 286 %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> 287 288// CHECK-DAG: %[[alloc:.*]] = memref.alloc 289// CHECK-DAG: %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, #[[$DYN_1D_MAP]]> 290// CHECK-DAG: memref.copy %[[A]], %[[alloc]] 291// CHECK: call @some_external_func(%[[B]]) : (memref<4xi32, #[[$DYN_1D_MAP]]>) -> () 292 call @some_external_func(%A) : (tensor<4xi32>) -> () 293 294// CHECK: memref.dealloc %[[alloc]] 295 return 296} 297 298// ----- 299 300// Alloc and copy must be inserted because the arith.constant is read-only. The 301// function call is inside of an scf.execute_region. 302 303// CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 304 305// CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> 306// CHECK: func private @some_external_func_within_scf_execute(memref<4xi32, #[[$DYN_1D_MAP]]>) 307func.func private @some_external_func_within_scf_execute(tensor<4xi32>) 308 309// CHECK: func @main() 310func.func @main() { 311// CHECK-DAG: %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32> 312 %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> 313 314// Note: The scf.execute_region canonicalizes away. 315 316// CHECK-DAG: %[[alloc:.*]] = memref.alloc 317// CHECK-DAG: %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, #[[$DYN_1D_MAP]]> 318// CHECK-DAG: memref.copy %[[A]], %[[alloc]] 319// CHECK: call @some_external_func_within_scf_execute(%[[B]]) : (memref<4xi32, #[[$DYN_1D_MAP]]>) -> () 320 scf.execute_region { 321 func.call @some_external_func_within_scf_execute(%A) : (tensor<4xi32>) -> () 322 scf.yield 323 } 324 325// CHECK: memref.dealloc %[[alloc]] 326 return 327} 328 329// ----- 330 331// A write inside an scf.execute_region. An equivalent tensor is yielded. 332 333// CHECK-LABEL: func @execute_region_test( 334// CHECK-SAME: %[[m1:.*]]: memref<?xf32 335func.func @execute_region_test(%t1 : tensor<?xf32>) 336 -> (f32, tensor<?xf32>, f32) 337{ 338 %f1 = arith.constant 0.0 : f32 339 %f2 = arith.constant 1.0 : f32 340 %idx = arith.constant 7 : index 341 342 // scf.execute_region is canonicalized away after bufferization. So just the 343 // memref.store is left over. 344 345 // CHECK-NOT: alloc 346 // CHECK-NOT: copy 347 // CHECK: memref.store %{{.*}}, %[[m1]][%{{.*}}] 348 %0, %1, %2 = scf.execute_region -> (f32, tensor<?xf32>, f32) { 349 %t2 = tensor.insert %f2 into %t1[%idx] : tensor<?xf32> 350 scf.yield %f1, %t2, %f2 : f32, tensor<?xf32>, f32 351 } 352 353 // CHECK: return %{{.*}}, %{{.*}} : f32, f32 354 return %0, %1, %2 : f32, tensor<?xf32>, f32 355} 356 357// ----- 358 359// CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 360 361// CHECK: func private @some_external_func(memref<?xf32, #[[$DYN_1D_MAP]]>) 362func.func private @some_external_func(tensor<?xf32>) 363 364// CHECK: func @scf_for_with_tensor_insert_slice( 365// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]> 366// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]> 367// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$DYN_1D_MAP]]> 368func.func @scf_for_with_tensor_insert_slice( 369 %A : tensor<?xf32>, %B : tensor<?xf32>, %C : tensor<4xf32>, 370 %lb : index, %ub : index, %step : index) 371 -> (tensor<?xf32>, tensor<?xf32>) 372{ 373 // CHECK-NEXT: scf.for 374 %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) 375 -> (tensor<?xf32>, tensor<?xf32>) 376 { 377 // CHECK-NEXT: %[[SVA:.*]] = memref.subview %[[A]] 378 // CHECK-NEXT: memref.copy %[[C]], %[[SVA]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, #[[$DYN_1D_MAP]]> 379 %ttA = tensor.insert_slice %C into %tA[%i][4][1] : tensor<4xf32> into tensor<?xf32> 380 381 // CHECK-NEXT: %[[SVB:.*]] = memref.subview %[[B]] 382 // CHECK-NEXT: memref.copy %[[C]], %[[SVB]] : memref<4xf32, #[[$DYN_1D_MAP]]> to memref<4xf32, #[[$DYN_1D_MAP]]> 383 %ttB = tensor.insert_slice %C into %tB[%i][4][1] : tensor<4xf32> into tensor<?xf32> 384 385 // scf.yield is empty and is elided 386 // CHECK-NOT: scf.yield 387 scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32> 388 } 389 390 // Swaparoo requires bufferizing the whole function to figure out who's who. 391 return %r0#1, %r0#0: tensor<?xf32>, tensor<?xf32> 392} 393 394// CHECK: func @bar( 395// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]> 396// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]> 397// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$DYN_1D_MAP]]> 398func.func @bar( 399 %A : tensor<?xf32> {bufferization.writable = true}, 400 %B : tensor<?xf32> {bufferization.writable = true}, 401 %C : tensor<4xf32> {bufferization.writable = true}, 402 %lb : index, %ub : index, %step : index) 403 -> (tensor<?xf32>, tensor<?xf32>) 404{ 405// CHECK-DAG: call @scf_for_with_tensor_insert_slice(%[[A]], %[[B]], %[[C]] 406 %r0:2 = call @scf_for_with_tensor_insert_slice(%A, %B, %C, %lb, %ub, %step) : 407 (tensor<?xf32>, tensor<?xf32>, tensor<4xf32>, index, index, index) 408 -> (tensor<?xf32>, tensor<?xf32>) 409 410 // %r0#0 requires a copy because we have no idea what the function is doing. 411// CHECK-DAG: %[[alloc:.*]] = memref.alloc 412// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 413// CHECK-DAG: memref.copy %[[B]], %[[alloc]] 414// CHECK-NEXT: call @some_external_func(%[[casted]]) : (memref<?xf32, #[[$DYN_1D_MAP]]>) -> () 415 call @some_external_func(%r0#0) : (tensor<?xf32>) -> () 416 417// CHECK: return 418 return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32> 419} 420 421// ----- 422 423// CHECK-DAG: #[[$DYN_0D_MAP:.*]] = affine_map<()[s0] -> (s0)> 424// CHECK-DAG: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 425 426// CHECK: func @init_and_dot( 427// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<64xf32, #[[$DYN_1D_MAP]]> 428// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<64xf32, #[[$DYN_1D_MAP]]> 429// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<f32, #[[$DYN_0D_MAP]]> 430func.func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> { 431 // CHECK-NEXT: %[[C0:.*]] = arith.constant 0{{.*}} : f32 432 %v0 = arith.constant 0.0 : f32 433 434 // CHECK-NEXT: linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32, #[[$DYN_0D_MAP]]>) 435 %d = linalg.fill ins(%v0 : f32) outs(%c : tensor<f32>) -> tensor<f32> 436 437 // CHECK-NEXT: linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, #[[$DYN_1D_MAP]]>, memref<64xf32, #[[$DYN_1D_MAP]]>) outs(%[[C]] : memref<f32, #[[$DYN_0D_MAP]]>) 438 %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>) 439 outs(%d: tensor<f32>) -> tensor<f32> 440 441 // CHECK-NEXT: return 442 return %e : tensor<f32> 443} 444 445// CHECK: func @main() 446func.func @main() { 447 // CHECK-DAG: %[[C0:.*]] = arith.constant 0{{.*}} : f32 448 // CHECK-DAG: %[[C1:.*]] = arith.constant 1{{.*}} : f32 449 // CHECK-DAG: %[[C2:.*]] = arith.constant 2{{.*}} : f32 450 %v0 = arith.constant 0.0 : f32 451 %v1 = arith.constant 1.0 : f32 452 %v2 = arith.constant 2.0 : f32 453 454 // CHECK-NEXT: %[[A:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32> 455 // CHECK-NEXT: %[[B:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32> 456 // CHECK-NEXT: %[[C:.*]] = memref.alloc() {alignment = 128 : i64} : memref<f32> 457 // CHECK-DAG: %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]> 458 // CHECK-DAG: %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]> 459 // CHECK-DAG: %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]> 460 %A = bufferization.alloc_tensor() : tensor<64xf32> 461 %B = bufferization.alloc_tensor() : tensor<64xf32> 462 %C = bufferization.alloc_tensor() : tensor<f32> 463 464 // CHECK-DAG: linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>) 465 // CHECK-DAG: linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>) 466 // CHECK-DAG: linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32>) 467 %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32> 468 %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32> 469 %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor<f32>) -> tensor<f32> 470 471 // CHECK-NEXT: call @init_and_dot(%[[cA]], %[[cB]], %[[cC]]) 472 %res = call @init_and_dot(%AA, %BB, %CC) : 473 (tensor<64xf32>, tensor<64xf32>, tensor<f32>) -> tensor<f32> 474 475 // CHECK-NEXT: %[[dC:.*]] = memref.cast %[[cC]] : memref<f32, {{.*}}> to memref<*xf32> 476 %res2 = tensor.cast %res: tensor<f32> to tensor<*xf32> 477 478 // CHECK-NEXT: call @printMemrefF32(%[[dC]]) : (memref<*xf32>) -> () 479 call @printMemrefF32(%res2) : (tensor<*xf32>) -> () 480 481 // CHECK-DAG: memref.dealloc %[[A]] : memref<64xf32> 482 // CHECK-DAG: memref.dealloc %[[B]] : memref<64xf32> 483 // CHECK-DAG: memref.dealloc %[[C]] : memref<f32> 484 // CHECK-NEXT: return 485 return 486} 487 488// CHECK: func private @printMemrefF32(memref<*xf32>) 489func.func private @printMemrefF32(tensor<*xf32>) 490 491// ----- 492 493// CHECK: #[[$DYNAMIC:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 494 495// CHECK: func private @external_func(memref<?xf32, #[[$DYNAMIC]]>) 496func.func private @external_func(tensor<?xf32>) 497 498// CHECK: func @callee( 499// CHECK-SAME: %[[A:[0-9a-zA-Z]*]]: memref<?xf32> 500// CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]> 501// CHECK-SAME: %[[C:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]> 502func.func @callee( 503 %A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>}, 504 %B : tensor<?xf32>, 505 %C : tensor<?xf32>) { 506// CHECK-NEXT: %[[CASTED:.*]] = memref.cast %[[A]] : memref<?xf32> to memref<?xf32, #[[$DYNAMIC]]> 507// CHECK-NEXT: call @external_func(%[[CASTED]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> () 508 call @external_func(%A) : (tensor<?xf32>) -> () 509 510// CHECK-NEXT: call @external_func(%[[B]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> () 511 call @external_func(%B) : (tensor<?xf32>) -> () 512 513// CHECK-NEXT: call @external_func(%[[C]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> () 514 call @external_func(%C) : (tensor<?xf32>) -> () 515 516 return 517} 518 519// CHECK: func @entry( 520// CHECK-SAME: %[[A:[0-9a-zA-Z]*]]: memref<?xf32> 521// CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<?xf32> 522// CHECK-SAME: %[[C:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]> 523func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false}, 524 %B : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false}, 525 %C : tensor<?xf32> {bufferization.writable = false}) { 526// Note: `callee` does not write to its bbArg directly, but `external_func` 527// does. Inside `callee`, the writes via `external_func` do not cause a 528// conflict. However, inside `entry`, the writes do cause a conflict because 529// %A, %B and %C are not inplaceable. This test case shows that this kind of 530// conflict detection has a "transitive" nature. 531// CHECK-DAG: %[[ALLOC_A:.*]] = memref.alloc 532// CHECK-DAG: %[[CASTED_A:.*]] = memref.cast %[[ALLOC_A]] 533// CHECK-DAG: %[[ALLOC_B:.*]] = memref.alloc 534// CHECK-DAG: %[[CASTED_B:.*]] = memref.cast %[[ALLOC_B]] 535// CHECK-DAG: %[[ALLOC_C:.*]] = memref.alloc 536// CHECK-DAG: %[[CASTED_C:.*]] = memref.cast %[[ALLOC_C]] 537// CHECK-DAG: memref.copy %[[A]], %[[ALLOC_A]] 538// CHECK-DAG: memref.copy %[[B]], %[[ALLOC_B]] 539// CHECK-DAG: memref.copy %[[C]], %[[ALLOC_C]] 540// CHECK-NEXT: call @callee(%[[CASTED_A]], %[[CASTED_B]], %[[CASTED_C]]) 541 call @callee(%A, %B, %C) : (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) -> () 542 return 543} 544 545// ----- 546 547// No alloc or copy inside of the loop. 548 549// CHECK-LABEL: func @inner_func( 550// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 551func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> { 552 %f = arith.constant 1.0 : f32 553 %c0 = arith.constant 0 : index 554 // CHECK: memref.store %{{.*}}, %[[arg0]] 555 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 556 return %0 : tensor<?xf32> 557} 558 559// CHECK-LABEL: func @equivalent_func_arg( 560// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 561func.func @equivalent_func_arg(%t0: tensor<?xf32> {bufferization.writable = true}, 562 %c0: index, %c10: index, %c1: index) -> tensor<?xf32> { 563 // CHECK-NOT: alloc 564 // CHECK-NOT: copy 565 // CHECK: scf.for {{.*}} iter_args(%[[t1:.*]] = %[[arg0]]) 566 %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) { 567 // CHECK: call @inner_func(%[[t1]]) 568 %3 = func.call @inner_func(%t1) : (tensor<?xf32>) -> tensor<?xf32> 569 // CHECK: scf.yield %[[t1]] 570 scf.yield %3 : tensor<?xf32> 571 } 572 return %1: tensor<?xf32> 573} 574 575// ----- 576 577// inner_func_2 modifies the bbArg, but the loop yields the original value. A 578// buffer copy must be inserted inside the loop. 579 580// CHECK-LABEL: func @inner_func_2( 581// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 582func.func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> { 583 %f = arith.constant 1.0 : f32 584 %c0 = arith.constant 0 : index 585 // CHECK: memref.store %{{.*}}, %[[arg0]] 586 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 587 return %0 : tensor<?xf32> 588} 589 590// CHECK-LABEL: func @equivalent_func_arg_2( 591// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 592func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {bufferization.writable = true}, 593 %c0: index, %c10: index, %c1: index) -> tensor<?xf32> { 594 // CHECK: scf.for {{.*}} { 595 %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) { 596 // CHECK: %[[alloc:.*]] = memref.alloc 597 // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 598 // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]] 599 // CHECK: call @inner_func_2(%[[casted]]) 600 // CHECK: memref.dealloc %[[alloc]] 601 // CHECK-NOT: scf.yield 602 %3 = func.call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32> 603 scf.yield %t1 : tensor<?xf32> 604 } 605 return %1: tensor<?xf32> 606} 607 608// ----- 609 610// Bufferize without fully dynamic layout maps. 611 612// CHECK-LABEL: func @transfer_read(%{{.*}}: memref<?xf32, #map>) -> vector<4xf32> { 613// CHECK-NO-LAYOUT-MAP-LABEL: func @transfer_read(%{{.*}}: memref<?xf32>) -> vector<4xf32> 614func.func @transfer_read( 615 %A : tensor<?xf32> {bufferization.writable = false}) 616 -> (vector<4xf32>) 617{ 618 %c0 = arith.constant 0 : index 619 %f0 = arith.constant 0.0 : f32 620 621// CHECK: %[[RES:.*]] = vector.transfer_read {{.*}} : memref<?xf32, #{{.*}}>, vector<4xf32> 622 %0 = vector.transfer_read %A[%c0], %f0 : tensor<?xf32>, vector<4xf32> 623 624// CHECK: return %[[RES]] : vector<4xf32> 625 return %0 : vector<4xf32> 626} 627