1// RUN: mlir-opt -verify-diagnostics -buffer-deallocation -split-input-file %s | FileCheck %s 2 3// This file checks the behaviour of BufferDeallocation pass for moving and 4// inserting missing DeallocOps in their correct positions. Furthermore, 5// copies and their corresponding AllocOps are inserted. 6 7// Test Case: 8// bb0 9// / \ 10// bb1 bb2 <- Initial position of AllocOp 11// \ / 12// bb3 13// BufferDeallocation expected behavior: bb2 contains an AllocOp which is 14// passed to bb3. In the latter block, there should be an deallocation. 15// Since bb1 does not contain an adequate alloc and the alloc in bb2 is not 16// moved to bb0, we need to insert allocs and copies. 17 18// CHECK-LABEL: func @condBranch 19func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 20 cond_br %arg0, ^bb1, ^bb2 21^bb1: 22 br ^bb3(%arg1 : memref<2xf32>) 23^bb2: 24 %0 = memref.alloc() : memref<2xf32> 25 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 26 br ^bb3(%0 : memref<2xf32>) 27^bb3(%1: memref<2xf32>): 28 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 29 return 30} 31 32// CHECK-NEXT: cond_br 33// CHECK: %[[ALLOC0:.*]] = bufferization.clone 34// CHECK-NEXT: br ^bb3(%[[ALLOC0]] 35// CHECK: %[[ALLOC1:.*]] = memref.alloc 36// CHECK-NEXT: test.buffer_based 37// CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC1]] 38// CHECK-NEXT: memref.dealloc %[[ALLOC1]] 39// CHECK-NEXT: br ^bb3(%[[ALLOC2]] 40// CHECK: test.copy 41// CHECK-NEXT: memref.dealloc 42// CHECK-NEXT: return 43 44// ----- 45 46// Test Case: 47// bb0 48// / \ 49// bb1 bb2 <- Initial position of AllocOp 50// \ / 51// bb3 52// BufferDeallocation expected behavior: The existing AllocOp has a dynamic 53// dependency to block argument %0 in bb2. Since the dynamic type is passed 54// to bb3 via the block argument %2, it is currently required to allocate a 55// temporary buffer for %2 that gets copies of %arg0 and %1 with their 56// appropriate shape dimensions. The copy buffer deallocation will be applied 57// to %2 in block bb3. 58 59// CHECK-LABEL: func @condBranchDynamicType 60func @condBranchDynamicType( 61 %arg0: i1, 62 %arg1: memref<?xf32>, 63 %arg2: memref<?xf32>, 64 %arg3: index) { 65 cond_br %arg0, ^bb1, ^bb2(%arg3: index) 66^bb1: 67 br ^bb3(%arg1 : memref<?xf32>) 68^bb2(%0: index): 69 %1 = memref.alloc(%0) : memref<?xf32> 70 test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>) 71 br ^bb3(%1 : memref<?xf32>) 72^bb3(%2: memref<?xf32>): 73 test.copy(%2, %arg2) : (memref<?xf32>, memref<?xf32>) 74 return 75} 76 77// CHECK-NEXT: cond_br 78// CHECK: %[[ALLOC0:.*]] = bufferization.clone 79// CHECK-NEXT: br ^bb3(%[[ALLOC0]] 80// CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) 81// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]]) 82// CHECK-NEXT: test.buffer_based 83// CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone 84// CHECK-NEXT: memref.dealloc %[[ALLOC1]] 85// CHECK-NEXT: br ^bb3 86// CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}}) 87// CHECK: test.copy(%[[ALLOC3]], 88// CHECK-NEXT: memref.dealloc %[[ALLOC3]] 89// CHECK-NEXT: return 90 91// ----- 92 93// Test case: See above. 94 95// CHECK-LABEL: func @condBranchUnrankedType 96func @condBranchUnrankedType( 97 %arg0: i1, 98 %arg1: memref<*xf32>, 99 %arg2: memref<*xf32>, 100 %arg3: index) { 101 cond_br %arg0, ^bb1, ^bb2(%arg3: index) 102^bb1: 103 br ^bb3(%arg1 : memref<*xf32>) 104^bb2(%0: index): 105 %1 = memref.alloc(%0) : memref<?xf32> 106 %2 = memref.cast %1 : memref<?xf32> to memref<*xf32> 107 test.buffer_based in(%arg1: memref<*xf32>) out(%2: memref<*xf32>) 108 br ^bb3(%2 : memref<*xf32>) 109^bb3(%3: memref<*xf32>): 110 test.copy(%3, %arg2) : (memref<*xf32>, memref<*xf32>) 111 return 112} 113 114// CHECK-NEXT: cond_br 115// CHECK: %[[ALLOC0:.*]] = bufferization.clone 116// CHECK-NEXT: br ^bb3(%[[ALLOC0]] 117// CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) 118// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]]) 119// CHECK: test.buffer_based 120// CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone 121// CHECK-NEXT: memref.dealloc %[[ALLOC1]] 122// CHECK-NEXT: br ^bb3 123// CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}}) 124// CHECK: test.copy(%[[ALLOC3]], 125// CHECK-NEXT: memref.dealloc %[[ALLOC3]] 126// CHECK-NEXT: return 127 128// ----- 129 130// Test Case: 131// bb0 132// / \ 133// bb1 bb2 <- Initial position of AllocOp 134// | / \ 135// | bb3 bb4 136// | \ / 137// \ bb5 138// \ / 139// bb6 140// | 141// bb7 142// BufferDeallocation expected behavior: The existing AllocOp has a dynamic 143// dependency to block argument %0 in bb2. Since the dynamic type is passed to 144// bb5 via the block argument %2 and to bb6 via block argument %3, it is 145// currently required to allocate temporary buffers for %2 and %3 that gets 146// copies of %1 and %arg0 1 with their appropriate shape dimensions. The copy 147// buffer deallocations will be applied to %2 in block bb5 and to %3 in block 148// bb6. Furthermore, there should be no copy inserted for %4. 149 150// CHECK-LABEL: func @condBranchDynamicTypeNested 151func @condBranchDynamicTypeNested( 152 %arg0: i1, 153 %arg1: memref<?xf32>, 154 %arg2: memref<?xf32>, 155 %arg3: index) { 156 cond_br %arg0, ^bb1, ^bb2(%arg3: index) 157^bb1: 158 br ^bb6(%arg1 : memref<?xf32>) 159^bb2(%0: index): 160 %1 = memref.alloc(%0) : memref<?xf32> 161 test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>) 162 cond_br %arg0, ^bb3, ^bb4 163^bb3: 164 br ^bb5(%1 : memref<?xf32>) 165^bb4: 166 br ^bb5(%1 : memref<?xf32>) 167^bb5(%2: memref<?xf32>): 168 br ^bb6(%2 : memref<?xf32>) 169^bb6(%3: memref<?xf32>): 170 br ^bb7(%3 : memref<?xf32>) 171^bb7(%4: memref<?xf32>): 172 test.copy(%4, %arg2) : (memref<?xf32>, memref<?xf32>) 173 return 174} 175 176// CHECK-NEXT: cond_br{{.*}} 177// CHECK-NEXT: ^bb1 178// CHECK-NEXT: %[[ALLOC0:.*]] = bufferization.clone 179// CHECK-NEXT: br ^bb6(%[[ALLOC0]] 180// CHECK: ^bb2(%[[IDX:.*]]:{{.*}}) 181// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]]) 182// CHECK-NEXT: test.buffer_based 183// CHECK: cond_br 184// CHECK: ^bb3: 185// CHECK-NEXT: br ^bb5(%[[ALLOC1]]{{.*}}) 186// CHECK: ^bb4: 187// CHECK-NEXT: br ^bb5(%[[ALLOC1]]{{.*}}) 188// CHECK-NEXT: ^bb5(%[[ALLOC2:.*]]:{{.*}}) 189// CHECK-NEXT: %[[ALLOC3:.*]] = bufferization.clone %[[ALLOC2]] 190// CHECK-NEXT: memref.dealloc %[[ALLOC1]] 191// CHECK-NEXT: br ^bb6(%[[ALLOC3]]{{.*}}) 192// CHECK-NEXT: ^bb6(%[[ALLOC4:.*]]:{{.*}}) 193// CHECK-NEXT: br ^bb7(%[[ALLOC4]]{{.*}}) 194// CHECK-NEXT: ^bb7(%[[ALLOC5:.*]]:{{.*}}) 195// CHECK: test.copy(%[[ALLOC5]], 196// CHECK-NEXT: memref.dealloc %[[ALLOC4]] 197// CHECK-NEXT: return 198 199// ----- 200 201// Test Case: Existing AllocOp with no users. 202// BufferDeallocation expected behavior: It should insert a DeallocOp right 203// before ReturnOp. 204 205// CHECK-LABEL: func @emptyUsesValue 206func @emptyUsesValue(%arg0: memref<4xf32>) { 207 %0 = memref.alloc() : memref<4xf32> 208 return 209} 210// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc() 211// CHECK-NEXT: memref.dealloc %[[ALLOC]] 212// CHECK-NEXT: return 213 214// ----- 215 216// Test Case: 217// bb0 218// / \ 219// | bb1 <- Initial position of AllocOp 220// \ / 221// bb2 222// BufferDeallocation expected behavior: It should insert a DeallocOp at the 223// exit block after CopyOp since %1 is an alias for %0 and %arg1. Furthermore, 224// we have to insert a copy and an alloc in the beginning of the function. 225 226// CHECK-LABEL: func @criticalEdge 227func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 228 cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) 229^bb1: 230 %0 = memref.alloc() : memref<2xf32> 231 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 232 br ^bb2(%0 : memref<2xf32>) 233^bb2(%1: memref<2xf32>): 234 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 235 return 236} 237 238// CHECK-NEXT: %[[ALLOC0:.*]] = bufferization.clone 239// CHECK-NEXT: cond_br 240// CHECK: %[[ALLOC1:.*]] = memref.alloc() 241// CHECK-NEXT: test.buffer_based 242// CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC1]] 243// CHECK-NEXT: memref.dealloc %[[ALLOC1]] 244// CHECK: test.copy 245// CHECK-NEXT: memref.dealloc 246// CHECK-NEXT: return 247 248// ----- 249 250// Test Case: 251// bb0 <- Initial position of AllocOp 252// / \ 253// | bb1 254// \ / 255// bb2 256// BufferDeallocation expected behavior: It only inserts a DeallocOp at the 257// exit block after CopyOp since %1 is an alias for %0 and %arg1. 258 259// CHECK-LABEL: func @invCriticalEdge 260func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 261 %0 = memref.alloc() : memref<2xf32> 262 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 263 cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) 264^bb1: 265 br ^bb2(%0 : memref<2xf32>) 266^bb2(%1: memref<2xf32>): 267 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 268 return 269} 270 271// CHECK: dealloc 272// CHECK-NEXT: return 273 274// ----- 275 276// Test Case: 277// bb0 <- Initial position of the first AllocOp 278// / \ 279// bb1 bb2 280// \ / 281// bb3 <- Initial position of the second AllocOp 282// BufferDeallocation expected behavior: It only inserts two missing 283// DeallocOps in the exit block. %5 is an alias for %0. Therefore, the 284// DeallocOp for %0 should occur after the last BufferBasedOp. The Dealloc for 285// %7 should happen after CopyOp. 286 287// CHECK-LABEL: func @ifElse 288func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 289 %0 = memref.alloc() : memref<2xf32> 290 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 291 cond_br %arg0, 292 ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), 293 ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) 294^bb1(%1: memref<2xf32>, %2: memref<2xf32>): 295 br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) 296^bb2(%3: memref<2xf32>, %4: memref<2xf32>): 297 br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) 298^bb3(%5: memref<2xf32>, %6: memref<2xf32>): 299 %7 = memref.alloc() : memref<2xf32> 300 test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>) 301 test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>) 302 return 303} 304 305// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc() 306// CHECK-NEXT: test.buffer_based 307// CHECK: %[[SECOND_ALLOC:.*]] = memref.alloc() 308// CHECK-NEXT: test.buffer_based 309// CHECK: memref.dealloc %[[FIRST_ALLOC]] 310// CHECK: test.copy 311// CHECK-NEXT: memref.dealloc %[[SECOND_ALLOC]] 312// CHECK-NEXT: return 313 314// ----- 315 316// Test Case: No users for buffer in if-else CFG 317// bb0 <- Initial position of AllocOp 318// / \ 319// bb1 bb2 320// \ / 321// bb3 322// BufferDeallocation expected behavior: It only inserts a missing DeallocOp 323// in the exit block since %5 or %6 are the latest aliases of %0. 324 325// CHECK-LABEL: func @ifElseNoUsers 326func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 327 %0 = memref.alloc() : memref<2xf32> 328 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 329 cond_br %arg0, 330 ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), 331 ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) 332^bb1(%1: memref<2xf32>, %2: memref<2xf32>): 333 br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) 334^bb2(%3: memref<2xf32>, %4: memref<2xf32>): 335 br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) 336^bb3(%5: memref<2xf32>, %6: memref<2xf32>): 337 test.copy(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) 338 return 339} 340 341// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc() 342// CHECK: test.copy 343// CHECK-NEXT: memref.dealloc %[[FIRST_ALLOC]] 344// CHECK-NEXT: return 345 346// ----- 347 348// Test Case: 349// bb0 <- Initial position of the first AllocOp 350// / \ 351// bb1 bb2 352// | / \ 353// | bb3 bb4 354// \ \ / 355// \ / 356// bb5 <- Initial position of the second AllocOp 357// BufferDeallocation expected behavior: Two missing DeallocOps should be 358// inserted in the exit block. 359 360// CHECK-LABEL: func @ifElseNested 361func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 362 %0 = memref.alloc() : memref<2xf32> 363 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 364 cond_br %arg0, 365 ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), 366 ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) 367^bb1(%1: memref<2xf32>, %2: memref<2xf32>): 368 br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>) 369^bb2(%3: memref<2xf32>, %4: memref<2xf32>): 370 cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>) 371^bb3(%5: memref<2xf32>): 372 br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>) 373^bb4(%6: memref<2xf32>): 374 br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) 375^bb5(%7: memref<2xf32>, %8: memref<2xf32>): 376 %9 = memref.alloc() : memref<2xf32> 377 test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>) 378 test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>) 379 return 380} 381 382// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc() 383// CHECK-NEXT: test.buffer_based 384// CHECK: %[[SECOND_ALLOC:.*]] = memref.alloc() 385// CHECK-NEXT: test.buffer_based 386// CHECK: memref.dealloc %[[FIRST_ALLOC]] 387// CHECK: test.copy 388// CHECK-NEXT: memref.dealloc %[[SECOND_ALLOC]] 389// CHECK-NEXT: return 390 391// ----- 392 393// Test Case: Dead operations in a single block. 394// BufferDeallocation expected behavior: It only inserts the two missing 395// DeallocOps after the last BufferBasedOp. 396 397// CHECK-LABEL: func @redundantOperations 398func @redundantOperations(%arg0: memref<2xf32>) { 399 %0 = memref.alloc() : memref<2xf32> 400 test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) 401 %1 = memref.alloc() : memref<2xf32> 402 test.buffer_based in(%0: memref<2xf32>) out(%1: memref<2xf32>) 403 return 404} 405 406// CHECK: (%[[ARG0:.*]]: {{.*}}) 407// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc() 408// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}}out(%[[FIRST_ALLOC]] 409// CHECK: %[[SECOND_ALLOC:.*]] = memref.alloc() 410// CHECK-NEXT: test.buffer_based in(%[[FIRST_ALLOC]]{{.*}}out(%[[SECOND_ALLOC]] 411// CHECK: dealloc 412// CHECK-NEXT: dealloc 413// CHECK-NEXT: return 414 415// ----- 416 417// Test Case: 418// bb0 419// / \ 420// Initial pos of the 1st AllocOp -> bb1 bb2 <- Initial pos of the 2nd AllocOp 421// \ / 422// bb3 423// BufferDeallocation expected behavior: We need to introduce a copy for each 424// buffer since the buffers are passed to bb3. The both missing DeallocOps are 425// inserted in the respective block of the allocs. The copy is freed in the exit 426// block. 427 428// CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc 429func @moving_alloc_and_inserting_missing_dealloc( 430 %cond: i1, 431 %arg0: memref<2xf32>, 432 %arg1: memref<2xf32>) { 433 cond_br %cond, ^bb1, ^bb2 434^bb1: 435 %0 = memref.alloc() : memref<2xf32> 436 test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) 437 br ^exit(%0 : memref<2xf32>) 438^bb2: 439 %1 = memref.alloc() : memref<2xf32> 440 test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>) 441 br ^exit(%1 : memref<2xf32>) 442^exit(%arg2: memref<2xf32>): 443 test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) 444 return 445} 446 447// CHECK-NEXT: cond_br{{.*}} 448// CHECK-NEXT: ^bb1 449// CHECK: %[[ALLOC0:.*]] = memref.alloc() 450// CHECK-NEXT: test.buffer_based 451// CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %[[ALLOC0]] 452// CHECK-NEXT: memref.dealloc %[[ALLOC0]] 453// CHECK-NEXT: br ^bb3(%[[ALLOC1]] 454// CHECK-NEXT: ^bb2 455// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc() 456// CHECK-NEXT: test.buffer_based 457// CHECK-NEXT: %[[ALLOC3:.*]] = bufferization.clone %[[ALLOC2]] 458// CHECK-NEXT: memref.dealloc %[[ALLOC2]] 459// CHECK-NEXT: br ^bb3(%[[ALLOC3]] 460// CHECK-NEXT: ^bb3(%[[ALLOC4:.*]]:{{.*}}) 461// CHECK: test.copy 462// CHECK-NEXT: memref.dealloc %[[ALLOC4]] 463// CHECK-NEXT: return 464 465// ----- 466 467// Test Case: Invalid position of the DeallocOp. There is a user after 468// deallocation. 469// bb0 470// / \ 471// bb1 bb2 <- Initial position of AllocOp 472// \ / 473// bb3 474// BufferDeallocation expected behavior: The existing DeallocOp should be 475// moved to exit block. 476 477// CHECK-LABEL: func @moving_invalid_dealloc_op_complex 478func @moving_invalid_dealloc_op_complex( 479 %cond: i1, 480 %arg0: memref<2xf32>, 481 %arg1: memref<2xf32>) { 482 %1 = memref.alloc() : memref<2xf32> 483 cond_br %cond, ^bb1, ^bb2 484^bb1: 485 br ^exit(%arg0 : memref<2xf32>) 486^bb2: 487 test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>) 488 memref.dealloc %1 : memref<2xf32> 489 br ^exit(%1 : memref<2xf32>) 490^exit(%arg2: memref<2xf32>): 491 test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) 492 return 493} 494 495// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc() 496// CHECK-NEXT: cond_br 497// CHECK: test.copy 498// CHECK-NEXT: memref.dealloc %[[ALLOC0]] 499// CHECK-NEXT: return 500 501// ----- 502 503// Test Case: Inserting missing DeallocOp in a single block. 504 505// CHECK-LABEL: func @inserting_missing_dealloc_simple 506func @inserting_missing_dealloc_simple( 507 %arg0 : memref<2xf32>, 508 %arg1: memref<2xf32>) { 509 %0 = memref.alloc() : memref<2xf32> 510 test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) 511 test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>) 512 return 513} 514 515// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc() 516// CHECK: test.copy 517// CHECK-NEXT: memref.dealloc %[[ALLOC0]] 518 519// ----- 520 521// Test Case: Moving invalid DeallocOp (there is a user after deallocation) in a 522// single block. 523 524// CHECK-LABEL: func @moving_invalid_dealloc_op 525func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>) { 526 %0 = memref.alloc() : memref<2xf32> 527 test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) 528 memref.dealloc %0 : memref<2xf32> 529 test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>) 530 return 531} 532 533// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc() 534// CHECK: test.copy 535// CHECK-NEXT: memref.dealloc %[[ALLOC0]] 536 537// ----- 538 539// Test Case: Nested regions - This test defines a BufferBasedOp inside the 540// region of a RegionBufferBasedOp. 541// BufferDeallocation expected behavior: The AllocOp for the BufferBasedOp 542// should remain inside the region of the RegionBufferBasedOp and it should insert 543// the missing DeallocOp in the same region. The missing DeallocOp should be 544// inserted after CopyOp. 545 546// CHECK-LABEL: func @nested_regions_and_cond_branch 547func @nested_regions_and_cond_branch( 548 %arg0: i1, 549 %arg1: memref<2xf32>, 550 %arg2: memref<2xf32>) { 551 cond_br %arg0, ^bb1, ^bb2 552^bb1: 553 br ^bb3(%arg1 : memref<2xf32>) 554^bb2: 555 %0 = memref.alloc() : memref<2xf32> 556 test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { 557 ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): 558 %1 = memref.alloc() : memref<2xf32> 559 test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) 560 %tmp1 = math.exp %gen1_arg0 : f32 561 test.region_yield %tmp1 : f32 562 } 563 br ^bb3(%0 : memref<2xf32>) 564^bb3(%1: memref<2xf32>): 565 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 566 return 567} 568// CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}}) 569// CHECK-NEXT: cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]] 570// CHECK: %[[ALLOC0:.*]] = bufferization.clone %[[ARG1]] 571// CHECK: ^[[BB2]]: 572// CHECK: %[[ALLOC1:.*]] = memref.alloc() 573// CHECK-NEXT: test.region_buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]] 574// CHECK: %[[ALLOC2:.*]] = memref.alloc() 575// CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC2]] 576// CHECK: memref.dealloc %[[ALLOC2]] 577// CHECK-NEXT: %{{.*}} = math.exp 578// CHECK: %[[ALLOC3:.*]] = bufferization.clone %[[ALLOC1]] 579// CHECK-NEXT: memref.dealloc %[[ALLOC1]] 580// CHECK: ^[[BB3:.*]]({{.*}}): 581// CHECK: test.copy 582// CHECK-NEXT: memref.dealloc 583 584// ----- 585 586// Test Case: buffer deallocation escaping 587// BufferDeallocation expected behavior: It must not dealloc %arg1 and %x 588// since they are operands of return operation and should escape from 589// deallocating. It should dealloc %y after CopyOp. 590 591// CHECK-LABEL: func @memref_in_function_results 592func @memref_in_function_results( 593 %arg0: memref<5xf32>, 594 %arg1: memref<10xf32>, 595 %arg2: memref<5xf32>) -> (memref<10xf32>, memref<15xf32>) { 596 %x = memref.alloc() : memref<15xf32> 597 %y = memref.alloc() : memref<5xf32> 598 test.buffer_based in(%arg0: memref<5xf32>) out(%y: memref<5xf32>) 599 test.copy(%y, %arg2) : (memref<5xf32>, memref<5xf32>) 600 return %arg1, %x : memref<10xf32>, memref<15xf32> 601} 602// CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>, 603// CHECK-SAME: %[[RESULT:.*]]: memref<5xf32>) 604// CHECK: %[[X:.*]] = memref.alloc() 605// CHECK: %[[Y:.*]] = memref.alloc() 606// CHECK: test.copy 607// CHECK: memref.dealloc %[[Y]] 608// CHECK: return %[[ARG1]], %[[X]] 609 610// ----- 611 612// Test Case: nested region control flow 613// The alloc %1 flows through both if branches until it is finally returned. 614// Hence, it does not require a specific dealloc operation. However, %3 615// requires a dealloc. 616 617// CHECK-LABEL: func @nested_region_control_flow 618func @nested_region_control_flow( 619 %arg0 : index, 620 %arg1 : index) -> memref<?x?xf32> { 621 %0 = arith.cmpi eq, %arg0, %arg1 : index 622 %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32> 623 %2 = scf.if %0 -> (memref<?x?xf32>) { 624 scf.yield %1 : memref<?x?xf32> 625 } else { 626 %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32> 627 scf.yield %1 : memref<?x?xf32> 628 } 629 return %2 : memref<?x?xf32> 630} 631 632// CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0) 633// CHECK-NEXT: %[[ALLOC1:.*]] = scf.if 634// CHECK: scf.yield %[[ALLOC0]] 635// CHECK: %[[ALLOC2:.*]] = memref.alloc(%arg0, %arg1) 636// CHECK-NEXT: memref.dealloc %[[ALLOC2]] 637// CHECK-NEXT: scf.yield %[[ALLOC0]] 638// CHECK: return %[[ALLOC1]] 639 640// ----- 641 642// Test Case: nested region control flow with a nested buffer allocation in a 643// divergent branch. 644// Buffer deallocation places a copy for both %1 and %3, since they are 645// returned in the end. 646 647// CHECK-LABEL: func @nested_region_control_flow_div 648func @nested_region_control_flow_div( 649 %arg0 : index, 650 %arg1 : index) -> memref<?x?xf32> { 651 %0 = arith.cmpi eq, %arg0, %arg1 : index 652 %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32> 653 %2 = scf.if %0 -> (memref<?x?xf32>) { 654 scf.yield %1 : memref<?x?xf32> 655 } else { 656 %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32> 657 scf.yield %3 : memref<?x?xf32> 658 } 659 return %2 : memref<?x?xf32> 660} 661 662// CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0) 663// CHECK-NEXT: %[[ALLOC1:.*]] = scf.if 664// CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC0]] 665// CHECK: scf.yield %[[ALLOC2]] 666// CHECK: %[[ALLOC3:.*]] = memref.alloc(%arg0, %arg1) 667// CHECK-NEXT: %[[ALLOC4:.*]] = bufferization.clone %[[ALLOC3]] 668// CHECK: memref.dealloc %[[ALLOC3]] 669// CHECK: scf.yield %[[ALLOC4]] 670// CHECK: memref.dealloc %[[ALLOC0]] 671// CHECK-NEXT: return %[[ALLOC1]] 672 673// ----- 674 675// Test Case: nested region control flow within a region interface. 676// No copies are required in this case since the allocation finally escapes 677// the method. 678 679// CHECK-LABEL: func @inner_region_control_flow 680func @inner_region_control_flow(%arg0 : index) -> memref<?x?xf32> { 681 %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32> 682 %1 = test.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>) then { 683 ^bb0(%arg1 : memref<?x?xf32>): 684 test.region_if_yield %arg1 : memref<?x?xf32> 685 } else { 686 ^bb0(%arg1 : memref<?x?xf32>): 687 test.region_if_yield %arg1 : memref<?x?xf32> 688 } join { 689 ^bb0(%arg1 : memref<?x?xf32>): 690 test.region_if_yield %arg1 : memref<?x?xf32> 691 } 692 return %1 : memref<?x?xf32> 693} 694 695// CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0) 696// CHECK-NEXT: %[[ALLOC1:.*]] = test.region_if 697// CHECK-NEXT: ^bb0(%[[ALLOC2:.*]]:{{.*}}): 698// CHECK-NEXT: test.region_if_yield %[[ALLOC2]] 699// CHECK: ^bb0(%[[ALLOC3:.*]]:{{.*}}): 700// CHECK-NEXT: test.region_if_yield %[[ALLOC3]] 701// CHECK: ^bb0(%[[ALLOC4:.*]]:{{.*}}): 702// CHECK-NEXT: test.region_if_yield %[[ALLOC4]] 703// CHECK: return %[[ALLOC1]] 704 705// ----- 706 707// CHECK-LABEL: func @subview 708func @subview(%arg0 : index, %arg1 : index, %arg2 : memref<?x?xf32>) { 709 %0 = memref.alloc() : memref<64x4xf32, offset: 0, strides: [4, 1]> 710 %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] : 711 memref<64x4xf32, offset: 0, strides: [4, 1]> 712 to memref<?x?xf32, offset: ?, strides: [?, ?]> 713 test.copy(%1, %arg2) : 714 (memref<?x?xf32, offset: ?, strides: [?, ?]>, memref<?x?xf32>) 715 return 716} 717 718// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc() 719// CHECK-NEXT: memref.subview 720// CHECK-NEXT: test.copy 721// CHECK-NEXT: memref.dealloc %[[ALLOC]] 722// CHECK-NEXT: return 723 724// ----- 725 726// Test Case: In the presence of AllocaOps only the AllocOps has top be freed. 727// Therefore, all allocas are not handled. 728 729// CHECK-LABEL: func @condBranchAlloca 730func @condBranchAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 731 cond_br %arg0, ^bb1, ^bb2 732^bb1: 733 br ^bb3(%arg1 : memref<2xf32>) 734^bb2: 735 %0 = memref.alloca() : memref<2xf32> 736 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 737 br ^bb3(%0 : memref<2xf32>) 738^bb3(%1: memref<2xf32>): 739 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 740 return 741} 742 743// CHECK-NEXT: cond_br 744// CHECK: %[[ALLOCA:.*]] = memref.alloca() 745// CHECK: br ^bb3(%[[ALLOCA:.*]]) 746// CHECK-NEXT: ^bb3 747// CHECK-NEXT: test.copy 748// CHECK-NEXT: return 749 750// ----- 751 752// Test Case: In the presence of AllocaOps only the AllocOps has top be freed. 753// Therefore, all allocas are not handled. In this case, only alloc %0 has a 754// dealloc. 755 756// CHECK-LABEL: func @ifElseAlloca 757func @ifElseAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 758 %0 = memref.alloc() : memref<2xf32> 759 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 760 cond_br %arg0, 761 ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), 762 ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) 763^bb1(%1: memref<2xf32>, %2: memref<2xf32>): 764 br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) 765^bb2(%3: memref<2xf32>, %4: memref<2xf32>): 766 br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) 767^bb3(%5: memref<2xf32>, %6: memref<2xf32>): 768 %7 = memref.alloca() : memref<2xf32> 769 test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>) 770 test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>) 771 return 772} 773 774// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc() 775// CHECK-NEXT: test.buffer_based 776// CHECK: %[[ALLOCA:.*]] = memref.alloca() 777// CHECK-NEXT: test.buffer_based 778// CHECK: memref.dealloc %[[ALLOC]] 779// CHECK: test.copy 780// CHECK-NEXT: return 781 782// ----- 783 784// CHECK-LABEL: func @ifElseNestedAlloca 785func @ifElseNestedAlloca( 786 %arg0: i1, 787 %arg1: memref<2xf32>, 788 %arg2: memref<2xf32>) { 789 %0 = memref.alloca() : memref<2xf32> 790 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 791 cond_br %arg0, 792 ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), 793 ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) 794^bb1(%1: memref<2xf32>, %2: memref<2xf32>): 795 br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>) 796^bb2(%3: memref<2xf32>, %4: memref<2xf32>): 797 cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>) 798^bb3(%5: memref<2xf32>): 799 br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>) 800^bb4(%6: memref<2xf32>): 801 br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) 802^bb5(%7: memref<2xf32>, %8: memref<2xf32>): 803 %9 = memref.alloc() : memref<2xf32> 804 test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>) 805 test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>) 806 return 807} 808 809// CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca() 810// CHECK-NEXT: test.buffer_based 811// CHECK: %[[ALLOC:.*]] = memref.alloc() 812// CHECK-NEXT: test.buffer_based 813// CHECK: test.copy 814// CHECK-NEXT: memref.dealloc %[[ALLOC]] 815// CHECK-NEXT: return 816 817// ----- 818 819// CHECK-LABEL: func @nestedRegionsAndCondBranchAlloca 820func @nestedRegionsAndCondBranchAlloca( 821 %arg0: i1, 822 %arg1: memref<2xf32>, 823 %arg2: memref<2xf32>) { 824 cond_br %arg0, ^bb1, ^bb2 825^bb1: 826 br ^bb3(%arg1 : memref<2xf32>) 827^bb2: 828 %0 = memref.alloc() : memref<2xf32> 829 test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { 830 ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): 831 %1 = memref.alloca() : memref<2xf32> 832 test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) 833 %tmp1 = math.exp %gen1_arg0 : f32 834 test.region_yield %tmp1 : f32 835 } 836 br ^bb3(%0 : memref<2xf32>) 837^bb3(%1: memref<2xf32>): 838 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 839 return 840} 841// CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}}) 842// CHECK-NEXT: cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]] 843// CHECK: ^[[BB1]]: 844// CHECK: %[[ALLOC0:.*]] = bufferization.clone 845// CHECK: ^[[BB2]]: 846// CHECK: %[[ALLOC1:.*]] = memref.alloc() 847// CHECK-NEXT: test.region_buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]] 848// CHECK: %[[ALLOCA:.*]] = memref.alloca() 849// CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOCA]] 850// CHECK: %{{.*}} = math.exp 851// CHECK: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC1]] 852// CHECK-NEXT: memref.dealloc %[[ALLOC1]] 853// CHECK: ^[[BB3:.*]]({{.*}}): 854// CHECK: test.copy 855// CHECK-NEXT: memref.dealloc 856 857// ----- 858 859// CHECK-LABEL: func @nestedRegionControlFlowAlloca 860func @nestedRegionControlFlowAlloca( 861 %arg0 : index, 862 %arg1 : index) -> memref<?x?xf32> { 863 %0 = arith.cmpi eq, %arg0, %arg1 : index 864 %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32> 865 %2 = scf.if %0 -> (memref<?x?xf32>) { 866 scf.yield %1 : memref<?x?xf32> 867 } else { 868 %3 = memref.alloca(%arg0, %arg1) : memref<?x?xf32> 869 scf.yield %1 : memref<?x?xf32> 870 } 871 return %2 : memref<?x?xf32> 872} 873 874// CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0) 875// CHECK-NEXT: %[[ALLOC1:.*]] = scf.if 876// CHECK: scf.yield %[[ALLOC0]] 877// CHECK: %[[ALLOCA:.*]] = memref.alloca(%arg0, %arg1) 878// CHECK-NEXT: scf.yield %[[ALLOC0]] 879// CHECK: return %[[ALLOC1]] 880 881// ----- 882 883// Test Case: structured control-flow loop using a nested alloc. 884// The iteration argument %iterBuf has to be freed before yielding %3 to avoid 885// memory leaks. 886 887// CHECK-LABEL: func @loop_alloc 888func @loop_alloc( 889 %lb: index, 890 %ub: index, 891 %step: index, 892 %buf: memref<2xf32>, 893 %res: memref<2xf32>) { 894 %0 = memref.alloc() : memref<2xf32> 895 %1 = scf.for %i = %lb to %ub step %step 896 iter_args(%iterBuf = %buf) -> memref<2xf32> { 897 %2 = arith.cmpi eq, %i, %ub : index 898 %3 = memref.alloc() : memref<2xf32> 899 scf.yield %3 : memref<2xf32> 900 } 901 test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) 902 return 903} 904 905// CHECK: %[[ALLOC0:.*]] = memref.alloc() 906// CHECK-NEXT: memref.dealloc %[[ALLOC0]] 907// CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %arg3 908// CHECK: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args 909// CHECK-SAME: (%[[IALLOC:.*]] = %[[ALLOC1]] 910// CHECK: arith.cmpi 911// CHECK: memref.dealloc %[[IALLOC]] 912// CHECK: %[[ALLOC3:.*]] = memref.alloc() 913// CHECK: %[[ALLOC4:.*]] = bufferization.clone %[[ALLOC3]] 914// CHECK: memref.dealloc %[[ALLOC3]] 915// CHECK: scf.yield %[[ALLOC4]] 916// CHECK: } 917// CHECK: test.copy(%[[ALLOC2]], %arg4) 918// CHECK-NEXT: memref.dealloc %[[ALLOC2]] 919 920// ----- 921 922// Test Case: structured control-flow loop with a nested if operation. 923// The loop yields buffers that have been defined outside of the loop and the 924// backedges only use the iteration arguments (or one of its aliases). 925// Therefore, we do not have to (and are not allowed to) free any buffers 926// that are passed via the backedges. 927 928// CHECK-LABEL: func @loop_nested_if_no_alloc 929func @loop_nested_if_no_alloc( 930 %lb: index, 931 %ub: index, 932 %step: index, 933 %buf: memref<2xf32>, 934 %res: memref<2xf32>) { 935 %0 = memref.alloc() : memref<2xf32> 936 %1 = scf.for %i = %lb to %ub step %step 937 iter_args(%iterBuf = %buf) -> memref<2xf32> { 938 %2 = arith.cmpi eq, %i, %ub : index 939 %3 = scf.if %2 -> (memref<2xf32>) { 940 scf.yield %0 : memref<2xf32> 941 } else { 942 scf.yield %iterBuf : memref<2xf32> 943 } 944 scf.yield %3 : memref<2xf32> 945 } 946 test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) 947 return 948} 949 950// CHECK: %[[ALLOC0:.*]] = memref.alloc() 951// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = 952// CHECK: %[[ALLOC2:.*]] = scf.if 953// CHECK: scf.yield %[[ALLOC0]] 954// CHECK: scf.yield %[[IALLOC]] 955// CHECK: scf.yield %[[ALLOC2]] 956// CHECK: test.copy(%[[ALLOC1]], %arg4) 957// CHECK: memref.dealloc %[[ALLOC0]] 958 959// ----- 960 961// Test Case: structured control-flow loop with a nested if operation using 962// a deeply nested buffer allocation. 963// Since the innermost allocation happens in a divergent branch, we have to 964// introduce additional copies for the nested if operation. Since the loop's 965// yield operation "returns" %3, it will return a newly allocated buffer. 966// Therefore, we have to free the iteration argument %iterBuf before 967// "returning" %3. 968 969// CHECK-LABEL: func @loop_nested_if_alloc 970func @loop_nested_if_alloc( 971 %lb: index, 972 %ub: index, 973 %step: index, 974 %buf: memref<2xf32>) -> memref<2xf32> { 975 %0 = memref.alloc() : memref<2xf32> 976 %1 = scf.for %i = %lb to %ub step %step 977 iter_args(%iterBuf = %buf) -> memref<2xf32> { 978 %2 = arith.cmpi eq, %i, %ub : index 979 %3 = scf.if %2 -> (memref<2xf32>) { 980 %4 = memref.alloc() : memref<2xf32> 981 scf.yield %4 : memref<2xf32> 982 } else { 983 scf.yield %0 : memref<2xf32> 984 } 985 scf.yield %3 : memref<2xf32> 986 } 987 return %1 : memref<2xf32> 988} 989 990// CHECK: %[[ALLOC0:.*]] = memref.alloc() 991// CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %arg3 992// CHECK-NEXT: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args 993// CHECK-SAME: (%[[IALLOC:.*]] = %[[ALLOC1]] 994// CHECK: memref.dealloc %[[IALLOC]] 995// CHECK: %[[ALLOC3:.*]] = scf.if 996 997// CHECK: %[[ALLOC4:.*]] = memref.alloc() 998// CHECK-NEXT: %[[ALLOC5:.*]] = bufferization.clone %[[ALLOC4]] 999// CHECK-NEXT: memref.dealloc %[[ALLOC4]] 1000// CHECK-NEXT: scf.yield %[[ALLOC5]] 1001 1002// CHECK: %[[ALLOC6:.*]] = bufferization.clone %[[ALLOC0]] 1003// CHECK-NEXT: scf.yield %[[ALLOC6]] 1004 1005// CHECK: %[[ALLOC7:.*]] = bufferization.clone %[[ALLOC3]] 1006// CHECK-NEXT: memref.dealloc %[[ALLOC3]] 1007// CHECK-NEXT: scf.yield %[[ALLOC7]] 1008 1009// CHECK: memref.dealloc %[[ALLOC0]] 1010// CHECK-NEXT: return %[[ALLOC2]] 1011 1012// ----- 1013 1014// Test Case: several nested structured control-flow loops with a deeply nested 1015// buffer allocation inside an if operation. 1016// Same behavior is an loop_nested_if_alloc: we have to insert deallocations 1017// before each yield in all loops recursively. 1018 1019// CHECK-LABEL: func @loop_nested_alloc 1020func @loop_nested_alloc( 1021 %lb: index, 1022 %ub: index, 1023 %step: index, 1024 %buf: memref<2xf32>, 1025 %res: memref<2xf32>) { 1026 %0 = memref.alloc() : memref<2xf32> 1027 %1 = scf.for %i = %lb to %ub step %step 1028 iter_args(%iterBuf = %buf) -> memref<2xf32> { 1029 %2 = scf.for %i2 = %lb to %ub step %step 1030 iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> { 1031 %3 = scf.for %i3 = %lb to %ub step %step 1032 iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> { 1033 %4 = memref.alloc() : memref<2xf32> 1034 %5 = arith.cmpi eq, %i, %ub : index 1035 %6 = scf.if %5 -> (memref<2xf32>) { 1036 %7 = memref.alloc() : memref<2xf32> 1037 scf.yield %7 : memref<2xf32> 1038 } else { 1039 scf.yield %iterBuf3 : memref<2xf32> 1040 } 1041 scf.yield %6 : memref<2xf32> 1042 } 1043 scf.yield %3 : memref<2xf32> 1044 } 1045 scf.yield %2 : memref<2xf32> 1046 } 1047 test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) 1048 return 1049} 1050 1051// CHECK: %[[ALLOC0:.*]] = memref.alloc() 1052// CHECK-NEXT: memref.dealloc %[[ALLOC0]] 1053// CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %arg3 1054// CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args 1055// CHECK-SAME: (%[[IALLOC0:.*]] = %[[ALLOC1]]) 1056// CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[IALLOC0]] 1057// CHECK-NEXT: memref.dealloc %[[IALLOC0]] 1058// CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args 1059// CHECK-SAME: (%[[IALLOC1:.*]] = %[[ALLOC2]]) 1060// CHECK-NEXT: %[[ALLOC5:.*]] = bufferization.clone %[[IALLOC1]] 1061// CHECK-NEXT: memref.dealloc %[[IALLOC1]] 1062 1063// CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args 1064// CHECK-SAME: (%[[IALLOC2:.*]] = %[[ALLOC5]]) 1065// CHECK: %[[ALLOC8:.*]] = memref.alloc() 1066// CHECK-NEXT: memref.dealloc %[[ALLOC8]] 1067// CHECK: %[[ALLOC9:.*]] = scf.if 1068 1069// CHECK: %[[ALLOC11:.*]] = memref.alloc() 1070// CHECK-NEXT: %[[ALLOC12:.*]] = bufferization.clone %[[ALLOC11]] 1071// CHECK-NEXT: memref.dealloc %[[ALLOC11]] 1072// CHECK-NEXT: scf.yield %[[ALLOC12]] 1073 1074// CHECK: %[[ALLOC13:.*]] = bufferization.clone %[[IALLOC2]] 1075// CHECK-NEXT: scf.yield %[[ALLOC13]] 1076 1077// CHECK: memref.dealloc %[[IALLOC2]] 1078// CHECK-NEXT: %[[ALLOC10:.*]] = bufferization.clone %[[ALLOC9]] 1079// CHECK-NEXT: memref.dealloc %[[ALLOC9]] 1080// CHECK-NEXT: scf.yield %[[ALLOC10]] 1081 1082// CHECK: %[[ALLOC7:.*]] = bufferization.clone %[[ALLOC6]] 1083// CHECK-NEXT: memref.dealloc %[[ALLOC6]] 1084// CHECK-NEXT: scf.yield %[[ALLOC7]] 1085 1086// CHECK: %[[ALLOC4:.*]] = bufferization.clone %[[ALLOC3]] 1087// CHECK-NEXT: memref.dealloc %[[ALLOC3]] 1088// CHECK-NEXT: scf.yield %[[ALLOC4]] 1089 1090// CHECK: test.copy(%[[VAL_7]], %arg4) 1091// CHECK-NEXT: memref.dealloc %[[VAL_7]] 1092 1093// ----- 1094 1095// Test Case: explicit control-flow loop with a dynamically allocated buffer. 1096// The BufferDeallocation transformation should fail on this explicit 1097// control-flow loop since they are not supported. 1098 1099// expected-error@+1 {{Only structured control-flow loops are supported}} 1100func @loop_dynalloc( 1101 %arg0 : i32, 1102 %arg1 : i32, 1103 %arg2: memref<?xf32>, 1104 %arg3: memref<?xf32>) { 1105 %const0 = arith.constant 0 : i32 1106 br ^loopHeader(%const0, %arg2 : i32, memref<?xf32>) 1107 1108^loopHeader(%i : i32, %buff : memref<?xf32>): 1109 %lessThan = arith.cmpi slt, %i, %arg1 : i32 1110 cond_br %lessThan, 1111 ^loopBody(%i, %buff : i32, memref<?xf32>), 1112 ^exit(%buff : memref<?xf32>) 1113 1114^loopBody(%val : i32, %buff2: memref<?xf32>): 1115 %const1 = arith.constant 1 : i32 1116 %inc = arith.addi %val, %const1 : i32 1117 %size = arith.index_cast %inc : i32 to index 1118 %alloc1 = memref.alloc(%size) : memref<?xf32> 1119 br ^loopHeader(%inc, %alloc1 : i32, memref<?xf32>) 1120 1121^exit(%buff3 : memref<?xf32>): 1122 test.copy(%buff3, %arg3) : (memref<?xf32>, memref<?xf32>) 1123 return 1124} 1125 1126// ----- 1127 1128// Test Case: explicit control-flow loop with a dynamically allocated buffer. 1129// The BufferDeallocation transformation should fail on this explicit 1130// control-flow loop since they are not supported. 1131 1132// expected-error@+1 {{Only structured control-flow loops are supported}} 1133func @do_loop_alloc( 1134 %arg0 : i32, 1135 %arg1 : i32, 1136 %arg2: memref<2xf32>, 1137 %arg3: memref<2xf32>) { 1138 %const0 = arith.constant 0 : i32 1139 br ^loopBody(%const0, %arg2 : i32, memref<2xf32>) 1140 1141^loopBody(%val : i32, %buff2: memref<2xf32>): 1142 %const1 = arith.constant 1 : i32 1143 %inc = arith.addi %val, %const1 : i32 1144 %alloc1 = memref.alloc() : memref<2xf32> 1145 br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>) 1146 1147^loopHeader(%i : i32, %buff : memref<2xf32>): 1148 %lessThan = arith.cmpi slt, %i, %arg1 : i32 1149 cond_br %lessThan, 1150 ^loopBody(%i, %buff : i32, memref<2xf32>), 1151 ^exit(%buff : memref<2xf32>) 1152 1153^exit(%buff3 : memref<2xf32>): 1154 test.copy(%buff3, %arg3) : (memref<2xf32>, memref<2xf32>) 1155 return 1156} 1157 1158// ----- 1159 1160// CHECK-LABEL: func @assumingOp( 1161func @assumingOp( 1162 %arg0: !shape.witness, 1163 %arg2: memref<2xf32>, 1164 %arg3: memref<2xf32>) { 1165 // Confirm the alloc will be dealloc'ed in the block. 1166 %1 = shape.assuming %arg0 -> memref<2xf32> { 1167 %0 = memref.alloc() : memref<2xf32> 1168 shape.assuming_yield %arg2 : memref<2xf32> 1169 } 1170 // Confirm the alloc will be returned and dealloc'ed after its use. 1171 %3 = shape.assuming %arg0 -> memref<2xf32> { 1172 %2 = memref.alloc() : memref<2xf32> 1173 shape.assuming_yield %2 : memref<2xf32> 1174 } 1175 test.copy(%3, %arg3) : (memref<2xf32>, memref<2xf32>) 1176 return 1177} 1178 1179// CHECK-SAME: %[[ARG0:.*]]: !shape.witness, 1180// CHECK-SAME: %[[ARG1:.*]]: {{.*}}, 1181// CHECK-SAME: %[[ARG2:.*]]: {{.*}} 1182// CHECK: %[[UNUSED_RESULT:.*]] = shape.assuming %[[ARG0]] 1183// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc() 1184// CHECK-NEXT: memref.dealloc %[[ALLOC0]] 1185// CHECK-NEXT: shape.assuming_yield %[[ARG1]] 1186// CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[ARG0]] 1187// CHECK-NEXT: %[[TMP_ALLOC:.*]] = memref.alloc() 1188// CHECK-NEXT: %[[RETURNING_ALLOC:.*]] = bufferization.clone %[[TMP_ALLOC]] 1189// CHECK-NEXT: memref.dealloc %[[TMP_ALLOC]] 1190// CHECK-NEXT: shape.assuming_yield %[[RETURNING_ALLOC]] 1191// CHECK: test.copy(%[[ASSUMING_RESULT:.*]], %[[ARG2]]) 1192// CHECK-NEXT: memref.dealloc %[[ASSUMING_RESULT]] 1193 1194// ----- 1195 1196// Test Case: The op "test.bar" does not implement the RegionBranchOpInterface. 1197// This is not allowed in buffer deallocation. 1198 1199func @noRegionBranchOpInterface() { 1200// expected-error@+1 {{All operations with attached regions need to implement the RegionBranchOpInterface.}} 1201 %0 = "test.bar"() ({ 1202// expected-error@+1 {{All operations with attached regions need to implement the RegionBranchOpInterface.}} 1203 %1 = "test.bar"() ({ 1204 "test.yield"() : () -> () 1205 }) : () -> (i32) 1206 "test.yield"() : () -> () 1207 }) : () -> (i32) 1208 "test.terminator"() : () -> () 1209} 1210 1211// ----- 1212 1213// CHECK-LABEL: func @dealloc_existing_clones 1214// CHECK: (%[[ARG0:.*]]: memref<?x?xf64>, %[[ARG1:.*]]: memref<?x?xf64>) 1215// CHECK: %[[RES0:.*]] = bufferization.clone %[[ARG0]] 1216// CHECK: %[[RES1:.*]] = bufferization.clone %[[ARG1]] 1217// CHECK-NOT: memref.dealloc %[[RES0]] 1218// CHECK: memref.dealloc %[[RES1]] 1219// CHECK: return %[[RES0]] 1220func @dealloc_existing_clones(%arg0: memref<?x?xf64>, %arg1: memref<?x?xf64>) -> memref<?x?xf64> { 1221 %0 = bufferization.clone %arg0 : memref<?x?xf64> to memref<?x?xf64> 1222 %1 = bufferization.clone %arg1 : memref<?x?xf64> to memref<?x?xf64> 1223 return %0 : memref<?x?xf64> 1224} 1225