1 //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements logic for computing correct alloc and dealloc positions. 10 // Furthermore, buffer deallocation also adds required new clone operations to 11 // ensure that all buffers are deallocated. The main class is the 12 // BufferDeallocationPass class that implements the underlying algorithm. In 13 // order to put allocations and deallocations at safe positions, it is 14 // significantly important to put them into the correct blocks. However, the 15 // liveness analysis does not pay attention to aliases, which can occur due to 16 // branches (and their associated block arguments) in general. For this purpose, 17 // BufferDeallocation firstly finds all possible aliases for a single value 18 // (using the BufferViewFlowAnalysis class). Consider the following example: 19 // 20 // ^bb0(%arg0): 21 // cf.cond_br %cond, ^bb1, ^bb2 22 // ^bb1: 23 // cf.br ^exit(%arg0) 24 // ^bb2: 25 // %new_value = ... 26 // cf.br ^exit(%new_value) 27 // ^exit(%arg1): 28 // return %arg1; 29 // 30 // We should place the dealloc for %new_value in exit. However, we have to free 31 // the buffer in the same block, because it cannot be freed in the post 32 // dominator. However, this requires a new clone buffer for %arg1 that will 33 // contain the actual contents. Using the class BufferViewFlowAnalysis, we 34 // will find out that %new_value has a potential alias %arg1. In order to find 35 // the dealloc position we have to find all potential aliases, iterate over 36 // their uses and find the common post-dominator block (note that additional 37 // clones and buffers remove potential aliases and will influence the placement 38 // of the deallocs). In all cases, the computed block can be safely used to free 39 // the %new_value buffer (may be exit or bb2) as it will die and we can use 40 // liveness information to determine the exact operation after which we have to 41 // insert the dealloc. However, the algorithm supports introducing clone buffers 42 // and placing deallocs in safe locations to ensure that all buffers will be 43 // freed in the end. 44 // 45 // TODO: 46 // The current implementation does not support explicit-control-flow loops and 47 // the resulting code will be invalid with respect to program semantics. 48 // However, structured control-flow loops are fully supported. Furthermore, it 49 // doesn't accept functions which return buffers already. 50 // 51 //===----------------------------------------------------------------------===// 52 53 #include "PassDetail.h" 54 55 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h" 56 #include "mlir/Dialect/Bufferization/IR/Bufferization.h" 57 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h" 58 #include "mlir/Dialect/Bufferization/Transforms/Passes.h" 59 #include "mlir/Dialect/MemRef/IR/MemRef.h" 60 #include "llvm/ADT/SetOperations.h" 61 62 using namespace mlir; 63 using namespace mlir::bufferization; 64 65 /// Walks over all immediate return-like terminators in the given region. 66 static LogicalResult 67 walkReturnOperations(Region *region, 68 llvm::function_ref<LogicalResult(Operation *)> func) { 69 for (Block &block : *region) { 70 Operation *terminator = block.getTerminator(); 71 // Skip non region-return-like terminators. 72 if (isRegionReturnLike(terminator)) { 73 if (failed(func(terminator))) 74 return failure(); 75 } 76 } 77 return success(); 78 } 79 80 /// Checks if all operations in a given region that have at least one attached 81 /// region implement the RegionBranchOpInterface. This is not required in edge 82 /// cases, where we have a single attached region and the parent operation has 83 /// no results. 84 static bool validateSupportedControlFlow(Region ®ion) { 85 bool success = true; 86 region.walk([&success](Operation *operation) { 87 auto regions = operation->getRegions(); 88 // Walk over all operations in a region and check if the operation has at 89 // least one region and implements the RegionBranchOpInterface. If there 90 // is an operation that does not fulfill this condition, we cannot apply 91 // the deallocation steps. Furthermore, we accept cases, where we have a 92 // region that returns no results, since, in that case, the intra-region 93 // control flow does not affect the transformation. 94 size_t size = regions.size(); 95 if (((size == 1 && !operation->getResults().empty()) || size > 1) && 96 !dyn_cast<RegionBranchOpInterface>(operation)) { 97 operation->emitError("All operations with attached regions need to " 98 "implement the RegionBranchOpInterface."); 99 success = false; 100 } 101 }); 102 return success; 103 } 104 105 namespace { 106 107 //===----------------------------------------------------------------------===// 108 // Backedges analysis 109 //===----------------------------------------------------------------------===// 110 111 /// A straight-forward program analysis which detects loop backedges induced by 112 /// explicit control flow. 113 class Backedges { 114 public: 115 using BlockSetT = SmallPtrSet<Block *, 16>; 116 using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>; 117 118 public: 119 /// Constructs a new backedges analysis using the op provided. 120 Backedges(Operation *op) { recurse(op); } 121 122 /// Returns the number of backedges formed by explicit control flow. 123 size_t size() const { return edgeSet.size(); } 124 125 /// Returns the start iterator to loop over all backedges. 126 BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } 127 128 /// Returns the end iterator to loop over all backedges. 129 BackedgeSetT::const_iterator end() const { return edgeSet.end(); } 130 131 private: 132 /// Enters the current block and inserts a backedge into the `edgeSet` if we 133 /// have already visited the current block. The inserted edge links the given 134 /// `predecessor` with the `current` block. 135 bool enter(Block ¤t, Block *predecessor) { 136 bool inserted = visited.insert(¤t).second; 137 if (!inserted) 138 edgeSet.insert(std::make_pair(predecessor, ¤t)); 139 return inserted; 140 } 141 142 /// Leaves the current block. 143 void exit(Block ¤t) { visited.erase(¤t); } 144 145 /// Recurses into the given operation while taking all attached regions into 146 /// account. 147 void recurse(Operation *op) { 148 Block *current = op->getBlock(); 149 // If the current op implements the `BranchOpInterface`, there can be 150 // cycles in the scope of all successor blocks. 151 if (isa<BranchOpInterface>(op)) { 152 for (Block *succ : current->getSuccessors()) 153 recurse(*succ, current); 154 } 155 // Recurse into all distinct regions and check for explicit control-flow 156 // loops. 157 for (Region ®ion : op->getRegions()) { 158 if (!region.empty()) 159 recurse(region.front(), current); 160 } 161 } 162 163 /// Recurses into explicit control-flow structures that are given by 164 /// the successor relation defined on the block level. 165 void recurse(Block &block, Block *predecessor) { 166 // Try to enter the current block. If this is not possible, we are 167 // currently processing this block and can safely return here. 168 if (!enter(block, predecessor)) 169 return; 170 171 // Recurse into all operations and successor blocks. 172 for (Operation &op : block.getOperations()) 173 recurse(&op); 174 175 // Leave the current block. 176 exit(block); 177 } 178 179 /// Stores all blocks that are currently visited and on the processing stack. 180 BlockSetT visited; 181 182 /// Stores all backedges in the format (source, target). 183 BackedgeSetT edgeSet; 184 }; 185 186 //===----------------------------------------------------------------------===// 187 // BufferDeallocation 188 //===----------------------------------------------------------------------===// 189 190 /// The buffer deallocation transformation which ensures that all allocs in the 191 /// program have a corresponding de-allocation. As a side-effect, it might also 192 /// introduce clones that in turn leads to additional deallocations. 193 class BufferDeallocation : public BufferPlacementTransformationBase { 194 public: 195 using AliasAllocationMapT = 196 llvm::DenseMap<Value, bufferization::AllocationOpInterface>; 197 198 BufferDeallocation(Operation *op) 199 : BufferPlacementTransformationBase(op), dominators(op), 200 postDominators(op) {} 201 202 /// Checks if all allocation operations either provide an already existing 203 /// deallocation operation or implement the AllocationOpInterface. In 204 /// addition, this method initializes the internal alias to 205 /// AllocationOpInterface mapping in order to get compatible 206 /// AllocationOpInterface implementations for aliases. 207 LogicalResult prepare() { 208 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { 209 // Get the defining allocation operation. 210 Value alloc = std::get<0>(entry); 211 auto allocationInterface = 212 alloc.getDefiningOp<bufferization::AllocationOpInterface>(); 213 // If there is no existing deallocation operation and no implementation of 214 // the AllocationOpInterface, we cannot apply the BufferDeallocation pass. 215 if (!std::get<1>(entry) && !allocationInterface) { 216 return alloc.getDefiningOp()->emitError( 217 "Allocation is not deallocated explicitly nor does the operation " 218 "implement the AllocationOpInterface."); 219 } 220 221 // Register the current allocation interface implementation. 222 aliasToAllocations[alloc] = allocationInterface; 223 224 // Get the alias information for the current allocation node. 225 llvm::for_each(aliases.resolve(alloc), [&](Value alias) { 226 // TODO: check for incompatible implementations of the 227 // AllocationOpInterface. This could be realized by promoting the 228 // AllocationOpInterface to a DialectInterface. 229 aliasToAllocations[alias] = allocationInterface; 230 }); 231 } 232 return success(); 233 } 234 235 /// Performs the actual placement/creation of all temporary clone and dealloc 236 /// nodes. 237 LogicalResult deallocate() { 238 // Add additional clones that are required. 239 if (failed(introduceClones())) 240 return failure(); 241 242 // Place deallocations for all allocation entries. 243 return placeDeallocs(); 244 } 245 246 private: 247 /// Introduces required clone operations to avoid memory leaks. 248 LogicalResult introduceClones() { 249 // Initialize the set of values that require a dedicated memory free 250 // operation since their operands cannot be safely deallocated in a post 251 // dominator. 252 SmallPtrSet<Value, 8> valuesToFree; 253 llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues; 254 SmallVector<std::tuple<Value, Block *>, 8> toProcess; 255 256 // Check dominance relation for proper dominance properties. If the given 257 // value node does not dominate an alias, we will have to create a clone in 258 // order to free all buffers that can potentially leak into a post 259 // dominator. 260 auto findUnsafeValues = [&](Value source, Block *definingBlock) { 261 auto it = aliases.find(source); 262 if (it == aliases.end()) 263 return; 264 for (Value value : it->second) { 265 if (valuesToFree.count(value) > 0) 266 continue; 267 Block *parentBlock = value.getParentBlock(); 268 // Check whether we have to free this particular block argument or 269 // generic value. We have to free the current alias if it is either 270 // defined in a non-dominated block or it is defined in the same block 271 // but the current value is not dominated by the source value. 272 if (!dominators.dominates(definingBlock, parentBlock) || 273 (definingBlock == parentBlock && value.isa<BlockArgument>())) { 274 toProcess.emplace_back(value, parentBlock); 275 valuesToFree.insert(value); 276 } else if (visitedValues.insert(std::make_tuple(value, definingBlock)) 277 .second) 278 toProcess.emplace_back(value, definingBlock); 279 } 280 }; 281 282 // Detect possibly unsafe aliases starting from all allocations. 283 for (BufferPlacementAllocs::AllocEntry &entry : allocs) { 284 Value allocValue = std::get<0>(entry); 285 findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock()); 286 } 287 // Try to find block arguments that require an explicit free operation 288 // until we reach a fix point. 289 while (!toProcess.empty()) { 290 auto current = toProcess.pop_back_val(); 291 findUnsafeValues(std::get<0>(current), std::get<1>(current)); 292 } 293 294 // Update buffer aliases to ensure that we free all buffers and block 295 // arguments at the correct locations. 296 aliases.remove(valuesToFree); 297 298 // Add new allocs and additional clone operations. 299 for (Value value : valuesToFree) { 300 if (failed(value.isa<BlockArgument>() 301 ? introduceBlockArgCopy(value.cast<BlockArgument>()) 302 : introduceValueCopyForRegionResult(value))) 303 return failure(); 304 305 // Register the value to require a final dealloc. Note that we do not have 306 // to assign a block here since we do not want to move the allocation node 307 // to another location. 308 allocs.registerAlloc(std::make_tuple(value, nullptr)); 309 } 310 return success(); 311 } 312 313 /// Introduces temporary clones in all predecessors and copies the source 314 /// values into the newly allocated buffers. 315 LogicalResult introduceBlockArgCopy(BlockArgument blockArg) { 316 // Allocate a buffer for the current block argument in the block of 317 // the associated value (which will be a predecessor block by 318 // definition). 319 Block *block = blockArg.getOwner(); 320 for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) { 321 // Get the terminator and the value that will be passed to our 322 // argument. 323 Operation *terminator = (*it)->getTerminator(); 324 auto branchInterface = cast<BranchOpInterface>(terminator); 325 // Query the associated source value. 326 Value sourceValue = 327 branchInterface.getSuccessorOperands(it.getSuccessorIndex()) 328 .getValue()[blockArg.getArgNumber()]; 329 // Wire new clone and successor operand. 330 auto mutableOperands = 331 branchInterface.getMutableSuccessorOperands(it.getSuccessorIndex()); 332 if (!mutableOperands) { 333 terminator->emitError() << "terminators with immutable successor " 334 "operands are not supported"; 335 continue; 336 } 337 // Create a new clone at the current location of the terminator. 338 auto clone = introduceCloneBuffers(sourceValue, terminator); 339 if (failed(clone)) 340 return failure(); 341 mutableOperands.getValue() 342 .slice(blockArg.getArgNumber(), 1) 343 .assign(*clone); 344 } 345 346 // Check whether the block argument has implicitly defined predecessors via 347 // the RegionBranchOpInterface. This can be the case if the current block 348 // argument belongs to the first block in a region and the parent operation 349 // implements the RegionBranchOpInterface. 350 Region *argRegion = block->getParent(); 351 Operation *parentOp = argRegion->getParentOp(); 352 RegionBranchOpInterface regionInterface; 353 if (&argRegion->front() != block || 354 !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp))) 355 return success(); 356 357 if (failed(introduceClonesForRegionSuccessors( 358 regionInterface, argRegion->getParentOp()->getRegions(), blockArg, 359 [&](RegionSuccessor &successorRegion) { 360 // Find a predecessor of our argRegion. 361 return successorRegion.getSuccessor() == argRegion; 362 }))) 363 return failure(); 364 365 // Check whether the block argument belongs to an entry region of the 366 // parent operation. In this case, we have to introduce an additional clone 367 // for buffer that is passed to the argument. 368 SmallVector<RegionSuccessor, 2> successorRegions; 369 regionInterface.getSuccessorRegions(/*index=*/llvm::None, successorRegions); 370 auto *it = 371 llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) { 372 return successorRegion.getSuccessor() == argRegion; 373 }); 374 if (it == successorRegions.end()) 375 return success(); 376 377 // Determine the actual operand to introduce a clone for and rewire the 378 // operand to point to the clone instead. 379 auto operands = 380 regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber()); 381 size_t operandIndex = 382 llvm::find(it->getSuccessorInputs(), blockArg).getIndex() + 383 operands.getBeginOperandIndex(); 384 Value operand = parentOp->getOperand(operandIndex); 385 assert(operand == 386 operands[operandIndex - operands.getBeginOperandIndex()] && 387 "region interface operands don't match parentOp operands"); 388 auto clone = introduceCloneBuffers(operand, parentOp); 389 if (failed(clone)) 390 return failure(); 391 392 parentOp->setOperand(operandIndex, *clone); 393 return success(); 394 } 395 396 /// Introduces temporary clones in front of all associated nested-region 397 /// terminators and copies the source values into the newly allocated buffers. 398 LogicalResult introduceValueCopyForRegionResult(Value value) { 399 // Get the actual result index in the scope of the parent terminator. 400 Operation *operation = value.getDefiningOp(); 401 auto regionInterface = cast<RegionBranchOpInterface>(operation); 402 // Filter successors that return to the parent operation. 403 auto regionPredicate = [&](RegionSuccessor &successorRegion) { 404 // If the RegionSuccessor has no associated successor, it will return to 405 // its parent operation. 406 return !successorRegion.getSuccessor(); 407 }; 408 // Introduce a clone for all region "results" that are returned to the 409 // parent operation. This is required since the parent's result value has 410 // been considered critical. Therefore, the algorithm assumes that a clone 411 // of a previously allocated buffer is returned by the operation (like in 412 // the case of a block argument). 413 return introduceClonesForRegionSuccessors( 414 regionInterface, operation->getRegions(), value, regionPredicate); 415 } 416 417 /// Introduces buffer clones for all terminators in the given regions. The 418 /// regionPredicate is applied to every successor region in order to restrict 419 /// the clones to specific regions. 420 template <typename TPredicate> 421 LogicalResult introduceClonesForRegionSuccessors( 422 RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions, 423 Value argValue, const TPredicate ®ionPredicate) { 424 for (Region ®ion : regions) { 425 // Query the regionInterface to get all successor regions of the current 426 // one. 427 SmallVector<RegionSuccessor, 2> successorRegions; 428 regionInterface.getSuccessorRegions(region.getRegionNumber(), 429 successorRegions); 430 // Try to find a matching region successor. 431 RegionSuccessor *regionSuccessor = 432 llvm::find_if(successorRegions, regionPredicate); 433 if (regionSuccessor == successorRegions.end()) 434 continue; 435 // Get the operand index in the context of the current successor input 436 // bindings. 437 size_t operandIndex = 438 llvm::find(regionSuccessor->getSuccessorInputs(), argValue) 439 .getIndex(); 440 441 // Iterate over all immediate terminator operations to introduce 442 // new buffer allocations. Thereby, the appropriate terminator operand 443 // will be adjusted to point to the newly allocated buffer instead. 444 if (failed(walkReturnOperations(®ion, [&](Operation *terminator) { 445 // Get the actual mutable operands for this terminator op. 446 auto terminatorOperands = *getMutableRegionBranchSuccessorOperands( 447 terminator, region.getRegionNumber()); 448 // Extract the source value from the current terminator. 449 // This conversion needs to exist on a separate line due to a bug in 450 // GCC conversion analysis. 451 OperandRange immutableTerminatorOperands = terminatorOperands; 452 Value sourceValue = immutableTerminatorOperands[operandIndex]; 453 // Create a new clone at the current location of the terminator. 454 auto clone = introduceCloneBuffers(sourceValue, terminator); 455 if (failed(clone)) 456 return failure(); 457 // Wire clone and terminator operand. 458 terminatorOperands.slice(operandIndex, 1).assign(*clone); 459 return success(); 460 }))) 461 return failure(); 462 } 463 return success(); 464 } 465 466 /// Creates a new memory allocation for the given source value and clones 467 /// its content into the newly allocated buffer. The terminator operation is 468 /// used to insert the clone operation at the right place. 469 FailureOr<Value> introduceCloneBuffers(Value sourceValue, 470 Operation *terminator) { 471 // Avoid multiple clones of the same source value. This can happen in the 472 // presence of loops when a branch acts as a backedge while also having 473 // another successor that returns to its parent operation. Note: that 474 // copying copied buffers can introduce memory leaks since the invariant of 475 // BufferDeallocation assumes that a buffer will be only cloned once into a 476 // temporary buffer. Hence, the construction of clone chains introduces 477 // additional allocations that are not tracked automatically by the 478 // algorithm. 479 if (clonedValues.contains(sourceValue)) 480 return sourceValue; 481 // Create a new clone operation that copies the contents of the old 482 // buffer to the new one. 483 auto clone = buildClone(terminator, sourceValue); 484 if (succeeded(clone)) { 485 // Remember the clone of original source value. 486 clonedValues.insert(*clone); 487 } 488 return clone; 489 } 490 491 /// Finds correct dealloc positions according to the algorithm described at 492 /// the top of the file for all alloc nodes and block arguments that can be 493 /// handled by this analysis. 494 LogicalResult placeDeallocs() { 495 // Move or insert deallocs using the previously computed information. 496 // These deallocations will be linked to their associated allocation nodes 497 // since they don't have any aliases that can (potentially) increase their 498 // liveness. 499 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { 500 Value alloc = std::get<0>(entry); 501 auto aliasesSet = aliases.resolve(alloc); 502 assert(!aliasesSet.empty() && "must contain at least one alias"); 503 504 // Determine the actual block to place the dealloc and get liveness 505 // information. 506 Block *placementBlock = 507 findCommonDominator(alloc, aliasesSet, postDominators); 508 const LivenessBlockInfo *livenessInfo = 509 liveness.getLiveness(placementBlock); 510 511 // We have to ensure that the dealloc will be after the last use of all 512 // aliases of the given value. We first assume that there are no uses in 513 // the placementBlock and that we can safely place the dealloc at the 514 // beginning. 515 Operation *endOperation = &placementBlock->front(); 516 517 // Iterate over all aliases and ensure that the endOperation will point 518 // to the last operation of all potential aliases in the placementBlock. 519 for (Value alias : aliasesSet) { 520 // Ensure that the start operation is at least the defining operation of 521 // the current alias to avoid invalid placement of deallocs for aliases 522 // without any uses. 523 Operation *beforeOp = endOperation; 524 if (alias.getDefiningOp() && 525 !(beforeOp = placementBlock->findAncestorOpInBlock( 526 *alias.getDefiningOp()))) 527 continue; 528 529 Operation *aliasEndOperation = 530 livenessInfo->getEndOperation(alias, beforeOp); 531 // Check whether the aliasEndOperation lies in the desired block and 532 // whether it is behind the current endOperation. If yes, this will be 533 // the new endOperation. 534 if (aliasEndOperation->getBlock() == placementBlock && 535 endOperation->isBeforeInBlock(aliasEndOperation)) 536 endOperation = aliasEndOperation; 537 } 538 // endOperation is the last operation behind which we can safely store 539 // the dealloc taking all potential aliases into account. 540 541 // If there is an existing dealloc, move it to the right place. 542 Operation *deallocOperation = std::get<1>(entry); 543 if (deallocOperation) { 544 deallocOperation->moveAfter(endOperation); 545 } else { 546 // If the Dealloc position is at the terminator operation of the 547 // block, then the value should escape from a deallocation. 548 Operation *nextOp = endOperation->getNextNode(); 549 if (!nextOp) 550 continue; 551 // If there is no dealloc node, insert one in the right place. 552 if (failed(buildDealloc(nextOp, alloc))) 553 return failure(); 554 } 555 } 556 return success(); 557 } 558 559 /// Builds a deallocation operation compatible with the given allocation 560 /// value. If there is no registered AllocationOpInterface implementation for 561 /// the given value (e.g. in the case of a function parameter), this method 562 /// builds a memref::DeallocOp. 563 LogicalResult buildDealloc(Operation *op, Value alloc) { 564 OpBuilder builder(op); 565 auto it = aliasToAllocations.find(alloc); 566 if (it != aliasToAllocations.end()) { 567 // Call the allocation op interface to build a supported and 568 // compatible deallocation operation. 569 auto dealloc = it->second.buildDealloc(builder, alloc); 570 if (!dealloc) 571 return op->emitError() 572 << "allocations without compatible deallocations are " 573 "not supported"; 574 } else { 575 // Build a "default" DeallocOp for unknown allocation sources. 576 builder.create<memref::DeallocOp>(alloc.getLoc(), alloc); 577 } 578 return success(); 579 } 580 581 /// Builds a clone operation compatible with the given allocation value. If 582 /// there is no registered AllocationOpInterface implementation for the given 583 /// value (e.g. in the case of a function parameter), this method builds a 584 /// bufferization::CloneOp. 585 FailureOr<Value> buildClone(Operation *op, Value alloc) { 586 OpBuilder builder(op); 587 auto it = aliasToAllocations.find(alloc); 588 if (it != aliasToAllocations.end()) { 589 // Call the allocation op interface to build a supported and 590 // compatible clone operation. 591 auto clone = it->second.buildClone(builder, alloc); 592 if (clone) 593 return *clone; 594 return (LogicalResult)(op->emitError() 595 << "allocations without compatible clone ops " 596 "are not supported"); 597 } 598 // Build a "default" CloneOp for unknown allocation sources. 599 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) 600 .getResult(); 601 } 602 603 /// The dominator info to find the appropriate start operation to move the 604 /// allocs. 605 DominanceInfo dominators; 606 607 /// The post dominator info to move the dependent allocs in the right 608 /// position. 609 PostDominanceInfo postDominators; 610 611 /// Stores already cloned buffers to avoid additional clones of clones. 612 ValueSetT clonedValues; 613 614 /// Maps aliases to their source allocation interfaces (inverse mapping). 615 AliasAllocationMapT aliasToAllocations; 616 }; 617 618 //===----------------------------------------------------------------------===// 619 // BufferDeallocationPass 620 //===----------------------------------------------------------------------===// 621 622 struct DefaultAllocationInterface 623 : public bufferization::AllocationOpInterface::ExternalModel< 624 DefaultAllocationInterface, memref::AllocOp> { 625 static Optional<Operation *> buildDealloc(OpBuilder &builder, Value alloc) { 626 return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc) 627 .getOperation(); 628 } 629 static Optional<Value> buildClone(OpBuilder &builder, Value alloc) { 630 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) 631 .getResult(); 632 } 633 }; 634 635 /// The actual buffer deallocation pass that inserts and moves dealloc nodes 636 /// into the right positions. Furthermore, it inserts additional clones if 637 /// necessary. It uses the algorithm described at the top of the file. 638 struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> { 639 void getDependentDialects(DialectRegistry ®istry) const override { 640 registry.insert<bufferization::BufferizationDialect>(); 641 registry.insert<memref::MemRefDialect>(); 642 registry.addOpInterface<memref::AllocOp, DefaultAllocationInterface>(); 643 } 644 645 void runOnOperation() override { 646 FuncOp func = getOperation(); 647 if (func.isExternal()) 648 return; 649 650 // Ensure that there are supported loops only. 651 Backedges backedges(func); 652 if (backedges.size()) { 653 func.emitError("Only structured control-flow loops are supported."); 654 return signalPassFailure(); 655 } 656 657 // Check that the control flow structures are supported. 658 if (!validateSupportedControlFlow(func.getRegion())) 659 return signalPassFailure(); 660 661 // Gather all required allocation nodes and prepare the deallocation phase. 662 BufferDeallocation deallocation(func); 663 664 // Check for supported AllocationOpInterface implementations and prepare the 665 // internal deallocation pass. 666 if (failed(deallocation.prepare())) 667 return signalPassFailure(); 668 669 // Place all required temporary clone and dealloc nodes. 670 if (failed(deallocation.deallocate())) 671 return signalPassFailure(); 672 } 673 }; 674 675 } // namespace 676 677 //===----------------------------------------------------------------------===// 678 // BufferDeallocationPass construction 679 //===----------------------------------------------------------------------===// 680 681 std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() { 682 return std::make_unique<BufferDeallocationPass>(); 683 } 684