1 //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements logic for computing correct alloc and dealloc positions. 10 // Furthermore, buffer deallocation also adds required new clone operations to 11 // ensure that all buffers are deallocated. The main class is the 12 // BufferDeallocationPass class that implements the underlying algorithm. In 13 // order to put allocations and deallocations at safe positions, it is 14 // significantly important to put them into the correct blocks. However, the 15 // liveness analysis does not pay attention to aliases, which can occur due to 16 // branches (and their associated block arguments) in general. For this purpose, 17 // BufferDeallocation firstly finds all possible aliases for a single value 18 // (using the BufferViewFlowAnalysis class). Consider the following example: 19 // 20 // ^bb0(%arg0): 21 // cf.cond_br %cond, ^bb1, ^bb2 22 // ^bb1: 23 // cf.br ^exit(%arg0) 24 // ^bb2: 25 // %new_value = ... 26 // cf.br ^exit(%new_value) 27 // ^exit(%arg1): 28 // return %arg1; 29 // 30 // We should place the dealloc for %new_value in exit. However, we have to free 31 // the buffer in the same block, because it cannot be freed in the post 32 // dominator. However, this requires a new clone buffer for %arg1 that will 33 // contain the actual contents. Using the class BufferViewFlowAnalysis, we 34 // will find out that %new_value has a potential alias %arg1. In order to find 35 // the dealloc position we have to find all potential aliases, iterate over 36 // their uses and find the common post-dominator block (note that additional 37 // clones and buffers remove potential aliases and will influence the placement 38 // of the deallocs). In all cases, the computed block can be safely used to free 39 // the %new_value buffer (may be exit or bb2) as it will die and we can use 40 // liveness information to determine the exact operation after which we have to 41 // insert the dealloc. However, the algorithm supports introducing clone buffers 42 // and placing deallocs in safe locations to ensure that all buffers will be 43 // freed in the end. 44 // 45 // TODO: 46 // The current implementation does not support explicit-control-flow loops and 47 // the resulting code will be invalid with respect to program semantics. 48 // However, structured control-flow loops are fully supported. Furthermore, it 49 // doesn't accept functions which return buffers already. 50 // 51 //===----------------------------------------------------------------------===// 52 53 #include "PassDetail.h" 54 55 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h" 56 #include "mlir/Dialect/Bufferization/IR/Bufferization.h" 57 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h" 58 #include "mlir/Dialect/Bufferization/Transforms/Passes.h" 59 #include "mlir/Dialect/MemRef/IR/MemRef.h" 60 #include "llvm/ADT/SetOperations.h" 61 62 using namespace mlir; 63 using namespace mlir::bufferization; 64 65 /// Walks over all immediate return-like terminators in the given region. 66 static LogicalResult 67 walkReturnOperations(Region *region, 68 llvm::function_ref<LogicalResult(Operation *)> func) { 69 for (Block &block : *region) { 70 Operation *terminator = block.getTerminator(); 71 // Skip non region-return-like terminators. 72 if (isRegionReturnLike(terminator)) { 73 if (failed(func(terminator))) 74 return failure(); 75 } 76 } 77 return success(); 78 } 79 80 /// Checks if all operations that have at least one attached region implement 81 /// the RegionBranchOpInterface. This is not required in edge cases, where we 82 /// have a single attached region and the parent operation has no results. 83 static bool validateSupportedControlFlow(Operation *op) { 84 WalkResult result = op->walk([&](Operation *operation) { 85 // Only check ops that are inside a function. 86 if (!operation->getParentOfType<func::FuncOp>()) 87 return WalkResult::advance(); 88 89 auto regions = operation->getRegions(); 90 // Walk over all operations in a region and check if the operation has at 91 // least one region and implements the RegionBranchOpInterface. If there 92 // is an operation that does not fulfill this condition, we cannot apply 93 // the deallocation steps. Furthermore, we accept cases, where we have a 94 // region that returns no results, since, in that case, the intra-region 95 // control flow does not affect the transformation. 96 size_t size = regions.size(); 97 if (((size == 1 && !operation->getResults().empty()) || size > 1) && 98 !dyn_cast<RegionBranchOpInterface>(operation)) { 99 operation->emitError("All operations with attached regions need to " 100 "implement the RegionBranchOpInterface."); 101 } 102 103 return WalkResult::advance(); 104 }); 105 return !result.wasSkipped(); 106 } 107 108 namespace { 109 110 //===----------------------------------------------------------------------===// 111 // Backedges analysis 112 //===----------------------------------------------------------------------===// 113 114 /// A straight-forward program analysis which detects loop backedges induced by 115 /// explicit control flow. 116 class Backedges { 117 public: 118 using BlockSetT = SmallPtrSet<Block *, 16>; 119 using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>; 120 121 public: 122 /// Constructs a new backedges analysis using the op provided. 123 Backedges(Operation *op) { recurse(op); } 124 125 /// Returns the number of backedges formed by explicit control flow. 126 size_t size() const { return edgeSet.size(); } 127 128 /// Returns the start iterator to loop over all backedges. 129 BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } 130 131 /// Returns the end iterator to loop over all backedges. 132 BackedgeSetT::const_iterator end() const { return edgeSet.end(); } 133 134 private: 135 /// Enters the current block and inserts a backedge into the `edgeSet` if we 136 /// have already visited the current block. The inserted edge links the given 137 /// `predecessor` with the `current` block. 138 bool enter(Block ¤t, Block *predecessor) { 139 bool inserted = visited.insert(¤t).second; 140 if (!inserted) 141 edgeSet.insert(std::make_pair(predecessor, ¤t)); 142 return inserted; 143 } 144 145 /// Leaves the current block. 146 void exit(Block ¤t) { visited.erase(¤t); } 147 148 /// Recurses into the given operation while taking all attached regions into 149 /// account. 150 void recurse(Operation *op) { 151 Block *current = op->getBlock(); 152 // If the current op implements the `BranchOpInterface`, there can be 153 // cycles in the scope of all successor blocks. 154 if (isa<BranchOpInterface>(op)) { 155 for (Block *succ : current->getSuccessors()) 156 recurse(*succ, current); 157 } 158 // Recurse into all distinct regions and check for explicit control-flow 159 // loops. 160 for (Region ®ion : op->getRegions()) { 161 if (!region.empty()) 162 recurse(region.front(), current); 163 } 164 } 165 166 /// Recurses into explicit control-flow structures that are given by 167 /// the successor relation defined on the block level. 168 void recurse(Block &block, Block *predecessor) { 169 // Try to enter the current block. If this is not possible, we are 170 // currently processing this block and can safely return here. 171 if (!enter(block, predecessor)) 172 return; 173 174 // Recurse into all operations and successor blocks. 175 for (Operation &op : block.getOperations()) 176 recurse(&op); 177 178 // Leave the current block. 179 exit(block); 180 } 181 182 /// Stores all blocks that are currently visited and on the processing stack. 183 BlockSetT visited; 184 185 /// Stores all backedges in the format (source, target). 186 BackedgeSetT edgeSet; 187 }; 188 189 //===----------------------------------------------------------------------===// 190 // BufferDeallocation 191 //===----------------------------------------------------------------------===// 192 193 /// The buffer deallocation transformation which ensures that all allocs in the 194 /// program have a corresponding de-allocation. As a side-effect, it might also 195 /// introduce clones that in turn leads to additional deallocations. 196 class BufferDeallocation : public BufferPlacementTransformationBase { 197 public: 198 using AliasAllocationMapT = 199 llvm::DenseMap<Value, bufferization::AllocationOpInterface>; 200 201 BufferDeallocation(Operation *op) 202 : BufferPlacementTransformationBase(op), dominators(op), 203 postDominators(op) {} 204 205 /// Checks if all allocation operations either provide an already existing 206 /// deallocation operation or implement the AllocationOpInterface. In 207 /// addition, this method initializes the internal alias to 208 /// AllocationOpInterface mapping in order to get compatible 209 /// AllocationOpInterface implementations for aliases. 210 LogicalResult prepare() { 211 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { 212 // Get the defining allocation operation. 213 Value alloc = std::get<0>(entry); 214 auto allocationInterface = 215 alloc.getDefiningOp<bufferization::AllocationOpInterface>(); 216 // If there is no existing deallocation operation and no implementation of 217 // the AllocationOpInterface, we cannot apply the BufferDeallocation pass. 218 if (!std::get<1>(entry) && !allocationInterface) { 219 return alloc.getDefiningOp()->emitError( 220 "Allocation is not deallocated explicitly nor does the operation " 221 "implement the AllocationOpInterface."); 222 } 223 224 // Register the current allocation interface implementation. 225 aliasToAllocations[alloc] = allocationInterface; 226 227 // Get the alias information for the current allocation node. 228 llvm::for_each(aliases.resolve(alloc), [&](Value alias) { 229 // TODO: check for incompatible implementations of the 230 // AllocationOpInterface. This could be realized by promoting the 231 // AllocationOpInterface to a DialectInterface. 232 aliasToAllocations[alias] = allocationInterface; 233 }); 234 } 235 return success(); 236 } 237 238 /// Performs the actual placement/creation of all temporary clone and dealloc 239 /// nodes. 240 LogicalResult deallocate() { 241 // Add additional clones that are required. 242 if (failed(introduceClones())) 243 return failure(); 244 245 // Place deallocations for all allocation entries. 246 return placeDeallocs(); 247 } 248 249 private: 250 /// Introduces required clone operations to avoid memory leaks. 251 LogicalResult introduceClones() { 252 // Initialize the set of values that require a dedicated memory free 253 // operation since their operands cannot be safely deallocated in a post 254 // dominator. 255 SmallPtrSet<Value, 8> valuesToFree; 256 llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues; 257 SmallVector<std::tuple<Value, Block *>, 8> toProcess; 258 259 // Check dominance relation for proper dominance properties. If the given 260 // value node does not dominate an alias, we will have to create a clone in 261 // order to free all buffers that can potentially leak into a post 262 // dominator. 263 auto findUnsafeValues = [&](Value source, Block *definingBlock) { 264 auto it = aliases.find(source); 265 if (it == aliases.end()) 266 return; 267 for (Value value : it->second) { 268 if (valuesToFree.count(value) > 0) 269 continue; 270 Block *parentBlock = value.getParentBlock(); 271 // Check whether we have to free this particular block argument or 272 // generic value. We have to free the current alias if it is either 273 // defined in a non-dominated block or it is defined in the same block 274 // but the current value is not dominated by the source value. 275 if (!dominators.dominates(definingBlock, parentBlock) || 276 (definingBlock == parentBlock && value.isa<BlockArgument>())) { 277 toProcess.emplace_back(value, parentBlock); 278 valuesToFree.insert(value); 279 } else if (visitedValues.insert(std::make_tuple(value, definingBlock)) 280 .second) 281 toProcess.emplace_back(value, definingBlock); 282 } 283 }; 284 285 // Detect possibly unsafe aliases starting from all allocations. 286 for (BufferPlacementAllocs::AllocEntry &entry : allocs) { 287 Value allocValue = std::get<0>(entry); 288 findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock()); 289 } 290 // Try to find block arguments that require an explicit free operation 291 // until we reach a fix point. 292 while (!toProcess.empty()) { 293 auto current = toProcess.pop_back_val(); 294 findUnsafeValues(std::get<0>(current), std::get<1>(current)); 295 } 296 297 // Update buffer aliases to ensure that we free all buffers and block 298 // arguments at the correct locations. 299 aliases.remove(valuesToFree); 300 301 // Add new allocs and additional clone operations. 302 for (Value value : valuesToFree) { 303 if (failed(value.isa<BlockArgument>() 304 ? introduceBlockArgCopy(value.cast<BlockArgument>()) 305 : introduceValueCopyForRegionResult(value))) 306 return failure(); 307 308 // Register the value to require a final dealloc. Note that we do not have 309 // to assign a block here since we do not want to move the allocation node 310 // to another location. 311 allocs.registerAlloc(std::make_tuple(value, nullptr)); 312 } 313 return success(); 314 } 315 316 /// Introduces temporary clones in all predecessors and copies the source 317 /// values into the newly allocated buffers. 318 LogicalResult introduceBlockArgCopy(BlockArgument blockArg) { 319 // Allocate a buffer for the current block argument in the block of 320 // the associated value (which will be a predecessor block by 321 // definition). 322 Block *block = blockArg.getOwner(); 323 for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) { 324 // Get the terminator and the value that will be passed to our 325 // argument. 326 Operation *terminator = (*it)->getTerminator(); 327 auto branchInterface = cast<BranchOpInterface>(terminator); 328 SuccessorOperands operands = 329 branchInterface.getSuccessorOperands(it.getSuccessorIndex()); 330 331 // Query the associated source value. 332 Value sourceValue = operands[blockArg.getArgNumber()]; 333 if (!sourceValue) { 334 return failure(); 335 } 336 // Wire new clone and successor operand. 337 // Create a new clone at the current location of the terminator. 338 auto clone = introduceCloneBuffers(sourceValue, terminator); 339 if (failed(clone)) 340 return failure(); 341 operands.slice(blockArg.getArgNumber(), 1).assign(*clone); 342 } 343 344 // Check whether the block argument has implicitly defined predecessors via 345 // the RegionBranchOpInterface. This can be the case if the current block 346 // argument belongs to the first block in a region and the parent operation 347 // implements the RegionBranchOpInterface. 348 Region *argRegion = block->getParent(); 349 Operation *parentOp = argRegion->getParentOp(); 350 RegionBranchOpInterface regionInterface; 351 if (&argRegion->front() != block || 352 !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp))) 353 return success(); 354 355 if (failed(introduceClonesForRegionSuccessors( 356 regionInterface, argRegion->getParentOp()->getRegions(), blockArg, 357 [&](RegionSuccessor &successorRegion) { 358 // Find a predecessor of our argRegion. 359 return successorRegion.getSuccessor() == argRegion; 360 }))) 361 return failure(); 362 363 // Check whether the block argument belongs to an entry region of the 364 // parent operation. In this case, we have to introduce an additional clone 365 // for buffer that is passed to the argument. 366 SmallVector<RegionSuccessor, 2> successorRegions; 367 regionInterface.getSuccessorRegions(/*index=*/llvm::None, successorRegions); 368 auto *it = 369 llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) { 370 return successorRegion.getSuccessor() == argRegion; 371 }); 372 if (it == successorRegions.end()) 373 return success(); 374 375 // Determine the actual operand to introduce a clone for and rewire the 376 // operand to point to the clone instead. 377 auto operands = 378 regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber()); 379 size_t operandIndex = 380 llvm::find(it->getSuccessorInputs(), blockArg).getIndex() + 381 operands.getBeginOperandIndex(); 382 Value operand = parentOp->getOperand(operandIndex); 383 assert(operand == 384 operands[operandIndex - operands.getBeginOperandIndex()] && 385 "region interface operands don't match parentOp operands"); 386 auto clone = introduceCloneBuffers(operand, parentOp); 387 if (failed(clone)) 388 return failure(); 389 390 parentOp->setOperand(operandIndex, *clone); 391 return success(); 392 } 393 394 /// Introduces temporary clones in front of all associated nested-region 395 /// terminators and copies the source values into the newly allocated buffers. 396 LogicalResult introduceValueCopyForRegionResult(Value value) { 397 // Get the actual result index in the scope of the parent terminator. 398 Operation *operation = value.getDefiningOp(); 399 auto regionInterface = cast<RegionBranchOpInterface>(operation); 400 // Filter successors that return to the parent operation. 401 auto regionPredicate = [&](RegionSuccessor &successorRegion) { 402 // If the RegionSuccessor has no associated successor, it will return to 403 // its parent operation. 404 return !successorRegion.getSuccessor(); 405 }; 406 // Introduce a clone for all region "results" that are returned to the 407 // parent operation. This is required since the parent's result value has 408 // been considered critical. Therefore, the algorithm assumes that a clone 409 // of a previously allocated buffer is returned by the operation (like in 410 // the case of a block argument). 411 return introduceClonesForRegionSuccessors( 412 regionInterface, operation->getRegions(), value, regionPredicate); 413 } 414 415 /// Introduces buffer clones for all terminators in the given regions. The 416 /// regionPredicate is applied to every successor region in order to restrict 417 /// the clones to specific regions. 418 template <typename TPredicate> 419 LogicalResult introduceClonesForRegionSuccessors( 420 RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions, 421 Value argValue, const TPredicate ®ionPredicate) { 422 for (Region ®ion : regions) { 423 // Query the regionInterface to get all successor regions of the current 424 // one. 425 SmallVector<RegionSuccessor, 2> successorRegions; 426 regionInterface.getSuccessorRegions(region.getRegionNumber(), 427 successorRegions); 428 // Try to find a matching region successor. 429 RegionSuccessor *regionSuccessor = 430 llvm::find_if(successorRegions, regionPredicate); 431 if (regionSuccessor == successorRegions.end()) 432 continue; 433 // Get the operand index in the context of the current successor input 434 // bindings. 435 size_t operandIndex = 436 llvm::find(regionSuccessor->getSuccessorInputs(), argValue) 437 .getIndex(); 438 439 // Iterate over all immediate terminator operations to introduce 440 // new buffer allocations. Thereby, the appropriate terminator operand 441 // will be adjusted to point to the newly allocated buffer instead. 442 if (failed(walkReturnOperations(®ion, [&](Operation *terminator) { 443 // Get the actual mutable operands for this terminator op. 444 auto terminatorOperands = *getMutableRegionBranchSuccessorOperands( 445 terminator, region.getRegionNumber()); 446 // Extract the source value from the current terminator. 447 // This conversion needs to exist on a separate line due to a bug in 448 // GCC conversion analysis. 449 OperandRange immutableTerminatorOperands = terminatorOperands; 450 Value sourceValue = immutableTerminatorOperands[operandIndex]; 451 // Create a new clone at the current location of the terminator. 452 auto clone = introduceCloneBuffers(sourceValue, terminator); 453 if (failed(clone)) 454 return failure(); 455 // Wire clone and terminator operand. 456 terminatorOperands.slice(operandIndex, 1).assign(*clone); 457 return success(); 458 }))) 459 return failure(); 460 } 461 return success(); 462 } 463 464 /// Creates a new memory allocation for the given source value and clones 465 /// its content into the newly allocated buffer. The terminator operation is 466 /// used to insert the clone operation at the right place. 467 FailureOr<Value> introduceCloneBuffers(Value sourceValue, 468 Operation *terminator) { 469 // Avoid multiple clones of the same source value. This can happen in the 470 // presence of loops when a branch acts as a backedge while also having 471 // another successor that returns to its parent operation. Note: that 472 // copying copied buffers can introduce memory leaks since the invariant of 473 // BufferDeallocation assumes that a buffer will be only cloned once into a 474 // temporary buffer. Hence, the construction of clone chains introduces 475 // additional allocations that are not tracked automatically by the 476 // algorithm. 477 if (clonedValues.contains(sourceValue)) 478 return sourceValue; 479 // Create a new clone operation that copies the contents of the old 480 // buffer to the new one. 481 auto clone = buildClone(terminator, sourceValue); 482 if (succeeded(clone)) { 483 // Remember the clone of original source value. 484 clonedValues.insert(*clone); 485 } 486 return clone; 487 } 488 489 /// Finds correct dealloc positions according to the algorithm described at 490 /// the top of the file for all alloc nodes and block arguments that can be 491 /// handled by this analysis. 492 LogicalResult placeDeallocs() { 493 // Move or insert deallocs using the previously computed information. 494 // These deallocations will be linked to their associated allocation nodes 495 // since they don't have any aliases that can (potentially) increase their 496 // liveness. 497 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { 498 Value alloc = std::get<0>(entry); 499 auto aliasesSet = aliases.resolve(alloc); 500 assert(!aliasesSet.empty() && "must contain at least one alias"); 501 502 // Determine the actual block to place the dealloc and get liveness 503 // information. 504 Block *placementBlock = 505 findCommonDominator(alloc, aliasesSet, postDominators); 506 const LivenessBlockInfo *livenessInfo = 507 liveness.getLiveness(placementBlock); 508 509 // We have to ensure that the dealloc will be after the last use of all 510 // aliases of the given value. We first assume that there are no uses in 511 // the placementBlock and that we can safely place the dealloc at the 512 // beginning. 513 Operation *endOperation = &placementBlock->front(); 514 515 // Iterate over all aliases and ensure that the endOperation will point 516 // to the last operation of all potential aliases in the placementBlock. 517 for (Value alias : aliasesSet) { 518 // Ensure that the start operation is at least the defining operation of 519 // the current alias to avoid invalid placement of deallocs for aliases 520 // without any uses. 521 Operation *beforeOp = endOperation; 522 if (alias.getDefiningOp() && 523 !(beforeOp = placementBlock->findAncestorOpInBlock( 524 *alias.getDefiningOp()))) 525 continue; 526 527 Operation *aliasEndOperation = 528 livenessInfo->getEndOperation(alias, beforeOp); 529 // Check whether the aliasEndOperation lies in the desired block and 530 // whether it is behind the current endOperation. If yes, this will be 531 // the new endOperation. 532 if (aliasEndOperation->getBlock() == placementBlock && 533 endOperation->isBeforeInBlock(aliasEndOperation)) 534 endOperation = aliasEndOperation; 535 } 536 // endOperation is the last operation behind which we can safely store 537 // the dealloc taking all potential aliases into account. 538 539 // If there is an existing dealloc, move it to the right place. 540 Operation *deallocOperation = std::get<1>(entry); 541 if (deallocOperation) { 542 deallocOperation->moveAfter(endOperation); 543 } else { 544 // If the Dealloc position is at the terminator operation of the 545 // block, then the value should escape from a deallocation. 546 Operation *nextOp = endOperation->getNextNode(); 547 if (!nextOp) 548 continue; 549 // If there is no dealloc node, insert one in the right place. 550 if (failed(buildDealloc(nextOp, alloc))) 551 return failure(); 552 } 553 } 554 return success(); 555 } 556 557 /// Builds a deallocation operation compatible with the given allocation 558 /// value. If there is no registered AllocationOpInterface implementation for 559 /// the given value (e.g. in the case of a function parameter), this method 560 /// builds a memref::DeallocOp. 561 LogicalResult buildDealloc(Operation *op, Value alloc) { 562 OpBuilder builder(op); 563 auto it = aliasToAllocations.find(alloc); 564 if (it != aliasToAllocations.end()) { 565 // Call the allocation op interface to build a supported and 566 // compatible deallocation operation. 567 auto dealloc = it->second.buildDealloc(builder, alloc); 568 if (!dealloc) 569 return op->emitError() 570 << "allocations without compatible deallocations are " 571 "not supported"; 572 } else { 573 // Build a "default" DeallocOp for unknown allocation sources. 574 builder.create<memref::DeallocOp>(alloc.getLoc(), alloc); 575 } 576 return success(); 577 } 578 579 /// Builds a clone operation compatible with the given allocation value. If 580 /// there is no registered AllocationOpInterface implementation for the given 581 /// value (e.g. in the case of a function parameter), this method builds a 582 /// bufferization::CloneOp. 583 FailureOr<Value> buildClone(Operation *op, Value alloc) { 584 OpBuilder builder(op); 585 auto it = aliasToAllocations.find(alloc); 586 if (it != aliasToAllocations.end()) { 587 // Call the allocation op interface to build a supported and 588 // compatible clone operation. 589 auto clone = it->second.buildClone(builder, alloc); 590 if (clone) 591 return *clone; 592 return (LogicalResult)(op->emitError() 593 << "allocations without compatible clone ops " 594 "are not supported"); 595 } 596 // Build a "default" CloneOp for unknown allocation sources. 597 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) 598 .getResult(); 599 } 600 601 /// The dominator info to find the appropriate start operation to move the 602 /// allocs. 603 DominanceInfo dominators; 604 605 /// The post dominator info to move the dependent allocs in the right 606 /// position. 607 PostDominanceInfo postDominators; 608 609 /// Stores already cloned buffers to avoid additional clones of clones. 610 ValueSetT clonedValues; 611 612 /// Maps aliases to their source allocation interfaces (inverse mapping). 613 AliasAllocationMapT aliasToAllocations; 614 }; 615 616 //===----------------------------------------------------------------------===// 617 // BufferDeallocationPass 618 //===----------------------------------------------------------------------===// 619 620 struct DefaultAllocationInterface 621 : public bufferization::AllocationOpInterface::ExternalModel< 622 DefaultAllocationInterface, memref::AllocOp> { 623 static Optional<Operation *> buildDealloc(OpBuilder &builder, Value alloc) { 624 return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc) 625 .getOperation(); 626 } 627 static Optional<Value> buildClone(OpBuilder &builder, Value alloc) { 628 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) 629 .getResult(); 630 } 631 }; 632 633 /// The actual buffer deallocation pass that inserts and moves dealloc nodes 634 /// into the right positions. Furthermore, it inserts additional clones if 635 /// necessary. It uses the algorithm described at the top of the file. 636 struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> { 637 void getDependentDialects(DialectRegistry ®istry) const override { 638 registry.insert<bufferization::BufferizationDialect>(); 639 registry.insert<memref::MemRefDialect>(); 640 registerAllocationOpInterfaceExternalModels(registry); 641 } 642 643 void runOnOperation() override { 644 func::FuncOp func = getOperation(); 645 if (func.isExternal()) 646 return; 647 648 if (failed(deallocateBuffers(func))) 649 signalPassFailure(); 650 } 651 }; 652 653 } // namespace 654 655 LogicalResult bufferization::deallocateBuffers(Operation *op) { 656 if (isa<ModuleOp>(op)) { 657 WalkResult result = op->walk([&](func::FuncOp funcOp) { 658 if (failed(deallocateBuffers(funcOp))) 659 return WalkResult::interrupt(); 660 return WalkResult::advance(); 661 }); 662 return success(!result.wasInterrupted()); 663 } 664 665 // Ensure that there are supported loops only. 666 Backedges backedges(op); 667 if (backedges.size()) { 668 op->emitError("Only structured control-flow loops are supported."); 669 return failure(); 670 } 671 672 // Check that the control flow structures are supported. 673 if (!validateSupportedControlFlow(op)) 674 return failure(); 675 676 // Gather all required allocation nodes and prepare the deallocation phase. 677 BufferDeallocation deallocation(op); 678 679 // Check for supported AllocationOpInterface implementations and prepare the 680 // internal deallocation pass. 681 if (failed(deallocation.prepare())) 682 return failure(); 683 684 // Place all required temporary clone and dealloc nodes. 685 if (failed(deallocation.deallocate())) 686 return failure(); 687 688 return success(); 689 } 690 691 void bufferization::registerAllocationOpInterfaceExternalModels( 692 DialectRegistry ®istry) { 693 registry.addExtension(+[](MLIRContext *ctx, memref::MemRefDialect *dialect) { 694 memref::AllocOp::attachInterface<DefaultAllocationInterface>(*ctx); 695 }); 696 } 697 698 //===----------------------------------------------------------------------===// 699 // BufferDeallocationPass construction 700 //===----------------------------------------------------------------------===// 701 702 std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() { 703 return std::make_unique<BufferDeallocationPass>(); 704 } 705