1 //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements logic for computing correct alloc and dealloc positions. 10 // Furthermore, buffer deallocation also adds required new clone operations to 11 // ensure that all buffers are deallocated. The main class is the 12 // BufferDeallocationPass class that implements the underlying algorithm. In 13 // order to put allocations and deallocations at safe positions, it is 14 // significantly important to put them into the correct blocks. However, the 15 // liveness analysis does not pay attention to aliases, which can occur due to 16 // branches (and their associated block arguments) in general. For this purpose, 17 // BufferDeallocation firstly finds all possible aliases for a single value 18 // (using the BufferViewFlowAnalysis class). Consider the following example: 19 // 20 // ^bb0(%arg0): 21 // cf.cond_br %cond, ^bb1, ^bb2 22 // ^bb1: 23 // cf.br ^exit(%arg0) 24 // ^bb2: 25 // %new_value = ... 26 // cf.br ^exit(%new_value) 27 // ^exit(%arg1): 28 // return %arg1; 29 // 30 // We should place the dealloc for %new_value in exit. However, we have to free 31 // the buffer in the same block, because it cannot be freed in the post 32 // dominator. However, this requires a new clone buffer for %arg1 that will 33 // contain the actual contents. Using the class BufferViewFlowAnalysis, we 34 // will find out that %new_value has a potential alias %arg1. In order to find 35 // the dealloc position we have to find all potential aliases, iterate over 36 // their uses and find the common post-dominator block (note that additional 37 // clones and buffers remove potential aliases and will influence the placement 38 // of the deallocs). In all cases, the computed block can be safely used to free 39 // the %new_value buffer (may be exit or bb2) as it will die and we can use 40 // liveness information to determine the exact operation after which we have to 41 // insert the dealloc. However, the algorithm supports introducing clone buffers 42 // and placing deallocs in safe locations to ensure that all buffers will be 43 // freed in the end. 44 // 45 // TODO: 46 // The current implementation does not support explicit-control-flow loops and 47 // the resulting code will be invalid with respect to program semantics. 48 // However, structured control-flow loops are fully supported. Furthermore, it 49 // doesn't accept functions which return buffers already. 50 // 51 //===----------------------------------------------------------------------===// 52 53 #include "PassDetail.h" 54 55 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h" 56 #include "mlir/Dialect/Bufferization/IR/Bufferization.h" 57 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h" 58 #include "mlir/Dialect/Bufferization/Transforms/Passes.h" 59 #include "mlir/Dialect/MemRef/IR/MemRef.h" 60 #include "llvm/ADT/SetOperations.h" 61 62 using namespace mlir; 63 using namespace mlir::bufferization; 64 65 /// Walks over all immediate return-like terminators in the given region. 66 static LogicalResult 67 walkReturnOperations(Region *region, 68 llvm::function_ref<LogicalResult(Operation *)> func) { 69 for (Block &block : *region) { 70 Operation *terminator = block.getTerminator(); 71 // Skip non region-return-like terminators. 72 if (isRegionReturnLike(terminator)) { 73 if (failed(func(terminator))) 74 return failure(); 75 } 76 } 77 return success(); 78 } 79 80 /// Checks if all operations in a given region that have at least one attached 81 /// region implement the RegionBranchOpInterface. This is not required in edge 82 /// cases, where we have a single attached region and the parent operation has 83 /// no results. 84 static bool validateSupportedControlFlow(Region ®ion) { 85 bool success = true; 86 region.walk([&success](Operation *operation) { 87 auto regions = operation->getRegions(); 88 // Walk over all operations in a region and check if the operation has at 89 // least one region and implements the RegionBranchOpInterface. If there 90 // is an operation that does not fulfill this condition, we cannot apply 91 // the deallocation steps. Furthermore, we accept cases, where we have a 92 // region that returns no results, since, in that case, the intra-region 93 // control flow does not affect the transformation. 94 size_t size = regions.size(); 95 if (((size == 1 && !operation->getResults().empty()) || size > 1) && 96 !dyn_cast<RegionBranchOpInterface>(operation)) { 97 operation->emitError("All operations with attached regions need to " 98 "implement the RegionBranchOpInterface."); 99 success = false; 100 } 101 }); 102 return success; 103 } 104 105 namespace { 106 107 //===----------------------------------------------------------------------===// 108 // Backedges analysis 109 //===----------------------------------------------------------------------===// 110 111 /// A straight-forward program analysis which detects loop backedges induced by 112 /// explicit control flow. 113 class Backedges { 114 public: 115 using BlockSetT = SmallPtrSet<Block *, 16>; 116 using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>; 117 118 public: 119 /// Constructs a new backedges analysis using the op provided. 120 Backedges(Operation *op) { recurse(op); } 121 122 /// Returns the number of backedges formed by explicit control flow. 123 size_t size() const { return edgeSet.size(); } 124 125 /// Returns the start iterator to loop over all backedges. 126 BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } 127 128 /// Returns the end iterator to loop over all backedges. 129 BackedgeSetT::const_iterator end() const { return edgeSet.end(); } 130 131 private: 132 /// Enters the current block and inserts a backedge into the `edgeSet` if we 133 /// have already visited the current block. The inserted edge links the given 134 /// `predecessor` with the `current` block. 135 bool enter(Block ¤t, Block *predecessor) { 136 bool inserted = visited.insert(¤t).second; 137 if (!inserted) 138 edgeSet.insert(std::make_pair(predecessor, ¤t)); 139 return inserted; 140 } 141 142 /// Leaves the current block. 143 void exit(Block ¤t) { visited.erase(¤t); } 144 145 /// Recurses into the given operation while taking all attached regions into 146 /// account. 147 void recurse(Operation *op) { 148 Block *current = op->getBlock(); 149 // If the current op implements the `BranchOpInterface`, there can be 150 // cycles in the scope of all successor blocks. 151 if (isa<BranchOpInterface>(op)) { 152 for (Block *succ : current->getSuccessors()) 153 recurse(*succ, current); 154 } 155 // Recurse into all distinct regions and check for explicit control-flow 156 // loops. 157 for (Region ®ion : op->getRegions()) { 158 if (!region.empty()) 159 recurse(region.front(), current); 160 } 161 } 162 163 /// Recurses into explicit control-flow structures that are given by 164 /// the successor relation defined on the block level. 165 void recurse(Block &block, Block *predecessor) { 166 // Try to enter the current block. If this is not possible, we are 167 // currently processing this block and can safely return here. 168 if (!enter(block, predecessor)) 169 return; 170 171 // Recurse into all operations and successor blocks. 172 for (Operation &op : block.getOperations()) 173 recurse(&op); 174 175 // Leave the current block. 176 exit(block); 177 } 178 179 /// Stores all blocks that are currently visited and on the processing stack. 180 BlockSetT visited; 181 182 /// Stores all backedges in the format (source, target). 183 BackedgeSetT edgeSet; 184 }; 185 186 //===----------------------------------------------------------------------===// 187 // BufferDeallocation 188 //===----------------------------------------------------------------------===// 189 190 /// The buffer deallocation transformation which ensures that all allocs in the 191 /// program have a corresponding de-allocation. As a side-effect, it might also 192 /// introduce clones that in turn leads to additional deallocations. 193 class BufferDeallocation : public BufferPlacementTransformationBase { 194 public: 195 using AliasAllocationMapT = 196 llvm::DenseMap<Value, bufferization::AllocationOpInterface>; 197 198 BufferDeallocation(Operation *op) 199 : BufferPlacementTransformationBase(op), dominators(op), 200 postDominators(op) {} 201 202 /// Checks if all allocation operations either provide an already existing 203 /// deallocation operation or implement the AllocationOpInterface. In 204 /// addition, this method initializes the internal alias to 205 /// AllocationOpInterface mapping in order to get compatible 206 /// AllocationOpInterface implementations for aliases. 207 LogicalResult prepare() { 208 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { 209 // Get the defining allocation operation. 210 Value alloc = std::get<0>(entry); 211 auto allocationInterface = 212 alloc.getDefiningOp<bufferization::AllocationOpInterface>(); 213 // If there is no existing deallocation operation and no implementation of 214 // the AllocationOpInterface, we cannot apply the BufferDeallocation pass. 215 if (!std::get<1>(entry) && !allocationInterface) { 216 return alloc.getDefiningOp()->emitError( 217 "Allocation is not deallocated explicitly nor does the operation " 218 "implement the AllocationOpInterface."); 219 } 220 221 // Register the current allocation interface implementation. 222 aliasToAllocations[alloc] = allocationInterface; 223 224 // Get the alias information for the current allocation node. 225 llvm::for_each(aliases.resolve(alloc), [&](Value alias) { 226 // TODO: check for incompatible implementations of the 227 // AllocationOpInterface. This could be realized by promoting the 228 // AllocationOpInterface to a DialectInterface. 229 aliasToAllocations[alias] = allocationInterface; 230 }); 231 } 232 return success(); 233 } 234 235 /// Performs the actual placement/creation of all temporary clone and dealloc 236 /// nodes. 237 LogicalResult deallocate() { 238 // Add additional clones that are required. 239 if (failed(introduceClones())) 240 return failure(); 241 242 // Place deallocations for all allocation entries. 243 return placeDeallocs(); 244 } 245 246 private: 247 /// Introduces required clone operations to avoid memory leaks. 248 LogicalResult introduceClones() { 249 // Initialize the set of values that require a dedicated memory free 250 // operation since their operands cannot be safely deallocated in a post 251 // dominator. 252 SmallPtrSet<Value, 8> valuesToFree; 253 llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues; 254 SmallVector<std::tuple<Value, Block *>, 8> toProcess; 255 256 // Check dominance relation for proper dominance properties. If the given 257 // value node does not dominate an alias, we will have to create a clone in 258 // order to free all buffers that can potentially leak into a post 259 // dominator. 260 auto findUnsafeValues = [&](Value source, Block *definingBlock) { 261 auto it = aliases.find(source); 262 if (it == aliases.end()) 263 return; 264 for (Value value : it->second) { 265 if (valuesToFree.count(value) > 0) 266 continue; 267 Block *parentBlock = value.getParentBlock(); 268 // Check whether we have to free this particular block argument or 269 // generic value. We have to free the current alias if it is either 270 // defined in a non-dominated block or it is defined in the same block 271 // but the current value is not dominated by the source value. 272 if (!dominators.dominates(definingBlock, parentBlock) || 273 (definingBlock == parentBlock && value.isa<BlockArgument>())) { 274 toProcess.emplace_back(value, parentBlock); 275 valuesToFree.insert(value); 276 } else if (visitedValues.insert(std::make_tuple(value, definingBlock)) 277 .second) 278 toProcess.emplace_back(value, definingBlock); 279 } 280 }; 281 282 // Detect possibly unsafe aliases starting from all allocations. 283 for (BufferPlacementAllocs::AllocEntry &entry : allocs) { 284 Value allocValue = std::get<0>(entry); 285 findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock()); 286 } 287 // Try to find block arguments that require an explicit free operation 288 // until we reach a fix point. 289 while (!toProcess.empty()) { 290 auto current = toProcess.pop_back_val(); 291 findUnsafeValues(std::get<0>(current), std::get<1>(current)); 292 } 293 294 // Update buffer aliases to ensure that we free all buffers and block 295 // arguments at the correct locations. 296 aliases.remove(valuesToFree); 297 298 // Add new allocs and additional clone operations. 299 for (Value value : valuesToFree) { 300 if (failed(value.isa<BlockArgument>() 301 ? introduceBlockArgCopy(value.cast<BlockArgument>()) 302 : introduceValueCopyForRegionResult(value))) 303 return failure(); 304 305 // Register the value to require a final dealloc. Note that we do not have 306 // to assign a block here since we do not want to move the allocation node 307 // to another location. 308 allocs.registerAlloc(std::make_tuple(value, nullptr)); 309 } 310 return success(); 311 } 312 313 /// Introduces temporary clones in all predecessors and copies the source 314 /// values into the newly allocated buffers. 315 LogicalResult introduceBlockArgCopy(BlockArgument blockArg) { 316 // Allocate a buffer for the current block argument in the block of 317 // the associated value (which will be a predecessor block by 318 // definition). 319 Block *block = blockArg.getOwner(); 320 for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) { 321 // Get the terminator and the value that will be passed to our 322 // argument. 323 Operation *terminator = (*it)->getTerminator(); 324 auto branchInterface = cast<BranchOpInterface>(terminator); 325 // Query the associated source value. 326 Value sourceValue = 327 branchInterface.getSuccessorOperands(it.getSuccessorIndex()) 328 .getValue()[blockArg.getArgNumber()]; 329 // Wire new clone and successor operand. 330 auto mutableOperands = 331 branchInterface.getMutableSuccessorOperands(it.getSuccessorIndex()); 332 if (!mutableOperands) { 333 terminator->emitError() << "terminators with immutable successor " 334 "operands are not supported"; 335 continue; 336 } 337 // Create a new clone at the current location of the terminator. 338 auto clone = introduceCloneBuffers(sourceValue, terminator); 339 if (failed(clone)) 340 return failure(); 341 mutableOperands.getValue() 342 .slice(blockArg.getArgNumber(), 1) 343 .assign(*clone); 344 } 345 346 // Check whether the block argument has implicitly defined predecessors via 347 // the RegionBranchOpInterface. This can be the case if the current block 348 // argument belongs to the first block in a region and the parent operation 349 // implements the RegionBranchOpInterface. 350 Region *argRegion = block->getParent(); 351 Operation *parentOp = argRegion->getParentOp(); 352 RegionBranchOpInterface regionInterface; 353 if (&argRegion->front() != block || 354 !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp))) 355 return success(); 356 357 if (failed(introduceClonesForRegionSuccessors( 358 regionInterface, argRegion->getParentOp()->getRegions(), blockArg, 359 [&](RegionSuccessor &successorRegion) { 360 // Find a predecessor of our argRegion. 361 return successorRegion.getSuccessor() == argRegion; 362 }))) 363 return failure(); 364 365 // Check whether the block argument belongs to an entry region of the 366 // parent operation. In this case, we have to introduce an additional clone 367 // for buffer that is passed to the argument. 368 SmallVector<RegionSuccessor, 2> successorRegions; 369 regionInterface.getSuccessorRegions(/*index=*/llvm::None, successorRegions); 370 auto *it = 371 llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) { 372 return successorRegion.getSuccessor() == argRegion; 373 }); 374 if (it == successorRegions.end()) 375 return success(); 376 377 // Determine the actual operand to introduce a clone for and rewire the 378 // operand to point to the clone instead. 379 Value operand = 380 regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber()) 381 [llvm::find(it->getSuccessorInputs(), blockArg).getIndex()]; 382 auto clone = introduceCloneBuffers(operand, parentOp); 383 if (failed(clone)) 384 return failure(); 385 386 auto op = llvm::find(parentOp->getOperands(), operand); 387 assert(op != parentOp->getOperands().end() && 388 "parentOp does not contain operand"); 389 parentOp->setOperand(op.getIndex(), *clone); 390 return success(); 391 } 392 393 /// Introduces temporary clones in front of all associated nested-region 394 /// terminators and copies the source values into the newly allocated buffers. 395 LogicalResult introduceValueCopyForRegionResult(Value value) { 396 // Get the actual result index in the scope of the parent terminator. 397 Operation *operation = value.getDefiningOp(); 398 auto regionInterface = cast<RegionBranchOpInterface>(operation); 399 // Filter successors that return to the parent operation. 400 auto regionPredicate = [&](RegionSuccessor &successorRegion) { 401 // If the RegionSuccessor has no associated successor, it will return to 402 // its parent operation. 403 return !successorRegion.getSuccessor(); 404 }; 405 // Introduce a clone for all region "results" that are returned to the 406 // parent operation. This is required since the parent's result value has 407 // been considered critical. Therefore, the algorithm assumes that a clone 408 // of a previously allocated buffer is returned by the operation (like in 409 // the case of a block argument). 410 return introduceClonesForRegionSuccessors( 411 regionInterface, operation->getRegions(), value, regionPredicate); 412 } 413 414 /// Introduces buffer clones for all terminators in the given regions. The 415 /// regionPredicate is applied to every successor region in order to restrict 416 /// the clones to specific regions. 417 template <typename TPredicate> 418 LogicalResult introduceClonesForRegionSuccessors( 419 RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions, 420 Value argValue, const TPredicate ®ionPredicate) { 421 for (Region ®ion : regions) { 422 // Query the regionInterface to get all successor regions of the current 423 // one. 424 SmallVector<RegionSuccessor, 2> successorRegions; 425 regionInterface.getSuccessorRegions(region.getRegionNumber(), 426 successorRegions); 427 // Try to find a matching region successor. 428 RegionSuccessor *regionSuccessor = 429 llvm::find_if(successorRegions, regionPredicate); 430 if (regionSuccessor == successorRegions.end()) 431 continue; 432 // Get the operand index in the context of the current successor input 433 // bindings. 434 size_t operandIndex = 435 llvm::find(regionSuccessor->getSuccessorInputs(), argValue) 436 .getIndex(); 437 438 // Iterate over all immediate terminator operations to introduce 439 // new buffer allocations. Thereby, the appropriate terminator operand 440 // will be adjusted to point to the newly allocated buffer instead. 441 if (failed(walkReturnOperations(®ion, [&](Operation *terminator) { 442 // Get the actual mutable operands for this terminator op. 443 auto terminatorOperands = *getMutableRegionBranchSuccessorOperands( 444 terminator, region.getRegionNumber()); 445 // Extract the source value from the current terminator. 446 // This conversion needs to exist on a separate line due to a bug in 447 // GCC conversion analysis. 448 OperandRange immutableTerminatorOperands = terminatorOperands; 449 Value sourceValue = immutableTerminatorOperands[operandIndex]; 450 // Create a new clone at the current location of the terminator. 451 auto clone = introduceCloneBuffers(sourceValue, terminator); 452 if (failed(clone)) 453 return failure(); 454 // Wire clone and terminator operand. 455 terminatorOperands.slice(operandIndex, 1).assign(*clone); 456 return success(); 457 }))) 458 return failure(); 459 } 460 return success(); 461 } 462 463 /// Creates a new memory allocation for the given source value and clones 464 /// its content into the newly allocated buffer. The terminator operation is 465 /// used to insert the clone operation at the right place. 466 FailureOr<Value> introduceCloneBuffers(Value sourceValue, 467 Operation *terminator) { 468 // Avoid multiple clones of the same source value. This can happen in the 469 // presence of loops when a branch acts as a backedge while also having 470 // another successor that returns to its parent operation. Note: that 471 // copying copied buffers can introduce memory leaks since the invariant of 472 // BufferDeallocation assumes that a buffer will be only cloned once into a 473 // temporary buffer. Hence, the construction of clone chains introduces 474 // additional allocations that are not tracked automatically by the 475 // algorithm. 476 if (clonedValues.contains(sourceValue)) 477 return sourceValue; 478 // Create a new clone operation that copies the contents of the old 479 // buffer to the new one. 480 auto clone = buildClone(terminator, sourceValue); 481 if (succeeded(clone)) { 482 // Remember the clone of original source value. 483 clonedValues.insert(*clone); 484 } 485 return clone; 486 } 487 488 /// Finds correct dealloc positions according to the algorithm described at 489 /// the top of the file for all alloc nodes and block arguments that can be 490 /// handled by this analysis. 491 LogicalResult placeDeallocs() { 492 // Move or insert deallocs using the previously computed information. 493 // These deallocations will be linked to their associated allocation nodes 494 // since they don't have any aliases that can (potentially) increase their 495 // liveness. 496 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { 497 Value alloc = std::get<0>(entry); 498 auto aliasesSet = aliases.resolve(alloc); 499 assert(!aliasesSet.empty() && "must contain at least one alias"); 500 501 // Determine the actual block to place the dealloc and get liveness 502 // information. 503 Block *placementBlock = 504 findCommonDominator(alloc, aliasesSet, postDominators); 505 const LivenessBlockInfo *livenessInfo = 506 liveness.getLiveness(placementBlock); 507 508 // We have to ensure that the dealloc will be after the last use of all 509 // aliases of the given value. We first assume that there are no uses in 510 // the placementBlock and that we can safely place the dealloc at the 511 // beginning. 512 Operation *endOperation = &placementBlock->front(); 513 514 // Iterate over all aliases and ensure that the endOperation will point 515 // to the last operation of all potential aliases in the placementBlock. 516 for (Value alias : aliasesSet) { 517 // Ensure that the start operation is at least the defining operation of 518 // the current alias to avoid invalid placement of deallocs for aliases 519 // without any uses. 520 Operation *beforeOp = endOperation; 521 if (alias.getDefiningOp() && 522 !(beforeOp = placementBlock->findAncestorOpInBlock( 523 *alias.getDefiningOp()))) 524 continue; 525 526 Operation *aliasEndOperation = 527 livenessInfo->getEndOperation(alias, beforeOp); 528 // Check whether the aliasEndOperation lies in the desired block and 529 // whether it is behind the current endOperation. If yes, this will be 530 // the new endOperation. 531 if (aliasEndOperation->getBlock() == placementBlock && 532 endOperation->isBeforeInBlock(aliasEndOperation)) 533 endOperation = aliasEndOperation; 534 } 535 // endOperation is the last operation behind which we can safely store 536 // the dealloc taking all potential aliases into account. 537 538 // If there is an existing dealloc, move it to the right place. 539 Operation *deallocOperation = std::get<1>(entry); 540 if (deallocOperation) { 541 deallocOperation->moveAfter(endOperation); 542 } else { 543 // If the Dealloc position is at the terminator operation of the 544 // block, then the value should escape from a deallocation. 545 Operation *nextOp = endOperation->getNextNode(); 546 if (!nextOp) 547 continue; 548 // If there is no dealloc node, insert one in the right place. 549 if (failed(buildDealloc(nextOp, alloc))) 550 return failure(); 551 } 552 } 553 return success(); 554 } 555 556 /// Builds a deallocation operation compatible with the given allocation 557 /// value. If there is no registered AllocationOpInterface implementation for 558 /// the given value (e.g. in the case of a function parameter), this method 559 /// builds a memref::DeallocOp. 560 LogicalResult buildDealloc(Operation *op, Value alloc) { 561 OpBuilder builder(op); 562 auto it = aliasToAllocations.find(alloc); 563 if (it != aliasToAllocations.end()) { 564 // Call the allocation op interface to build a supported and 565 // compatible deallocation operation. 566 auto dealloc = it->second.buildDealloc(builder, alloc); 567 if (!dealloc) 568 return op->emitError() 569 << "allocations without compatible deallocations are " 570 "not supported"; 571 } else { 572 // Build a "default" DeallocOp for unknown allocation sources. 573 builder.create<memref::DeallocOp>(alloc.getLoc(), alloc); 574 } 575 return success(); 576 } 577 578 /// Builds a clone operation compatible with the given allocation value. If 579 /// there is no registered AllocationOpInterface implementation for the given 580 /// value (e.g. in the case of a function parameter), this method builds a 581 /// bufferization::CloneOp. 582 FailureOr<Value> buildClone(Operation *op, Value alloc) { 583 OpBuilder builder(op); 584 auto it = aliasToAllocations.find(alloc); 585 if (it != aliasToAllocations.end()) { 586 // Call the allocation op interface to build a supported and 587 // compatible clone operation. 588 auto clone = it->second.buildClone(builder, alloc); 589 if (clone) 590 return *clone; 591 return (LogicalResult)(op->emitError() 592 << "allocations without compatible clone ops " 593 "are not supported"); 594 } 595 // Build a "default" CloneOp for unknown allocation sources. 596 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) 597 .getResult(); 598 } 599 600 /// The dominator info to find the appropriate start operation to move the 601 /// allocs. 602 DominanceInfo dominators; 603 604 /// The post dominator info to move the dependent allocs in the right 605 /// position. 606 PostDominanceInfo postDominators; 607 608 /// Stores already cloned buffers to avoid additional clones of clones. 609 ValueSetT clonedValues; 610 611 /// Maps aliases to their source allocation interfaces (inverse mapping). 612 AliasAllocationMapT aliasToAllocations; 613 }; 614 615 //===----------------------------------------------------------------------===// 616 // BufferDeallocationPass 617 //===----------------------------------------------------------------------===// 618 619 struct DefaultAllocationInterface 620 : public bufferization::AllocationOpInterface::ExternalModel< 621 DefaultAllocationInterface, memref::AllocOp> { 622 static Optional<Operation *> buildDealloc(OpBuilder &builder, Value alloc) { 623 return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc) 624 .getOperation(); 625 } 626 static Optional<Value> buildClone(OpBuilder &builder, Value alloc) { 627 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) 628 .getResult(); 629 } 630 }; 631 632 /// The actual buffer deallocation pass that inserts and moves dealloc nodes 633 /// into the right positions. Furthermore, it inserts additional clones if 634 /// necessary. It uses the algorithm described at the top of the file. 635 struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> { 636 void getDependentDialects(DialectRegistry ®istry) const override { 637 registry.insert<bufferization::BufferizationDialect>(); 638 registry.insert<memref::MemRefDialect>(); 639 registry.addOpInterface<memref::AllocOp, DefaultAllocationInterface>(); 640 } 641 642 void runOnOperation() override { 643 FuncOp func = getOperation(); 644 if (func.isExternal()) 645 return; 646 647 // Ensure that there are supported loops only. 648 Backedges backedges(func); 649 if (backedges.size()) { 650 func.emitError("Only structured control-flow loops are supported."); 651 return signalPassFailure(); 652 } 653 654 // Check that the control flow structures are supported. 655 if (!validateSupportedControlFlow(func.getRegion())) 656 return signalPassFailure(); 657 658 // Gather all required allocation nodes and prepare the deallocation phase. 659 BufferDeallocation deallocation(func); 660 661 // Check for supported AllocationOpInterface implementations and prepare the 662 // internal deallocation pass. 663 if (failed(deallocation.prepare())) 664 return signalPassFailure(); 665 666 // Place all required temporary clone and dealloc nodes. 667 if (failed(deallocation.deallocate())) 668 return signalPassFailure(); 669 } 670 }; 671 672 } // namespace 673 674 //===----------------------------------------------------------------------===// 675 // BufferDeallocationPass construction 676 //===----------------------------------------------------------------------===// 677 678 std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() { 679 return std::make_unique<BufferDeallocationPass>(); 680 } 681