1 //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements logic for computing correct alloc and dealloc positions. 10 // Furthermore, buffer deallocation also adds required new clone operations to 11 // ensure that all buffers are deallocated. The main class is the 12 // BufferDeallocationPass class that implements the underlying algorithm. In 13 // order to put allocations and deallocations at safe positions, it is 14 // significantly important to put them into the correct blocks. However, the 15 // liveness analysis does not pay attention to aliases, which can occur due to 16 // branches (and their associated block arguments) in general. For this purpose, 17 // BufferDeallocation firstly finds all possible aliases for a single value 18 // (using the BufferViewFlowAnalysis class). Consider the following example: 19 // 20 // ^bb0(%arg0): 21 // cond_br %cond, ^bb1, ^bb2 22 // ^bb1: 23 // br ^exit(%arg0) 24 // ^bb2: 25 // %new_value = ... 26 // br ^exit(%new_value) 27 // ^exit(%arg1): 28 // return %arg1; 29 // 30 // We should place the dealloc for %new_value in exit. However, we have to free 31 // the buffer in the same block, because it cannot be freed in the post 32 // dominator. However, this requires a new clone buffer for %arg1 that will 33 // contain the actual contents. Using the class BufferViewFlowAnalysis, we 34 // will find out that %new_value has a potential alias %arg1. In order to find 35 // the dealloc position we have to find all potential aliases, iterate over 36 // their uses and find the common post-dominator block (note that additional 37 // clones and buffers remove potential aliases and will influence the placement 38 // of the deallocs). In all cases, the computed block can be safely used to free 39 // the %new_value buffer (may be exit or bb2) as it will die and we can use 40 // liveness information to determine the exact operation after which we have to 41 // insert the dealloc. However, the algorithm supports introducing clone buffers 42 // and placing deallocs in safe locations to ensure that all buffers will be 43 // freed in the end. 44 // 45 // TODO: 46 // The current implementation does not support explicit-control-flow loops and 47 // the resulting code will be invalid with respect to program semantics. 48 // However, structured control-flow loops are fully supported. Furthermore, it 49 // doesn't accept functions which return buffers already. 50 // 51 //===----------------------------------------------------------------------===// 52 53 #include "PassDetail.h" 54 55 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h" 56 #include "mlir/Dialect/Bufferization/IR/Bufferization.h" 57 #include "mlir/Dialect/Bufferization/Transforms/Passes.h" 58 #include "mlir/Dialect/MemRef/IR/MemRef.h" 59 #include "mlir/Transforms/BufferUtils.h" 60 #include "llvm/ADT/SetOperations.h" 61 62 using namespace mlir; 63 64 /// Walks over all immediate return-like terminators in the given region. 65 static LogicalResult 66 walkReturnOperations(Region *region, 67 llvm::function_ref<LogicalResult(Operation *)> func) { 68 for (Block &block : *region) { 69 Operation *terminator = block.getTerminator(); 70 // Skip non region-return-like terminators. 71 if (isRegionReturnLike(terminator)) { 72 if (failed(func(terminator))) 73 return failure(); 74 } 75 } 76 return success(); 77 } 78 79 /// Checks if all operations in a given region that have at least one attached 80 /// region implement the RegionBranchOpInterface. This is not required in edge 81 /// cases, where we have a single attached region and the parent operation has 82 /// no results. 83 static bool validateSupportedControlFlow(Region ®ion) { 84 bool success = true; 85 region.walk([&success](Operation *operation) { 86 auto regions = operation->getRegions(); 87 // Walk over all operations in a region and check if the operation has at 88 // least one region and implements the RegionBranchOpInterface. If there 89 // is an operation that does not fulfill this condition, we cannot apply 90 // the deallocation steps. Furthermore, we accept cases, where we have a 91 // region that returns no results, since, in that case, the intra-region 92 // control flow does not affect the transformation. 93 size_t size = regions.size(); 94 if (((size == 1 && !operation->getResults().empty()) || size > 1) && 95 !dyn_cast<RegionBranchOpInterface>(operation)) { 96 operation->emitError("All operations with attached regions need to " 97 "implement the RegionBranchOpInterface."); 98 success = false; 99 } 100 }); 101 return success; 102 } 103 104 namespace { 105 106 //===----------------------------------------------------------------------===// 107 // Backedges analysis 108 //===----------------------------------------------------------------------===// 109 110 /// A straight-forward program analysis which detects loop backedges induced by 111 /// explicit control flow. 112 class Backedges { 113 public: 114 using BlockSetT = SmallPtrSet<Block *, 16>; 115 using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>; 116 117 public: 118 /// Constructs a new backedges analysis using the op provided. 119 Backedges(Operation *op) { recurse(op); } 120 121 /// Returns the number of backedges formed by explicit control flow. 122 size_t size() const { return edgeSet.size(); } 123 124 /// Returns the start iterator to loop over all backedges. 125 BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } 126 127 /// Returns the end iterator to loop over all backedges. 128 BackedgeSetT::const_iterator end() const { return edgeSet.end(); } 129 130 private: 131 /// Enters the current block and inserts a backedge into the `edgeSet` if we 132 /// have already visited the current block. The inserted edge links the given 133 /// `predecessor` with the `current` block. 134 bool enter(Block ¤t, Block *predecessor) { 135 bool inserted = visited.insert(¤t).second; 136 if (!inserted) 137 edgeSet.insert(std::make_pair(predecessor, ¤t)); 138 return inserted; 139 } 140 141 /// Leaves the current block. 142 void exit(Block ¤t) { visited.erase(¤t); } 143 144 /// Recurses into the given operation while taking all attached regions into 145 /// account. 146 void recurse(Operation *op) { 147 Block *current = op->getBlock(); 148 // If the current op implements the `BranchOpInterface`, there can be 149 // cycles in the scope of all successor blocks. 150 if (isa<BranchOpInterface>(op)) { 151 for (Block *succ : current->getSuccessors()) 152 recurse(*succ, current); 153 } 154 // Recurse into all distinct regions and check for explicit control-flow 155 // loops. 156 for (Region ®ion : op->getRegions()) { 157 if (!region.empty()) 158 recurse(region.front(), current); 159 } 160 } 161 162 /// Recurses into explicit control-flow structures that are given by 163 /// the successor relation defined on the block level. 164 void recurse(Block &block, Block *predecessor) { 165 // Try to enter the current block. If this is not possible, we are 166 // currently processing this block and can safely return here. 167 if (!enter(block, predecessor)) 168 return; 169 170 // Recurse into all operations and successor blocks. 171 for (Operation &op : block.getOperations()) 172 recurse(&op); 173 174 // Leave the current block. 175 exit(block); 176 } 177 178 /// Stores all blocks that are currently visited and on the processing stack. 179 BlockSetT visited; 180 181 /// Stores all backedges in the format (source, target). 182 BackedgeSetT edgeSet; 183 }; 184 185 //===----------------------------------------------------------------------===// 186 // BufferDeallocation 187 //===----------------------------------------------------------------------===// 188 189 /// The buffer deallocation transformation which ensures that all allocs in the 190 /// program have a corresponding de-allocation. As a side-effect, it might also 191 /// introduce clones that in turn leads to additional deallocations. 192 class BufferDeallocation : public BufferPlacementTransformationBase { 193 public: 194 using AliasAllocationMapT = 195 llvm::DenseMap<Value, bufferization::AllocationOpInterface>; 196 197 BufferDeallocation(Operation *op) 198 : BufferPlacementTransformationBase(op), dominators(op), 199 postDominators(op) {} 200 201 /// Checks if all allocation operations either provide an already existing 202 /// deallocation operation or implement the AllocationOpInterface. In 203 /// addition, this method initializes the internal alias to 204 /// AllocationOpInterface mapping in order to get compatible 205 /// AllocationOpInterface implementations for aliases. 206 LogicalResult prepare() { 207 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { 208 // Get the defining allocation operation. 209 Value alloc = std::get<0>(entry); 210 auto allocationInterface = 211 alloc.getDefiningOp<bufferization::AllocationOpInterface>(); 212 // If there is no existing deallocation operation and no implementation of 213 // the AllocationOpInterface, we cannot apply the BufferDeallocation pass. 214 if (!std::get<1>(entry) && !allocationInterface) { 215 return alloc.getDefiningOp()->emitError( 216 "Allocation is not deallocated explicitly nor does the operation " 217 "implement the AllocationOpInterface."); 218 } 219 220 // Register the current allocation interface implementation. 221 aliasToAllocations[alloc] = allocationInterface; 222 223 // Get the alias information for the current allocation node. 224 llvm::for_each(aliases.resolve(alloc), [&](Value alias) { 225 // TODO: check for incompatible implementations of the 226 // AllocationOpInterface. This could be realized by promoting the 227 // AllocationOpInterface to a DialectInterface. 228 aliasToAllocations[alias] = allocationInterface; 229 }); 230 } 231 return success(); 232 } 233 234 /// Performs the actual placement/creation of all temporary clone and dealloc 235 /// nodes. 236 LogicalResult deallocate() { 237 // Add additional clones that are required. 238 if (failed(introduceClones())) 239 return failure(); 240 241 // Place deallocations for all allocation entries. 242 return placeDeallocs(); 243 } 244 245 private: 246 /// Introduces required clone operations to avoid memory leaks. 247 LogicalResult introduceClones() { 248 // Initialize the set of values that require a dedicated memory free 249 // operation since their operands cannot be safely deallocated in a post 250 // dominator. 251 SmallPtrSet<Value, 8> valuesToFree; 252 llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues; 253 SmallVector<std::tuple<Value, Block *>, 8> toProcess; 254 255 // Check dominance relation for proper dominance properties. If the given 256 // value node does not dominate an alias, we will have to create a clone in 257 // order to free all buffers that can potentially leak into a post 258 // dominator. 259 auto findUnsafeValues = [&](Value source, Block *definingBlock) { 260 auto it = aliases.find(source); 261 if (it == aliases.end()) 262 return; 263 for (Value value : it->second) { 264 if (valuesToFree.count(value) > 0) 265 continue; 266 Block *parentBlock = value.getParentBlock(); 267 // Check whether we have to free this particular block argument or 268 // generic value. We have to free the current alias if it is either 269 // defined in a non-dominated block or it is defined in the same block 270 // but the current value is not dominated by the source value. 271 if (!dominators.dominates(definingBlock, parentBlock) || 272 (definingBlock == parentBlock && value.isa<BlockArgument>())) { 273 toProcess.emplace_back(value, parentBlock); 274 valuesToFree.insert(value); 275 } else if (visitedValues.insert(std::make_tuple(value, definingBlock)) 276 .second) 277 toProcess.emplace_back(value, definingBlock); 278 } 279 }; 280 281 // Detect possibly unsafe aliases starting from all allocations. 282 for (BufferPlacementAllocs::AllocEntry &entry : allocs) { 283 Value allocValue = std::get<0>(entry); 284 findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock()); 285 } 286 // Try to find block arguments that require an explicit free operation 287 // until we reach a fix point. 288 while (!toProcess.empty()) { 289 auto current = toProcess.pop_back_val(); 290 findUnsafeValues(std::get<0>(current), std::get<1>(current)); 291 } 292 293 // Update buffer aliases to ensure that we free all buffers and block 294 // arguments at the correct locations. 295 aliases.remove(valuesToFree); 296 297 // Add new allocs and additional clone operations. 298 for (Value value : valuesToFree) { 299 if (failed(value.isa<BlockArgument>() 300 ? introduceBlockArgCopy(value.cast<BlockArgument>()) 301 : introduceValueCopyForRegionResult(value))) 302 return failure(); 303 304 // Register the value to require a final dealloc. Note that we do not have 305 // to assign a block here since we do not want to move the allocation node 306 // to another location. 307 allocs.registerAlloc(std::make_tuple(value, nullptr)); 308 } 309 return success(); 310 } 311 312 /// Introduces temporary clones in all predecessors and copies the source 313 /// values into the newly allocated buffers. 314 LogicalResult introduceBlockArgCopy(BlockArgument blockArg) { 315 // Allocate a buffer for the current block argument in the block of 316 // the associated value (which will be a predecessor block by 317 // definition). 318 Block *block = blockArg.getOwner(); 319 for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) { 320 // Get the terminator and the value that will be passed to our 321 // argument. 322 Operation *terminator = (*it)->getTerminator(); 323 auto branchInterface = cast<BranchOpInterface>(terminator); 324 // Query the associated source value. 325 Value sourceValue = 326 branchInterface.getSuccessorOperands(it.getSuccessorIndex()) 327 .getValue()[blockArg.getArgNumber()]; 328 // Wire new clone and successor operand. 329 auto mutableOperands = 330 branchInterface.getMutableSuccessorOperands(it.getSuccessorIndex()); 331 if (!mutableOperands) { 332 terminator->emitError() << "terminators with immutable successor " 333 "operands are not supported"; 334 continue; 335 } 336 // Create a new clone at the current location of the terminator. 337 auto clone = introduceCloneBuffers(sourceValue, terminator); 338 if (failed(clone)) 339 return failure(); 340 mutableOperands.getValue() 341 .slice(blockArg.getArgNumber(), 1) 342 .assign(*clone); 343 } 344 345 // Check whether the block argument has implicitly defined predecessors via 346 // the RegionBranchOpInterface. This can be the case if the current block 347 // argument belongs to the first block in a region and the parent operation 348 // implements the RegionBranchOpInterface. 349 Region *argRegion = block->getParent(); 350 Operation *parentOp = argRegion->getParentOp(); 351 RegionBranchOpInterface regionInterface; 352 if (!argRegion || &argRegion->front() != block || 353 !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp))) 354 return success(); 355 356 if (failed(introduceClonesForRegionSuccessors( 357 regionInterface, argRegion->getParentOp()->getRegions(), blockArg, 358 [&](RegionSuccessor &successorRegion) { 359 // Find a predecessor of our argRegion. 360 return successorRegion.getSuccessor() == argRegion; 361 }))) 362 return failure(); 363 364 // Check whether the block argument belongs to an entry region of the 365 // parent operation. In this case, we have to introduce an additional clone 366 // for buffer that is passed to the argument. 367 SmallVector<RegionSuccessor, 2> successorRegions; 368 regionInterface.getSuccessorRegions(/*index=*/llvm::None, successorRegions); 369 auto *it = 370 llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) { 371 return successorRegion.getSuccessor() == argRegion; 372 }); 373 if (it == successorRegions.end()) 374 return success(); 375 376 // Determine the actual operand to introduce a clone for and rewire the 377 // operand to point to the clone instead. 378 Value operand = 379 regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber()) 380 [llvm::find(it->getSuccessorInputs(), blockArg).getIndex()]; 381 auto clone = introduceCloneBuffers(operand, parentOp); 382 if (failed(clone)) 383 return failure(); 384 385 auto op = llvm::find(parentOp->getOperands(), operand); 386 assert(op != parentOp->getOperands().end() && 387 "parentOp does not contain operand"); 388 parentOp->setOperand(op.getIndex(), *clone); 389 return success(); 390 } 391 392 /// Introduces temporary clones in front of all associated nested-region 393 /// terminators and copies the source values into the newly allocated buffers. 394 LogicalResult introduceValueCopyForRegionResult(Value value) { 395 // Get the actual result index in the scope of the parent terminator. 396 Operation *operation = value.getDefiningOp(); 397 auto regionInterface = cast<RegionBranchOpInterface>(operation); 398 // Filter successors that return to the parent operation. 399 auto regionPredicate = [&](RegionSuccessor &successorRegion) { 400 // If the RegionSuccessor has no associated successor, it will return to 401 // its parent operation. 402 return !successorRegion.getSuccessor(); 403 }; 404 // Introduce a clone for all region "results" that are returned to the 405 // parent operation. This is required since the parent's result value has 406 // been considered critical. Therefore, the algorithm assumes that a clone 407 // of a previously allocated buffer is returned by the operation (like in 408 // the case of a block argument). 409 return introduceClonesForRegionSuccessors( 410 regionInterface, operation->getRegions(), value, regionPredicate); 411 } 412 413 /// Introduces buffer clones for all terminators in the given regions. The 414 /// regionPredicate is applied to every successor region in order to restrict 415 /// the clones to specific regions. 416 template <typename TPredicate> 417 LogicalResult introduceClonesForRegionSuccessors( 418 RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions, 419 Value argValue, const TPredicate ®ionPredicate) { 420 for (Region ®ion : regions) { 421 // Query the regionInterface to get all successor regions of the current 422 // one. 423 SmallVector<RegionSuccessor, 2> successorRegions; 424 regionInterface.getSuccessorRegions(region.getRegionNumber(), 425 successorRegions); 426 // Try to find a matching region successor. 427 RegionSuccessor *regionSuccessor = 428 llvm::find_if(successorRegions, regionPredicate); 429 if (regionSuccessor == successorRegions.end()) 430 continue; 431 // Get the operand index in the context of the current successor input 432 // bindings. 433 size_t operandIndex = 434 llvm::find(regionSuccessor->getSuccessorInputs(), argValue) 435 .getIndex(); 436 437 // Iterate over all immediate terminator operations to introduce 438 // new buffer allocations. Thereby, the appropriate terminator operand 439 // will be adjusted to point to the newly allocated buffer instead. 440 if (failed(walkReturnOperations(®ion, [&](Operation *terminator) { 441 // Get the actual mutable operands for this terminator op. 442 auto terminatorOperands = *getMutableRegionBranchSuccessorOperands( 443 terminator, region.getRegionNumber()); 444 // Extract the source value from the current terminator. 445 // This conversion needs to exist on a separate line due to a bug in 446 // GCC conversion analysis. 447 OperandRange immutableTerminatorOperands = terminatorOperands; 448 Value sourceValue = immutableTerminatorOperands[operandIndex]; 449 // Create a new clone at the current location of the terminator. 450 auto clone = introduceCloneBuffers(sourceValue, terminator); 451 if (failed(clone)) 452 return failure(); 453 // Wire clone and terminator operand. 454 terminatorOperands.slice(operandIndex, 1).assign(*clone); 455 return success(); 456 }))) 457 return failure(); 458 } 459 return success(); 460 } 461 462 /// Creates a new memory allocation for the given source value and clones 463 /// its content into the newly allocated buffer. The terminator operation is 464 /// used to insert the clone operation at the right place. 465 FailureOr<Value> introduceCloneBuffers(Value sourceValue, 466 Operation *terminator) { 467 // Avoid multiple clones of the same source value. This can happen in the 468 // presence of loops when a branch acts as a backedge while also having 469 // another successor that returns to its parent operation. Note: that 470 // copying copied buffers can introduce memory leaks since the invariant of 471 // BufferDeallocation assumes that a buffer will be only cloned once into a 472 // temporary buffer. Hence, the construction of clone chains introduces 473 // additional allocations that are not tracked automatically by the 474 // algorithm. 475 if (clonedValues.contains(sourceValue)) 476 return sourceValue; 477 // Create a new clone operation that copies the contents of the old 478 // buffer to the new one. 479 auto clone = buildClone(terminator, sourceValue); 480 if (succeeded(clone)) { 481 // Remember the clone of original source value. 482 clonedValues.insert(*clone); 483 } 484 return clone; 485 } 486 487 /// Finds correct dealloc positions according to the algorithm described at 488 /// the top of the file for all alloc nodes and block arguments that can be 489 /// handled by this analysis. 490 LogicalResult placeDeallocs() { 491 // Move or insert deallocs using the previously computed information. 492 // These deallocations will be linked to their associated allocation nodes 493 // since they don't have any aliases that can (potentially) increase their 494 // liveness. 495 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { 496 Value alloc = std::get<0>(entry); 497 auto aliasesSet = aliases.resolve(alloc); 498 assert(!aliasesSet.empty() && "must contain at least one alias"); 499 500 // Determine the actual block to place the dealloc and get liveness 501 // information. 502 Block *placementBlock = 503 findCommonDominator(alloc, aliasesSet, postDominators); 504 const LivenessBlockInfo *livenessInfo = 505 liveness.getLiveness(placementBlock); 506 507 // We have to ensure that the dealloc will be after the last use of all 508 // aliases of the given value. We first assume that there are no uses in 509 // the placementBlock and that we can safely place the dealloc at the 510 // beginning. 511 Operation *endOperation = &placementBlock->front(); 512 513 // Iterate over all aliases and ensure that the endOperation will point 514 // to the last operation of all potential aliases in the placementBlock. 515 for (Value alias : aliasesSet) { 516 // Ensure that the start operation is at least the defining operation of 517 // the current alias to avoid invalid placement of deallocs for aliases 518 // without any uses. 519 Operation *beforeOp = endOperation; 520 if (alias.getDefiningOp() && 521 !(beforeOp = placementBlock->findAncestorOpInBlock( 522 *alias.getDefiningOp()))) 523 continue; 524 525 Operation *aliasEndOperation = 526 livenessInfo->getEndOperation(alias, beforeOp); 527 // Check whether the aliasEndOperation lies in the desired block and 528 // whether it is behind the current endOperation. If yes, this will be 529 // the new endOperation. 530 if (aliasEndOperation->getBlock() == placementBlock && 531 endOperation->isBeforeInBlock(aliasEndOperation)) 532 endOperation = aliasEndOperation; 533 } 534 // endOperation is the last operation behind which we can safely store 535 // the dealloc taking all potential aliases into account. 536 537 // If there is an existing dealloc, move it to the right place. 538 Operation *deallocOperation = std::get<1>(entry); 539 if (deallocOperation) { 540 deallocOperation->moveAfter(endOperation); 541 } else { 542 // If the Dealloc position is at the terminator operation of the 543 // block, then the value should escape from a deallocation. 544 Operation *nextOp = endOperation->getNextNode(); 545 if (!nextOp) 546 continue; 547 // If there is no dealloc node, insert one in the right place. 548 if (failed(buildDealloc(nextOp, alloc))) 549 return failure(); 550 } 551 } 552 return success(); 553 } 554 555 /// Builds a deallocation operation compatible with the given allocation 556 /// value. If there is no registered AllocationOpInterface implementation for 557 /// the given value (e.g. in the case of a function parameter), this method 558 /// builds a memref::DeallocOp. 559 LogicalResult buildDealloc(Operation *op, Value alloc) { 560 OpBuilder builder(op); 561 auto it = aliasToAllocations.find(alloc); 562 if (it != aliasToAllocations.end()) { 563 // Call the allocation op interface to build a supported and 564 // compatible deallocation operation. 565 auto dealloc = it->second.buildDealloc(builder, alloc); 566 if (!dealloc) 567 return op->emitError() 568 << "allocations without compatible deallocations are " 569 "not supported"; 570 } else { 571 // Build a "default" DeallocOp for unknown allocation sources. 572 builder.create<memref::DeallocOp>(alloc.getLoc(), alloc); 573 } 574 return success(); 575 } 576 577 /// Builds a clone operation compatible with the given allocation value. If 578 /// there is no registered AllocationOpInterface implementation for the given 579 /// value (e.g. in the case of a function parameter), this method builds a 580 /// bufferization::CloneOp. 581 FailureOr<Value> buildClone(Operation *op, Value alloc) { 582 OpBuilder builder(op); 583 auto it = aliasToAllocations.find(alloc); 584 if (it != aliasToAllocations.end()) { 585 // Call the allocation op interface to build a supported and 586 // compatible clone operation. 587 auto clone = it->second.buildClone(builder, alloc); 588 if (clone) 589 return *clone; 590 return (LogicalResult)(op->emitError() 591 << "allocations without compatible clone ops " 592 "are not supported"); 593 } 594 // Build a "default" CloneOp for unknown allocation sources. 595 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) 596 .getResult(); 597 } 598 599 /// The dominator info to find the appropriate start operation to move the 600 /// allocs. 601 DominanceInfo dominators; 602 603 /// The post dominator info to move the dependent allocs in the right 604 /// position. 605 PostDominanceInfo postDominators; 606 607 /// Stores already cloned buffers to avoid additional clones of clones. 608 ValueSetT clonedValues; 609 610 /// Maps aliases to their source allocation interfaces (inverse mapping). 611 AliasAllocationMapT aliasToAllocations; 612 }; 613 614 //===----------------------------------------------------------------------===// 615 // BufferDeallocationPass 616 //===----------------------------------------------------------------------===// 617 618 struct DefaultAllocationInterface 619 : public bufferization::AllocationOpInterface::ExternalModel< 620 DefaultAllocationInterface, memref::AllocOp> { 621 static Optional<Operation *> buildDealloc(OpBuilder &builder, Value alloc) { 622 return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc) 623 .getOperation(); 624 } 625 static Optional<Value> buildClone(OpBuilder &builder, Value alloc) { 626 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) 627 .getResult(); 628 } 629 }; 630 631 /// The actual buffer deallocation pass that inserts and moves dealloc nodes 632 /// into the right positions. Furthermore, it inserts additional clones if 633 /// necessary. It uses the algorithm described at the top of the file. 634 struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> { 635 void getDependentDialects(DialectRegistry ®istry) const override { 636 registry.insert<bufferization::BufferizationDialect>(); 637 registry.insert<memref::MemRefDialect>(); 638 registry.addOpInterface<memref::AllocOp, DefaultAllocationInterface>(); 639 } 640 641 void runOnFunction() override { 642 // Ensure that there are supported loops only. 643 FuncOp func = getFunction(); 644 Backedges backedges(func); 645 if (backedges.size()) { 646 func.emitError("Only structured control-flow loops are supported."); 647 return signalPassFailure(); 648 } 649 650 // Check that the control flow structures are supported. 651 if (!validateSupportedControlFlow(func.getRegion())) 652 return signalPassFailure(); 653 654 // Gather all required allocation nodes and prepare the deallocation phase. 655 BufferDeallocation deallocation(func); 656 657 // Check for supported AllocationOpInterface implementations and prepare the 658 // internal deallocation pass. 659 if (failed(deallocation.prepare())) 660 return signalPassFailure(); 661 662 // Place all required temporary clone and dealloc nodes. 663 if (failed(deallocation.deallocate())) 664 return signalPassFailure(); 665 } 666 }; 667 668 } // namespace 669 670 //===----------------------------------------------------------------------===// 671 // BufferDeallocationPass construction 672 //===----------------------------------------------------------------------===// 673 674 std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() { 675 return std::make_unique<BufferDeallocationPass>(); 676 } 677