1 //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements logic for computing correct alloc and dealloc positions. 10 // Furthermore, buffer deallocation also adds required new clone operations to 11 // ensure that all buffers are deallocated. The main class is the 12 // BufferDeallocationPass class that implements the underlying algorithm. In 13 // order to put allocations and deallocations at safe positions, it is 14 // significantly important to put them into the correct blocks. However, the 15 // liveness analysis does not pay attention to aliases, which can occur due to 16 // branches (and their associated block arguments) in general. For this purpose, 17 // BufferDeallocation firstly finds all possible aliases for a single value 18 // (using the BufferViewFlowAnalysis class). Consider the following example: 19 // 20 // ^bb0(%arg0): 21 // cf.cond_br %cond, ^bb1, ^bb2 22 // ^bb1: 23 // cf.br ^exit(%arg0) 24 // ^bb2: 25 // %new_value = ... 26 // cf.br ^exit(%new_value) 27 // ^exit(%arg1): 28 // return %arg1; 29 // 30 // We should place the dealloc for %new_value in exit. However, we have to free 31 // the buffer in the same block, because it cannot be freed in the post 32 // dominator. However, this requires a new clone buffer for %arg1 that will 33 // contain the actual contents. Using the class BufferViewFlowAnalysis, we 34 // will find out that %new_value has a potential alias %arg1. In order to find 35 // the dealloc position we have to find all potential aliases, iterate over 36 // their uses and find the common post-dominator block (note that additional 37 // clones and buffers remove potential aliases and will influence the placement 38 // of the deallocs). In all cases, the computed block can be safely used to free 39 // the %new_value buffer (may be exit or bb2) as it will die and we can use 40 // liveness information to determine the exact operation after which we have to 41 // insert the dealloc. However, the algorithm supports introducing clone buffers 42 // and placing deallocs in safe locations to ensure that all buffers will be 43 // freed in the end. 44 // 45 // TODO: 46 // The current implementation does not support explicit-control-flow loops and 47 // the resulting code will be invalid with respect to program semantics. 48 // However, structured control-flow loops are fully supported. Furthermore, it 49 // doesn't accept functions which return buffers already. 50 // 51 //===----------------------------------------------------------------------===// 52 53 #include "PassDetail.h" 54 55 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h" 56 #include "mlir/Dialect/Bufferization/IR/Bufferization.h" 57 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h" 58 #include "mlir/Dialect/Bufferization/Transforms/Passes.h" 59 #include "mlir/Dialect/MemRef/IR/MemRef.h" 60 #include "llvm/ADT/SetOperations.h" 61 62 using namespace mlir; 63 using namespace mlir::bufferization; 64 65 /// Walks over all immediate return-like terminators in the given region. 66 static LogicalResult 67 walkReturnOperations(Region *region, 68 llvm::function_ref<LogicalResult(Operation *)> func) { 69 for (Block &block : *region) { 70 Operation *terminator = block.getTerminator(); 71 // Skip non region-return-like terminators. 72 if (isRegionReturnLike(terminator)) { 73 if (failed(func(terminator))) 74 return failure(); 75 } 76 } 77 return success(); 78 } 79 80 /// Checks if all operations that have at least one attached region implement 81 /// the RegionBranchOpInterface. This is not required in edge cases, where we 82 /// have a single attached region and the parent operation has no results. 83 static bool validateSupportedControlFlow(Operation *op) { 84 WalkResult result = op->walk([&](Operation *operation) { 85 // Only check ops that are inside a function. 86 if (!operation->getParentOfType<FuncOp>()) 87 return WalkResult::advance(); 88 89 auto regions = operation->getRegions(); 90 // Walk over all operations in a region and check if the operation has at 91 // least one region and implements the RegionBranchOpInterface. If there 92 // is an operation that does not fulfill this condition, we cannot apply 93 // the deallocation steps. Furthermore, we accept cases, where we have a 94 // region that returns no results, since, in that case, the intra-region 95 // control flow does not affect the transformation. 96 size_t size = regions.size(); 97 if (((size == 1 && !operation->getResults().empty()) || size > 1) && 98 !dyn_cast<RegionBranchOpInterface>(operation)) { 99 operation->emitError("All operations with attached regions need to " 100 "implement the RegionBranchOpInterface."); 101 } 102 103 return WalkResult::advance(); 104 }); 105 return !result.wasSkipped(); 106 } 107 108 namespace { 109 110 //===----------------------------------------------------------------------===// 111 // Backedges analysis 112 //===----------------------------------------------------------------------===// 113 114 /// A straight-forward program analysis which detects loop backedges induced by 115 /// explicit control flow. 116 class Backedges { 117 public: 118 using BlockSetT = SmallPtrSet<Block *, 16>; 119 using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>; 120 121 public: 122 /// Constructs a new backedges analysis using the op provided. 123 Backedges(Operation *op) { recurse(op); } 124 125 /// Returns the number of backedges formed by explicit control flow. 126 size_t size() const { return edgeSet.size(); } 127 128 /// Returns the start iterator to loop over all backedges. 129 BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } 130 131 /// Returns the end iterator to loop over all backedges. 132 BackedgeSetT::const_iterator end() const { return edgeSet.end(); } 133 134 private: 135 /// Enters the current block and inserts a backedge into the `edgeSet` if we 136 /// have already visited the current block. The inserted edge links the given 137 /// `predecessor` with the `current` block. 138 bool enter(Block ¤t, Block *predecessor) { 139 bool inserted = visited.insert(¤t).second; 140 if (!inserted) 141 edgeSet.insert(std::make_pair(predecessor, ¤t)); 142 return inserted; 143 } 144 145 /// Leaves the current block. 146 void exit(Block ¤t) { visited.erase(¤t); } 147 148 /// Recurses into the given operation while taking all attached regions into 149 /// account. 150 void recurse(Operation *op) { 151 Block *current = op->getBlock(); 152 // If the current op implements the `BranchOpInterface`, there can be 153 // cycles in the scope of all successor blocks. 154 if (isa<BranchOpInterface>(op)) { 155 for (Block *succ : current->getSuccessors()) 156 recurse(*succ, current); 157 } 158 // Recurse into all distinct regions and check for explicit control-flow 159 // loops. 160 for (Region ®ion : op->getRegions()) { 161 if (!region.empty()) 162 recurse(region.front(), current); 163 } 164 } 165 166 /// Recurses into explicit control-flow structures that are given by 167 /// the successor relation defined on the block level. 168 void recurse(Block &block, Block *predecessor) { 169 // Try to enter the current block. If this is not possible, we are 170 // currently processing this block and can safely return here. 171 if (!enter(block, predecessor)) 172 return; 173 174 // Recurse into all operations and successor blocks. 175 for (Operation &op : block.getOperations()) 176 recurse(&op); 177 178 // Leave the current block. 179 exit(block); 180 } 181 182 /// Stores all blocks that are currently visited and on the processing stack. 183 BlockSetT visited; 184 185 /// Stores all backedges in the format (source, target). 186 BackedgeSetT edgeSet; 187 }; 188 189 //===----------------------------------------------------------------------===// 190 // BufferDeallocation 191 //===----------------------------------------------------------------------===// 192 193 /// The buffer deallocation transformation which ensures that all allocs in the 194 /// program have a corresponding de-allocation. As a side-effect, it might also 195 /// introduce clones that in turn leads to additional deallocations. 196 class BufferDeallocation : public BufferPlacementTransformationBase { 197 public: 198 using AliasAllocationMapT = 199 llvm::DenseMap<Value, bufferization::AllocationOpInterface>; 200 201 BufferDeallocation(Operation *op) 202 : BufferPlacementTransformationBase(op), dominators(op), 203 postDominators(op) {} 204 205 /// Checks if all allocation operations either provide an already existing 206 /// deallocation operation or implement the AllocationOpInterface. In 207 /// addition, this method initializes the internal alias to 208 /// AllocationOpInterface mapping in order to get compatible 209 /// AllocationOpInterface implementations for aliases. 210 LogicalResult prepare() { 211 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { 212 // Get the defining allocation operation. 213 Value alloc = std::get<0>(entry); 214 auto allocationInterface = 215 alloc.getDefiningOp<bufferization::AllocationOpInterface>(); 216 // If there is no existing deallocation operation and no implementation of 217 // the AllocationOpInterface, we cannot apply the BufferDeallocation pass. 218 if (!std::get<1>(entry) && !allocationInterface) { 219 return alloc.getDefiningOp()->emitError( 220 "Allocation is not deallocated explicitly nor does the operation " 221 "implement the AllocationOpInterface."); 222 } 223 224 // Register the current allocation interface implementation. 225 aliasToAllocations[alloc] = allocationInterface; 226 227 // Get the alias information for the current allocation node. 228 llvm::for_each(aliases.resolve(alloc), [&](Value alias) { 229 // TODO: check for incompatible implementations of the 230 // AllocationOpInterface. This could be realized by promoting the 231 // AllocationOpInterface to a DialectInterface. 232 aliasToAllocations[alias] = allocationInterface; 233 }); 234 } 235 return success(); 236 } 237 238 /// Performs the actual placement/creation of all temporary clone and dealloc 239 /// nodes. 240 LogicalResult deallocate() { 241 // Add additional clones that are required. 242 if (failed(introduceClones())) 243 return failure(); 244 245 // Place deallocations for all allocation entries. 246 return placeDeallocs(); 247 } 248 249 private: 250 /// Introduces required clone operations to avoid memory leaks. 251 LogicalResult introduceClones() { 252 // Initialize the set of values that require a dedicated memory free 253 // operation since their operands cannot be safely deallocated in a post 254 // dominator. 255 SmallPtrSet<Value, 8> valuesToFree; 256 llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues; 257 SmallVector<std::tuple<Value, Block *>, 8> toProcess; 258 259 // Check dominance relation for proper dominance properties. If the given 260 // value node does not dominate an alias, we will have to create a clone in 261 // order to free all buffers that can potentially leak into a post 262 // dominator. 263 auto findUnsafeValues = [&](Value source, Block *definingBlock) { 264 auto it = aliases.find(source); 265 if (it == aliases.end()) 266 return; 267 for (Value value : it->second) { 268 if (valuesToFree.count(value) > 0) 269 continue; 270 Block *parentBlock = value.getParentBlock(); 271 // Check whether we have to free this particular block argument or 272 // generic value. We have to free the current alias if it is either 273 // defined in a non-dominated block or it is defined in the same block 274 // but the current value is not dominated by the source value. 275 if (!dominators.dominates(definingBlock, parentBlock) || 276 (definingBlock == parentBlock && value.isa<BlockArgument>())) { 277 toProcess.emplace_back(value, parentBlock); 278 valuesToFree.insert(value); 279 } else if (visitedValues.insert(std::make_tuple(value, definingBlock)) 280 .second) 281 toProcess.emplace_back(value, definingBlock); 282 } 283 }; 284 285 // Detect possibly unsafe aliases starting from all allocations. 286 for (BufferPlacementAllocs::AllocEntry &entry : allocs) { 287 Value allocValue = std::get<0>(entry); 288 findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock()); 289 } 290 // Try to find block arguments that require an explicit free operation 291 // until we reach a fix point. 292 while (!toProcess.empty()) { 293 auto current = toProcess.pop_back_val(); 294 findUnsafeValues(std::get<0>(current), std::get<1>(current)); 295 } 296 297 // Update buffer aliases to ensure that we free all buffers and block 298 // arguments at the correct locations. 299 aliases.remove(valuesToFree); 300 301 // Add new allocs and additional clone operations. 302 for (Value value : valuesToFree) { 303 if (failed(value.isa<BlockArgument>() 304 ? introduceBlockArgCopy(value.cast<BlockArgument>()) 305 : introduceValueCopyForRegionResult(value))) 306 return failure(); 307 308 // Register the value to require a final dealloc. Note that we do not have 309 // to assign a block here since we do not want to move the allocation node 310 // to another location. 311 allocs.registerAlloc(std::make_tuple(value, nullptr)); 312 } 313 return success(); 314 } 315 316 /// Introduces temporary clones in all predecessors and copies the source 317 /// values into the newly allocated buffers. 318 LogicalResult introduceBlockArgCopy(BlockArgument blockArg) { 319 // Allocate a buffer for the current block argument in the block of 320 // the associated value (which will be a predecessor block by 321 // definition). 322 Block *block = blockArg.getOwner(); 323 for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) { 324 // Get the terminator and the value that will be passed to our 325 // argument. 326 Operation *terminator = (*it)->getTerminator(); 327 auto branchInterface = cast<BranchOpInterface>(terminator); 328 // Query the associated source value. 329 Value sourceValue = 330 branchInterface.getSuccessorOperands(it.getSuccessorIndex()) 331 .getValue()[blockArg.getArgNumber()]; 332 // Wire new clone and successor operand. 333 auto mutableOperands = 334 branchInterface.getMutableSuccessorOperands(it.getSuccessorIndex()); 335 if (!mutableOperands) { 336 terminator->emitError() << "terminators with immutable successor " 337 "operands are not supported"; 338 continue; 339 } 340 // Create a new clone at the current location of the terminator. 341 auto clone = introduceCloneBuffers(sourceValue, terminator); 342 if (failed(clone)) 343 return failure(); 344 mutableOperands.getValue() 345 .slice(blockArg.getArgNumber(), 1) 346 .assign(*clone); 347 } 348 349 // Check whether the block argument has implicitly defined predecessors via 350 // the RegionBranchOpInterface. This can be the case if the current block 351 // argument belongs to the first block in a region and the parent operation 352 // implements the RegionBranchOpInterface. 353 Region *argRegion = block->getParent(); 354 Operation *parentOp = argRegion->getParentOp(); 355 RegionBranchOpInterface regionInterface; 356 if (&argRegion->front() != block || 357 !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp))) 358 return success(); 359 360 if (failed(introduceClonesForRegionSuccessors( 361 regionInterface, argRegion->getParentOp()->getRegions(), blockArg, 362 [&](RegionSuccessor &successorRegion) { 363 // Find a predecessor of our argRegion. 364 return successorRegion.getSuccessor() == argRegion; 365 }))) 366 return failure(); 367 368 // Check whether the block argument belongs to an entry region of the 369 // parent operation. In this case, we have to introduce an additional clone 370 // for buffer that is passed to the argument. 371 SmallVector<RegionSuccessor, 2> successorRegions; 372 regionInterface.getSuccessorRegions(/*index=*/llvm::None, successorRegions); 373 auto *it = 374 llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) { 375 return successorRegion.getSuccessor() == argRegion; 376 }); 377 if (it == successorRegions.end()) 378 return success(); 379 380 // Determine the actual operand to introduce a clone for and rewire the 381 // operand to point to the clone instead. 382 auto operands = 383 regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber()); 384 size_t operandIndex = 385 llvm::find(it->getSuccessorInputs(), blockArg).getIndex() + 386 operands.getBeginOperandIndex(); 387 Value operand = parentOp->getOperand(operandIndex); 388 assert(operand == 389 operands[operandIndex - operands.getBeginOperandIndex()] && 390 "region interface operands don't match parentOp operands"); 391 auto clone = introduceCloneBuffers(operand, parentOp); 392 if (failed(clone)) 393 return failure(); 394 395 parentOp->setOperand(operandIndex, *clone); 396 return success(); 397 } 398 399 /// Introduces temporary clones in front of all associated nested-region 400 /// terminators and copies the source values into the newly allocated buffers. 401 LogicalResult introduceValueCopyForRegionResult(Value value) { 402 // Get the actual result index in the scope of the parent terminator. 403 Operation *operation = value.getDefiningOp(); 404 auto regionInterface = cast<RegionBranchOpInterface>(operation); 405 // Filter successors that return to the parent operation. 406 auto regionPredicate = [&](RegionSuccessor &successorRegion) { 407 // If the RegionSuccessor has no associated successor, it will return to 408 // its parent operation. 409 return !successorRegion.getSuccessor(); 410 }; 411 // Introduce a clone for all region "results" that are returned to the 412 // parent operation. This is required since the parent's result value has 413 // been considered critical. Therefore, the algorithm assumes that a clone 414 // of a previously allocated buffer is returned by the operation (like in 415 // the case of a block argument). 416 return introduceClonesForRegionSuccessors( 417 regionInterface, operation->getRegions(), value, regionPredicate); 418 } 419 420 /// Introduces buffer clones for all terminators in the given regions. The 421 /// regionPredicate is applied to every successor region in order to restrict 422 /// the clones to specific regions. 423 template <typename TPredicate> 424 LogicalResult introduceClonesForRegionSuccessors( 425 RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions, 426 Value argValue, const TPredicate ®ionPredicate) { 427 for (Region ®ion : regions) { 428 // Query the regionInterface to get all successor regions of the current 429 // one. 430 SmallVector<RegionSuccessor, 2> successorRegions; 431 regionInterface.getSuccessorRegions(region.getRegionNumber(), 432 successorRegions); 433 // Try to find a matching region successor. 434 RegionSuccessor *regionSuccessor = 435 llvm::find_if(successorRegions, regionPredicate); 436 if (regionSuccessor == successorRegions.end()) 437 continue; 438 // Get the operand index in the context of the current successor input 439 // bindings. 440 size_t operandIndex = 441 llvm::find(regionSuccessor->getSuccessorInputs(), argValue) 442 .getIndex(); 443 444 // Iterate over all immediate terminator operations to introduce 445 // new buffer allocations. Thereby, the appropriate terminator operand 446 // will be adjusted to point to the newly allocated buffer instead. 447 if (failed(walkReturnOperations(®ion, [&](Operation *terminator) { 448 // Get the actual mutable operands for this terminator op. 449 auto terminatorOperands = *getMutableRegionBranchSuccessorOperands( 450 terminator, region.getRegionNumber()); 451 // Extract the source value from the current terminator. 452 // This conversion needs to exist on a separate line due to a bug in 453 // GCC conversion analysis. 454 OperandRange immutableTerminatorOperands = terminatorOperands; 455 Value sourceValue = immutableTerminatorOperands[operandIndex]; 456 // Create a new clone at the current location of the terminator. 457 auto clone = introduceCloneBuffers(sourceValue, terminator); 458 if (failed(clone)) 459 return failure(); 460 // Wire clone and terminator operand. 461 terminatorOperands.slice(operandIndex, 1).assign(*clone); 462 return success(); 463 }))) 464 return failure(); 465 } 466 return success(); 467 } 468 469 /// Creates a new memory allocation for the given source value and clones 470 /// its content into the newly allocated buffer. The terminator operation is 471 /// used to insert the clone operation at the right place. 472 FailureOr<Value> introduceCloneBuffers(Value sourceValue, 473 Operation *terminator) { 474 // Avoid multiple clones of the same source value. This can happen in the 475 // presence of loops when a branch acts as a backedge while also having 476 // another successor that returns to its parent operation. Note: that 477 // copying copied buffers can introduce memory leaks since the invariant of 478 // BufferDeallocation assumes that a buffer will be only cloned once into a 479 // temporary buffer. Hence, the construction of clone chains introduces 480 // additional allocations that are not tracked automatically by the 481 // algorithm. 482 if (clonedValues.contains(sourceValue)) 483 return sourceValue; 484 // Create a new clone operation that copies the contents of the old 485 // buffer to the new one. 486 auto clone = buildClone(terminator, sourceValue); 487 if (succeeded(clone)) { 488 // Remember the clone of original source value. 489 clonedValues.insert(*clone); 490 } 491 return clone; 492 } 493 494 /// Finds correct dealloc positions according to the algorithm described at 495 /// the top of the file for all alloc nodes and block arguments that can be 496 /// handled by this analysis. 497 LogicalResult placeDeallocs() { 498 // Move or insert deallocs using the previously computed information. 499 // These deallocations will be linked to their associated allocation nodes 500 // since they don't have any aliases that can (potentially) increase their 501 // liveness. 502 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { 503 Value alloc = std::get<0>(entry); 504 auto aliasesSet = aliases.resolve(alloc); 505 assert(!aliasesSet.empty() && "must contain at least one alias"); 506 507 // Determine the actual block to place the dealloc and get liveness 508 // information. 509 Block *placementBlock = 510 findCommonDominator(alloc, aliasesSet, postDominators); 511 const LivenessBlockInfo *livenessInfo = 512 liveness.getLiveness(placementBlock); 513 514 // We have to ensure that the dealloc will be after the last use of all 515 // aliases of the given value. We first assume that there are no uses in 516 // the placementBlock and that we can safely place the dealloc at the 517 // beginning. 518 Operation *endOperation = &placementBlock->front(); 519 520 // Iterate over all aliases and ensure that the endOperation will point 521 // to the last operation of all potential aliases in the placementBlock. 522 for (Value alias : aliasesSet) { 523 // Ensure that the start operation is at least the defining operation of 524 // the current alias to avoid invalid placement of deallocs for aliases 525 // without any uses. 526 Operation *beforeOp = endOperation; 527 if (alias.getDefiningOp() && 528 !(beforeOp = placementBlock->findAncestorOpInBlock( 529 *alias.getDefiningOp()))) 530 continue; 531 532 Operation *aliasEndOperation = 533 livenessInfo->getEndOperation(alias, beforeOp); 534 // Check whether the aliasEndOperation lies in the desired block and 535 // whether it is behind the current endOperation. If yes, this will be 536 // the new endOperation. 537 if (aliasEndOperation->getBlock() == placementBlock && 538 endOperation->isBeforeInBlock(aliasEndOperation)) 539 endOperation = aliasEndOperation; 540 } 541 // endOperation is the last operation behind which we can safely store 542 // the dealloc taking all potential aliases into account. 543 544 // If there is an existing dealloc, move it to the right place. 545 Operation *deallocOperation = std::get<1>(entry); 546 if (deallocOperation) { 547 deallocOperation->moveAfter(endOperation); 548 } else { 549 // If the Dealloc position is at the terminator operation of the 550 // block, then the value should escape from a deallocation. 551 Operation *nextOp = endOperation->getNextNode(); 552 if (!nextOp) 553 continue; 554 // If there is no dealloc node, insert one in the right place. 555 if (failed(buildDealloc(nextOp, alloc))) 556 return failure(); 557 } 558 } 559 return success(); 560 } 561 562 /// Builds a deallocation operation compatible with the given allocation 563 /// value. If there is no registered AllocationOpInterface implementation for 564 /// the given value (e.g. in the case of a function parameter), this method 565 /// builds a memref::DeallocOp. 566 LogicalResult buildDealloc(Operation *op, Value alloc) { 567 OpBuilder builder(op); 568 auto it = aliasToAllocations.find(alloc); 569 if (it != aliasToAllocations.end()) { 570 // Call the allocation op interface to build a supported and 571 // compatible deallocation operation. 572 auto dealloc = it->second.buildDealloc(builder, alloc); 573 if (!dealloc) 574 return op->emitError() 575 << "allocations without compatible deallocations are " 576 "not supported"; 577 } else { 578 // Build a "default" DeallocOp for unknown allocation sources. 579 builder.create<memref::DeallocOp>(alloc.getLoc(), alloc); 580 } 581 return success(); 582 } 583 584 /// Builds a clone operation compatible with the given allocation value. If 585 /// there is no registered AllocationOpInterface implementation for the given 586 /// value (e.g. in the case of a function parameter), this method builds a 587 /// bufferization::CloneOp. 588 FailureOr<Value> buildClone(Operation *op, Value alloc) { 589 OpBuilder builder(op); 590 auto it = aliasToAllocations.find(alloc); 591 if (it != aliasToAllocations.end()) { 592 // Call the allocation op interface to build a supported and 593 // compatible clone operation. 594 auto clone = it->second.buildClone(builder, alloc); 595 if (clone) 596 return *clone; 597 return (LogicalResult)(op->emitError() 598 << "allocations without compatible clone ops " 599 "are not supported"); 600 } 601 // Build a "default" CloneOp for unknown allocation sources. 602 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) 603 .getResult(); 604 } 605 606 /// The dominator info to find the appropriate start operation to move the 607 /// allocs. 608 DominanceInfo dominators; 609 610 /// The post dominator info to move the dependent allocs in the right 611 /// position. 612 PostDominanceInfo postDominators; 613 614 /// Stores already cloned buffers to avoid additional clones of clones. 615 ValueSetT clonedValues; 616 617 /// Maps aliases to their source allocation interfaces (inverse mapping). 618 AliasAllocationMapT aliasToAllocations; 619 }; 620 621 //===----------------------------------------------------------------------===// 622 // BufferDeallocationPass 623 //===----------------------------------------------------------------------===// 624 625 struct DefaultAllocationInterface 626 : public bufferization::AllocationOpInterface::ExternalModel< 627 DefaultAllocationInterface, memref::AllocOp> { 628 static Optional<Operation *> buildDealloc(OpBuilder &builder, Value alloc) { 629 return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc) 630 .getOperation(); 631 } 632 static Optional<Value> buildClone(OpBuilder &builder, Value alloc) { 633 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) 634 .getResult(); 635 } 636 }; 637 638 /// The actual buffer deallocation pass that inserts and moves dealloc nodes 639 /// into the right positions. Furthermore, it inserts additional clones if 640 /// necessary. It uses the algorithm described at the top of the file. 641 struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> { 642 void getDependentDialects(DialectRegistry ®istry) const override { 643 registry.insert<bufferization::BufferizationDialect>(); 644 registry.insert<memref::MemRefDialect>(); 645 registerAllocationOpInterfaceExternalModels(registry); 646 } 647 648 void runOnOperation() override { 649 FuncOp func = getOperation(); 650 if (func.isExternal()) 651 return; 652 653 if (failed(deallocateBuffers(func))) 654 signalPassFailure(); 655 } 656 }; 657 658 } // namespace 659 660 LogicalResult bufferization::deallocateBuffers(Operation *op) { 661 if (isa<ModuleOp>(op)) { 662 WalkResult result = op->walk([&](FuncOp funcOp) { 663 if (failed(deallocateBuffers(funcOp))) 664 return WalkResult::interrupt(); 665 return WalkResult::advance(); 666 }); 667 return success(!result.wasInterrupted()); 668 } 669 670 // Ensure that there are supported loops only. 671 Backedges backedges(op); 672 if (backedges.size()) { 673 op->emitError("Only structured control-flow loops are supported."); 674 return failure(); 675 } 676 677 // Check that the control flow structures are supported. 678 if (!validateSupportedControlFlow(op)) 679 return failure(); 680 681 // Gather all required allocation nodes and prepare the deallocation phase. 682 BufferDeallocation deallocation(op); 683 684 // Check for supported AllocationOpInterface implementations and prepare the 685 // internal deallocation pass. 686 if (failed(deallocation.prepare())) 687 return failure(); 688 689 // Place all required temporary clone and dealloc nodes. 690 if (failed(deallocation.deallocate())) 691 return failure(); 692 693 return success(); 694 } 695 696 void bufferization::registerAllocationOpInterfaceExternalModels( 697 DialectRegistry ®istry) { 698 registry.addExtension(+[](MLIRContext *ctx, memref::MemRefDialect *dialect) { 699 memref::AllocOp::attachInterface<DefaultAllocationInterface>(*ctx); 700 }); 701 } 702 703 //===----------------------------------------------------------------------===// 704 // BufferDeallocationPass construction 705 //===----------------------------------------------------------------------===// 706 707 std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() { 708 return std::make_unique<BufferDeallocationPass>(); 709 } 710