1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements logic for three optimization passes. The first two 10 // passes try to move alloc nodes out of blocks to reduce the number of 11 // allocations and copies during buffer deallocation. The third pass tries to 12 // convert heap-based allocations to stack-based allocations, if possible. 13 14 #include "PassDetail.h" 15 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h" 16 #include "mlir/Dialect/Bufferization/Transforms/Passes.h" 17 #include "mlir/Dialect/MemRef/IR/MemRef.h" 18 #include "mlir/IR/Operation.h" 19 #include "mlir/Interfaces/LoopLikeInterface.h" 20 #include "mlir/Pass/Pass.h" 21 22 using namespace mlir; 23 using namespace mlir::bufferization; 24 25 /// Returns true if the given operation implements a known high-level region- 26 /// based control-flow interface. 27 static bool isKnownControlFlowInterface(Operation *op) { 28 return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op); 29 } 30 31 /// Check if the size of the allocation is less than the given size. The 32 /// transformation is only applied to small buffers since large buffers could 33 /// exceed the stack space. 34 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, 35 unsigned bitwidthOfIndexType, 36 unsigned maxRankOfAllocatedMemRef) { 37 auto type = alloc.getType().dyn_cast<ShapedType>(); 38 if (!type || !alloc.getDefiningOp<memref::AllocOp>()) 39 return false; 40 if (!type.hasStaticShape()) { 41 // Check if the dynamic shape dimension of the alloc is produced by 42 // `memref.rank`. If this is the case, it is likely to be small. 43 // Furthermore, the dimension is limited to the maximum rank of the 44 // allocated memref to avoid large values by multiplying several small 45 // values. 46 if (type.getRank() <= maxRankOfAllocatedMemRef) { 47 return llvm::all_of(alloc.getDefiningOp()->getOperands(), 48 [&](Value operand) { 49 return operand.getDefiningOp<memref::RankOp>(); 50 }); 51 } 52 return false; 53 } 54 // For index types, use the provided size, as the type does not know. 55 unsigned int bitwidth = type.getElementType().isIndex() 56 ? bitwidthOfIndexType 57 : type.getElementTypeBitWidth(); 58 return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8; 59 } 60 61 /// Checks whether the given aliases leave the allocation scope. 62 static bool 63 leavesAllocationScope(Region *parentRegion, 64 const BufferViewFlowAnalysis::ValueSetT &aliases) { 65 for (Value alias : aliases) { 66 for (auto *use : alias.getUsers()) { 67 // If there is at least one alias that leaves the parent region, we know 68 // that this alias escapes the whole region and hence the associated 69 // allocation leaves allocation scope. 70 if (isRegionReturnLike(use) && use->getParentRegion() == parentRegion) 71 return true; 72 } 73 } 74 return false; 75 } 76 77 /// Checks, if an automated allocation scope for a given alloc value exists. 78 static bool hasAllocationScope(Value alloc, 79 const BufferViewFlowAnalysis &aliasAnalysis) { 80 Region *region = alloc.getParentRegion(); 81 do { 82 if (Operation *parentOp = region->getParentOp()) { 83 // Check if the operation is an automatic allocation scope and whether an 84 // alias leaves the scope. This means, an allocation yields out of 85 // this scope and can not be transformed in a stack-based allocation. 86 if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() && 87 !leavesAllocationScope(region, aliasAnalysis.resolve(alloc))) 88 return true; 89 // Check if the operation is a known control flow interface and break the 90 // loop to avoid transformation in loops. Furthermore skip transformation 91 // if the operation does not implement a RegionBeanchOpInterface. 92 if (BufferPlacementTransformationBase::isLoop(parentOp) || 93 !isKnownControlFlowInterface(parentOp)) 94 break; 95 } 96 } while ((region = region->getParentRegion())); 97 return false; 98 } 99 100 namespace { 101 102 //===----------------------------------------------------------------------===// 103 // BufferAllocationHoisting 104 //===----------------------------------------------------------------------===// 105 106 /// A base implementation compatible with the `BufferAllocationHoisting` class. 107 struct BufferAllocationHoistingStateBase { 108 /// A pointer to the current dominance info. 109 DominanceInfo *dominators; 110 111 /// The current allocation value. 112 Value allocValue; 113 114 /// The current placement block (if any). 115 Block *placementBlock; 116 117 /// Initializes the state base. 118 BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue, 119 Block *placementBlock) 120 : dominators(dominators), allocValue(allocValue), 121 placementBlock(placementBlock) {} 122 }; 123 124 /// Implements the actual hoisting logic for allocation nodes. 125 template <typename StateT> 126 class BufferAllocationHoisting : public BufferPlacementTransformationBase { 127 public: 128 BufferAllocationHoisting(Operation *op) 129 : BufferPlacementTransformationBase(op), dominators(op), 130 postDominators(op), scopeOp(op) {} 131 132 /// Moves allocations upwards. 133 void hoist() { 134 SmallVector<Value> allocsAndAllocas; 135 for (BufferPlacementAllocs::AllocEntry &entry : allocs) 136 allocsAndAllocas.push_back(std::get<0>(entry)); 137 scopeOp->walk( 138 [&](memref::AllocaOp op) { allocsAndAllocas.push_back(op.memref()); }); 139 140 for (auto allocValue : allocsAndAllocas) { 141 if (!StateT::shouldHoistOpType(allocValue.getDefiningOp())) 142 continue; 143 Operation *definingOp = allocValue.getDefiningOp(); 144 assert(definingOp && "No defining op"); 145 auto operands = definingOp->getOperands(); 146 auto resultAliases = aliases.resolve(allocValue); 147 // Determine the common dominator block of all aliases. 148 Block *dominatorBlock = 149 findCommonDominator(allocValue, resultAliases, dominators); 150 // Init the initial hoisting state. 151 StateT state(&dominators, allocValue, allocValue.getParentBlock()); 152 // Check for additional allocation dependencies to compute an upper bound 153 // for hoisting. 154 Block *dependencyBlock = nullptr; 155 // If this node has dependencies, check all dependent nodes. This ensures 156 // that all dependency values have been computed before allocating the 157 // buffer. 158 for (Value depValue : operands) { 159 Block *depBlock = depValue.getParentBlock(); 160 if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock)) 161 dependencyBlock = depBlock; 162 } 163 164 // Find the actual placement block and determine the start operation using 165 // an upper placement-block boundary. The idea is that placement block 166 // cannot be moved any further upwards than the given upper bound. 167 Block *placementBlock = findPlacementBlock( 168 state, state.computeUpperBound(dominatorBlock, dependencyBlock)); 169 Operation *startOperation = BufferPlacementAllocs::getStartOperation( 170 allocValue, placementBlock, liveness); 171 172 // Move the alloc in front of the start operation. 173 Operation *allocOperation = allocValue.getDefiningOp(); 174 allocOperation->moveBefore(startOperation); 175 } 176 } 177 178 private: 179 /// Finds a valid placement block by walking upwards in the CFG until we 180 /// either cannot continue our walk due to constraints (given by the StateT 181 /// implementation) or we have reached the upper-most dominator block. 182 Block *findPlacementBlock(StateT &state, Block *upperBound) { 183 Block *currentBlock = state.placementBlock; 184 // Walk from the innermost regions/loops to the outermost regions/loops and 185 // find an appropriate placement block that satisfies the constraint of the 186 // current StateT implementation. Walk until we reach the upperBound block 187 // (if any). 188 189 // If we are not able to find a valid parent operation or an associated 190 // parent block, break the walk loop. 191 Operation *parentOp; 192 Block *parentBlock; 193 while ((parentOp = currentBlock->getParentOp()) && 194 (parentBlock = parentOp->getBlock()) && 195 (!upperBound || 196 dominators.properlyDominates(upperBound, currentBlock))) { 197 // Try to find an immediate dominator and check whether the parent block 198 // is above the immediate dominator (if any). 199 DominanceInfoNode *idom = nullptr; 200 201 // DominanceInfo doesn't support getNode queries for single-block regions. 202 if (!currentBlock->isEntryBlock()) 203 idom = dominators.getNode(currentBlock)->getIDom(); 204 205 if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) { 206 // If the current immediate dominator is below the placement block, move 207 // to the immediate dominator block. 208 currentBlock = idom->getBlock(); 209 state.recordMoveToDominator(currentBlock); 210 } else { 211 // We have to move to our parent block since an immediate dominator does 212 // either not exist or is above our parent block. If we cannot move to 213 // our parent operation due to constraints given by the StateT 214 // implementation, break the walk loop. Furthermore, we should not move 215 // allocations out of unknown region-based control-flow operations. 216 if (!isKnownControlFlowInterface(parentOp) || 217 !state.isLegalPlacement(parentOp)) 218 break; 219 // Move to our parent block by notifying the current StateT 220 // implementation. 221 currentBlock = parentBlock; 222 state.recordMoveToParent(currentBlock); 223 } 224 } 225 // Return the finally determined placement block. 226 return state.placementBlock; 227 } 228 229 /// The dominator info to find the appropriate start operation to move the 230 /// allocs. 231 DominanceInfo dominators; 232 233 /// The post dominator info to move the dependent allocs in the right 234 /// position. 235 PostDominanceInfo postDominators; 236 237 /// The map storing the final placement blocks of a given alloc value. 238 llvm::DenseMap<Value, Block *> placementBlocks; 239 240 /// The operation that this transformation is working on. It is used to also 241 /// gather allocas. 242 Operation *scopeOp; 243 }; 244 245 /// A state implementation compatible with the `BufferAllocationHoisting` class 246 /// that hoists allocations into dominator blocks while keeping them inside of 247 /// loops. 248 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase { 249 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; 250 251 /// Computes the upper bound for the placement block search. 252 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { 253 // If we do not have a dependency block, the upper bound is given by the 254 // dominator block. 255 if (!dependencyBlock) 256 return dominatorBlock; 257 258 // Find the "lower" block of the dominator and the dependency block to 259 // ensure that we do not move allocations above this block. 260 return dominators->properlyDominates(dominatorBlock, dependencyBlock) 261 ? dependencyBlock 262 : dominatorBlock; 263 } 264 265 /// Returns true if the given operation does not represent a loop. 266 bool isLegalPlacement(Operation *op) { 267 return !BufferPlacementTransformationBase::isLoop(op); 268 } 269 270 /// Returns true if the given operation should be considered for hoisting. 271 static bool shouldHoistOpType(Operation *op) { 272 return llvm::isa<memref::AllocOp>(op); 273 } 274 275 /// Sets the current placement block to the given block. 276 void recordMoveToDominator(Block *block) { placementBlock = block; } 277 278 /// Sets the current placement block to the given block. 279 void recordMoveToParent(Block *block) { recordMoveToDominator(block); } 280 }; 281 282 /// A state implementation compatible with the `BufferAllocationHoisting` class 283 /// that hoists allocations out of loops. 284 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase { 285 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; 286 287 /// Remembers the dominator block of all aliases. 288 Block *aliasDominatorBlock = nullptr; 289 290 /// Computes the upper bound for the placement block search. 291 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { 292 aliasDominatorBlock = dominatorBlock; 293 // If there is a dependency block, we have to use this block as an upper 294 // bound to satisfy all allocation value dependencies. 295 return dependencyBlock ? dependencyBlock : nullptr; 296 } 297 298 /// Returns true if the given operation represents a loop and one of the 299 /// aliases caused the `aliasDominatorBlock` to be "above" the block of the 300 /// given loop operation. If this is the case, it indicates that the 301 /// allocation is passed via a back edge. 302 bool isLegalPlacement(Operation *op) { 303 return BufferPlacementTransformationBase::isLoop(op) && 304 !dominators->dominates(aliasDominatorBlock, op->getBlock()); 305 } 306 307 /// Returns true if the given operation should be considered for hoisting. 308 static bool shouldHoistOpType(Operation *op) { 309 return llvm::isa<memref::AllocOp, memref::AllocaOp>(op); 310 } 311 312 /// Does not change the internal placement block, as we want to move 313 /// operations out of loops only. 314 void recordMoveToDominator(Block *block) {} 315 316 /// Sets the current placement block to the given block. 317 void recordMoveToParent(Block *block) { placementBlock = block; } 318 }; 319 320 //===----------------------------------------------------------------------===// 321 // BufferPlacementPromotion 322 //===----------------------------------------------------------------------===// 323 324 /// Promotes heap-based allocations to stack-based allocations (if possible). 325 class BufferPlacementPromotion : BufferPlacementTransformationBase { 326 public: 327 BufferPlacementPromotion(Operation *op) 328 : BufferPlacementTransformationBase(op) {} 329 330 /// Promote buffers to stack-based allocations. 331 void promote(function_ref<bool(Value)> isSmallAlloc) { 332 for (BufferPlacementAllocs::AllocEntry &entry : allocs) { 333 Value alloc = std::get<0>(entry); 334 Operation *dealloc = std::get<1>(entry); 335 // Checking several requirements to transform an AllocOp into an AllocaOp. 336 // The transformation is done if the allocation is limited to a given 337 // size. Furthermore, a deallocation must not be defined for this 338 // allocation entry and a parent allocation scope must exist. 339 if (!isSmallAlloc(alloc) || dealloc || 340 !hasAllocationScope(alloc, aliases)) 341 continue; 342 343 Operation *startOperation = BufferPlacementAllocs::getStartOperation( 344 alloc, alloc.getParentBlock(), liveness); 345 // Build a new alloca that is associated with its parent 346 // `AutomaticAllocationScope` determined during the initialization phase. 347 OpBuilder builder(startOperation); 348 Operation *allocOp = alloc.getDefiningOp(); 349 Operation *alloca = builder.create<memref::AllocaOp>( 350 alloc.getLoc(), alloc.getType().cast<MemRefType>(), 351 allocOp->getOperands()); 352 353 // Replace the original alloc by a newly created alloca. 354 allocOp->replaceAllUsesWith(alloca); 355 allocOp->erase(); 356 } 357 } 358 }; 359 360 //===----------------------------------------------------------------------===// 361 // BufferOptimizationPasses 362 //===----------------------------------------------------------------------===// 363 364 /// The buffer hoisting pass that hoists allocation nodes into dominating 365 /// blocks. 366 struct BufferHoistingPass : BufferHoistingBase<BufferHoistingPass> { 367 368 void runOnOperation() override { 369 // Hoist all allocations into dominator blocks. 370 BufferAllocationHoisting<BufferAllocationHoistingState> optimizer( 371 getOperation()); 372 optimizer.hoist(); 373 } 374 }; 375 376 /// The buffer loop hoisting pass that hoists allocation nodes out of loops. 377 struct BufferLoopHoistingPass : BufferLoopHoistingBase<BufferLoopHoistingPass> { 378 379 void runOnOperation() override { 380 // Hoist all allocations out of loops. 381 BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer( 382 getOperation()); 383 optimizer.hoist(); 384 } 385 }; 386 387 /// The promote buffer to stack pass that tries to convert alloc nodes into 388 /// alloca nodes. 389 class PromoteBuffersToStackPass 390 : public PromoteBuffersToStackBase<PromoteBuffersToStackPass> { 391 public: 392 PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes, 393 unsigned bitwidthOfIndexType, 394 unsigned maxRankOfAllocatedMemRef) { 395 this->maxAllocSizeInBytes = maxAllocSizeInBytes; 396 this->bitwidthOfIndexType = bitwidthOfIndexType; 397 this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef; 398 } 399 400 explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc) 401 : isSmallAlloc(std::move(isSmallAlloc)) {} 402 403 LogicalResult initialize(MLIRContext *context) override { 404 if (isSmallAlloc == nullptr) { 405 isSmallAlloc = [=](Value alloc) { 406 return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes, 407 bitwidthOfIndexType, 408 maxRankOfAllocatedMemRef); 409 }; 410 } 411 return success(); 412 } 413 414 void runOnOperation() override { 415 // Move all allocation nodes and convert candidates into allocas. 416 BufferPlacementPromotion optimizer(getOperation()); 417 optimizer.promote(isSmallAlloc); 418 } 419 420 private: 421 std::function<bool(Value)> isSmallAlloc; 422 }; 423 424 } // namespace 425 426 std::unique_ptr<Pass> mlir::bufferization::createBufferHoistingPass() { 427 return std::make_unique<BufferHoistingPass>(); 428 } 429 430 std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() { 431 return std::make_unique<BufferLoopHoistingPass>(); 432 } 433 434 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass( 435 unsigned maxAllocSizeInBytes, unsigned bitwidthOfIndexType, 436 unsigned maxRankOfAllocatedMemRef) { 437 return std::make_unique<PromoteBuffersToStackPass>( 438 maxAllocSizeInBytes, bitwidthOfIndexType, maxRankOfAllocatedMemRef); 439 } 440 441 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass( 442 std::function<bool(Value)> isSmallAlloc) { 443 return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc)); 444 } 445