1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13
14 #include "PassDetail.h"
15 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
16 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
17 #include "mlir/Dialect/MemRef/IR/MemRef.h"
18 #include "mlir/IR/Operation.h"
19 #include "mlir/Interfaces/LoopLikeInterface.h"
20 #include "mlir/Pass/Pass.h"
21
22 using namespace mlir;
23 using namespace mlir::bufferization;
24
25 /// Returns true if the given operation implements a known high-level region-
26 /// based control-flow interface.
isKnownControlFlowInterface(Operation * op)27 static bool isKnownControlFlowInterface(Operation *op) {
28 return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
29 }
30
31 /// Check if the size of the allocation is less than the given size. The
32 /// transformation is only applied to small buffers since large buffers could
33 /// exceed the stack space.
defaultIsSmallAlloc(Value alloc,unsigned maximumSizeInBytes,unsigned maxRankOfAllocatedMemRef)34 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
35 unsigned maxRankOfAllocatedMemRef) {
36 auto type = alloc.getType().dyn_cast<ShapedType>();
37 if (!type || !alloc.getDefiningOp<memref::AllocOp>())
38 return false;
39 if (!type.hasStaticShape()) {
40 // Check if the dynamic shape dimension of the alloc is produced by
41 // `memref.rank`. If this is the case, it is likely to be small.
42 // Furthermore, the dimension is limited to the maximum rank of the
43 // allocated memref to avoid large values by multiplying several small
44 // values.
45 if (type.getRank() <= maxRankOfAllocatedMemRef) {
46 return llvm::all_of(alloc.getDefiningOp()->getOperands(),
47 [&](Value operand) {
48 return operand.getDefiningOp<memref::RankOp>();
49 });
50 }
51 return false;
52 }
53 unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp())
54 .getTypeSizeInBits(type.getElementType());
55 return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
56 }
57
58 /// Checks whether the given aliases leave the allocation scope.
59 static bool
leavesAllocationScope(Region * parentRegion,const BufferViewFlowAnalysis::ValueSetT & aliases)60 leavesAllocationScope(Region *parentRegion,
61 const BufferViewFlowAnalysis::ValueSetT &aliases) {
62 for (Value alias : aliases) {
63 for (auto *use : alias.getUsers()) {
64 // If there is at least one alias that leaves the parent region, we know
65 // that this alias escapes the whole region and hence the associated
66 // allocation leaves allocation scope.
67 if (isRegionReturnLike(use) && use->getParentRegion() == parentRegion)
68 return true;
69 }
70 }
71 return false;
72 }
73
74 /// Checks, if an automated allocation scope for a given alloc value exists.
hasAllocationScope(Value alloc,const BufferViewFlowAnalysis & aliasAnalysis)75 static bool hasAllocationScope(Value alloc,
76 const BufferViewFlowAnalysis &aliasAnalysis) {
77 Region *region = alloc.getParentRegion();
78 do {
79 if (Operation *parentOp = region->getParentOp()) {
80 // Check if the operation is an automatic allocation scope and whether an
81 // alias leaves the scope. This means, an allocation yields out of
82 // this scope and can not be transformed in a stack-based allocation.
83 if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
84 !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
85 return true;
86 // Check if the operation is a known control flow interface and break the
87 // loop to avoid transformation in loops. Furthermore skip transformation
88 // if the operation does not implement a RegionBeanchOpInterface.
89 if (BufferPlacementTransformationBase::isLoop(parentOp) ||
90 !isKnownControlFlowInterface(parentOp))
91 break;
92 }
93 } while ((region = region->getParentRegion()));
94 return false;
95 }
96
97 namespace {
98
99 //===----------------------------------------------------------------------===//
100 // BufferAllocationHoisting
101 //===----------------------------------------------------------------------===//
102
103 /// A base implementation compatible with the `BufferAllocationHoisting` class.
104 struct BufferAllocationHoistingStateBase {
105 /// A pointer to the current dominance info.
106 DominanceInfo *dominators;
107
108 /// The current allocation value.
109 Value allocValue;
110
111 /// The current placement block (if any).
112 Block *placementBlock;
113
114 /// Initializes the state base.
BufferAllocationHoistingStateBase__anon68c8f7d70211::BufferAllocationHoistingStateBase115 BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
116 Block *placementBlock)
117 : dominators(dominators), allocValue(allocValue),
118 placementBlock(placementBlock) {}
119 };
120
121 /// Implements the actual hoisting logic for allocation nodes.
122 template <typename StateT>
123 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
124 public:
BufferAllocationHoisting(Operation * op)125 BufferAllocationHoisting(Operation *op)
126 : BufferPlacementTransformationBase(op), dominators(op),
127 postDominators(op), scopeOp(op) {}
128
129 /// Moves allocations upwards.
hoist()130 void hoist() {
131 SmallVector<Value> allocsAndAllocas;
132 for (BufferPlacementAllocs::AllocEntry &entry : allocs)
133 allocsAndAllocas.push_back(std::get<0>(entry));
134 scopeOp->walk([&](memref::AllocaOp op) {
135 allocsAndAllocas.push_back(op.getMemref());
136 });
137
138 for (auto allocValue : allocsAndAllocas) {
139 if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
140 continue;
141 Operation *definingOp = allocValue.getDefiningOp();
142 assert(definingOp && "No defining op");
143 auto operands = definingOp->getOperands();
144 auto resultAliases = aliases.resolve(allocValue);
145 // Determine the common dominator block of all aliases.
146 Block *dominatorBlock =
147 findCommonDominator(allocValue, resultAliases, dominators);
148 // Init the initial hoisting state.
149 StateT state(&dominators, allocValue, allocValue.getParentBlock());
150 // Check for additional allocation dependencies to compute an upper bound
151 // for hoisting.
152 Block *dependencyBlock = nullptr;
153 // If this node has dependencies, check all dependent nodes. This ensures
154 // that all dependency values have been computed before allocating the
155 // buffer.
156 for (Value depValue : operands) {
157 Block *depBlock = depValue.getParentBlock();
158 if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
159 dependencyBlock = depBlock;
160 }
161
162 // Find the actual placement block and determine the start operation using
163 // an upper placement-block boundary. The idea is that placement block
164 // cannot be moved any further upwards than the given upper bound.
165 Block *placementBlock = findPlacementBlock(
166 state, state.computeUpperBound(dominatorBlock, dependencyBlock));
167 Operation *startOperation = BufferPlacementAllocs::getStartOperation(
168 allocValue, placementBlock, liveness);
169
170 // Move the alloc in front of the start operation.
171 Operation *allocOperation = allocValue.getDefiningOp();
172 allocOperation->moveBefore(startOperation);
173 }
174 }
175
176 private:
177 /// Finds a valid placement block by walking upwards in the CFG until we
178 /// either cannot continue our walk due to constraints (given by the StateT
179 /// implementation) or we have reached the upper-most dominator block.
findPlacementBlock(StateT & state,Block * upperBound)180 Block *findPlacementBlock(StateT &state, Block *upperBound) {
181 Block *currentBlock = state.placementBlock;
182 // Walk from the innermost regions/loops to the outermost regions/loops and
183 // find an appropriate placement block that satisfies the constraint of the
184 // current StateT implementation. Walk until we reach the upperBound block
185 // (if any).
186
187 // If we are not able to find a valid parent operation or an associated
188 // parent block, break the walk loop.
189 Operation *parentOp;
190 Block *parentBlock;
191 while ((parentOp = currentBlock->getParentOp()) &&
192 (parentBlock = parentOp->getBlock()) &&
193 (!upperBound ||
194 dominators.properlyDominates(upperBound, currentBlock))) {
195 // Try to find an immediate dominator and check whether the parent block
196 // is above the immediate dominator (if any).
197 DominanceInfoNode *idom = nullptr;
198
199 // DominanceInfo doesn't support getNode queries for single-block regions.
200 if (!currentBlock->isEntryBlock())
201 idom = dominators.getNode(currentBlock)->getIDom();
202
203 if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
204 // If the current immediate dominator is below the placement block, move
205 // to the immediate dominator block.
206 currentBlock = idom->getBlock();
207 state.recordMoveToDominator(currentBlock);
208 } else {
209 // We have to move to our parent block since an immediate dominator does
210 // either not exist or is above our parent block. If we cannot move to
211 // our parent operation due to constraints given by the StateT
212 // implementation, break the walk loop. Furthermore, we should not move
213 // allocations out of unknown region-based control-flow operations.
214 if (!isKnownControlFlowInterface(parentOp) ||
215 !state.isLegalPlacement(parentOp))
216 break;
217 // Move to our parent block by notifying the current StateT
218 // implementation.
219 currentBlock = parentBlock;
220 state.recordMoveToParent(currentBlock);
221 }
222 }
223 // Return the finally determined placement block.
224 return state.placementBlock;
225 }
226
227 /// The dominator info to find the appropriate start operation to move the
228 /// allocs.
229 DominanceInfo dominators;
230
231 /// The post dominator info to move the dependent allocs in the right
232 /// position.
233 PostDominanceInfo postDominators;
234
235 /// The map storing the final placement blocks of a given alloc value.
236 llvm::DenseMap<Value, Block *> placementBlocks;
237
238 /// The operation that this transformation is working on. It is used to also
239 /// gather allocas.
240 Operation *scopeOp;
241 };
242
243 /// A state implementation compatible with the `BufferAllocationHoisting` class
244 /// that hoists allocations into dominator blocks while keeping them inside of
245 /// loops.
246 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
247 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
248
249 /// Computes the upper bound for the placement block search.
computeUpperBound__anon68c8f7d70211::BufferAllocationHoistingState250 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
251 // If we do not have a dependency block, the upper bound is given by the
252 // dominator block.
253 if (!dependencyBlock)
254 return dominatorBlock;
255
256 // Find the "lower" block of the dominator and the dependency block to
257 // ensure that we do not move allocations above this block.
258 return dominators->properlyDominates(dominatorBlock, dependencyBlock)
259 ? dependencyBlock
260 : dominatorBlock;
261 }
262
263 /// Returns true if the given operation does not represent a loop.
isLegalPlacement__anon68c8f7d70211::BufferAllocationHoistingState264 bool isLegalPlacement(Operation *op) {
265 return !BufferPlacementTransformationBase::isLoop(op);
266 }
267
268 /// Returns true if the given operation should be considered for hoisting.
shouldHoistOpType__anon68c8f7d70211::BufferAllocationHoistingState269 static bool shouldHoistOpType(Operation *op) {
270 return llvm::isa<memref::AllocOp>(op);
271 }
272
273 /// Sets the current placement block to the given block.
recordMoveToDominator__anon68c8f7d70211::BufferAllocationHoistingState274 void recordMoveToDominator(Block *block) { placementBlock = block; }
275
276 /// Sets the current placement block to the given block.
recordMoveToParent__anon68c8f7d70211::BufferAllocationHoistingState277 void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
278 };
279
280 /// A state implementation compatible with the `BufferAllocationHoisting` class
281 /// that hoists allocations out of loops.
282 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
283 using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
284
285 /// Remembers the dominator block of all aliases.
286 Block *aliasDominatorBlock = nullptr;
287
288 /// Computes the upper bound for the placement block search.
computeUpperBound__anon68c8f7d70211::BufferAllocationLoopHoistingState289 Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
290 aliasDominatorBlock = dominatorBlock;
291 // If there is a dependency block, we have to use this block as an upper
292 // bound to satisfy all allocation value dependencies.
293 return dependencyBlock ? dependencyBlock : nullptr;
294 }
295
296 /// Returns true if the given operation represents a loop and one of the
297 /// aliases caused the `aliasDominatorBlock` to be "above" the block of the
298 /// given loop operation. If this is the case, it indicates that the
299 /// allocation is passed via a back edge.
isLegalPlacement__anon68c8f7d70211::BufferAllocationLoopHoistingState300 bool isLegalPlacement(Operation *op) {
301 return BufferPlacementTransformationBase::isLoop(op) &&
302 !dominators->dominates(aliasDominatorBlock, op->getBlock());
303 }
304
305 /// Returns true if the given operation should be considered for hoisting.
shouldHoistOpType__anon68c8f7d70211::BufferAllocationLoopHoistingState306 static bool shouldHoistOpType(Operation *op) {
307 return llvm::isa<memref::AllocOp, memref::AllocaOp>(op);
308 }
309
310 /// Does not change the internal placement block, as we want to move
311 /// operations out of loops only.
recordMoveToDominator__anon68c8f7d70211::BufferAllocationLoopHoistingState312 void recordMoveToDominator(Block *block) {}
313
314 /// Sets the current placement block to the given block.
recordMoveToParent__anon68c8f7d70211::BufferAllocationLoopHoistingState315 void recordMoveToParent(Block *block) { placementBlock = block; }
316 };
317
318 //===----------------------------------------------------------------------===//
319 // BufferPlacementPromotion
320 //===----------------------------------------------------------------------===//
321
322 /// Promotes heap-based allocations to stack-based allocations (if possible).
323 class BufferPlacementPromotion : BufferPlacementTransformationBase {
324 public:
BufferPlacementPromotion(Operation * op)325 BufferPlacementPromotion(Operation *op)
326 : BufferPlacementTransformationBase(op) {}
327
328 /// Promote buffers to stack-based allocations.
promote(function_ref<bool (Value)> isSmallAlloc)329 void promote(function_ref<bool(Value)> isSmallAlloc) {
330 for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
331 Value alloc = std::get<0>(entry);
332 Operation *dealloc = std::get<1>(entry);
333 // Checking several requirements to transform an AllocOp into an AllocaOp.
334 // The transformation is done if the allocation is limited to a given
335 // size. Furthermore, a deallocation must not be defined for this
336 // allocation entry and a parent allocation scope must exist.
337 if (!isSmallAlloc(alloc) || dealloc ||
338 !hasAllocationScope(alloc, aliases))
339 continue;
340
341 Operation *startOperation = BufferPlacementAllocs::getStartOperation(
342 alloc, alloc.getParentBlock(), liveness);
343 // Build a new alloca that is associated with its parent
344 // `AutomaticAllocationScope` determined during the initialization phase.
345 OpBuilder builder(startOperation);
346 Operation *allocOp = alloc.getDefiningOp();
347 Operation *alloca = builder.create<memref::AllocaOp>(
348 alloc.getLoc(), alloc.getType().cast<MemRefType>(),
349 allocOp->getOperands());
350
351 // Replace the original alloc by a newly created alloca.
352 allocOp->replaceAllUsesWith(alloca);
353 allocOp->erase();
354 }
355 }
356 };
357
358 //===----------------------------------------------------------------------===//
359 // BufferOptimizationPasses
360 //===----------------------------------------------------------------------===//
361
362 /// The buffer hoisting pass that hoists allocation nodes into dominating
363 /// blocks.
364 struct BufferHoistingPass : BufferHoistingBase<BufferHoistingPass> {
365
runOnOperation__anon68c8f7d70211::BufferHoistingPass366 void runOnOperation() override {
367 // Hoist all allocations into dominator blocks.
368 BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
369 getOperation());
370 optimizer.hoist();
371 }
372 };
373
374 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
375 struct BufferLoopHoistingPass : BufferLoopHoistingBase<BufferLoopHoistingPass> {
376
runOnOperation__anon68c8f7d70211::BufferLoopHoistingPass377 void runOnOperation() override {
378 // Hoist all allocations out of loops.
379 BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(
380 getOperation());
381 optimizer.hoist();
382 }
383 };
384
385 /// The promote buffer to stack pass that tries to convert alloc nodes into
386 /// alloca nodes.
387 class PromoteBuffersToStackPass
388 : public PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
389 public:
PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,unsigned maxRankOfAllocatedMemRef)390 PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
391 unsigned maxRankOfAllocatedMemRef) {
392 this->maxAllocSizeInBytes = maxAllocSizeInBytes;
393 this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
394 }
395
PromoteBuffersToStackPass(std::function<bool (Value)> isSmallAlloc)396 explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
397 : isSmallAlloc(std::move(isSmallAlloc)) {}
398
initialize(MLIRContext * context)399 LogicalResult initialize(MLIRContext *context) override {
400 if (isSmallAlloc == nullptr) {
401 isSmallAlloc = [=](Value alloc) {
402 return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
403 maxRankOfAllocatedMemRef);
404 };
405 }
406 return success();
407 }
408
runOnOperation()409 void runOnOperation() override {
410 // Move all allocation nodes and convert candidates into allocas.
411 BufferPlacementPromotion optimizer(getOperation());
412 optimizer.promote(isSmallAlloc);
413 }
414
415 private:
416 std::function<bool(Value)> isSmallAlloc;
417 };
418
419 } // namespace
420
createBufferHoistingPass()421 std::unique_ptr<Pass> mlir::bufferization::createBufferHoistingPass() {
422 return std::make_unique<BufferHoistingPass>();
423 }
424
createBufferLoopHoistingPass()425 std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() {
426 return std::make_unique<BufferLoopHoistingPass>();
427 }
428
createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,unsigned maxRankOfAllocatedMemRef)429 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
430 unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) {
431 return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes,
432 maxRankOfAllocatedMemRef);
433 }
434
createPromoteBuffersToStackPass(std::function<bool (Value)> isSmallAlloc)435 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
436 std::function<bool(Value)> isSmallAlloc) {
437 return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
438 }
439