1 //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for three optimization passes. The first two
10 // passes try to move alloc nodes out of blocks to reduce the number of
11 // allocations and copies during buffer deallocation. The third pass tries to
12 // convert heap-based allocations to stack-based allocations, if possible.
13 
14 #include "PassDetail.h"
15 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
16 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
17 #include "mlir/Dialect/MemRef/IR/MemRef.h"
18 #include "mlir/IR/Operation.h"
19 #include "mlir/Interfaces/LoopLikeInterface.h"
20 #include "mlir/Pass/Pass.h"
21 
22 using namespace mlir;
23 using namespace mlir::bufferization;
24 
25 /// Returns true if the given operation implements a known high-level region-
26 /// based control-flow interface.
27 static bool isKnownControlFlowInterface(Operation *op) {
28   return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
29 }
30 
31 /// Check if the size of the allocation is less than the given size. The
32 /// transformation is only applied to small buffers since large buffers could
33 /// exceed the stack space.
34 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
35                                 unsigned bitwidthOfIndexType,
36                                 unsigned maxRankOfAllocatedMemRef) {
37   auto type = alloc.getType().dyn_cast<ShapedType>();
38   if (!type || !alloc.getDefiningOp<memref::AllocOp>())
39     return false;
40   if (!type.hasStaticShape()) {
41     // Check if the dynamic shape dimension of the alloc is produced by
42     // `memref.rank`. If this is the case, it is likely to be small.
43     // Furthermore, the dimension is limited to the maximum rank of the
44     // allocated memref to avoid large values by multiplying several small
45     // values.
46     if (type.getRank() <= maxRankOfAllocatedMemRef) {
47       return llvm::all_of(alloc.getDefiningOp()->getOperands(),
48                           [&](Value operand) {
49                             return operand.getDefiningOp<memref::RankOp>();
50                           });
51     }
52     return false;
53   }
54   // For index types, use the provided size, as the type does not know.
55   unsigned int bitwidth = type.getElementType().isIndex()
56                               ? bitwidthOfIndexType
57                               : type.getElementTypeBitWidth();
58   return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
59 }
60 
61 /// Checks whether the given aliases leave the allocation scope.
62 static bool
63 leavesAllocationScope(Region *parentRegion,
64                       const BufferViewFlowAnalysis::ValueSetT &aliases) {
65   for (Value alias : aliases) {
66     for (auto *use : alias.getUsers()) {
67       // If there is at least one alias that leaves the parent region, we know
68       // that this alias escapes the whole region and hence the associated
69       // allocation leaves allocation scope.
70       if (isRegionReturnLike(use) && use->getParentRegion() == parentRegion)
71         return true;
72     }
73   }
74   return false;
75 }
76 
77 /// Checks, if an automated allocation scope for a given alloc value exists.
78 static bool hasAllocationScope(Value alloc,
79                                const BufferViewFlowAnalysis &aliasAnalysis) {
80   Region *region = alloc.getParentRegion();
81   do {
82     if (Operation *parentOp = region->getParentOp()) {
83       // Check if the operation is an automatic allocation scope and whether an
84       // alias leaves the scope. This means, an allocation yields out of
85       // this scope and can not be transformed in a stack-based allocation.
86       if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
87           !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
88         return true;
89       // Check if the operation is a known control flow interface and break the
90       // loop to avoid transformation in loops. Furthermore skip transformation
91       // if the operation does not implement a RegionBeanchOpInterface.
92       if (BufferPlacementTransformationBase::isLoop(parentOp) ||
93           !isKnownControlFlowInterface(parentOp))
94         break;
95     }
96   } while ((region = region->getParentRegion()));
97   return false;
98 }
99 
100 namespace {
101 
102 //===----------------------------------------------------------------------===//
103 // BufferAllocationHoisting
104 //===----------------------------------------------------------------------===//
105 
106 /// A base implementation compatible with the `BufferAllocationHoisting` class.
107 struct BufferAllocationHoistingStateBase {
108   /// A pointer to the current dominance info.
109   DominanceInfo *dominators;
110 
111   /// The current allocation value.
112   Value allocValue;
113 
114   /// The current placement block (if any).
115   Block *placementBlock;
116 
117   /// Initializes the state base.
118   BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
119                                     Block *placementBlock)
120       : dominators(dominators), allocValue(allocValue),
121         placementBlock(placementBlock) {}
122 };
123 
124 /// Implements the actual hoisting logic for allocation nodes.
125 template <typename StateT>
126 class BufferAllocationHoisting : public BufferPlacementTransformationBase {
127 public:
128   BufferAllocationHoisting(Operation *op)
129       : BufferPlacementTransformationBase(op), dominators(op),
130         postDominators(op), scopeOp(op) {}
131 
132   /// Moves allocations upwards.
133   void hoist() {
134     SmallVector<Value> allocsAndAllocas;
135     for (BufferPlacementAllocs::AllocEntry &entry : allocs)
136       allocsAndAllocas.push_back(std::get<0>(entry));
137     scopeOp->walk(
138         [&](memref::AllocaOp op) { allocsAndAllocas.push_back(op.memref()); });
139 
140     for (auto allocValue : allocsAndAllocas) {
141       if (!StateT::shouldHoistOpType(allocValue.getDefiningOp()))
142         continue;
143       Operation *definingOp = allocValue.getDefiningOp();
144       assert(definingOp && "No defining op");
145       auto operands = definingOp->getOperands();
146       auto resultAliases = aliases.resolve(allocValue);
147       // Determine the common dominator block of all aliases.
148       Block *dominatorBlock =
149           findCommonDominator(allocValue, resultAliases, dominators);
150       // Init the initial hoisting state.
151       StateT state(&dominators, allocValue, allocValue.getParentBlock());
152       // Check for additional allocation dependencies to compute an upper bound
153       // for hoisting.
154       Block *dependencyBlock = nullptr;
155       // If this node has dependencies, check all dependent nodes. This ensures
156       // that all dependency values have been computed before allocating the
157       // buffer.
158       for (Value depValue : operands) {
159         Block *depBlock = depValue.getParentBlock();
160         if (!dependencyBlock || dominators.dominates(dependencyBlock, depBlock))
161           dependencyBlock = depBlock;
162       }
163 
164       // Find the actual placement block and determine the start operation using
165       // an upper placement-block boundary. The idea is that placement block
166       // cannot be moved any further upwards than the given upper bound.
167       Block *placementBlock = findPlacementBlock(
168           state, state.computeUpperBound(dominatorBlock, dependencyBlock));
169       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
170           allocValue, placementBlock, liveness);
171 
172       // Move the alloc in front of the start operation.
173       Operation *allocOperation = allocValue.getDefiningOp();
174       allocOperation->moveBefore(startOperation);
175     }
176   }
177 
178 private:
179   /// Finds a valid placement block by walking upwards in the CFG until we
180   /// either cannot continue our walk due to constraints (given by the StateT
181   /// implementation) or we have reached the upper-most dominator block.
182   Block *findPlacementBlock(StateT &state, Block *upperBound) {
183     Block *currentBlock = state.placementBlock;
184     // Walk from the innermost regions/loops to the outermost regions/loops and
185     // find an appropriate placement block that satisfies the constraint of the
186     // current StateT implementation. Walk until we reach the upperBound block
187     // (if any).
188 
189     // If we are not able to find a valid parent operation or an associated
190     // parent block, break the walk loop.
191     Operation *parentOp;
192     Block *parentBlock;
193     while ((parentOp = currentBlock->getParentOp()) &&
194            (parentBlock = parentOp->getBlock()) &&
195            (!upperBound ||
196             dominators.properlyDominates(upperBound, currentBlock))) {
197       // Try to find an immediate dominator and check whether the parent block
198       // is above the immediate dominator (if any).
199       DominanceInfoNode *idom = nullptr;
200 
201       // DominanceInfo doesn't support getNode queries for single-block regions.
202       if (!currentBlock->isEntryBlock())
203         idom = dominators.getNode(currentBlock)->getIDom();
204 
205       if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
206         // If the current immediate dominator is below the placement block, move
207         // to the immediate dominator block.
208         currentBlock = idom->getBlock();
209         state.recordMoveToDominator(currentBlock);
210       } else {
211         // We have to move to our parent block since an immediate dominator does
212         // either not exist or is above our parent block. If we cannot move to
213         // our parent operation due to constraints given by the StateT
214         // implementation, break the walk loop. Furthermore, we should not move
215         // allocations out of unknown region-based control-flow operations.
216         if (!isKnownControlFlowInterface(parentOp) ||
217             !state.isLegalPlacement(parentOp))
218           break;
219         // Move to our parent block by notifying the current StateT
220         // implementation.
221         currentBlock = parentBlock;
222         state.recordMoveToParent(currentBlock);
223       }
224     }
225     // Return the finally determined placement block.
226     return state.placementBlock;
227   }
228 
229   /// The dominator info to find the appropriate start operation to move the
230   /// allocs.
231   DominanceInfo dominators;
232 
233   /// The post dominator info to move the dependent allocs in the right
234   /// position.
235   PostDominanceInfo postDominators;
236 
237   /// The map storing the final placement blocks of a given alloc value.
238   llvm::DenseMap<Value, Block *> placementBlocks;
239 
240   /// The operation that this transformation is working on. It is used to also
241   /// gather allocas.
242   Operation *scopeOp;
243 };
244 
245 /// A state implementation compatible with the `BufferAllocationHoisting` class
246 /// that hoists allocations into dominator blocks while keeping them inside of
247 /// loops.
248 struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
249   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
250 
251   /// Computes the upper bound for the placement block search.
252   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
253     // If we do not have a dependency block, the upper bound is given by the
254     // dominator block.
255     if (!dependencyBlock)
256       return dominatorBlock;
257 
258     // Find the "lower" block of the dominator and the dependency block to
259     // ensure that we do not move allocations above this block.
260     return dominators->properlyDominates(dominatorBlock, dependencyBlock)
261                ? dependencyBlock
262                : dominatorBlock;
263   }
264 
265   /// Returns true if the given operation does not represent a loop.
266   bool isLegalPlacement(Operation *op) {
267     return !BufferPlacementTransformationBase::isLoop(op);
268   }
269 
270   /// Returns true if the given operation should be considered for hoisting.
271   static bool shouldHoistOpType(Operation *op) {
272     return llvm::isa<memref::AllocOp>(op);
273   }
274 
275   /// Sets the current placement block to the given block.
276   void recordMoveToDominator(Block *block) { placementBlock = block; }
277 
278   /// Sets the current placement block to the given block.
279   void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
280 };
281 
282 /// A state implementation compatible with the `BufferAllocationHoisting` class
283 /// that hoists allocations out of loops.
284 struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
285   using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
286 
287   /// Remembers the dominator block of all aliases.
288   Block *aliasDominatorBlock = nullptr;
289 
290   /// Computes the upper bound for the placement block search.
291   Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
292     aliasDominatorBlock = dominatorBlock;
293     // If there is a dependency block, we have to use this block as an upper
294     // bound to satisfy all allocation value dependencies.
295     return dependencyBlock ? dependencyBlock : nullptr;
296   }
297 
298   /// Returns true if the given operation represents a loop and one of the
299   /// aliases caused the `aliasDominatorBlock` to be "above" the block of the
300   /// given loop operation. If this is the case, it indicates that the
301   /// allocation is passed via a back edge.
302   bool isLegalPlacement(Operation *op) {
303     return BufferPlacementTransformationBase::isLoop(op) &&
304            !dominators->dominates(aliasDominatorBlock, op->getBlock());
305   }
306 
307   /// Returns true if the given operation should be considered for hoisting.
308   static bool shouldHoistOpType(Operation *op) {
309     return llvm::isa<memref::AllocOp, memref::AllocaOp>(op);
310   }
311 
312   /// Does not change the internal placement block, as we want to move
313   /// operations out of loops only.
314   void recordMoveToDominator(Block *block) {}
315 
316   /// Sets the current placement block to the given block.
317   void recordMoveToParent(Block *block) { placementBlock = block; }
318 };
319 
320 //===----------------------------------------------------------------------===//
321 // BufferPlacementPromotion
322 //===----------------------------------------------------------------------===//
323 
324 /// Promotes heap-based allocations to stack-based allocations (if possible).
325 class BufferPlacementPromotion : BufferPlacementTransformationBase {
326 public:
327   BufferPlacementPromotion(Operation *op)
328       : BufferPlacementTransformationBase(op) {}
329 
330   /// Promote buffers to stack-based allocations.
331   void promote(function_ref<bool(Value)> isSmallAlloc) {
332     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
333       Value alloc = std::get<0>(entry);
334       Operation *dealloc = std::get<1>(entry);
335       // Checking several requirements to transform an AllocOp into an AllocaOp.
336       // The transformation is done if the allocation is limited to a given
337       // size. Furthermore, a deallocation must not be defined for this
338       // allocation entry and a parent allocation scope must exist.
339       if (!isSmallAlloc(alloc) || dealloc ||
340           !hasAllocationScope(alloc, aliases))
341         continue;
342 
343       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
344           alloc, alloc.getParentBlock(), liveness);
345       // Build a new alloca that is associated with its parent
346       // `AutomaticAllocationScope` determined during the initialization phase.
347       OpBuilder builder(startOperation);
348       Operation *allocOp = alloc.getDefiningOp();
349       Operation *alloca = builder.create<memref::AllocaOp>(
350           alloc.getLoc(), alloc.getType().cast<MemRefType>(),
351           allocOp->getOperands());
352 
353       // Replace the original alloc by a newly created alloca.
354       allocOp->replaceAllUsesWith(alloca);
355       allocOp->erase();
356     }
357   }
358 };
359 
360 //===----------------------------------------------------------------------===//
361 // BufferOptimizationPasses
362 //===----------------------------------------------------------------------===//
363 
364 /// The buffer hoisting pass that hoists allocation nodes into dominating
365 /// blocks.
366 struct BufferHoistingPass : BufferHoistingBase<BufferHoistingPass> {
367 
368   void runOnOperation() override {
369     // Hoist all allocations into dominator blocks.
370     BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
371         getOperation());
372     optimizer.hoist();
373   }
374 };
375 
376 /// The buffer loop hoisting pass that hoists allocation nodes out of loops.
377 struct BufferLoopHoistingPass : BufferLoopHoistingBase<BufferLoopHoistingPass> {
378 
379   void runOnOperation() override {
380     // Hoist all allocations out of loops.
381     BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(
382         getOperation());
383     optimizer.hoist();
384   }
385 };
386 
387 /// The promote buffer to stack pass that tries to convert alloc nodes into
388 /// alloca nodes.
389 class PromoteBuffersToStackPass
390     : public PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
391 public:
392   PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
393                             unsigned bitwidthOfIndexType,
394                             unsigned maxRankOfAllocatedMemRef) {
395     this->maxAllocSizeInBytes = maxAllocSizeInBytes;
396     this->bitwidthOfIndexType = bitwidthOfIndexType;
397     this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
398   }
399 
400   explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc)
401       : isSmallAlloc(std::move(isSmallAlloc)) {}
402 
403   LogicalResult initialize(MLIRContext *context) override {
404     if (isSmallAlloc == nullptr) {
405       isSmallAlloc = [=](Value alloc) {
406         return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
407                                    bitwidthOfIndexType,
408                                    maxRankOfAllocatedMemRef);
409       };
410     }
411     return success();
412   }
413 
414   void runOnOperation() override {
415     // Move all allocation nodes and convert candidates into allocas.
416     BufferPlacementPromotion optimizer(getOperation());
417     optimizer.promote(isSmallAlloc);
418   }
419 
420 private:
421   std::function<bool(Value)> isSmallAlloc;
422 };
423 
424 } // namespace
425 
426 std::unique_ptr<Pass> mlir::bufferization::createBufferHoistingPass() {
427   return std::make_unique<BufferHoistingPass>();
428 }
429 
430 std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() {
431   return std::make_unique<BufferLoopHoistingPass>();
432 }
433 
434 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
435     unsigned maxAllocSizeInBytes, unsigned bitwidthOfIndexType,
436     unsigned maxRankOfAllocatedMemRef) {
437   return std::make_unique<PromoteBuffersToStackPass>(
438       maxAllocSizeInBytes, bitwidthOfIndexType, maxRankOfAllocatedMemRef);
439 }
440 
441 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
442     std::function<bool(Value)> isSmallAlloc) {
443   return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
444 }
445