1 //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for computing correct alloc and dealloc positions.
10 // Furthermore, buffer deallocation also adds required new clone operations to
11 // ensure that all buffers are deallocated. The main class is the
12 // BufferDeallocationPass class that implements the underlying algorithm. In
13 // order to put allocations and deallocations at safe positions, it is
14 // significantly important to put them into the correct blocks. However, the
15 // liveness analysis does not pay attention to aliases, which can occur due to
16 // branches (and their associated block arguments) in general. For this purpose,
17 // BufferDeallocation firstly finds all possible aliases for a single value
18 // (using the BufferViewFlowAnalysis class). Consider the following example:
19 //
20 // ^bb0(%arg0):
21 //   cf.cond_br %cond, ^bb1, ^bb2
22 // ^bb1:
23 //   cf.br ^exit(%arg0)
24 // ^bb2:
25 //   %new_value = ...
26 //   cf.br ^exit(%new_value)
27 // ^exit(%arg1):
28 //   return %arg1;
29 //
30 // We should place the dealloc for %new_value in exit. However, we have to free
31 // the buffer in the same block, because it cannot be freed in the post
32 // dominator. However, this requires a new clone buffer for %arg1 that will
33 // contain the actual contents. Using the class BufferViewFlowAnalysis, we
34 // will find out that %new_value has a potential alias %arg1. In order to find
35 // the dealloc position we have to find all potential aliases, iterate over
36 // their uses and find the common post-dominator block (note that additional
37 // clones and buffers remove potential aliases and will influence the placement
38 // of the deallocs). In all cases, the computed block can be safely used to free
39 // the %new_value buffer (may be exit or bb2) as it will die and we can use
40 // liveness information to determine the exact operation after which we have to
41 // insert the dealloc. However, the algorithm supports introducing clone buffers
42 // and placing deallocs in safe locations to ensure that all buffers will be
43 // freed in the end.
44 //
45 // TODO:
46 // The current implementation does not support explicit-control-flow loops and
47 // the resulting code will be invalid with respect to program semantics.
48 // However, structured control-flow loops are fully supported. Furthermore, it
49 // doesn't accept functions which return buffers already.
50 //
51 //===----------------------------------------------------------------------===//
52 
53 #include "PassDetail.h"
54 
55 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
56 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
57 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
58 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
59 #include "mlir/Dialect/MemRef/IR/MemRef.h"
60 #include "llvm/ADT/SetOperations.h"
61 
62 using namespace mlir;
63 using namespace mlir::bufferization;
64 
65 /// Walks over all immediate return-like terminators in the given region.
66 static LogicalResult
walkReturnOperations(Region * region,llvm::function_ref<LogicalResult (Operation *)> func)67 walkReturnOperations(Region *region,
68                      llvm::function_ref<LogicalResult(Operation *)> func) {
69   for (Block &block : *region) {
70     Operation *terminator = block.getTerminator();
71     // Skip non region-return-like terminators.
72     if (isRegionReturnLike(terminator)) {
73       if (failed(func(terminator)))
74         return failure();
75     }
76   }
77   return success();
78 }
79 
80 /// Checks if all operations that have at least one attached region implement
81 /// the RegionBranchOpInterface. This is not required in edge cases, where we
82 /// have a single attached region and the parent operation has no results.
validateSupportedControlFlow(Operation * op)83 static bool validateSupportedControlFlow(Operation *op) {
84   WalkResult result = op->walk([&](Operation *operation) {
85     // Only check ops that are inside a function.
86     if (!operation->getParentOfType<func::FuncOp>())
87       return WalkResult::advance();
88 
89     auto regions = operation->getRegions();
90     // Walk over all operations in a region and check if the operation has at
91     // least one region and implements the RegionBranchOpInterface. If there
92     // is an operation that does not fulfill this condition, we cannot apply
93     // the deallocation steps. Furthermore, we accept cases, where we have a
94     // region that returns no results, since, in that case, the intra-region
95     // control flow does not affect the transformation.
96     size_t size = regions.size();
97     if (((size == 1 && !operation->getResults().empty()) || size > 1) &&
98         !dyn_cast<RegionBranchOpInterface>(operation)) {
99       operation->emitError("All operations with attached regions need to "
100                            "implement the RegionBranchOpInterface.");
101     }
102 
103     return WalkResult::advance();
104   });
105   return !result.wasSkipped();
106 }
107 
108 namespace {
109 
110 //===----------------------------------------------------------------------===//
111 // Backedges analysis
112 //===----------------------------------------------------------------------===//
113 
114 /// A straight-forward program analysis which detects loop backedges induced by
115 /// explicit control flow.
116 class Backedges {
117 public:
118   using BlockSetT = SmallPtrSet<Block *, 16>;
119   using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
120 
121 public:
122   /// Constructs a new backedges analysis using the op provided.
Backedges(Operation * op)123   Backedges(Operation *op) { recurse(op); }
124 
125   /// Returns the number of backedges formed by explicit control flow.
size() const126   size_t size() const { return edgeSet.size(); }
127 
128   /// Returns the start iterator to loop over all backedges.
begin() const129   BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
130 
131   /// Returns the end iterator to loop over all backedges.
end() const132   BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
133 
134 private:
135   /// Enters the current block and inserts a backedge into the `edgeSet` if we
136   /// have already visited the current block. The inserted edge links the given
137   /// `predecessor` with the `current` block.
enter(Block & current,Block * predecessor)138   bool enter(Block &current, Block *predecessor) {
139     bool inserted = visited.insert(&current).second;
140     if (!inserted)
141       edgeSet.insert(std::make_pair(predecessor, &current));
142     return inserted;
143   }
144 
145   /// Leaves the current block.
exit(Block & current)146   void exit(Block &current) { visited.erase(&current); }
147 
148   /// Recurses into the given operation while taking all attached regions into
149   /// account.
recurse(Operation * op)150   void recurse(Operation *op) {
151     Block *current = op->getBlock();
152     // If the current op implements the `BranchOpInterface`, there can be
153     // cycles in the scope of all successor blocks.
154     if (isa<BranchOpInterface>(op)) {
155       for (Block *succ : current->getSuccessors())
156         recurse(*succ, current);
157     }
158     // Recurse into all distinct regions and check for explicit control-flow
159     // loops.
160     for (Region &region : op->getRegions()) {
161       if (!region.empty())
162         recurse(region.front(), current);
163     }
164   }
165 
166   /// Recurses into explicit control-flow structures that are given by
167   /// the successor relation defined on the block level.
recurse(Block & block,Block * predecessor)168   void recurse(Block &block, Block *predecessor) {
169     // Try to enter the current block. If this is not possible, we are
170     // currently processing this block and can safely return here.
171     if (!enter(block, predecessor))
172       return;
173 
174     // Recurse into all operations and successor blocks.
175     for (Operation &op : block.getOperations())
176       recurse(&op);
177 
178     // Leave the current block.
179     exit(block);
180   }
181 
182   /// Stores all blocks that are currently visited and on the processing stack.
183   BlockSetT visited;
184 
185   /// Stores all backedges in the format (source, target).
186   BackedgeSetT edgeSet;
187 };
188 
189 //===----------------------------------------------------------------------===//
190 // BufferDeallocation
191 //===----------------------------------------------------------------------===//
192 
193 /// The buffer deallocation transformation which ensures that all allocs in the
194 /// program have a corresponding de-allocation. As a side-effect, it might also
195 /// introduce clones that in turn leads to additional deallocations.
196 class BufferDeallocation : public BufferPlacementTransformationBase {
197 public:
198   using AliasAllocationMapT =
199       llvm::DenseMap<Value, bufferization::AllocationOpInterface>;
200 
BufferDeallocation(Operation * op)201   BufferDeallocation(Operation *op)
202       : BufferPlacementTransformationBase(op), dominators(op),
203         postDominators(op) {}
204 
205   /// Checks if all allocation operations either provide an already existing
206   /// deallocation operation or implement the AllocationOpInterface. In
207   /// addition, this method initializes the internal alias to
208   /// AllocationOpInterface mapping in order to get compatible
209   /// AllocationOpInterface implementations for aliases.
prepare()210   LogicalResult prepare() {
211     for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
212       // Get the defining allocation operation.
213       Value alloc = std::get<0>(entry);
214       auto allocationInterface =
215           alloc.getDefiningOp<bufferization::AllocationOpInterface>();
216       // If there is no existing deallocation operation and no implementation of
217       // the AllocationOpInterface, we cannot apply the BufferDeallocation pass.
218       if (!std::get<1>(entry) && !allocationInterface) {
219         return alloc.getDefiningOp()->emitError(
220             "Allocation is not deallocated explicitly nor does the operation "
221             "implement the AllocationOpInterface.");
222       }
223 
224       // Register the current allocation interface implementation.
225       aliasToAllocations[alloc] = allocationInterface;
226 
227       // Get the alias information for the current allocation node.
228       for (Value alias : aliases.resolve(alloc)) {
229         // TODO: check for incompatible implementations of the
230         // AllocationOpInterface. This could be realized by promoting the
231         // AllocationOpInterface to a DialectInterface.
232         aliasToAllocations[alias] = allocationInterface;
233       }
234     }
235     return success();
236   }
237 
238   /// Performs the actual placement/creation of all temporary clone and dealloc
239   /// nodes.
deallocate()240   LogicalResult deallocate() {
241     // Add additional clones that are required.
242     if (failed(introduceClones()))
243       return failure();
244 
245     // Place deallocations for all allocation entries.
246     return placeDeallocs();
247   }
248 
249 private:
250   /// Introduces required clone operations to avoid memory leaks.
introduceClones()251   LogicalResult introduceClones() {
252     // Initialize the set of values that require a dedicated memory free
253     // operation since their operands cannot be safely deallocated in a post
254     // dominator.
255     SmallPtrSet<Value, 8> valuesToFree;
256     llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues;
257     SmallVector<std::tuple<Value, Block *>, 8> toProcess;
258 
259     // Check dominance relation for proper dominance properties. If the given
260     // value node does not dominate an alias, we will have to create a clone in
261     // order to free all buffers that can potentially leak into a post
262     // dominator.
263     auto findUnsafeValues = [&](Value source, Block *definingBlock) {
264       auto it = aliases.find(source);
265       if (it == aliases.end())
266         return;
267       for (Value value : it->second) {
268         if (valuesToFree.count(value) > 0)
269           continue;
270         Block *parentBlock = value.getParentBlock();
271         // Check whether we have to free this particular block argument or
272         // generic value. We have to free the current alias if it is either
273         // defined in a non-dominated block or it is defined in the same block
274         // but the current value is not dominated by the source value.
275         if (!dominators.dominates(definingBlock, parentBlock) ||
276             (definingBlock == parentBlock && value.isa<BlockArgument>())) {
277           toProcess.emplace_back(value, parentBlock);
278           valuesToFree.insert(value);
279         } else if (visitedValues.insert(std::make_tuple(value, definingBlock))
280                        .second)
281           toProcess.emplace_back(value, definingBlock);
282       }
283     };
284 
285     // Detect possibly unsafe aliases starting from all allocations.
286     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
287       Value allocValue = std::get<0>(entry);
288       findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock());
289     }
290     // Try to find block arguments that require an explicit free operation
291     // until we reach a fix point.
292     while (!toProcess.empty()) {
293       auto current = toProcess.pop_back_val();
294       findUnsafeValues(std::get<0>(current), std::get<1>(current));
295     }
296 
297     // Update buffer aliases to ensure that we free all buffers and block
298     // arguments at the correct locations.
299     aliases.remove(valuesToFree);
300 
301     // Add new allocs and additional clone operations.
302     for (Value value : valuesToFree) {
303       if (failed(value.isa<BlockArgument>()
304                      ? introduceBlockArgCopy(value.cast<BlockArgument>())
305                      : introduceValueCopyForRegionResult(value)))
306         return failure();
307 
308       // Register the value to require a final dealloc. Note that we do not have
309       // to assign a block here since we do not want to move the allocation node
310       // to another location.
311       allocs.registerAlloc(std::make_tuple(value, nullptr));
312     }
313     return success();
314   }
315 
316   /// Introduces temporary clones in all predecessors and copies the source
317   /// values into the newly allocated buffers.
introduceBlockArgCopy(BlockArgument blockArg)318   LogicalResult introduceBlockArgCopy(BlockArgument blockArg) {
319     // Allocate a buffer for the current block argument in the block of
320     // the associated value (which will be a predecessor block by
321     // definition).
322     Block *block = blockArg.getOwner();
323     for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) {
324       // Get the terminator and the value that will be passed to our
325       // argument.
326       Operation *terminator = (*it)->getTerminator();
327       auto branchInterface = cast<BranchOpInterface>(terminator);
328       SuccessorOperands operands =
329           branchInterface.getSuccessorOperands(it.getSuccessorIndex());
330 
331       // Query the associated source value.
332       Value sourceValue = operands[blockArg.getArgNumber()];
333       if (!sourceValue) {
334         return failure();
335       }
336       // Wire new clone and successor operand.
337       // Create a new clone at the current location of the terminator.
338       auto clone = introduceCloneBuffers(sourceValue, terminator);
339       if (failed(clone))
340         return failure();
341       operands.slice(blockArg.getArgNumber(), 1).assign(*clone);
342     }
343 
344     // Check whether the block argument has implicitly defined predecessors via
345     // the RegionBranchOpInterface. This can be the case if the current block
346     // argument belongs to the first block in a region and the parent operation
347     // implements the RegionBranchOpInterface.
348     Region *argRegion = block->getParent();
349     Operation *parentOp = argRegion->getParentOp();
350     RegionBranchOpInterface regionInterface;
351     if (&argRegion->front() != block ||
352         !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
353       return success();
354 
355     if (failed(introduceClonesForRegionSuccessors(
356             regionInterface, argRegion->getParentOp()->getRegions(), blockArg,
357             [&](RegionSuccessor &successorRegion) {
358               // Find a predecessor of our argRegion.
359               return successorRegion.getSuccessor() == argRegion;
360             })))
361       return failure();
362 
363     // Check whether the block argument belongs to an entry region of the
364     // parent operation. In this case, we have to introduce an additional clone
365     // for buffer that is passed to the argument.
366     SmallVector<RegionSuccessor, 2> successorRegions;
367     regionInterface.getSuccessorRegions(/*index=*/llvm::None, successorRegions);
368     auto *it =
369         llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) {
370           return successorRegion.getSuccessor() == argRegion;
371         });
372     if (it == successorRegions.end())
373       return success();
374 
375     // Determine the actual operand to introduce a clone for and rewire the
376     // operand to point to the clone instead.
377     auto operands =
378         regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber());
379     size_t operandIndex =
380         llvm::find(it->getSuccessorInputs(), blockArg).getIndex() +
381         operands.getBeginOperandIndex();
382     Value operand = parentOp->getOperand(operandIndex);
383     assert(operand ==
384                operands[operandIndex - operands.getBeginOperandIndex()] &&
385            "region interface operands don't match parentOp operands");
386     auto clone = introduceCloneBuffers(operand, parentOp);
387     if (failed(clone))
388       return failure();
389 
390     parentOp->setOperand(operandIndex, *clone);
391     return success();
392   }
393 
394   /// Introduces temporary clones in front of all associated nested-region
395   /// terminators and copies the source values into the newly allocated buffers.
introduceValueCopyForRegionResult(Value value)396   LogicalResult introduceValueCopyForRegionResult(Value value) {
397     // Get the actual result index in the scope of the parent terminator.
398     Operation *operation = value.getDefiningOp();
399     auto regionInterface = cast<RegionBranchOpInterface>(operation);
400     // Filter successors that return to the parent operation.
401     auto regionPredicate = [&](RegionSuccessor &successorRegion) {
402       // If the RegionSuccessor has no associated successor, it will return to
403       // its parent operation.
404       return !successorRegion.getSuccessor();
405     };
406     // Introduce a clone for all region "results" that are returned to the
407     // parent operation. This is required since the parent's result value has
408     // been considered critical. Therefore, the algorithm assumes that a clone
409     // of a previously allocated buffer is returned by the operation (like in
410     // the case of a block argument).
411     return introduceClonesForRegionSuccessors(
412         regionInterface, operation->getRegions(), value, regionPredicate);
413   }
414 
415   /// Introduces buffer clones for all terminators in the given regions. The
416   /// regionPredicate is applied to every successor region in order to restrict
417   /// the clones to specific regions.
418   template <typename TPredicate>
introduceClonesForRegionSuccessors(RegionBranchOpInterface regionInterface,MutableArrayRef<Region> regions,Value argValue,const TPredicate & regionPredicate)419   LogicalResult introduceClonesForRegionSuccessors(
420       RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions,
421       Value argValue, const TPredicate &regionPredicate) {
422     for (Region &region : regions) {
423       // Query the regionInterface to get all successor regions of the current
424       // one.
425       SmallVector<RegionSuccessor, 2> successorRegions;
426       regionInterface.getSuccessorRegions(region.getRegionNumber(),
427                                           successorRegions);
428       // Try to find a matching region successor.
429       RegionSuccessor *regionSuccessor =
430           llvm::find_if(successorRegions, regionPredicate);
431       if (regionSuccessor == successorRegions.end())
432         continue;
433       // Get the operand index in the context of the current successor input
434       // bindings.
435       size_t operandIndex =
436           llvm::find(regionSuccessor->getSuccessorInputs(), argValue)
437               .getIndex();
438 
439       // Iterate over all immediate terminator operations to introduce
440       // new buffer allocations. Thereby, the appropriate terminator operand
441       // will be adjusted to point to the newly allocated buffer instead.
442       if (failed(walkReturnOperations(&region, [&](Operation *terminator) {
443             // Get the actual mutable operands for this terminator op.
444             auto terminatorOperands = *getMutableRegionBranchSuccessorOperands(
445                 terminator, region.getRegionNumber());
446             // Extract the source value from the current terminator.
447             // This conversion needs to exist on a separate line due to a bug in
448             // GCC conversion analysis.
449             OperandRange immutableTerminatorOperands = terminatorOperands;
450             Value sourceValue = immutableTerminatorOperands[operandIndex];
451             // Create a new clone at the current location of the terminator.
452             auto clone = introduceCloneBuffers(sourceValue, terminator);
453             if (failed(clone))
454               return failure();
455             // Wire clone and terminator operand.
456             terminatorOperands.slice(operandIndex, 1).assign(*clone);
457             return success();
458           })))
459         return failure();
460     }
461     return success();
462   }
463 
464   /// Creates a new memory allocation for the given source value and clones
465   /// its content into the newly allocated buffer. The terminator operation is
466   /// used to insert the clone operation at the right place.
introduceCloneBuffers(Value sourceValue,Operation * terminator)467   FailureOr<Value> introduceCloneBuffers(Value sourceValue,
468                                          Operation *terminator) {
469     // Avoid multiple clones of the same source value. This can happen in the
470     // presence of loops when a branch acts as a backedge while also having
471     // another successor that returns to its parent operation. Note: that
472     // copying copied buffers can introduce memory leaks since the invariant of
473     // BufferDeallocation assumes that a buffer will be only cloned once into a
474     // temporary buffer. Hence, the construction of clone chains introduces
475     // additional allocations that are not tracked automatically by the
476     // algorithm.
477     if (clonedValues.contains(sourceValue))
478       return sourceValue;
479     // Create a new clone operation that copies the contents of the old
480     // buffer to the new one.
481     auto clone = buildClone(terminator, sourceValue);
482     if (succeeded(clone)) {
483       // Remember the clone of original source value.
484       clonedValues.insert(*clone);
485     }
486     return clone;
487   }
488 
489   /// Finds correct dealloc positions according to the algorithm described at
490   /// the top of the file for all alloc nodes and block arguments that can be
491   /// handled by this analysis.
placeDeallocs()492   LogicalResult placeDeallocs() {
493     // Move or insert deallocs using the previously computed information.
494     // These deallocations will be linked to their associated allocation nodes
495     // since they don't have any aliases that can (potentially) increase their
496     // liveness.
497     for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
498       Value alloc = std::get<0>(entry);
499       auto aliasesSet = aliases.resolve(alloc);
500       assert(!aliasesSet.empty() && "must contain at least one alias");
501 
502       // Determine the actual block to place the dealloc and get liveness
503       // information.
504       Block *placementBlock =
505           findCommonDominator(alloc, aliasesSet, postDominators);
506       const LivenessBlockInfo *livenessInfo =
507           liveness.getLiveness(placementBlock);
508 
509       // We have to ensure that the dealloc will be after the last use of all
510       // aliases of the given value. We first assume that there are no uses in
511       // the placementBlock and that we can safely place the dealloc at the
512       // beginning.
513       Operation *endOperation = &placementBlock->front();
514 
515       // Iterate over all aliases and ensure that the endOperation will point
516       // to the last operation of all potential aliases in the placementBlock.
517       for (Value alias : aliasesSet) {
518         // Ensure that the start operation is at least the defining operation of
519         // the current alias to avoid invalid placement of deallocs for aliases
520         // without any uses.
521         Operation *beforeOp = endOperation;
522         if (alias.getDefiningOp() &&
523             !(beforeOp = placementBlock->findAncestorOpInBlock(
524                   *alias.getDefiningOp())))
525           continue;
526 
527         Operation *aliasEndOperation =
528             livenessInfo->getEndOperation(alias, beforeOp);
529         // Check whether the aliasEndOperation lies in the desired block and
530         // whether it is behind the current endOperation. If yes, this will be
531         // the new endOperation.
532         if (aliasEndOperation->getBlock() == placementBlock &&
533             endOperation->isBeforeInBlock(aliasEndOperation))
534           endOperation = aliasEndOperation;
535       }
536       // endOperation is the last operation behind which we can safely store
537       // the dealloc taking all potential aliases into account.
538 
539       // If there is an existing dealloc, move it to the right place.
540       Operation *deallocOperation = std::get<1>(entry);
541       if (deallocOperation) {
542         deallocOperation->moveAfter(endOperation);
543       } else {
544         // If the Dealloc position is at the terminator operation of the
545         // block, then the value should escape from a deallocation.
546         Operation *nextOp = endOperation->getNextNode();
547         if (!nextOp)
548           continue;
549         // If there is no dealloc node, insert one in the right place.
550         if (failed(buildDealloc(nextOp, alloc)))
551           return failure();
552       }
553     }
554     return success();
555   }
556 
557   /// Builds a deallocation operation compatible with the given allocation
558   /// value. If there is no registered AllocationOpInterface implementation for
559   /// the given value (e.g. in the case of a function parameter), this method
560   /// builds a memref::DeallocOp.
buildDealloc(Operation * op,Value alloc)561   LogicalResult buildDealloc(Operation *op, Value alloc) {
562     OpBuilder builder(op);
563     auto it = aliasToAllocations.find(alloc);
564     if (it != aliasToAllocations.end()) {
565       // Call the allocation op interface to build a supported and
566       // compatible deallocation operation.
567       auto dealloc = it->second.buildDealloc(builder, alloc);
568       if (!dealloc)
569         return op->emitError()
570                << "allocations without compatible deallocations are "
571                   "not supported";
572     } else {
573       // Build a "default" DeallocOp for unknown allocation sources.
574       builder.create<memref::DeallocOp>(alloc.getLoc(), alloc);
575     }
576     return success();
577   }
578 
579   /// Builds a clone operation compatible with the given allocation value. If
580   /// there is no registered AllocationOpInterface implementation for the given
581   /// value (e.g. in the case of a function parameter), this method builds a
582   /// bufferization::CloneOp.
buildClone(Operation * op,Value alloc)583   FailureOr<Value> buildClone(Operation *op, Value alloc) {
584     OpBuilder builder(op);
585     auto it = aliasToAllocations.find(alloc);
586     if (it != aliasToAllocations.end()) {
587       // Call the allocation op interface to build a supported and
588       // compatible clone operation.
589       auto clone = it->second.buildClone(builder, alloc);
590       if (clone)
591         return *clone;
592       return (LogicalResult)(op->emitError()
593                              << "allocations without compatible clone ops "
594                                 "are not supported");
595     }
596     // Build a "default" CloneOp for unknown allocation sources.
597     return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc)
598         .getResult();
599   }
600 
601   /// The dominator info to find the appropriate start operation to move the
602   /// allocs.
603   DominanceInfo dominators;
604 
605   /// The post dominator info to move the dependent allocs in the right
606   /// position.
607   PostDominanceInfo postDominators;
608 
609   /// Stores already cloned buffers to avoid additional clones of clones.
610   ValueSetT clonedValues;
611 
612   /// Maps aliases to their source allocation interfaces (inverse mapping).
613   AliasAllocationMapT aliasToAllocations;
614 };
615 
616 //===----------------------------------------------------------------------===//
617 // BufferDeallocationPass
618 //===----------------------------------------------------------------------===//
619 
620 struct DefaultAllocationInterface
621     : public bufferization::AllocationOpInterface::ExternalModel<
622           DefaultAllocationInterface, memref::AllocOp> {
buildDealloc__anonf063894c0211::DefaultAllocationInterface623   static Optional<Operation *> buildDealloc(OpBuilder &builder, Value alloc) {
624     return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc)
625         .getOperation();
626   }
buildClone__anonf063894c0211::DefaultAllocationInterface627   static Optional<Value> buildClone(OpBuilder &builder, Value alloc) {
628     return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc)
629         .getResult();
630   }
631 };
632 
633 /// The actual buffer deallocation pass that inserts and moves dealloc nodes
634 /// into the right positions. Furthermore, it inserts additional clones if
635 /// necessary. It uses the algorithm described at the top of the file.
636 struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> {
getDependentDialects__anonf063894c0211::BufferDeallocationPass637   void getDependentDialects(DialectRegistry &registry) const override {
638     registry.insert<bufferization::BufferizationDialect>();
639     registry.insert<memref::MemRefDialect>();
640     registerAllocationOpInterfaceExternalModels(registry);
641   }
642 
runOnOperation__anonf063894c0211::BufferDeallocationPass643   void runOnOperation() override {
644     func::FuncOp func = getOperation();
645     if (func.isExternal())
646       return;
647 
648     if (failed(deallocateBuffers(func)))
649       signalPassFailure();
650   }
651 };
652 
653 } // namespace
654 
deallocateBuffers(Operation * op)655 LogicalResult bufferization::deallocateBuffers(Operation *op) {
656   if (isa<ModuleOp>(op)) {
657     WalkResult result = op->walk([&](func::FuncOp funcOp) {
658       if (failed(deallocateBuffers(funcOp)))
659         return WalkResult::interrupt();
660       return WalkResult::advance();
661     });
662     return success(!result.wasInterrupted());
663   }
664 
665   // Ensure that there are supported loops only.
666   Backedges backedges(op);
667   if (backedges.size()) {
668     op->emitError("Only structured control-flow loops are supported.");
669     return failure();
670   }
671 
672   // Check that the control flow structures are supported.
673   if (!validateSupportedControlFlow(op))
674     return failure();
675 
676   // Gather all required allocation nodes and prepare the deallocation phase.
677   BufferDeallocation deallocation(op);
678 
679   // Check for supported AllocationOpInterface implementations and prepare the
680   // internal deallocation pass.
681   if (failed(deallocation.prepare()))
682     return failure();
683 
684   // Place all required temporary clone and dealloc nodes.
685   if (failed(deallocation.deallocate()))
686     return failure();
687 
688   return success();
689 }
690 
registerAllocationOpInterfaceExternalModels(DialectRegistry & registry)691 void bufferization::registerAllocationOpInterfaceExternalModels(
692     DialectRegistry &registry) {
693   registry.addExtension(+[](MLIRContext *ctx, memref::MemRefDialect *dialect) {
694     memref::AllocOp::attachInterface<DefaultAllocationInterface>(*ctx);
695   });
696 }
697 
698 //===----------------------------------------------------------------------===//
699 // BufferDeallocationPass construction
700 //===----------------------------------------------------------------------===//
701 
createBufferDeallocationPass()702 std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() {
703   return std::make_unique<BufferDeallocationPass>();
704 }
705