1 //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for computing correct alloc and dealloc positions.
10 // Furthermore, buffer deallocation also adds required new clone operations to
11 // ensure that all buffers are deallocated. The main class is the
12 // BufferDeallocationPass class that implements the underlying algorithm. In
13 // order to put allocations and deallocations at safe positions, it is
14 // significantly important to put them into the correct blocks. However, the
15 // liveness analysis does not pay attention to aliases, which can occur due to
16 // branches (and their associated block arguments) in general. For this purpose,
17 // BufferDeallocation firstly finds all possible aliases for a single value
18 // (using the BufferViewFlowAnalysis class). Consider the following example:
19 //
20 // ^bb0(%arg0):
21 //   cf.cond_br %cond, ^bb1, ^bb2
22 // ^bb1:
23 //   cf.br ^exit(%arg0)
24 // ^bb2:
25 //   %new_value = ...
26 //   cf.br ^exit(%new_value)
27 // ^exit(%arg1):
28 //   return %arg1;
29 //
30 // We should place the dealloc for %new_value in exit. However, we have to free
31 // the buffer in the same block, because it cannot be freed in the post
32 // dominator. However, this requires a new clone buffer for %arg1 that will
33 // contain the actual contents. Using the class BufferViewFlowAnalysis, we
34 // will find out that %new_value has a potential alias %arg1. In order to find
35 // the dealloc position we have to find all potential aliases, iterate over
36 // their uses and find the common post-dominator block (note that additional
37 // clones and buffers remove potential aliases and will influence the placement
38 // of the deallocs). In all cases, the computed block can be safely used to free
39 // the %new_value buffer (may be exit or bb2) as it will die and we can use
40 // liveness information to determine the exact operation after which we have to
41 // insert the dealloc. However, the algorithm supports introducing clone buffers
42 // and placing deallocs in safe locations to ensure that all buffers will be
43 // freed in the end.
44 //
45 // TODO:
46 // The current implementation does not support explicit-control-flow loops and
47 // the resulting code will be invalid with respect to program semantics.
48 // However, structured control-flow loops are fully supported. Furthermore, it
49 // doesn't accept functions which return buffers already.
50 //
51 //===----------------------------------------------------------------------===//
52 
53 #include "PassDetail.h"
54 
55 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
56 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
57 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
58 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
59 #include "mlir/Dialect/MemRef/IR/MemRef.h"
60 #include "llvm/ADT/SetOperations.h"
61 
62 using namespace mlir;
63 using namespace mlir::bufferization;
64 
65 /// Walks over all immediate return-like terminators in the given region.
66 static LogicalResult
67 walkReturnOperations(Region *region,
68                      llvm::function_ref<LogicalResult(Operation *)> func) {
69   for (Block &block : *region) {
70     Operation *terminator = block.getTerminator();
71     // Skip non region-return-like terminators.
72     if (isRegionReturnLike(terminator)) {
73       if (failed(func(terminator)))
74         return failure();
75     }
76   }
77   return success();
78 }
79 
80 /// Checks if all operations in a given region that have at least one attached
81 /// region implement the RegionBranchOpInterface. This is not required in edge
82 /// cases, where we have a single attached region and the parent operation has
83 /// no results.
84 static bool validateSupportedControlFlow(Region &region) {
85   bool success = true;
86   region.walk([&success](Operation *operation) {
87     auto regions = operation->getRegions();
88     // Walk over all operations in a region and check if the operation has at
89     // least one region and implements the RegionBranchOpInterface. If there
90     // is an operation that does not fulfill this condition, we cannot apply
91     // the deallocation steps. Furthermore, we accept cases, where we have a
92     // region that returns no results, since, in that case, the intra-region
93     // control flow does not affect the transformation.
94     size_t size = regions.size();
95     if (((size == 1 && !operation->getResults().empty()) || size > 1) &&
96         !dyn_cast<RegionBranchOpInterface>(operation)) {
97       operation->emitError("All operations with attached regions need to "
98                            "implement the RegionBranchOpInterface.");
99       success = false;
100     }
101   });
102   return success;
103 }
104 
105 namespace {
106 
107 //===----------------------------------------------------------------------===//
108 // Backedges analysis
109 //===----------------------------------------------------------------------===//
110 
111 /// A straight-forward program analysis which detects loop backedges induced by
112 /// explicit control flow.
113 class Backedges {
114 public:
115   using BlockSetT = SmallPtrSet<Block *, 16>;
116   using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
117 
118 public:
119   /// Constructs a new backedges analysis using the op provided.
120   Backedges(Operation *op) { recurse(op); }
121 
122   /// Returns the number of backedges formed by explicit control flow.
123   size_t size() const { return edgeSet.size(); }
124 
125   /// Returns the start iterator to loop over all backedges.
126   BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
127 
128   /// Returns the end iterator to loop over all backedges.
129   BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
130 
131 private:
132   /// Enters the current block and inserts a backedge into the `edgeSet` if we
133   /// have already visited the current block. The inserted edge links the given
134   /// `predecessor` with the `current` block.
135   bool enter(Block &current, Block *predecessor) {
136     bool inserted = visited.insert(&current).second;
137     if (!inserted)
138       edgeSet.insert(std::make_pair(predecessor, &current));
139     return inserted;
140   }
141 
142   /// Leaves the current block.
143   void exit(Block &current) { visited.erase(&current); }
144 
145   /// Recurses into the given operation while taking all attached regions into
146   /// account.
147   void recurse(Operation *op) {
148     Block *current = op->getBlock();
149     // If the current op implements the `BranchOpInterface`, there can be
150     // cycles in the scope of all successor blocks.
151     if (isa<BranchOpInterface>(op)) {
152       for (Block *succ : current->getSuccessors())
153         recurse(*succ, current);
154     }
155     // Recurse into all distinct regions and check for explicit control-flow
156     // loops.
157     for (Region &region : op->getRegions()) {
158       if (!region.empty())
159         recurse(region.front(), current);
160     }
161   }
162 
163   /// Recurses into explicit control-flow structures that are given by
164   /// the successor relation defined on the block level.
165   void recurse(Block &block, Block *predecessor) {
166     // Try to enter the current block. If this is not possible, we are
167     // currently processing this block and can safely return here.
168     if (!enter(block, predecessor))
169       return;
170 
171     // Recurse into all operations and successor blocks.
172     for (Operation &op : block.getOperations())
173       recurse(&op);
174 
175     // Leave the current block.
176     exit(block);
177   }
178 
179   /// Stores all blocks that are currently visited and on the processing stack.
180   BlockSetT visited;
181 
182   /// Stores all backedges in the format (source, target).
183   BackedgeSetT edgeSet;
184 };
185 
186 //===----------------------------------------------------------------------===//
187 // BufferDeallocation
188 //===----------------------------------------------------------------------===//
189 
190 /// The buffer deallocation transformation which ensures that all allocs in the
191 /// program have a corresponding de-allocation. As a side-effect, it might also
192 /// introduce clones that in turn leads to additional deallocations.
193 class BufferDeallocation : public BufferPlacementTransformationBase {
194 public:
195   using AliasAllocationMapT =
196       llvm::DenseMap<Value, bufferization::AllocationOpInterface>;
197 
198   BufferDeallocation(Operation *op)
199       : BufferPlacementTransformationBase(op), dominators(op),
200         postDominators(op) {}
201 
202   /// Checks if all allocation operations either provide an already existing
203   /// deallocation operation or implement the AllocationOpInterface. In
204   /// addition, this method initializes the internal alias to
205   /// AllocationOpInterface mapping in order to get compatible
206   /// AllocationOpInterface implementations for aliases.
207   LogicalResult prepare() {
208     for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
209       // Get the defining allocation operation.
210       Value alloc = std::get<0>(entry);
211       auto allocationInterface =
212           alloc.getDefiningOp<bufferization::AllocationOpInterface>();
213       // If there is no existing deallocation operation and no implementation of
214       // the AllocationOpInterface, we cannot apply the BufferDeallocation pass.
215       if (!std::get<1>(entry) && !allocationInterface) {
216         return alloc.getDefiningOp()->emitError(
217             "Allocation is not deallocated explicitly nor does the operation "
218             "implement the AllocationOpInterface.");
219       }
220 
221       // Register the current allocation interface implementation.
222       aliasToAllocations[alloc] = allocationInterface;
223 
224       // Get the alias information for the current allocation node.
225       llvm::for_each(aliases.resolve(alloc), [&](Value alias) {
226         // TODO: check for incompatible implementations of the
227         // AllocationOpInterface. This could be realized by promoting the
228         // AllocationOpInterface to a DialectInterface.
229         aliasToAllocations[alias] = allocationInterface;
230       });
231     }
232     return success();
233   }
234 
235   /// Performs the actual placement/creation of all temporary clone and dealloc
236   /// nodes.
237   LogicalResult deallocate() {
238     // Add additional clones that are required.
239     if (failed(introduceClones()))
240       return failure();
241 
242     // Place deallocations for all allocation entries.
243     return placeDeallocs();
244   }
245 
246 private:
247   /// Introduces required clone operations to avoid memory leaks.
248   LogicalResult introduceClones() {
249     // Initialize the set of values that require a dedicated memory free
250     // operation since their operands cannot be safely deallocated in a post
251     // dominator.
252     SmallPtrSet<Value, 8> valuesToFree;
253     llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues;
254     SmallVector<std::tuple<Value, Block *>, 8> toProcess;
255 
256     // Check dominance relation for proper dominance properties. If the given
257     // value node does not dominate an alias, we will have to create a clone in
258     // order to free all buffers that can potentially leak into a post
259     // dominator.
260     auto findUnsafeValues = [&](Value source, Block *definingBlock) {
261       auto it = aliases.find(source);
262       if (it == aliases.end())
263         return;
264       for (Value value : it->second) {
265         if (valuesToFree.count(value) > 0)
266           continue;
267         Block *parentBlock = value.getParentBlock();
268         // Check whether we have to free this particular block argument or
269         // generic value. We have to free the current alias if it is either
270         // defined in a non-dominated block or it is defined in the same block
271         // but the current value is not dominated by the source value.
272         if (!dominators.dominates(definingBlock, parentBlock) ||
273             (definingBlock == parentBlock && value.isa<BlockArgument>())) {
274           toProcess.emplace_back(value, parentBlock);
275           valuesToFree.insert(value);
276         } else if (visitedValues.insert(std::make_tuple(value, definingBlock))
277                        .second)
278           toProcess.emplace_back(value, definingBlock);
279       }
280     };
281 
282     // Detect possibly unsafe aliases starting from all allocations.
283     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
284       Value allocValue = std::get<0>(entry);
285       findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock());
286     }
287     // Try to find block arguments that require an explicit free operation
288     // until we reach a fix point.
289     while (!toProcess.empty()) {
290       auto current = toProcess.pop_back_val();
291       findUnsafeValues(std::get<0>(current), std::get<1>(current));
292     }
293 
294     // Update buffer aliases to ensure that we free all buffers and block
295     // arguments at the correct locations.
296     aliases.remove(valuesToFree);
297 
298     // Add new allocs and additional clone operations.
299     for (Value value : valuesToFree) {
300       if (failed(value.isa<BlockArgument>()
301                      ? introduceBlockArgCopy(value.cast<BlockArgument>())
302                      : introduceValueCopyForRegionResult(value)))
303         return failure();
304 
305       // Register the value to require a final dealloc. Note that we do not have
306       // to assign a block here since we do not want to move the allocation node
307       // to another location.
308       allocs.registerAlloc(std::make_tuple(value, nullptr));
309     }
310     return success();
311   }
312 
313   /// Introduces temporary clones in all predecessors and copies the source
314   /// values into the newly allocated buffers.
315   LogicalResult introduceBlockArgCopy(BlockArgument blockArg) {
316     // Allocate a buffer for the current block argument in the block of
317     // the associated value (which will be a predecessor block by
318     // definition).
319     Block *block = blockArg.getOwner();
320     for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) {
321       // Get the terminator and the value that will be passed to our
322       // argument.
323       Operation *terminator = (*it)->getTerminator();
324       auto branchInterface = cast<BranchOpInterface>(terminator);
325       // Query the associated source value.
326       Value sourceValue =
327           branchInterface.getSuccessorOperands(it.getSuccessorIndex())
328               .getValue()[blockArg.getArgNumber()];
329       // Wire new clone and successor operand.
330       auto mutableOperands =
331           branchInterface.getMutableSuccessorOperands(it.getSuccessorIndex());
332       if (!mutableOperands) {
333         terminator->emitError() << "terminators with immutable successor "
334                                    "operands are not supported";
335         continue;
336       }
337       // Create a new clone at the current location of the terminator.
338       auto clone = introduceCloneBuffers(sourceValue, terminator);
339       if (failed(clone))
340         return failure();
341       mutableOperands.getValue()
342           .slice(blockArg.getArgNumber(), 1)
343           .assign(*clone);
344     }
345 
346     // Check whether the block argument has implicitly defined predecessors via
347     // the RegionBranchOpInterface. This can be the case if the current block
348     // argument belongs to the first block in a region and the parent operation
349     // implements the RegionBranchOpInterface.
350     Region *argRegion = block->getParent();
351     Operation *parentOp = argRegion->getParentOp();
352     RegionBranchOpInterface regionInterface;
353     if (&argRegion->front() != block ||
354         !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
355       return success();
356 
357     if (failed(introduceClonesForRegionSuccessors(
358             regionInterface, argRegion->getParentOp()->getRegions(), blockArg,
359             [&](RegionSuccessor &successorRegion) {
360               // Find a predecessor of our argRegion.
361               return successorRegion.getSuccessor() == argRegion;
362             })))
363       return failure();
364 
365     // Check whether the block argument belongs to an entry region of the
366     // parent operation. In this case, we have to introduce an additional clone
367     // for buffer that is passed to the argument.
368     SmallVector<RegionSuccessor, 2> successorRegions;
369     regionInterface.getSuccessorRegions(/*index=*/llvm::None, successorRegions);
370     auto *it =
371         llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) {
372           return successorRegion.getSuccessor() == argRegion;
373         });
374     if (it == successorRegions.end())
375       return success();
376 
377     // Determine the actual operand to introduce a clone for and rewire the
378     // operand to point to the clone instead.
379     auto operands =
380         regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber());
381     size_t operandIndex =
382         llvm::find(it->getSuccessorInputs(), blockArg).getIndex() +
383         operands.getBeginOperandIndex();
384     Value operand = parentOp->getOperand(operandIndex);
385     assert(operand ==
386                operands[operandIndex - operands.getBeginOperandIndex()] &&
387            "region interface operands don't match parentOp operands");
388     auto clone = introduceCloneBuffers(operand, parentOp);
389     if (failed(clone))
390       return failure();
391 
392     parentOp->setOperand(operandIndex, *clone);
393     return success();
394   }
395 
396   /// Introduces temporary clones in front of all associated nested-region
397   /// terminators and copies the source values into the newly allocated buffers.
398   LogicalResult introduceValueCopyForRegionResult(Value value) {
399     // Get the actual result index in the scope of the parent terminator.
400     Operation *operation = value.getDefiningOp();
401     auto regionInterface = cast<RegionBranchOpInterface>(operation);
402     // Filter successors that return to the parent operation.
403     auto regionPredicate = [&](RegionSuccessor &successorRegion) {
404       // If the RegionSuccessor has no associated successor, it will return to
405       // its parent operation.
406       return !successorRegion.getSuccessor();
407     };
408     // Introduce a clone for all region "results" that are returned to the
409     // parent operation. This is required since the parent's result value has
410     // been considered critical. Therefore, the algorithm assumes that a clone
411     // of a previously allocated buffer is returned by the operation (like in
412     // the case of a block argument).
413     return introduceClonesForRegionSuccessors(
414         regionInterface, operation->getRegions(), value, regionPredicate);
415   }
416 
417   /// Introduces buffer clones for all terminators in the given regions. The
418   /// regionPredicate is applied to every successor region in order to restrict
419   /// the clones to specific regions.
420   template <typename TPredicate>
421   LogicalResult introduceClonesForRegionSuccessors(
422       RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions,
423       Value argValue, const TPredicate &regionPredicate) {
424     for (Region &region : regions) {
425       // Query the regionInterface to get all successor regions of the current
426       // one.
427       SmallVector<RegionSuccessor, 2> successorRegions;
428       regionInterface.getSuccessorRegions(region.getRegionNumber(),
429                                           successorRegions);
430       // Try to find a matching region successor.
431       RegionSuccessor *regionSuccessor =
432           llvm::find_if(successorRegions, regionPredicate);
433       if (regionSuccessor == successorRegions.end())
434         continue;
435       // Get the operand index in the context of the current successor input
436       // bindings.
437       size_t operandIndex =
438           llvm::find(regionSuccessor->getSuccessorInputs(), argValue)
439               .getIndex();
440 
441       // Iterate over all immediate terminator operations to introduce
442       // new buffer allocations. Thereby, the appropriate terminator operand
443       // will be adjusted to point to the newly allocated buffer instead.
444       if (failed(walkReturnOperations(&region, [&](Operation *terminator) {
445             // Get the actual mutable operands for this terminator op.
446             auto terminatorOperands = *getMutableRegionBranchSuccessorOperands(
447                 terminator, region.getRegionNumber());
448             // Extract the source value from the current terminator.
449             // This conversion needs to exist on a separate line due to a bug in
450             // GCC conversion analysis.
451             OperandRange immutableTerminatorOperands = terminatorOperands;
452             Value sourceValue = immutableTerminatorOperands[operandIndex];
453             // Create a new clone at the current location of the terminator.
454             auto clone = introduceCloneBuffers(sourceValue, terminator);
455             if (failed(clone))
456               return failure();
457             // Wire clone and terminator operand.
458             terminatorOperands.slice(operandIndex, 1).assign(*clone);
459             return success();
460           })))
461         return failure();
462     }
463     return success();
464   }
465 
466   /// Creates a new memory allocation for the given source value and clones
467   /// its content into the newly allocated buffer. The terminator operation is
468   /// used to insert the clone operation at the right place.
469   FailureOr<Value> introduceCloneBuffers(Value sourceValue,
470                                          Operation *terminator) {
471     // Avoid multiple clones of the same source value. This can happen in the
472     // presence of loops when a branch acts as a backedge while also having
473     // another successor that returns to its parent operation. Note: that
474     // copying copied buffers can introduce memory leaks since the invariant of
475     // BufferDeallocation assumes that a buffer will be only cloned once into a
476     // temporary buffer. Hence, the construction of clone chains introduces
477     // additional allocations that are not tracked automatically by the
478     // algorithm.
479     if (clonedValues.contains(sourceValue))
480       return sourceValue;
481     // Create a new clone operation that copies the contents of the old
482     // buffer to the new one.
483     auto clone = buildClone(terminator, sourceValue);
484     if (succeeded(clone)) {
485       // Remember the clone of original source value.
486       clonedValues.insert(*clone);
487     }
488     return clone;
489   }
490 
491   /// Finds correct dealloc positions according to the algorithm described at
492   /// the top of the file for all alloc nodes and block arguments that can be
493   /// handled by this analysis.
494   LogicalResult placeDeallocs() {
495     // Move or insert deallocs using the previously computed information.
496     // These deallocations will be linked to their associated allocation nodes
497     // since they don't have any aliases that can (potentially) increase their
498     // liveness.
499     for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
500       Value alloc = std::get<0>(entry);
501       auto aliasesSet = aliases.resolve(alloc);
502       assert(!aliasesSet.empty() && "must contain at least one alias");
503 
504       // Determine the actual block to place the dealloc and get liveness
505       // information.
506       Block *placementBlock =
507           findCommonDominator(alloc, aliasesSet, postDominators);
508       const LivenessBlockInfo *livenessInfo =
509           liveness.getLiveness(placementBlock);
510 
511       // We have to ensure that the dealloc will be after the last use of all
512       // aliases of the given value. We first assume that there are no uses in
513       // the placementBlock and that we can safely place the dealloc at the
514       // beginning.
515       Operation *endOperation = &placementBlock->front();
516 
517       // Iterate over all aliases and ensure that the endOperation will point
518       // to the last operation of all potential aliases in the placementBlock.
519       for (Value alias : aliasesSet) {
520         // Ensure that the start operation is at least the defining operation of
521         // the current alias to avoid invalid placement of deallocs for aliases
522         // without any uses.
523         Operation *beforeOp = endOperation;
524         if (alias.getDefiningOp() &&
525             !(beforeOp = placementBlock->findAncestorOpInBlock(
526                   *alias.getDefiningOp())))
527           continue;
528 
529         Operation *aliasEndOperation =
530             livenessInfo->getEndOperation(alias, beforeOp);
531         // Check whether the aliasEndOperation lies in the desired block and
532         // whether it is behind the current endOperation. If yes, this will be
533         // the new endOperation.
534         if (aliasEndOperation->getBlock() == placementBlock &&
535             endOperation->isBeforeInBlock(aliasEndOperation))
536           endOperation = aliasEndOperation;
537       }
538       // endOperation is the last operation behind which we can safely store
539       // the dealloc taking all potential aliases into account.
540 
541       // If there is an existing dealloc, move it to the right place.
542       Operation *deallocOperation = std::get<1>(entry);
543       if (deallocOperation) {
544         deallocOperation->moveAfter(endOperation);
545       } else {
546         // If the Dealloc position is at the terminator operation of the
547         // block, then the value should escape from a deallocation.
548         Operation *nextOp = endOperation->getNextNode();
549         if (!nextOp)
550           continue;
551         // If there is no dealloc node, insert one in the right place.
552         if (failed(buildDealloc(nextOp, alloc)))
553           return failure();
554       }
555     }
556     return success();
557   }
558 
559   /// Builds a deallocation operation compatible with the given allocation
560   /// value. If there is no registered AllocationOpInterface implementation for
561   /// the given value (e.g. in the case of a function parameter), this method
562   /// builds a memref::DeallocOp.
563   LogicalResult buildDealloc(Operation *op, Value alloc) {
564     OpBuilder builder(op);
565     auto it = aliasToAllocations.find(alloc);
566     if (it != aliasToAllocations.end()) {
567       // Call the allocation op interface to build a supported and
568       // compatible deallocation operation.
569       auto dealloc = it->second.buildDealloc(builder, alloc);
570       if (!dealloc)
571         return op->emitError()
572                << "allocations without compatible deallocations are "
573                   "not supported";
574     } else {
575       // Build a "default" DeallocOp for unknown allocation sources.
576       builder.create<memref::DeallocOp>(alloc.getLoc(), alloc);
577     }
578     return success();
579   }
580 
581   /// Builds a clone operation compatible with the given allocation value. If
582   /// there is no registered AllocationOpInterface implementation for the given
583   /// value (e.g. in the case of a function parameter), this method builds a
584   /// bufferization::CloneOp.
585   FailureOr<Value> buildClone(Operation *op, Value alloc) {
586     OpBuilder builder(op);
587     auto it = aliasToAllocations.find(alloc);
588     if (it != aliasToAllocations.end()) {
589       // Call the allocation op interface to build a supported and
590       // compatible clone operation.
591       auto clone = it->second.buildClone(builder, alloc);
592       if (clone)
593         return *clone;
594       return (LogicalResult)(op->emitError()
595                              << "allocations without compatible clone ops "
596                                 "are not supported");
597     }
598     // Build a "default" CloneOp for unknown allocation sources.
599     return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc)
600         .getResult();
601   }
602 
603   /// The dominator info to find the appropriate start operation to move the
604   /// allocs.
605   DominanceInfo dominators;
606 
607   /// The post dominator info to move the dependent allocs in the right
608   /// position.
609   PostDominanceInfo postDominators;
610 
611   /// Stores already cloned buffers to avoid additional clones of clones.
612   ValueSetT clonedValues;
613 
614   /// Maps aliases to their source allocation interfaces (inverse mapping).
615   AliasAllocationMapT aliasToAllocations;
616 };
617 
618 //===----------------------------------------------------------------------===//
619 // BufferDeallocationPass
620 //===----------------------------------------------------------------------===//
621 
622 struct DefaultAllocationInterface
623     : public bufferization::AllocationOpInterface::ExternalModel<
624           DefaultAllocationInterface, memref::AllocOp> {
625   static Optional<Operation *> buildDealloc(OpBuilder &builder, Value alloc) {
626     return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc)
627         .getOperation();
628   }
629   static Optional<Value> buildClone(OpBuilder &builder, Value alloc) {
630     return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc)
631         .getResult();
632   }
633 };
634 
635 /// The actual buffer deallocation pass that inserts and moves dealloc nodes
636 /// into the right positions. Furthermore, it inserts additional clones if
637 /// necessary. It uses the algorithm described at the top of the file.
638 struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> {
639   void getDependentDialects(DialectRegistry &registry) const override {
640     registry.insert<bufferization::BufferizationDialect>();
641     registry.insert<memref::MemRefDialect>();
642     registry.addOpInterface<memref::AllocOp, DefaultAllocationInterface>();
643   }
644 
645   void runOnOperation() override {
646     FuncOp func = getOperation();
647     if (func.isExternal())
648       return;
649 
650     // Ensure that there are supported loops only.
651     Backedges backedges(func);
652     if (backedges.size()) {
653       func.emitError("Only structured control-flow loops are supported.");
654       return signalPassFailure();
655     }
656 
657     // Check that the control flow structures are supported.
658     if (!validateSupportedControlFlow(func.getRegion()))
659       return signalPassFailure();
660 
661     // Gather all required allocation nodes and prepare the deallocation phase.
662     BufferDeallocation deallocation(func);
663 
664     // Check for supported AllocationOpInterface implementations and prepare the
665     // internal deallocation pass.
666     if (failed(deallocation.prepare()))
667       return signalPassFailure();
668 
669     // Place all required temporary clone and dealloc nodes.
670     if (failed(deallocation.deallocate()))
671       return signalPassFailure();
672   }
673 };
674 
675 } // namespace
676 
677 //===----------------------------------------------------------------------===//
678 // BufferDeallocationPass construction
679 //===----------------------------------------------------------------------===//
680 
681 std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() {
682   return std::make_unique<BufferDeallocationPass>();
683 }
684