1 //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements logic for computing correct alloc and dealloc positions.
10 // Furthermore, buffer deallocation also adds required new clone operations to
11 // ensure that all buffers are deallocated. The main class is the
12 // BufferDeallocationPass class that implements the underlying algorithm. In
13 // order to put allocations and deallocations at safe positions, it is
14 // significantly important to put them into the correct blocks. However, the
15 // liveness analysis does not pay attention to aliases, which can occur due to
16 // branches (and their associated block arguments) in general. For this purpose,
17 // BufferDeallocation firstly finds all possible aliases for a single value
18 // (using the BufferViewFlowAnalysis class). Consider the following example:
19 //
20 // ^bb0(%arg0):
21 // cf.cond_br %cond, ^bb1, ^bb2
22 // ^bb1:
23 // cf.br ^exit(%arg0)
24 // ^bb2:
25 // %new_value = ...
26 // cf.br ^exit(%new_value)
27 // ^exit(%arg1):
28 // return %arg1;
29 //
30 // We should place the dealloc for %new_value in exit. However, we have to free
31 // the buffer in the same block, because it cannot be freed in the post
32 // dominator. However, this requires a new clone buffer for %arg1 that will
33 // contain the actual contents. Using the class BufferViewFlowAnalysis, we
34 // will find out that %new_value has a potential alias %arg1. In order to find
35 // the dealloc position we have to find all potential aliases, iterate over
36 // their uses and find the common post-dominator block (note that additional
37 // clones and buffers remove potential aliases and will influence the placement
38 // of the deallocs). In all cases, the computed block can be safely used to free
39 // the %new_value buffer (may be exit or bb2) as it will die and we can use
40 // liveness information to determine the exact operation after which we have to
41 // insert the dealloc. However, the algorithm supports introducing clone buffers
42 // and placing deallocs in safe locations to ensure that all buffers will be
43 // freed in the end.
44 //
45 // TODO:
46 // The current implementation does not support explicit-control-flow loops and
47 // the resulting code will be invalid with respect to program semantics.
48 // However, structured control-flow loops are fully supported. Furthermore, it
49 // doesn't accept functions which return buffers already.
50 //
51 //===----------------------------------------------------------------------===//
52
53 #include "PassDetail.h"
54
55 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
56 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
57 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
58 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
59 #include "mlir/Dialect/MemRef/IR/MemRef.h"
60 #include "llvm/ADT/SetOperations.h"
61
62 using namespace mlir;
63 using namespace mlir::bufferization;
64
65 /// Walks over all immediate return-like terminators in the given region.
66 static LogicalResult
walkReturnOperations(Region * region,llvm::function_ref<LogicalResult (Operation *)> func)67 walkReturnOperations(Region *region,
68 llvm::function_ref<LogicalResult(Operation *)> func) {
69 for (Block &block : *region) {
70 Operation *terminator = block.getTerminator();
71 // Skip non region-return-like terminators.
72 if (isRegionReturnLike(terminator)) {
73 if (failed(func(terminator)))
74 return failure();
75 }
76 }
77 return success();
78 }
79
80 /// Checks if all operations that have at least one attached region implement
81 /// the RegionBranchOpInterface. This is not required in edge cases, where we
82 /// have a single attached region and the parent operation has no results.
validateSupportedControlFlow(Operation * op)83 static bool validateSupportedControlFlow(Operation *op) {
84 WalkResult result = op->walk([&](Operation *operation) {
85 // Only check ops that are inside a function.
86 if (!operation->getParentOfType<func::FuncOp>())
87 return WalkResult::advance();
88
89 auto regions = operation->getRegions();
90 // Walk over all operations in a region and check if the operation has at
91 // least one region and implements the RegionBranchOpInterface. If there
92 // is an operation that does not fulfill this condition, we cannot apply
93 // the deallocation steps. Furthermore, we accept cases, where we have a
94 // region that returns no results, since, in that case, the intra-region
95 // control flow does not affect the transformation.
96 size_t size = regions.size();
97 if (((size == 1 && !operation->getResults().empty()) || size > 1) &&
98 !dyn_cast<RegionBranchOpInterface>(operation)) {
99 operation->emitError("All operations with attached regions need to "
100 "implement the RegionBranchOpInterface.");
101 }
102
103 return WalkResult::advance();
104 });
105 return !result.wasSkipped();
106 }
107
108 namespace {
109
110 //===----------------------------------------------------------------------===//
111 // Backedges analysis
112 //===----------------------------------------------------------------------===//
113
114 /// A straight-forward program analysis which detects loop backedges induced by
115 /// explicit control flow.
116 class Backedges {
117 public:
118 using BlockSetT = SmallPtrSet<Block *, 16>;
119 using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
120
121 public:
122 /// Constructs a new backedges analysis using the op provided.
Backedges(Operation * op)123 Backedges(Operation *op) { recurse(op); }
124
125 /// Returns the number of backedges formed by explicit control flow.
size() const126 size_t size() const { return edgeSet.size(); }
127
128 /// Returns the start iterator to loop over all backedges.
begin() const129 BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
130
131 /// Returns the end iterator to loop over all backedges.
end() const132 BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
133
134 private:
135 /// Enters the current block and inserts a backedge into the `edgeSet` if we
136 /// have already visited the current block. The inserted edge links the given
137 /// `predecessor` with the `current` block.
enter(Block & current,Block * predecessor)138 bool enter(Block ¤t, Block *predecessor) {
139 bool inserted = visited.insert(¤t).second;
140 if (!inserted)
141 edgeSet.insert(std::make_pair(predecessor, ¤t));
142 return inserted;
143 }
144
145 /// Leaves the current block.
exit(Block & current)146 void exit(Block ¤t) { visited.erase(¤t); }
147
148 /// Recurses into the given operation while taking all attached regions into
149 /// account.
recurse(Operation * op)150 void recurse(Operation *op) {
151 Block *current = op->getBlock();
152 // If the current op implements the `BranchOpInterface`, there can be
153 // cycles in the scope of all successor blocks.
154 if (isa<BranchOpInterface>(op)) {
155 for (Block *succ : current->getSuccessors())
156 recurse(*succ, current);
157 }
158 // Recurse into all distinct regions and check for explicit control-flow
159 // loops.
160 for (Region ®ion : op->getRegions()) {
161 if (!region.empty())
162 recurse(region.front(), current);
163 }
164 }
165
166 /// Recurses into explicit control-flow structures that are given by
167 /// the successor relation defined on the block level.
recurse(Block & block,Block * predecessor)168 void recurse(Block &block, Block *predecessor) {
169 // Try to enter the current block. If this is not possible, we are
170 // currently processing this block and can safely return here.
171 if (!enter(block, predecessor))
172 return;
173
174 // Recurse into all operations and successor blocks.
175 for (Operation &op : block.getOperations())
176 recurse(&op);
177
178 // Leave the current block.
179 exit(block);
180 }
181
182 /// Stores all blocks that are currently visited and on the processing stack.
183 BlockSetT visited;
184
185 /// Stores all backedges in the format (source, target).
186 BackedgeSetT edgeSet;
187 };
188
189 //===----------------------------------------------------------------------===//
190 // BufferDeallocation
191 //===----------------------------------------------------------------------===//
192
193 /// The buffer deallocation transformation which ensures that all allocs in the
194 /// program have a corresponding de-allocation. As a side-effect, it might also
195 /// introduce clones that in turn leads to additional deallocations.
196 class BufferDeallocation : public BufferPlacementTransformationBase {
197 public:
198 using AliasAllocationMapT =
199 llvm::DenseMap<Value, bufferization::AllocationOpInterface>;
200
BufferDeallocation(Operation * op)201 BufferDeallocation(Operation *op)
202 : BufferPlacementTransformationBase(op), dominators(op),
203 postDominators(op) {}
204
205 /// Checks if all allocation operations either provide an already existing
206 /// deallocation operation or implement the AllocationOpInterface. In
207 /// addition, this method initializes the internal alias to
208 /// AllocationOpInterface mapping in order to get compatible
209 /// AllocationOpInterface implementations for aliases.
prepare()210 LogicalResult prepare() {
211 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
212 // Get the defining allocation operation.
213 Value alloc = std::get<0>(entry);
214 auto allocationInterface =
215 alloc.getDefiningOp<bufferization::AllocationOpInterface>();
216 // If there is no existing deallocation operation and no implementation of
217 // the AllocationOpInterface, we cannot apply the BufferDeallocation pass.
218 if (!std::get<1>(entry) && !allocationInterface) {
219 return alloc.getDefiningOp()->emitError(
220 "Allocation is not deallocated explicitly nor does the operation "
221 "implement the AllocationOpInterface.");
222 }
223
224 // Register the current allocation interface implementation.
225 aliasToAllocations[alloc] = allocationInterface;
226
227 // Get the alias information for the current allocation node.
228 for (Value alias : aliases.resolve(alloc)) {
229 // TODO: check for incompatible implementations of the
230 // AllocationOpInterface. This could be realized by promoting the
231 // AllocationOpInterface to a DialectInterface.
232 aliasToAllocations[alias] = allocationInterface;
233 }
234 }
235 return success();
236 }
237
238 /// Performs the actual placement/creation of all temporary clone and dealloc
239 /// nodes.
deallocate()240 LogicalResult deallocate() {
241 // Add additional clones that are required.
242 if (failed(introduceClones()))
243 return failure();
244
245 // Place deallocations for all allocation entries.
246 return placeDeallocs();
247 }
248
249 private:
250 /// Introduces required clone operations to avoid memory leaks.
introduceClones()251 LogicalResult introduceClones() {
252 // Initialize the set of values that require a dedicated memory free
253 // operation since their operands cannot be safely deallocated in a post
254 // dominator.
255 SmallPtrSet<Value, 8> valuesToFree;
256 llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues;
257 SmallVector<std::tuple<Value, Block *>, 8> toProcess;
258
259 // Check dominance relation for proper dominance properties. If the given
260 // value node does not dominate an alias, we will have to create a clone in
261 // order to free all buffers that can potentially leak into a post
262 // dominator.
263 auto findUnsafeValues = [&](Value source, Block *definingBlock) {
264 auto it = aliases.find(source);
265 if (it == aliases.end())
266 return;
267 for (Value value : it->second) {
268 if (valuesToFree.count(value) > 0)
269 continue;
270 Block *parentBlock = value.getParentBlock();
271 // Check whether we have to free this particular block argument or
272 // generic value. We have to free the current alias if it is either
273 // defined in a non-dominated block or it is defined in the same block
274 // but the current value is not dominated by the source value.
275 if (!dominators.dominates(definingBlock, parentBlock) ||
276 (definingBlock == parentBlock && value.isa<BlockArgument>())) {
277 toProcess.emplace_back(value, parentBlock);
278 valuesToFree.insert(value);
279 } else if (visitedValues.insert(std::make_tuple(value, definingBlock))
280 .second)
281 toProcess.emplace_back(value, definingBlock);
282 }
283 };
284
285 // Detect possibly unsafe aliases starting from all allocations.
286 for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
287 Value allocValue = std::get<0>(entry);
288 findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock());
289 }
290 // Try to find block arguments that require an explicit free operation
291 // until we reach a fix point.
292 while (!toProcess.empty()) {
293 auto current = toProcess.pop_back_val();
294 findUnsafeValues(std::get<0>(current), std::get<1>(current));
295 }
296
297 // Update buffer aliases to ensure that we free all buffers and block
298 // arguments at the correct locations.
299 aliases.remove(valuesToFree);
300
301 // Add new allocs and additional clone operations.
302 for (Value value : valuesToFree) {
303 if (failed(value.isa<BlockArgument>()
304 ? introduceBlockArgCopy(value.cast<BlockArgument>())
305 : introduceValueCopyForRegionResult(value)))
306 return failure();
307
308 // Register the value to require a final dealloc. Note that we do not have
309 // to assign a block here since we do not want to move the allocation node
310 // to another location.
311 allocs.registerAlloc(std::make_tuple(value, nullptr));
312 }
313 return success();
314 }
315
316 /// Introduces temporary clones in all predecessors and copies the source
317 /// values into the newly allocated buffers.
introduceBlockArgCopy(BlockArgument blockArg)318 LogicalResult introduceBlockArgCopy(BlockArgument blockArg) {
319 // Allocate a buffer for the current block argument in the block of
320 // the associated value (which will be a predecessor block by
321 // definition).
322 Block *block = blockArg.getOwner();
323 for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) {
324 // Get the terminator and the value that will be passed to our
325 // argument.
326 Operation *terminator = (*it)->getTerminator();
327 auto branchInterface = cast<BranchOpInterface>(terminator);
328 SuccessorOperands operands =
329 branchInterface.getSuccessorOperands(it.getSuccessorIndex());
330
331 // Query the associated source value.
332 Value sourceValue = operands[blockArg.getArgNumber()];
333 if (!sourceValue) {
334 return failure();
335 }
336 // Wire new clone and successor operand.
337 // Create a new clone at the current location of the terminator.
338 auto clone = introduceCloneBuffers(sourceValue, terminator);
339 if (failed(clone))
340 return failure();
341 operands.slice(blockArg.getArgNumber(), 1).assign(*clone);
342 }
343
344 // Check whether the block argument has implicitly defined predecessors via
345 // the RegionBranchOpInterface. This can be the case if the current block
346 // argument belongs to the first block in a region and the parent operation
347 // implements the RegionBranchOpInterface.
348 Region *argRegion = block->getParent();
349 Operation *parentOp = argRegion->getParentOp();
350 RegionBranchOpInterface regionInterface;
351 if (&argRegion->front() != block ||
352 !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
353 return success();
354
355 if (failed(introduceClonesForRegionSuccessors(
356 regionInterface, argRegion->getParentOp()->getRegions(), blockArg,
357 [&](RegionSuccessor &successorRegion) {
358 // Find a predecessor of our argRegion.
359 return successorRegion.getSuccessor() == argRegion;
360 })))
361 return failure();
362
363 // Check whether the block argument belongs to an entry region of the
364 // parent operation. In this case, we have to introduce an additional clone
365 // for buffer that is passed to the argument.
366 SmallVector<RegionSuccessor, 2> successorRegions;
367 regionInterface.getSuccessorRegions(/*index=*/llvm::None, successorRegions);
368 auto *it =
369 llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) {
370 return successorRegion.getSuccessor() == argRegion;
371 });
372 if (it == successorRegions.end())
373 return success();
374
375 // Determine the actual operand to introduce a clone for and rewire the
376 // operand to point to the clone instead.
377 auto operands =
378 regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber());
379 size_t operandIndex =
380 llvm::find(it->getSuccessorInputs(), blockArg).getIndex() +
381 operands.getBeginOperandIndex();
382 Value operand = parentOp->getOperand(operandIndex);
383 assert(operand ==
384 operands[operandIndex - operands.getBeginOperandIndex()] &&
385 "region interface operands don't match parentOp operands");
386 auto clone = introduceCloneBuffers(operand, parentOp);
387 if (failed(clone))
388 return failure();
389
390 parentOp->setOperand(operandIndex, *clone);
391 return success();
392 }
393
394 /// Introduces temporary clones in front of all associated nested-region
395 /// terminators and copies the source values into the newly allocated buffers.
introduceValueCopyForRegionResult(Value value)396 LogicalResult introduceValueCopyForRegionResult(Value value) {
397 // Get the actual result index in the scope of the parent terminator.
398 Operation *operation = value.getDefiningOp();
399 auto regionInterface = cast<RegionBranchOpInterface>(operation);
400 // Filter successors that return to the parent operation.
401 auto regionPredicate = [&](RegionSuccessor &successorRegion) {
402 // If the RegionSuccessor has no associated successor, it will return to
403 // its parent operation.
404 return !successorRegion.getSuccessor();
405 };
406 // Introduce a clone for all region "results" that are returned to the
407 // parent operation. This is required since the parent's result value has
408 // been considered critical. Therefore, the algorithm assumes that a clone
409 // of a previously allocated buffer is returned by the operation (like in
410 // the case of a block argument).
411 return introduceClonesForRegionSuccessors(
412 regionInterface, operation->getRegions(), value, regionPredicate);
413 }
414
415 /// Introduces buffer clones for all terminators in the given regions. The
416 /// regionPredicate is applied to every successor region in order to restrict
417 /// the clones to specific regions.
418 template <typename TPredicate>
introduceClonesForRegionSuccessors(RegionBranchOpInterface regionInterface,MutableArrayRef<Region> regions,Value argValue,const TPredicate & regionPredicate)419 LogicalResult introduceClonesForRegionSuccessors(
420 RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions,
421 Value argValue, const TPredicate ®ionPredicate) {
422 for (Region ®ion : regions) {
423 // Query the regionInterface to get all successor regions of the current
424 // one.
425 SmallVector<RegionSuccessor, 2> successorRegions;
426 regionInterface.getSuccessorRegions(region.getRegionNumber(),
427 successorRegions);
428 // Try to find a matching region successor.
429 RegionSuccessor *regionSuccessor =
430 llvm::find_if(successorRegions, regionPredicate);
431 if (regionSuccessor == successorRegions.end())
432 continue;
433 // Get the operand index in the context of the current successor input
434 // bindings.
435 size_t operandIndex =
436 llvm::find(regionSuccessor->getSuccessorInputs(), argValue)
437 .getIndex();
438
439 // Iterate over all immediate terminator operations to introduce
440 // new buffer allocations. Thereby, the appropriate terminator operand
441 // will be adjusted to point to the newly allocated buffer instead.
442 if (failed(walkReturnOperations(®ion, [&](Operation *terminator) {
443 // Get the actual mutable operands for this terminator op.
444 auto terminatorOperands = *getMutableRegionBranchSuccessorOperands(
445 terminator, region.getRegionNumber());
446 // Extract the source value from the current terminator.
447 // This conversion needs to exist on a separate line due to a bug in
448 // GCC conversion analysis.
449 OperandRange immutableTerminatorOperands = terminatorOperands;
450 Value sourceValue = immutableTerminatorOperands[operandIndex];
451 // Create a new clone at the current location of the terminator.
452 auto clone = introduceCloneBuffers(sourceValue, terminator);
453 if (failed(clone))
454 return failure();
455 // Wire clone and terminator operand.
456 terminatorOperands.slice(operandIndex, 1).assign(*clone);
457 return success();
458 })))
459 return failure();
460 }
461 return success();
462 }
463
464 /// Creates a new memory allocation for the given source value and clones
465 /// its content into the newly allocated buffer. The terminator operation is
466 /// used to insert the clone operation at the right place.
introduceCloneBuffers(Value sourceValue,Operation * terminator)467 FailureOr<Value> introduceCloneBuffers(Value sourceValue,
468 Operation *terminator) {
469 // Avoid multiple clones of the same source value. This can happen in the
470 // presence of loops when a branch acts as a backedge while also having
471 // another successor that returns to its parent operation. Note: that
472 // copying copied buffers can introduce memory leaks since the invariant of
473 // BufferDeallocation assumes that a buffer will be only cloned once into a
474 // temporary buffer. Hence, the construction of clone chains introduces
475 // additional allocations that are not tracked automatically by the
476 // algorithm.
477 if (clonedValues.contains(sourceValue))
478 return sourceValue;
479 // Create a new clone operation that copies the contents of the old
480 // buffer to the new one.
481 auto clone = buildClone(terminator, sourceValue);
482 if (succeeded(clone)) {
483 // Remember the clone of original source value.
484 clonedValues.insert(*clone);
485 }
486 return clone;
487 }
488
489 /// Finds correct dealloc positions according to the algorithm described at
490 /// the top of the file for all alloc nodes and block arguments that can be
491 /// handled by this analysis.
placeDeallocs()492 LogicalResult placeDeallocs() {
493 // Move or insert deallocs using the previously computed information.
494 // These deallocations will be linked to their associated allocation nodes
495 // since they don't have any aliases that can (potentially) increase their
496 // liveness.
497 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
498 Value alloc = std::get<0>(entry);
499 auto aliasesSet = aliases.resolve(alloc);
500 assert(!aliasesSet.empty() && "must contain at least one alias");
501
502 // Determine the actual block to place the dealloc and get liveness
503 // information.
504 Block *placementBlock =
505 findCommonDominator(alloc, aliasesSet, postDominators);
506 const LivenessBlockInfo *livenessInfo =
507 liveness.getLiveness(placementBlock);
508
509 // We have to ensure that the dealloc will be after the last use of all
510 // aliases of the given value. We first assume that there are no uses in
511 // the placementBlock and that we can safely place the dealloc at the
512 // beginning.
513 Operation *endOperation = &placementBlock->front();
514
515 // Iterate over all aliases and ensure that the endOperation will point
516 // to the last operation of all potential aliases in the placementBlock.
517 for (Value alias : aliasesSet) {
518 // Ensure that the start operation is at least the defining operation of
519 // the current alias to avoid invalid placement of deallocs for aliases
520 // without any uses.
521 Operation *beforeOp = endOperation;
522 if (alias.getDefiningOp() &&
523 !(beforeOp = placementBlock->findAncestorOpInBlock(
524 *alias.getDefiningOp())))
525 continue;
526
527 Operation *aliasEndOperation =
528 livenessInfo->getEndOperation(alias, beforeOp);
529 // Check whether the aliasEndOperation lies in the desired block and
530 // whether it is behind the current endOperation. If yes, this will be
531 // the new endOperation.
532 if (aliasEndOperation->getBlock() == placementBlock &&
533 endOperation->isBeforeInBlock(aliasEndOperation))
534 endOperation = aliasEndOperation;
535 }
536 // endOperation is the last operation behind which we can safely store
537 // the dealloc taking all potential aliases into account.
538
539 // If there is an existing dealloc, move it to the right place.
540 Operation *deallocOperation = std::get<1>(entry);
541 if (deallocOperation) {
542 deallocOperation->moveAfter(endOperation);
543 } else {
544 // If the Dealloc position is at the terminator operation of the
545 // block, then the value should escape from a deallocation.
546 Operation *nextOp = endOperation->getNextNode();
547 if (!nextOp)
548 continue;
549 // If there is no dealloc node, insert one in the right place.
550 if (failed(buildDealloc(nextOp, alloc)))
551 return failure();
552 }
553 }
554 return success();
555 }
556
557 /// Builds a deallocation operation compatible with the given allocation
558 /// value. If there is no registered AllocationOpInterface implementation for
559 /// the given value (e.g. in the case of a function parameter), this method
560 /// builds a memref::DeallocOp.
buildDealloc(Operation * op,Value alloc)561 LogicalResult buildDealloc(Operation *op, Value alloc) {
562 OpBuilder builder(op);
563 auto it = aliasToAllocations.find(alloc);
564 if (it != aliasToAllocations.end()) {
565 // Call the allocation op interface to build a supported and
566 // compatible deallocation operation.
567 auto dealloc = it->second.buildDealloc(builder, alloc);
568 if (!dealloc)
569 return op->emitError()
570 << "allocations without compatible deallocations are "
571 "not supported";
572 } else {
573 // Build a "default" DeallocOp for unknown allocation sources.
574 builder.create<memref::DeallocOp>(alloc.getLoc(), alloc);
575 }
576 return success();
577 }
578
579 /// Builds a clone operation compatible with the given allocation value. If
580 /// there is no registered AllocationOpInterface implementation for the given
581 /// value (e.g. in the case of a function parameter), this method builds a
582 /// bufferization::CloneOp.
buildClone(Operation * op,Value alloc)583 FailureOr<Value> buildClone(Operation *op, Value alloc) {
584 OpBuilder builder(op);
585 auto it = aliasToAllocations.find(alloc);
586 if (it != aliasToAllocations.end()) {
587 // Call the allocation op interface to build a supported and
588 // compatible clone operation.
589 auto clone = it->second.buildClone(builder, alloc);
590 if (clone)
591 return *clone;
592 return (LogicalResult)(op->emitError()
593 << "allocations without compatible clone ops "
594 "are not supported");
595 }
596 // Build a "default" CloneOp for unknown allocation sources.
597 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc)
598 .getResult();
599 }
600
601 /// The dominator info to find the appropriate start operation to move the
602 /// allocs.
603 DominanceInfo dominators;
604
605 /// The post dominator info to move the dependent allocs in the right
606 /// position.
607 PostDominanceInfo postDominators;
608
609 /// Stores already cloned buffers to avoid additional clones of clones.
610 ValueSetT clonedValues;
611
612 /// Maps aliases to their source allocation interfaces (inverse mapping).
613 AliasAllocationMapT aliasToAllocations;
614 };
615
616 //===----------------------------------------------------------------------===//
617 // BufferDeallocationPass
618 //===----------------------------------------------------------------------===//
619
620 struct DefaultAllocationInterface
621 : public bufferization::AllocationOpInterface::ExternalModel<
622 DefaultAllocationInterface, memref::AllocOp> {
buildDealloc__anonf063894c0211::DefaultAllocationInterface623 static Optional<Operation *> buildDealloc(OpBuilder &builder, Value alloc) {
624 return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc)
625 .getOperation();
626 }
buildClone__anonf063894c0211::DefaultAllocationInterface627 static Optional<Value> buildClone(OpBuilder &builder, Value alloc) {
628 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc)
629 .getResult();
630 }
631 };
632
633 /// The actual buffer deallocation pass that inserts and moves dealloc nodes
634 /// into the right positions. Furthermore, it inserts additional clones if
635 /// necessary. It uses the algorithm described at the top of the file.
636 struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> {
getDependentDialects__anonf063894c0211::BufferDeallocationPass637 void getDependentDialects(DialectRegistry ®istry) const override {
638 registry.insert<bufferization::BufferizationDialect>();
639 registry.insert<memref::MemRefDialect>();
640 registerAllocationOpInterfaceExternalModels(registry);
641 }
642
runOnOperation__anonf063894c0211::BufferDeallocationPass643 void runOnOperation() override {
644 func::FuncOp func = getOperation();
645 if (func.isExternal())
646 return;
647
648 if (failed(deallocateBuffers(func)))
649 signalPassFailure();
650 }
651 };
652
653 } // namespace
654
deallocateBuffers(Operation * op)655 LogicalResult bufferization::deallocateBuffers(Operation *op) {
656 if (isa<ModuleOp>(op)) {
657 WalkResult result = op->walk([&](func::FuncOp funcOp) {
658 if (failed(deallocateBuffers(funcOp)))
659 return WalkResult::interrupt();
660 return WalkResult::advance();
661 });
662 return success(!result.wasInterrupted());
663 }
664
665 // Ensure that there are supported loops only.
666 Backedges backedges(op);
667 if (backedges.size()) {
668 op->emitError("Only structured control-flow loops are supported.");
669 return failure();
670 }
671
672 // Check that the control flow structures are supported.
673 if (!validateSupportedControlFlow(op))
674 return failure();
675
676 // Gather all required allocation nodes and prepare the deallocation phase.
677 BufferDeallocation deallocation(op);
678
679 // Check for supported AllocationOpInterface implementations and prepare the
680 // internal deallocation pass.
681 if (failed(deallocation.prepare()))
682 return failure();
683
684 // Place all required temporary clone and dealloc nodes.
685 if (failed(deallocation.deallocate()))
686 return failure();
687
688 return success();
689 }
690
registerAllocationOpInterfaceExternalModels(DialectRegistry & registry)691 void bufferization::registerAllocationOpInterfaceExternalModels(
692 DialectRegistry ®istry) {
693 registry.addExtension(+[](MLIRContext *ctx, memref::MemRefDialect *dialect) {
694 memref::AllocOp::attachInterface<DefaultAllocationInterface>(*ctx);
695 });
696 }
697
698 //===----------------------------------------------------------------------===//
699 // BufferDeallocationPass construction
700 //===----------------------------------------------------------------------===//
701
createBufferDeallocationPass()702 std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() {
703 return std::make_unique<BufferDeallocationPass>();
704 }
705