1 //===- BufferizableOpInterface.cpp - Bufferizable Ops  ---=----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
10 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
11 #include "mlir/Dialect/MemRef/IR/MemRef.h"
12 #include "mlir/IR/AsmState.h"
13 #include "mlir/IR/BlockAndValueMapping.h"
14 #include "mlir/IR/BuiltinOps.h"
15 #include "mlir/IR/Operation.h"
16 #include "mlir/IR/TypeUtilities.h"
17 #include "mlir/IR/Value.h"
18 #include "llvm/Support/Debug.h"
19 
20 namespace mlir {
21 namespace bufferization {
22 
23 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.cpp.inc"
24 
25 } // namespace bufferization
26 } // namespace mlir
27 
28 #define DEBUG_TYPE "bufferizable-op-interface"
29 #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
30 #define LDBG(X) LLVM_DEBUG(DBGS() << (X))
31 
32 using namespace mlir;
33 using namespace bufferization;
34 
35 /// Attribute name used to mark the bufferization layout for region
36 /// arguments during linalg comprehensive bufferization.
37 constexpr const ::llvm::StringLiteral
38     bufferization::BufferizableOpInterface::kBufferLayoutAttrName;
39 
40 /// Attribute name used to mark region arguments that can be bufferized
41 /// in-place during linalg comprehensive bufferization.
42 constexpr const ::llvm::StringLiteral
43     bufferization::BufferizableOpInterface::kInplaceableAttrName;
44 
45 //===----------------------------------------------------------------------===//
46 // BufferizationOptions
47 //===----------------------------------------------------------------------===//
48 
49 // Default constructor for BufferizationOptions.
50 BufferizationOptions::BufferizationOptions() {}
51 
52 BufferizableOpInterface
53 BufferizationOptions::dynCastBufferizableOp(Operation *op) const {
54   if (isOpAllowed(op))
55     return dyn_cast<BufferizableOpInterface>(op);
56   return nullptr;
57 }
58 
59 BufferizableOpInterface
60 BufferizationOptions::dynCastBufferizableOp(Value value) const {
61   if (auto bufferizableOp = value.getDefiningOp<BufferizableOpInterface>())
62     if (isOpAllowed(bufferizableOp.getOperation()))
63       return bufferizableOp;
64   return nullptr;
65 }
66 
67 //===----------------------------------------------------------------------===//
68 // Helper functions for BufferizableOpInterface
69 //===----------------------------------------------------------------------===//
70 
71 static void setInsertionPointAfter(OpBuilder &b, Value value) {
72   if (auto bbArg = value.dyn_cast<BlockArgument>()) {
73     b.setInsertionPointToStart(bbArg.getOwner());
74   } else {
75     b.setInsertionPointAfter(value.getDefiningOp());
76   }
77 }
78 
79 /// Determine which OpOperand* will alias with `result` if the op is bufferized
80 /// in place. Return an empty vector if the op is not bufferizable.
81 SmallVector<OpOperand *>
82 BufferizationState::getAliasingOpOperand(OpResult result) const {
83   if (Operation *op = result.getDefiningOp())
84     if (auto bufferizableOp = dyn_cast<BufferizableOpInterface>(op))
85       return bufferizableOp.getAliasingOpOperand(result, *this);
86   return {};
87 }
88 
89 /// Determine which OpResult will alias with `opOperand` if the op is bufferized
90 /// in place. Return an empty OpResult if the op is not bufferizable.
91 OpResult BufferizationState::getAliasingOpResult(OpOperand &opOperand) const {
92   if (auto bufferizableOp =
93           dyn_cast<BufferizableOpInterface>(opOperand.getOwner()))
94     return bufferizableOp.getAliasingOpResult(opOperand, *this);
95   return OpResult();
96 }
97 
98 /// Return true if `opOperand` bufferizes to a memory read. Return `true` if the
99 /// op is not bufferizable.
100 bool BufferizationState::bufferizesToMemoryRead(OpOperand &opOperand) const {
101   if (auto bufferizableOp =
102           dyn_cast<BufferizableOpInterface>(opOperand.getOwner()))
103     return bufferizableOp.bufferizesToMemoryRead(opOperand, *this);
104 
105   // Unknown op that returns a tensor. The inplace analysis does not support it.
106   // Conservatively return true.
107   return true;
108 }
109 
110 /// Return true if `opOperand` bufferizes to a memory write. Return
111 /// `true` if the op is not bufferizable.
112 bool BufferizationState::bufferizesToMemoryWrite(OpOperand &opOperand) const {
113   if (auto bufferizableOp =
114           dyn_cast<BufferizableOpInterface>(opOperand.getOwner()))
115     return bufferizableOp.bufferizesToMemoryWrite(opOperand, *this);
116 
117   // Unknown op that returns a tensor. The inplace analysis does not support it.
118   // Conservatively return true.
119   return true;
120 }
121 
122 /// Return true if `opOperand` does neither read nor write but bufferizes to an
123 /// alias. Return false if the op is not bufferizable.
124 bool BufferizationState::bufferizesToAliasOnly(OpOperand &opOperand) const {
125   if (auto bufferizableOp =
126           dyn_cast<BufferizableOpInterface>(opOperand.getOwner()))
127     return bufferizableOp.bufferizesToAliasOnly(opOperand, *this);
128 
129   // Unknown op that returns a tensor. The inplace analysis does not support it.
130   // Conservatively return false.
131   return false;
132 }
133 
134 /// Return true if the given value is read by an op that bufferizes to a memory
135 /// read. Also takes into account ops that create an alias but do not read by
136 /// themselves (e.g., ExtractSliceOp).
137 bool BufferizationState::isValueRead(Value value) const {
138   assert(value.getType().isa<TensorType>() && "expected TensorType");
139   SmallVector<OpOperand *> workingSet;
140   for (OpOperand &use : value.getUses())
141     workingSet.push_back(&use);
142 
143   while (!workingSet.empty()) {
144     OpOperand *uMaybeReading = workingSet.pop_back_val();
145     // Skip over all ops that neither read nor write (but create an alias).
146     if (bufferizesToAliasOnly(*uMaybeReading))
147       for (OpOperand &use : getAliasingOpResult(*uMaybeReading).getUses())
148         workingSet.push_back(&use);
149     if (bufferizesToMemoryRead(*uMaybeReading))
150       return true;
151   }
152 
153   return false;
154 }
155 
156 // Starting from `value`, follow the use-def chain in reverse, always selecting
157 // the aliasing OpOperands. Find and return Values for which `condition`
158 // evaluates to true. OpOperands of such matching Values are not traversed any
159 // further.
160 llvm::SetVector<Value> BufferizationState::findValueInReverseUseDefChain(
161     Value value, llvm::function_ref<bool(Value)> condition) const {
162   llvm::SetVector<Value> result, workingSet;
163   workingSet.insert(value);
164 
165   while (!workingSet.empty()) {
166     Value value = workingSet.pop_back_val();
167     if (condition(value) || value.isa<BlockArgument>()) {
168       result.insert(value);
169       continue;
170     }
171 
172     OpResult opResult = value.cast<OpResult>();
173     SmallVector<OpOperand *> opOperands = getAliasingOpOperand(opResult);
174     if (opOperands.empty() || !options.isOpAllowed(value.getDefiningOp())) {
175       result.insert(value);
176       continue;
177     }
178 
179     for (OpOperand *o : opOperands)
180       workingSet.insert(o->get());
181   }
182 
183   return result;
184 }
185 
186 // Find the Values of the last preceding write of a given Value.
187 llvm::SetVector<Value>
188 BufferizationState::findLastPrecedingWrite(Value value) const {
189   return findValueInReverseUseDefChain(value, [&](Value value) {
190     Operation *op = value.getDefiningOp();
191     if (!op)
192       return true;
193     auto bufferizableOp = options.dynCastBufferizableOp(op);
194     if (!bufferizableOp)
195       return true;
196     return bufferizableOp.isMemoryWrite(value.cast<OpResult>(), *this);
197   });
198 }
199 
200 BufferizationState::BufferizationState(const BufferizationOptions &options)
201     : options(options) {}
202 
203 // bufferization.to_memref is not allowed to change the rank.
204 static void ensureToMemrefOpIsValid(Value tensor, Type memrefType) {
205 #ifndef NDEBUG
206   auto rankedTensorType = tensor.getType().dyn_cast<RankedTensorType>();
207   assert((!rankedTensorType || memrefType.cast<MemRefType>().getRank() ==
208                                    rankedTensorType.getRank()) &&
209          "to_memref would be invalid: mismatching ranks");
210 #endif
211 }
212 
213 static Value lookupBuffer(RewriterBase &rewriter, Value tensor,
214                           const BufferizationOptions &options) {
215   auto tensorType = tensor.getType().dyn_cast<TensorType>();
216   assert(tensorType && "unexpected non-tensor type");
217 
218   // Replace "%t = to_tensor %m" with %m.
219   if (auto toTensorOp = tensor.getDefiningOp<bufferization::ToTensorOp>())
220     return toTensorOp.memref();
221 
222   // Insert to_memref op.
223   OpBuilder::InsertionGuard g(rewriter);
224   setInsertionPointAfter(rewriter, tensor);
225   Type memrefType = getMemRefType(tensorType, options);
226   ensureToMemrefOpIsValid(tensor, memrefType);
227   return rewriter.create<bufferization::ToMemrefOp>(tensor.getLoc(), memrefType,
228                                                     tensor);
229 }
230 
231 /// Return the result buffer (memref) for a given OpResult (tensor). Allocate
232 /// a new buffer and copy over data from the existing buffer if out-of-place
233 /// bufferization is necessary.
234 FailureOr<Value> BufferizationState::getBuffer(
235     RewriterBase &rewriter, OpOperand &opOperand, bool forceInPlace,
236     Optional<Operation *> customCopyInsertionPoint) const {
237   OpBuilder::InsertionGuard guard(rewriter);
238   Operation *op = opOperand.getOwner();
239   Location loc = op->getLoc();
240   Value operand = opOperand.get();
241   Value operandBuffer = lookupBuffer(rewriter, operand, options);
242 
243   if (forceInPlace || isInPlace(opOperand))
244     return operandBuffer;
245 
246   // Bufferizing out-of-place: Allocate a new buffer.
247   // Move insertion point right after `operandBuffer`. That is where the
248   // allocation should be inserted (in the absence of allocation hoisting).
249   setInsertionPointAfter(rewriter, operandBuffer);
250   // Allocate the result buffer.
251   FailureOr<Value> resultBuffer = createAlloc(rewriter, loc, operandBuffer,
252                                               options.createDeallocs, options);
253   if (failed(resultBuffer))
254     return failure();
255   // Do not copy if the last preceding writes of `operand` are ops that do
256   // not write (skipping ops that merely create aliases). E.g., InitTensorOp.
257   // Note: If `findLastPrecedingWrite` reaches the end of the reverse SSA
258   // use-def chain, it returns that value, regardless of whether it is a
259   // memory write or not.
260   SetVector<Value> lastWrites = findLastPrecedingWrite(operand);
261   if (llvm::none_of(lastWrites, [&](Value lastWrite) {
262         if (auto bufferizableOp = options.dynCastBufferizableOp(lastWrite))
263           return bufferizableOp.isMemoryWrite(lastWrite.cast<OpResult>(),
264                                               *this);
265         return true;
266       }))
267     return resultBuffer;
268   // Do not copy if the copied data is never read.
269   OpResult aliasingOpResult = getAliasingOpResult(opOperand);
270   if (aliasingOpResult && !bufferizesToMemoryRead(opOperand) &&
271       !isValueRead(aliasingOpResult))
272     return resultBuffer;
273   // Do not copy if this op does not read the data, but writes it.
274   if (bufferizesToMemoryWrite(opOperand) && !bufferizesToMemoryRead(opOperand))
275     return resultBuffer;
276 
277   if (customCopyInsertionPoint) {
278     rewriter.setInsertionPoint(*customCopyInsertionPoint);
279   } else {
280     // The copy happens right before the op that is bufferized.
281     rewriter.setInsertionPoint(op);
282   }
283   if (failed(
284           createMemCpy(rewriter, loc, operandBuffer, *resultBuffer, options)))
285     return failure();
286 
287   return resultBuffer;
288 }
289 
290 void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter,
291                                                   Operation *op,
292                                                   ValueRange values) {
293   OpBuilder::InsertionGuard g(rewriter);
294 
295   // Replace all OpResults with the given values.
296   for (OpResult opResult : op->getOpResults()) {
297     // Skip OpResult if it has no uses.
298     if (opResult.getUses().empty())
299       continue;
300 
301     Value replacement = values[opResult.getResultNumber()];
302     if (opResult.getType().isa<TensorType>()) {
303       // The OpResult is a tensor. Such values are replaced with memrefs during
304       // bufferization.
305       assert((replacement.getType().isa<MemRefType>() ||
306               replacement.getType().isa<UnrankedMemRefType>()) &&
307              "tensor op result should be replaced with a memref value");
308       // The existing uses of the OpResult still expect a tensor. Insert a
309       // ToTensorOp. Throughout bufferization, this ToTensorOp will gradually
310       // loose all of its users and eventually DCE away.
311       rewriter.setInsertionPointAfter(op);
312       replacement = rewriter.create<bufferization::ToTensorOp>(
313           replacement.getLoc(), replacement);
314     }
315     opResult.replaceAllUsesWith(replacement);
316   }
317 
318   rewriter.eraseOp(op);
319 }
320 
321 //===----------------------------------------------------------------------===//
322 // Bufferization-specific scoped alloc/dealloc insertion support.
323 //===----------------------------------------------------------------------===//
324 
325 /// Move the insertion point of the given builder to the beginning of a
326 /// surrounding block as much as possible, while not crossing any allocation
327 /// hoisting barriers.
328 static void moveInsertionPointToAllocationHoistingBarrier(OpBuilder &b) {
329   Operation *op = b.getInsertionBlock()->getParentOp();
330   while (op) {
331     if (auto bufferizableOp = dyn_cast<BufferizableOpInterface>(op))
332       if (bufferizableOp.isAllocationHoistingBarrier())
333         break;
334     op = op->getParentOp();
335   }
336 
337   if (!op) {
338     // No allocation hoisting barrier found. Hoist to FuncOp.
339     op = b.getInsertionBlock()->getParentOp();
340     if (!isa<FuncOp>(op))
341       op = op->getParentOfType<FuncOp>();
342     assert(op && "could not find enclosing FuncOp");
343   }
344 
345   // TODO: Handle cases where allocation hoisting barrier has more than one
346   // region or block.
347   assert(op->getNumRegions() == 1 &&
348          "allocation hoisting barriers with >1 regions not supported");
349   assert(op->getRegion(0).getBlocks().size() == 1 &&
350          "allocation hoisting barriers with >1 blocks not supported");
351   b.setInsertionPointToStart(&(op->getRegion(0).front()));
352 }
353 
354 /// Compute the type of the `memref` to use for allocating the buffer for
355 /// `shapedValue`. Also returns (by reference in `dynShape`), the value for the
356 /// dynamic dimensions in the returned `memref` type. The function may also set
357 /// the insertion point to an earlier location, where the allocation should
358 /// happen ("allocation hoisting").
359 static MemRefType getAllocationTypeAndShape(OpBuilder &b, Location loc,
360                                             Value shapedValue,
361                                             SmallVectorImpl<Value> &dynShape) {
362   MemRefType allocMemRefType =
363       getContiguousMemRefType(shapedValue.getType().cast<ShapedType>());
364 
365   // Compute the dynamic part of the shape.
366   bool reifiedShapes = false;
367   if (auto rankedOp = dyn_cast_or_null<ReifyRankedShapedTypeOpInterface>(
368           shapedValue.getDefiningOp())) {
369     ReifiedRankedShapedTypeDims resultDims;
370     if (succeeded(rankedOp.reifyResultShapes(b, resultDims))) {
371       reifiedShapes = true;
372       OpResult resultValue = shapedValue.dyn_cast<OpResult>();
373       auto &shape = resultDims[resultValue.getResultNumber()];
374       for (const auto &dim : enumerate(allocMemRefType.getShape()))
375         if (ShapedType::isDynamic(dim.value()))
376           dynShape.push_back(shape[dim.index()]);
377     }
378   }
379 
380   if (!reifiedShapes) {
381     for (const auto &dim : enumerate(allocMemRefType.getShape()))
382       if (ShapedType::isDynamic(dim.value())) {
383         assert((shapedValue.getType().isa<UnrankedMemRefType>() ||
384                 shapedValue.getType().isa<MemRefType>()) &&
385                "expected MemRef type");
386         dynShape.push_back(
387             b.create<memref::DimOp>(loc, shapedValue, dim.index()));
388       }
389   }
390 
391   // If the buffer is statically shaped, try to hoist it to the first enclosing
392   // parallel region.
393   // TODO: also hoist in the dynamic case. For now this relies on subsequent
394   // calls to LICM and buffer hoisting which will most likely not succeed.
395   // TODO: when packing, allocate a static bounding box which will enable more
396   // hoisting.
397   if (dynShape.empty())
398     moveInsertionPointToAllocationHoistingBarrier(b);
399 
400   return allocMemRefType;
401 }
402 
403 /// Create an AllocOp/DeallocOp pair, where the AllocOp is after
404 /// `shapedValue.getDefiningOp` (or at the top of the block in case of a
405 /// bbArg) and the DeallocOp is at the end of the block.
406 FailureOr<Value>
407 bufferization::createAlloc(OpBuilder &b, Location loc, Value shapedValue,
408                            bool deallocMemref,
409                            const BufferizationOptions &options) {
410   // Take a guard before anything else.
411   OpBuilder::InsertionGuard g(b);
412 
413   // 1. Create memory allocation.
414   assert(shapedValue.getType().isa<ShapedType>());
415   MemRefType memRefType = shapedValue.getType().dyn_cast<MemRefType>();
416   SmallVector<Value> dynShape;
417   // Note: getAllocationTypeAndShape also sets the insertion point.
418   MemRefType allocMemRefType =
419       getAllocationTypeAndShape(b, loc, shapedValue, dynShape);
420   FailureOr<Value> allocated =
421       createAlloc(b, loc, allocMemRefType, dynShape, options);
422   if (failed(allocated))
423     return failure();
424   Value casted = allocated.getValue();
425   if (memRefType && memRefType != allocMemRefType) {
426     assert(memref::CastOp::areCastCompatible(allocated.getValue().getType(),
427                                              memRefType) &&
428            "createAlloc: cast incompatible");
429     casted = b.create<memref::CastOp>(loc, memRefType, allocated.getValue());
430   }
431 
432   if (deallocMemref) {
433     // 2. Create memory deallocation.
434     b.setInsertionPoint(allocated.getValue().getParentBlock()->getTerminator());
435     if (failed(createDealloc(b, loc, allocated.getValue(), options)))
436       return failure();
437   }
438 
439   return casted;
440 }
441 
442 /// Create a memref allocation with the given type and dynamic extents.
443 FailureOr<Value>
444 bufferization::createAlloc(OpBuilder &b, Location loc, MemRefType type,
445                            ValueRange dynShape,
446                            const BufferizationOptions &options) {
447   if (options.allocationFn)
448     return (*options.allocationFn)(b, loc, type, dynShape);
449 
450   // Default bufferallocation via AllocOp.
451   Value allocated = b.create<memref::AllocOp>(
452       loc, type, dynShape, b.getI64IntegerAttr(kBufferAlignments));
453   return allocated;
454 }
455 
456 /// Create a memref allocation with the given type and dynamic extents. May also
457 /// deallocate the memref again.
458 FailureOr<Value>
459 bufferization::createAlloc(OpBuilder &b, Location loc, MemRefType type,
460                            ValueRange dynShape, bool deallocMemref,
461                            const BufferizationOptions &options) {
462   OpBuilder::InsertionGuard g(b);
463 
464   FailureOr<Value> alloc = createAlloc(b, loc, type, dynShape, options);
465   if (failed(alloc))
466     return failure();
467 
468   if (deallocMemref) {
469     // Dealloc at the end of the block.
470     b.setInsertionPoint(alloc.getValue().getParentBlock()->getTerminator());
471     if (failed(createDealloc(b, loc, *alloc, options)))
472       return failure();
473   }
474 
475   return alloc;
476 }
477 
478 /// Create a memref deallocation.
479 LogicalResult
480 bufferization::createDealloc(OpBuilder &b, Location loc, Value allocatedBuffer,
481                              const BufferizationOptions &options) {
482   if (options.deallocationFn)
483     return (*options.deallocationFn)(b, loc, allocatedBuffer);
484 
485   // Default buffer deallocation via DeallocOp.
486   b.create<memref::DeallocOp>(loc, allocatedBuffer);
487   return success();
488 }
489 
490 /// Create a memory copy between two memref buffers.
491 LogicalResult bufferization::createMemCpy(OpBuilder &b, Location loc,
492                                           Value from, Value to,
493                                           const BufferizationOptions &options) {
494   if (options.memCpyFn)
495     return (*options.memCpyFn)(b, loc, from, to);
496 
497   b.create<memref::CopyOp>(loc, from, to);
498   return success();
499 }
500 
501 //===----------------------------------------------------------------------===//
502 // Bufferization-specific BlockAndValueMapping support with debugging.
503 //===----------------------------------------------------------------------===//
504 
505 bool bufferization::isFunctionArgument(Value value) {
506   auto bbArg = value.dyn_cast<BlockArgument>();
507   if (!bbArg)
508     return false;
509   return isa<FuncOp>(bbArg.getOwner()->getParentOp());
510 }
511 
512 MemRefType bufferization::getContiguousMemRefType(ShapedType shapedType,
513                                                   Attribute memorySpace) {
514   MemRefLayoutAttrInterface layout = {};
515   return MemRefType::get(shapedType.getShape(), shapedType.getElementType(),
516                          layout, memorySpace);
517 }
518 
519 BaseMemRefType bufferization::getMemRefType(TensorType tensorType,
520                                             const BufferizationOptions &options,
521                                             MemRefLayoutAttrInterface layout,
522                                             Attribute memorySpace) {
523   // Case 1: Unranked memref type.
524   if (auto unrankedTensorType = tensorType.dyn_cast<UnrankedTensorType>()) {
525     assert(!layout && "UnrankedTensorType cannot have a layout map");
526     return UnrankedMemRefType::get(unrankedTensorType.getElementType(),
527                                    memorySpace);
528   }
529 
530   // Case 2: Ranked memref type with specified layout. If fully dynamic layout
531   // maps are not requested, generate a type with `layout`, which is empty (no
532   // layout map) by default.
533   auto rankedTensorType = tensorType.cast<RankedTensorType>();
534   if (layout || !options.fullyDynamicLayoutMaps) {
535     return MemRefType::get(rankedTensorType.getShape(),
536                            rankedTensorType.getElementType(), layout,
537                            memorySpace);
538   }
539 
540   // Case 3: Ranked memref type with unspecified layout. Choose the most dynamic
541   // one.
542   // TODO: address space decisions to connect with the actual alloc.
543   int64_t dynamicOffset = ShapedType::kDynamicStrideOrOffset;
544   SmallVector<int64_t> dynamicStrides(rankedTensorType.getRank(),
545                                       ShapedType::kDynamicStrideOrOffset);
546   AffineMap stridedLayout = makeStridedLinearLayoutMap(
547       dynamicStrides, dynamicOffset, rankedTensorType.getContext());
548   return MemRefType::get(rankedTensorType.getShape(),
549                          rankedTensorType.getElementType(), stridedLayout,
550                          memorySpace);
551 }
552