1 //===- FusePadOpWithLinalgProducer.cpp ---- Fuse pad with linalg producer -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements patterns that fuses a linalg.generic -> tensor.pad op
10 // chain into a tensor.extract_slice -> linalg.generic -> tensor.insert_slice
11 // op chain.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
16
17 #include "mlir/Dialect/Linalg/IR/Linalg.h"
18 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
19
20 using namespace mlir;
21
22 namespace {
23
24 /// A sequence of operations
25 ///
26 /// ```mlir
27 /// %0 = linalg. ...
28 /// %1 = tensor.pad %0 ...
29 /// ```
30 ///
31 /// can be replaced with
32 ///
33 /// ```mlir
34 /// %0 = linalg.fill
35 /// %1 = tensor.extract_slice %0 ...
36 /// %2 = linalg. .... outs(..., %1, ....) ....
37 /// %3 = tensor.insert_slice %2 into %1 ...
38 /// ```
39 ///
40 /// if the `linalg.generic` has all parallel iterator types.
41 struct FusePadOp : OpRewritePattern<tensor::PadOp> {
42 using OpRewritePattern<tensor::PadOp>::OpRewritePattern;
43
matchAndRewrite__anonb62012400111::FusePadOp44 LogicalResult matchAndRewrite(tensor::PadOp padOp,
45 PatternRewriter &rewriter) const override {
46 // Only works on padding op that sets the padded value to a constant.
47 Value padValue = padOp.getConstantPaddingValue();
48 if (!padValue)
49 return rewriter.notifyMatchFailure(padOp, "non constant padding");
50
51 // This pattern could work for any Linalg op. For now restrict it to generic
52 // ops.
53 Value source = padOp.getSource();
54 auto linalgOp = source.getDefiningOp<linalg::GenericOp>();
55 if (!linalgOp) {
56 return rewriter.notifyMatchFailure(
57 padOp, "expected source to be linalg.generic op");
58 }
59 // All iterator types need to be parallel.
60 if (linalgOp.getNumLoops() != linalgOp.getNumParallelLoops()) {
61 return rewriter.notifyMatchFailure(
62 padOp, "only supported for ops with all parallel iterator types");
63 }
64 ReifiedRankedShapedTypeDims resultShape;
65 ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
66 dyn_cast<ReifyRankedShapedTypeOpInterface>(padOp.getOperation());
67 if (failed(reifyShapedTypeInterface.reifyResultShapes(rewriter,
68 resultShape)) ||
69 resultShape.size() != 1) {
70 return rewriter.notifyMatchFailure(
71 padOp, "failed to get shape of pad op result");
72 }
73
74 Location loc = padOp.getLoc();
75
76 // Create the tensor of same size as output of the pad op.
77 RankedTensorType padResultType = padOp.getResultType();
78 auto resultSizes = getAsOpFoldResult(resultShape[0]);
79 auto initTensor = rewriter.create<linalg::InitTensorOp>(
80 loc, resultSizes, padResultType.getElementType());
81
82 // Fill the tensor with the pad value.
83 // TODO: There is an option to fill only the boundaries. For now just
84 // filling the whole tensor.
85 auto fillTensor =
86 rewriter.create<linalg::FillOp>(loc, padValue, initTensor.getResult());
87
88 // Construct a slice of the fill result that is to be replaced with the
89 // result of the generic op. The low pad values are the offsets, the size of
90 // the source is the size of the slice.
91 // TODO: This insert/extract could be potentially made a utility method.
92 unsigned resultNumber = source.cast<OpResult>().getResultNumber();
93 SmallVector<OpFoldResult> offsets = padOp.getMixedLowPad();
94 SmallVector<OpFoldResult> sizes;
95 sizes.reserve(offsets.size());
96 for (const auto &shape : llvm::enumerate(
97 source.getType().cast<RankedTensorType>().getShape())) {
98 if (ShapedType::isDynamic(shape.value())) {
99 sizes.push_back(
100 rewriter.create<tensor::DimOp>(loc, source, shape.index())
101 .getResult());
102 } else {
103 sizes.push_back(rewriter.getIndexAttr(shape.value()));
104 }
105 }
106 SmallVector<OpFoldResult> strides(offsets.size(), rewriter.getIndexAttr(1));
107 auto slice = rewriter.create<tensor::ExtractSliceOp>(
108 loc, fillTensor.getResult(0), offsets, sizes, strides);
109
110 // Clone the generic op.
111 auto clonedOp =
112 cast<linalg::GenericOp>(rewriter.clone(*linalgOp.getOperation()));
113 clonedOp.setOutputOperand(resultNumber, slice.getResult());
114
115 // Insert it back into the result of the fill.
116 rewriter.replaceOpWithNewOp<tensor::InsertSliceOp>(
117 padOp, clonedOp.getResult(resultNumber), fillTensor.getResult(0),
118 offsets, sizes, strides);
119 return success();
120 }
121 };
122 } // namespace
123
populateFuseTensorPadWithProducerLinalgOpPatterns(RewritePatternSet & patterns)124 void mlir::linalg::populateFuseTensorPadWithProducerLinalgOpPatterns(
125 RewritePatternSet &patterns) {
126 patterns.add<FusePadOp>(patterns.getContext());
127 }
128