1 //===- FusePadOpWithLinalgProducer.cpp ---- Fuse pad with linalg producer -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements patterns that fuses a linalg.generic -> tensor.pad op
10 // chain into a tensor.extract_slice -> linalg.generic -> tensor.insert_slice
11 // op chain.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
16 
17 #include "mlir/Dialect/Linalg/IR/Linalg.h"
18 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
19 
20 using namespace mlir;
21 
22 namespace {
23 
24 /// A sequence of operations
25 ///
26 /// ```mlir
27 /// %0 = linalg. ...
28 /// %1 = tensor.pad %0 ...
29 /// ```
30 ///
31 /// can be replaced with
32 ///
33 /// ```mlir
34 /// %0 = linalg.fill
35 /// %1 = tensor.extract_slice %0 ...
36 /// %2 = linalg. .... outs(..., %1, ....) ....
37 /// %3 = tensor.insert_slice %2 into %1 ...
38 /// ```
39 ///
40 /// if the `linalg.generic` has all parallel iterator types.
41 struct FusePadOp : OpRewritePattern<tensor::PadOp> {
42   using OpRewritePattern<tensor::PadOp>::OpRewritePattern;
43 
matchAndRewrite__anonb62012400111::FusePadOp44   LogicalResult matchAndRewrite(tensor::PadOp padOp,
45                                 PatternRewriter &rewriter) const override {
46     // Only works on padding op that sets the padded value to a constant.
47     Value padValue = padOp.getConstantPaddingValue();
48     if (!padValue)
49       return rewriter.notifyMatchFailure(padOp, "non constant padding");
50 
51     // This pattern could work for any Linalg op. For now restrict it to generic
52     // ops.
53     Value source = padOp.getSource();
54     auto linalgOp = source.getDefiningOp<linalg::GenericOp>();
55     if (!linalgOp) {
56       return rewriter.notifyMatchFailure(
57           padOp, "expected source to be linalg.generic op");
58     }
59     // All iterator types need to be parallel.
60     if (linalgOp.getNumLoops() != linalgOp.getNumParallelLoops()) {
61       return rewriter.notifyMatchFailure(
62           padOp, "only supported for ops with all parallel iterator types");
63     }
64     ReifiedRankedShapedTypeDims resultShape;
65     ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
66         dyn_cast<ReifyRankedShapedTypeOpInterface>(padOp.getOperation());
67     if (failed(reifyShapedTypeInterface.reifyResultShapes(rewriter,
68                                                           resultShape)) ||
69         resultShape.size() != 1) {
70       return rewriter.notifyMatchFailure(
71           padOp, "failed to get shape of pad op result");
72     }
73 
74     Location loc = padOp.getLoc();
75 
76     // Create the tensor of same size as output of the pad op.
77     RankedTensorType padResultType = padOp.getResultType();
78     auto resultSizes = getAsOpFoldResult(resultShape[0]);
79     auto initTensor = rewriter.create<linalg::InitTensorOp>(
80         loc, resultSizes, padResultType.getElementType());
81 
82     // Fill the tensor with the pad value.
83     // TODO: There is an option to fill only the boundaries. For now just
84     // filling the whole tensor.
85     auto fillTensor =
86         rewriter.create<linalg::FillOp>(loc, padValue, initTensor.getResult());
87 
88     // Construct a slice of the fill result that is to be replaced with the
89     // result of the generic op. The low pad values are the offsets, the size of
90     // the source is the size of the slice.
91     // TODO: This insert/extract could be potentially made a utility method.
92     unsigned resultNumber = source.cast<OpResult>().getResultNumber();
93     SmallVector<OpFoldResult> offsets = padOp.getMixedLowPad();
94     SmallVector<OpFoldResult> sizes;
95     sizes.reserve(offsets.size());
96     for (const auto &shape : llvm::enumerate(
97              source.getType().cast<RankedTensorType>().getShape())) {
98       if (ShapedType::isDynamic(shape.value())) {
99         sizes.push_back(
100             rewriter.create<tensor::DimOp>(loc, source, shape.index())
101                 .getResult());
102       } else {
103         sizes.push_back(rewriter.getIndexAttr(shape.value()));
104       }
105     }
106     SmallVector<OpFoldResult> strides(offsets.size(), rewriter.getIndexAttr(1));
107     auto slice = rewriter.create<tensor::ExtractSliceOp>(
108         loc, fillTensor.getResult(0), offsets, sizes, strides);
109 
110     // Clone the generic op.
111     auto clonedOp =
112         cast<linalg::GenericOp>(rewriter.clone(*linalgOp.getOperation()));
113     clonedOp.setOutputOperand(resultNumber, slice.getResult());
114 
115     // Insert it back into the result of the fill.
116     rewriter.replaceOpWithNewOp<tensor::InsertSliceOp>(
117         padOp, clonedOp.getResult(resultNumber), fillTensor.getResult(0),
118         offsets, sizes, strides);
119     return success();
120   }
121 };
122 } // namespace
123 
populateFuseTensorPadWithProducerLinalgOpPatterns(RewritePatternSet & patterns)124 void mlir::linalg::populateFuseTensorPadWithProducerLinalgOpPatterns(
125     RewritePatternSet &patterns) {
126   patterns.add<FusePadOp>(patterns.getContext());
127 }
128