1 //===- Loops.cpp - conversion from Linalg named and generic ops to loops --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "PassDetail.h"
10 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
11 #include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h"
12 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
13 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
14 #include "mlir/Dialect/Linalg/Passes.h"
15 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
16 #include "mlir/Dialect/Linalg/Utils/Utils.h"
17 #include "mlir/Dialect/SCF/EDSC/Builders.h"
18 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
19 #include "mlir/IR/AffineExpr.h"
20 #include "mlir/IR/AffineMap.h"
21 #include "mlir/IR/BlockAndValueMapping.h"
22 #include "mlir/Support/LLVM.h"
23 #include "mlir/Transforms/DialectConversion.h"
24 #include "mlir/Transforms/FoldUtils.h"
25 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
26 #include "llvm/ADT/TypeSwitch.h"
27 
28 using namespace mlir;
29 using namespace mlir::edsc;
30 using namespace mlir::edsc::intrinsics;
31 using namespace mlir::linalg;
32 
33 using edsc::op::operator+;
34 
35 static SmallVector<Value, 8> makeCanonicalAffineApplies(OpBuilder &b,
36                                                         Location loc,
37                                                         AffineMap map,
38                                                         ArrayRef<Value> vals) {
39   if (map.isEmpty())
40     return {};
41 
42   assert(map.getNumInputs() == vals.size());
43   SmallVector<Value, 8> res;
44   res.reserve(map.getNumResults());
45   auto dims = map.getNumDims();
46   for (auto e : map.getResults()) {
47     auto exprMap = AffineMap::get(dims, map.getNumSymbols(), e);
48     SmallVector<Value, 4> operands(vals.begin(), vals.end());
49     canonicalizeMapAndOperands(&exprMap, &operands);
50     res.push_back(affine_apply(exprMap, operands));
51   }
52   return res;
53 }
54 
55 static SmallVector<Value, 4> permuteIvs(ArrayRef<Value> ivs,
56                                         Optional<AffineMap> permutation) {
57   return permutation ? applyMapToValues(ScopedContext::getBuilderRef(),
58                                         ScopedContext::getLocation(),
59                                         permutation.getValue(), ivs)
60                      : SmallVector<Value, 4>(ivs.begin(), ivs.end());
61 }
62 
63 template <typename IndexedValueType, typename OpType>
64 static void inlineRegionAndEmitStore(OpType op, ArrayRef<Value> indexedValues,
65                                      ArrayRef<SmallVector<Value, 8>> indexing,
66                                      ArrayRef<Value> outputBuffers) {
67   assert(op->getNumRegions() == 1 && "Expected single region op");
68   auto &b = ScopedContext::getBuilderRef();
69   auto &block = op->getRegion(0).front();
70   BlockAndValueMapping map;
71   map.map(block.getArguments(), indexedValues);
72   for (auto &op : block.without_terminator()) {
73     auto *newOp = b.clone(op, map);
74     map.map(op.getResults(), newOp->getResults());
75   }
76 
77   Operation &terminator = block.back();
78   assert(isa<linalg::YieldOp>(terminator) &&
79          "expected a yield op in the end of the region");
80   for (unsigned i = 0, e = terminator.getNumOperands(); i < e; ++i) {
81     IndexedValueType O(outputBuffers[i]);
82     O(indexing[i]) = map.lookupOrDefault(terminator.getOperand(i));
83   }
84 }
85 
86 // Returns a pair that contains input indices and output indices of a
87 // SingleInputPoolingOp `op`.
88 struct InputAndOutputIndices {
89   SmallVector<Value, 8> inputs;
90   SmallVector<Value, 8> outputs;
91 };
92 template <typename SingleInputPoolingOp>
93 static InputAndOutputIndices getInputAndOutputIndices(ArrayRef<Value> allIvs,
94                                                       SingleInputPoolingOp op) {
95   auto &b = ScopedContext::getBuilderRef();
96   auto loc = ScopedContext::getLocation();
97   auto mapsRange = op.indexing_maps().template getAsRange<AffineMapAttr>();
98   auto maps = llvm::to_vector<8>(
99       llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
100   return InputAndOutputIndices{
101       makeCanonicalAffineApplies(b, loc, maps[0], allIvs),
102       makeCanonicalAffineApplies(b, loc, maps[2], allIvs)};
103 }
104 
105 /// Emits the MLIR for the scalar part of the generic op by:
106 ///   1. Emitting load ops for each input and output view in order. This is
107 ///      achieved by applying the appropriate input or output map to the
108 ///      enclosing induction variables.
109 ///   2. Emitting a call to `op.fun()` that takes as arguments the scalars
110 ///      from point 1. above.
111 ///   3. Emitting store ops to store the results of 2. to the output
112 ///      views.
113 ///
114 /// An example output may resemble:
115 ///
116 /// ```
117 ///    scf.for %i = %c0 to %0 step %c1 {
118 ///      scf.for %j = %c0 to %1 step %c1 {
119 ///        scf.for %k = %c0 to %4 step %c1 {
120 ///          %11 = load %arg0[%i, %j] :
121 ///            memref<?x?xf32, stride_specification>
122 ///          %12 = load %arg1[%i, %j, %k] :
123 ///            memref<?x?x?xf32, stride_specification>
124 ///          %13 = load %arg2[%i, %k, %j] :
125 ///            memref<?x?x?xf32, stride_specification>
126 ///          %14:2 = call @foo(%11, %12, %13) : (f32, f32, f32) -> (f32, f32)
127 ///          store %14#0, %arg1[%i, %j, %k] :
128 ///            memref<?x?x?Xf32, stride_specification>
129 ///          store %14#1, %arg2[%i, %k, %j] :
130 ///            memref<?x?x?Xf32, stride_specification>
131 ///       }
132 ///      }
133 ///    }
134 /// ```
135 template <typename IndexedValueType>
136 static void emitScalarImplementation(ArrayRef<Value> allIvs,
137                                      LinalgOp linalgOp) {
138   assert(linalgOp.hasBufferSemantics() &&
139          "expected linalg op with buffer semantics");
140   auto &b = ScopedContext::getBuilderRef();
141   auto loc = ScopedContext::getLocation();
142   unsigned nInputs = linalgOp.getNumInputs();
143   unsigned nOutputs = linalgOp.getNumOutputs();
144   SmallVector<Value, 4> indexedValues;
145   indexedValues.reserve(nInputs + nOutputs);
146 
147   auto allIvsPlusDims = SmallVector<Value, 4>(allIvs.begin(), allIvs.end());
148 
149   // TODO: Avoid the loads if the corresponding argument of the
150   // region has no uses.
151   // 1.a. Emit load from input views.
152   for (unsigned i = 0; i < nInputs; ++i) {
153     auto indexing = makeCanonicalAffineApplies(
154         b, loc, linalgOp.getInputIndexingMap(i), allIvsPlusDims);
155     // Passing through IndexedValueType emits the proper load operation.
156     indexedValues.push_back(IndexedValueType(linalgOp.getInput(i))(indexing));
157   }
158   // 1.b. Emit load from output views.
159   for (unsigned i = 0; i < nOutputs; ++i) {
160     auto indexing = makeCanonicalAffineApplies(
161         b, loc, linalgOp.getOutputIndexingMap(i), allIvsPlusDims);
162     // Passing through IndexedValueType emits the proper load operation.
163     indexedValues.push_back(
164         IndexedValueType(linalgOp.getOutputBuffer(i))(indexing));
165   }
166 
167   // TODO: When a region inliner exists, use it.
168   // 2. Inline region, currently only works for a single basic block.
169   // 3. Emit store.
170   SmallVector<SmallVector<Value, 8>, 8> indexing;
171   SmallVector<Value, 8> outputBuffers;
172   for (unsigned i = 0; i < nOutputs; ++i) {
173     indexing.push_back(makeCanonicalAffineApplies(
174         b, loc, linalgOp.getOutputIndexingMap(i), allIvsPlusDims));
175     outputBuffers.push_back(linalgOp.getOutputBuffer(i));
176   }
177   inlineRegionAndEmitStore<IndexedValueType>(linalgOp, indexedValues, indexing,
178                                              outputBuffers);
179 }
180 
181 template <typename IndexedValueType>
182 static void emitScalarImplementation(ArrayRef<Value> allIvs, CopyOp copyOp) {
183   assert(copyOp.hasBufferSemantics() &&
184          "expected linalg op with buffer semantics");
185   auto nPar = copyOp.getNumParallelLoops();
186   assert(nPar == allIvs.size());
187   auto inputIvs =
188       permuteIvs(allIvs.take_front(nPar), copyOp.inputPermutation());
189   auto outputIvs =
190       permuteIvs(allIvs.take_front(nPar), copyOp.outputPermutation());
191   SmallVector<Value, 8> iivs(inputIvs.begin(), inputIvs.end());
192   SmallVector<Value, 8> oivs(outputIvs.begin(), outputIvs.end());
193   IndexedValueType O(copyOp.getOutputBuffer(0)), I(copyOp.getInput(0));
194   // Emit the proper scalar assignment, whether we are dealing with a 0-D or
195   // an n-D loop nest; with or without permutations.
196   // clang-format off
197     nPar > 0 ? O(oivs) = I(iivs) :
198                O() = I();
199   // clang-format on
200 }
201 
202 template <typename IndexedValueType>
203 static void emitScalarImplementation(ArrayRef<Value> allIvs, FillOp fillOp) {
204   assert(fillOp.hasBufferSemantics() &&
205          "expected linalg op with buffer semantics");
206   auto nPar = fillOp.getNumParallelLoops();
207   assert(nPar == allIvs.size());
208   auto ivs = SmallVector<Value, 4>(allIvs.begin(), allIvs.begin() + nPar);
209   IndexedValueType O(fillOp.getOutputBuffer(0));
210   // Emit the proper scalar assignment, whether we are dealing with a 0-D or
211   // an n-D loop nest; with or without permutations.
212   nPar > 0 ? O(ivs) = fillOp.value() : O() = fillOp.value();
213 }
214 
215 // Create a padded view into the given `input` tensor using the 'indices'
216 // to access the tensor. `skipPadding` lists the dimensions for which no padding
217 // is needed e.g. the non-spatial dimensions for convolutions.
218 template <typename IndexedValueType>
219 Value getPaddedInput(Value input, ArrayRef<Value> indices,
220                      ArrayRef<int> skipPadding, Value padValue) {
221   // TODO: add a level of indirection to linalg.generic.
222 
223   IndexedValueType indexedInput(input);
224 
225   auto *context = ScopedContext::getContext();
226   Value zeroIndex = std_constant_index(0);
227   SmallVector<Value, 8> conds;
228   SmallVector<Value, 8> clampedImIdx;
229   for (auto iter : llvm::enumerate(indices)) {
230     int idx = iter.index();
231     auto dim = iter.value();
232     if (is_contained(skipPadding, idx)) {
233       clampedImIdx.push_back(dim);
234       continue;
235     }
236 
237     using edsc::op::sge;
238     using edsc::op::slt;
239     using edsc::op::operator||;
240     Value leftOutOfBound = slt(dim, zeroIndex);
241     if (conds.empty())
242       conds.push_back(leftOutOfBound);
243     else
244       conds.push_back(conds.back() || leftOutOfBound);
245     Value rightBound = std_dim(input, idx);
246     conds.push_back(conds.back() || (sge(dim, rightBound)));
247 
248     // When padding is involved, the indices will only be shifted to negative,
249     // so having a max op is enough.
250     auto maxMap = AffineMap::get(/*dimCount=*/1, 0,
251                                  {getAffineDimExpr(/*position=*/0, context),
252                                   getAffineConstantExpr(0, context)},
253                                  context);
254     clampedImIdx.push_back(affine_max(dim.getType(), maxMap, ValueRange{dim}));
255   }
256 
257   Value readInput = indexedInput(clampedImIdx);
258   return conds.empty() ? readInput
259                        : (Value)std_select(conds.back(), padValue, readInput);
260 }
261 
262 namespace {
263 
264 /// The padding value for a given Op depends on the semantics of the Op.
265 /// The identity value for ConvOp and PoolingSumOp is 0, for PoolingMaxOp is
266 /// -inf or minInt and for PoolingMinOp is inf or maxInt.
267 template <typename OpType>
268 Attribute getPadValueAttr(Type type) {
269   llvm_unreachable("Unexpected op type for getPadValueAttr");
270   return {};
271 }
272 
273 template <>
274 Attribute getPadValueAttr<PoolingMaxOp>(Type type) {
275   auto &b = ScopedContext::getBuilderRef();
276   if (auto floatType = type.dyn_cast<FloatType>()) {
277     return b.getFloatAttr(
278         floatType,
279         APFloat::getInf(floatType.getFloatSemantics(), /*Negative*/ true));
280   }
281   if (auto intType = type.dyn_cast<IntegerType>()) {
282     unsigned width = intType.getWidth();
283     // The select instruction used to lower the PoolingMin uses a signed
284     // comparison, use a signed constant irrespective of the signedness of the
285     // integer type.
286     return b.getIntegerAttr(intType, APInt::getSignedMinValue(width));
287   }
288   llvm_unreachable("Unsupported data type for PoolingMaxOp");
289   return {};
290 }
291 
292 template <>
293 Attribute getPadValueAttr<PoolingMinOp>(Type type) {
294   auto &b = ScopedContext::getBuilderRef();
295   if (auto floatType = type.dyn_cast<FloatType>()) {
296     return b.getFloatAttr(floatType,
297                           APFloat::getInf(floatType.getFloatSemantics()));
298   }
299   if (auto intType = type.dyn_cast<IntegerType>()) {
300     unsigned width = intType.getWidth();
301     // The select instruction used to lower the PoolingMin uses a signed
302     // comparison, use a signed constant irrespective of the signedness of the
303     // integer type.
304     return b.getIntegerAttr(intType, APInt::getSignedMaxValue(width));
305   }
306   llvm_unreachable("Unsupported data type for PoolingMinOp");
307   return {};
308 }
309 
310 template <>
311 Attribute getPadValueAttr<PoolingSumOp>(Type type) {
312   auto &b = ScopedContext::getBuilderRef();
313   return b.getZeroAttr(type);
314 }
315 
316 template <>
317 Attribute getPadValueAttr<ConvOp>(Type type) {
318   auto &b = ScopedContext::getBuilderRef();
319   return b.getZeroAttr(type);
320 }
321 
322 } // namespace
323 
324 /// Returns true is `convOp` has a non-zero padding.
325 static bool hasPadding(ConvOp convOp) {
326   for (unsigned i = 0, e = convOp.getNumSpatialDimensions(); i < e; ++i) {
327     if (convOp.getLowPad(i) > 0 || convOp.getHighPad(i) > 0)
328       return true;
329   }
330   return false;
331 }
332 
333 template <typename IndexedValueType>
334 static void emitScalarImplementation(ArrayRef<Value> allIvs, ConvOp convOp) {
335   assert(convOp.hasBufferSemantics() &&
336          "expected linalg op with buffer semantics");
337   auto &b = ScopedContext::getBuilderRef();
338   auto loc = ScopedContext::getLocation();
339   auto mapsRange = convOp.indexing_maps().getAsRange<AffineMapAttr>();
340   auto maps = llvm::to_vector<8>(
341       llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
342   SmallVector<Value, 8> fIdx(
343       makeCanonicalAffineApplies(b, loc, maps[0], allIvs));
344   SmallVector<Value, 8> imIdx(
345       makeCanonicalAffineApplies(b, loc, maps[1], allIvs));
346   SmallVector<Value, 8> oIdx(
347       makeCanonicalAffineApplies(b, loc, maps[2], allIvs));
348 
349   IndexedValueType F(convOp.filter()), O(convOp.output());
350 
351   // Emit scalar form. Padded conv involves an affine.max in the memory access
352   // which is not allowed by affine.load. Override to use an StdIndexedValue
353   // when there is non-zero padding.
354   if (hasPadding(convOp)) {
355     Type type = convOp.input().getType().cast<MemRefType>().getElementType();
356     Value padValue = std_constant(type, getPadValueAttr<ConvOp>(type));
357     Value paddedInput = getPaddedInput<StdIndexedValue>(
358         convOp.input(), imIdx,
359         /* Only need to pad the window dimensions */
360         {0, static_cast<int>(imIdx.size()) - 1}, padValue);
361     O(oIdx) += F(fIdx) * paddedInput;
362   } else {
363     IndexedValueType I(convOp.input());
364     O(oIdx) += F(fIdx) * I(imIdx);
365   }
366 }
367 
368 template <typename PoolingOp>
369 static bool hasPadding(PoolingOp poolingOp) {
370   for (unsigned i = 0, e = poolingOp.getNumWindowLoops(); i < e; ++i) {
371     if (poolingOp.getLowPad(i) > 0 || poolingOp.getHighPad(i) > 0)
372       return true;
373   }
374   return false;
375 }
376 
377 template <typename IndexedValueType, typename PoolingOp>
378 static Value getPoolingInput(PoolingOp op, ArrayRef<Value> inputIndices) {
379   if (hasPadding(op)) {
380     Type type =
381         op.input().getType().template cast<MemRefType>().getElementType();
382     Value padValue = std_constant(type, getPadValueAttr<PoolingOp>(type));
383     return getPaddedInput<StdIndexedValue>(op.input(), inputIndices,
384                                            /*Pad every dimension*/ {},
385                                            padValue);
386   }
387   IndexedValueType input(op.input());
388   return input(inputIndices);
389 }
390 
391 template <typename IndexedValueType, typename OpType>
392 void emitPoolingMinMaxScalarImplementation(ArrayRef<Value> allIvs, OpType op) {
393   InputAndOutputIndices indices = getInputAndOutputIndices(allIvs, op);
394   // Emit scalar form.
395   IndexedValueType output(op.output());
396   Value lhs = output(indices.outputs);
397   Value rhs = getPoolingInput<IndexedValueType>(op, indices.inputs);
398   using edsc::op::sgt;
399   using edsc::op::slt;
400   Value value = std::is_same<OpType, PoolingMinOp>()
401                     ? std_select(slt(lhs, rhs), lhs, rhs)
402                     : std_select(sgt(lhs, rhs), lhs, rhs);
403   output(indices.outputs) = value;
404 }
405 
406 template <typename IndexedValueType>
407 static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMaxOp op) {
408   emitPoolingMinMaxScalarImplementation<IndexedValueType, PoolingMaxOp>(allIvs,
409                                                                         op);
410 }
411 
412 template <typename IndexedValueType>
413 static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMinOp op) {
414   emitPoolingMinMaxScalarImplementation<IndexedValueType, PoolingMinOp>(allIvs,
415                                                                         op);
416 }
417 
418 template <typename IndexedValueType>
419 static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingSumOp op) {
420   auto indices = getInputAndOutputIndices(allIvs, op);
421   IndexedValueType output(op.output());
422 
423   // Emit scalar form.
424   output(indices.outputs) +=
425       getPoolingInput<IndexedValueType>(op, indices.inputs);
426 }
427 
428 /// Emits the MLIR for the scalar part of the indexed generic op by:
429 ///   1. Emitting load ops for each input and output view in order. This is
430 ///      achieved by applying the appropriate input or output map to the
431 ///      enclosing induction variables.
432 ///   2. Emitting a call to `op.fun()` that takes as arguments the induction
433 ///      variables and the scalars from point 1. above.
434 ///   3. Emitting store ops to store the results of 2. to the output views.
435 ///
436 /// An example output may resemble:
437 ///
438 /// ```
439 ///    scf.for %i = %c0 to %0 step %c1 {
440 ///      scf.for %j = %c0 to %1 step %c1 {
441 ///        scf.for %k = %c0 to %4 step %c1 {
442 ///          %11 = load %arg0[%i, %j] :
443 ///            memref<?x?xf32, stride_specification>
444 ///          %12 = load %arg1[%i, %j, %k] :
445 ///            memref<?x?x?xf32, stride_specification>
446 ///          %13 = load %arg2[%i, %k, %j] :
447 ///            memref<?x?x?xf32, stride_specification>
448 ///          %14:2 = call @foo(%i, %j, %k, %11, %12, %13) :
449 ///            (index, index, index, f32, f32, f32) -> (f32, f32)
450 ///          store %14#0, %arg1[%i, %j, %k] :
451 ///            memref<?x?x?Xf32, stride_specification>
452 ///          store %14#1, %arg2[%i, %k, %j] :
453 ///            memref<?x?x?Xf32, stride_specification>
454 ///       }
455 ///      }
456 ///    }
457 /// ```
458 template <typename IndexedValueType>
459 static void emitScalarImplementation(ArrayRef<Value> allIvs,
460                                      IndexedGenericOp indexedGenericOp) {
461   assert(indexedGenericOp.hasBufferSemantics() &&
462          "expected linalg op with buffer semantics");
463   auto &b = ScopedContext::getBuilderRef();
464   auto loc = ScopedContext::getLocation();
465   unsigned nInputs = indexedGenericOp.getNumInputs();
466   unsigned nOutputs = indexedGenericOp.getNumOutputs();
467   unsigned nLoops = allIvs.size();
468   SmallVector<Value, 4> indexedValues;
469   indexedValues.reserve(nLoops + nInputs + nOutputs);
470   for (unsigned i = 0; i < nLoops; ++i)
471     indexedValues.push_back(allIvs[i]);
472 
473   // TODO: Avoid the loads if the corresponding argument of the
474   // region has no uses.
475   // 1.a. Emit load from input views.
476   for (unsigned i = 0; i < nInputs; ++i) {
477     auto indexing = makeCanonicalAffineApplies(
478         b, loc, indexedGenericOp.getInputIndexingMap(i), allIvs);
479     // Pass input i through IndexedValueType emits the proper load operation.
480     indexedValues.push_back(
481         IndexedValueType(indexedGenericOp.getInput(i))(indexing));
482   }
483   // 1.b. Emit load from output views.
484   for (unsigned i = 0; i < nOutputs; ++i) {
485     auto indexing = makeCanonicalAffineApplies(
486         b, loc, indexedGenericOp.getOutputIndexingMap(i), allIvs);
487     // Pass output i through IndexedValueType emits the proper load operation.
488     indexedValues.push_back(
489         IndexedValueType(indexedGenericOp.getOutputBuffer(i))(indexing));
490   }
491 
492   // TODO: When a region inliner exists, use it.
493   // 2. Inline region, currently only works for a single basic block.
494   // 3. Emit store.
495   SmallVector<SmallVector<Value, 8>, 8> indexing;
496   SmallVector<Value, 8> outputBuffers;
497   for (unsigned i = 0; i < nOutputs; ++i) {
498     indexing.push_back(makeCanonicalAffineApplies(
499         b, loc, indexedGenericOp.getOutputIndexingMap(i), allIvs));
500     outputBuffers.push_back(indexedGenericOp.getOutputBuffer(i));
501   }
502   inlineRegionAndEmitStore<IndexedValueType>(indexedGenericOp, indexedValues,
503                                              indexing, outputBuffers);
504 }
505 
506 template <typename LoopTy>
507 static Optional<LinalgLoops>
508 linalgOpToLoopsImpl(Operation *op, OpBuilder &builder,
509                     ArrayRef<unsigned> interchangeVector) {
510   using IndexedValueTy = typename GenerateLoopNest<LoopTy>::IndexedValueTy;
511   ScopedContext scope(builder, op->getLoc());
512 
513   // The flattened loopToOperandRangesMaps is expected to be an invertible
514   // permutation map (which is asserted in the inverse calculation).
515   auto linalgOp = cast<LinalgOp>(op);
516   assert(linalgOp.hasBufferSemantics() &&
517          "expected linalg op with buffer semantics");
518 
519   auto loopRanges = linalgOp.createLoopRanges(builder, op->getLoc());
520   auto iteratorTypes = llvm::to_vector<4>(linalgOp.iterator_types().getValue());
521 
522   if (!interchangeVector.empty()) {
523     assert(interchangeVector.size() == loopRanges.size());
524     assert(interchangeVector.size() == iteratorTypes.size());
525     applyPermutationToVector(loopRanges, interchangeVector);
526     applyPermutationToVector(iteratorTypes, interchangeVector);
527   }
528 
529   SmallVector<Value, 4> allIvs;
530   GenerateLoopNest<LoopTy>::doit(
531       loopRanges, /*iterInitArgs=*/{}, iteratorTypes,
532       [&](ValueRange ivs, ValueRange iterArgs) -> scf::ValueVector {
533         assert(iterArgs.empty() && "unexpected iterArgs");
534         allIvs.append(ivs.begin(), ivs.end());
535         llvm::TypeSwitch<Operation *>(op)
536             .Case<CopyOp, FillOp, ConvOp, PoolingMaxOp, PoolingMinOp,
537                   PoolingSumOp, IndexedGenericOp, LinalgOp>([&](auto op) {
538               emitScalarImplementation<IndexedValueTy>(allIvs, op);
539             })
540             .Default([&](Operation *op) { assert(false && "unexpected op"); });
541         return scf::ValueVector{};
542       });
543   // Number of loop ops might be different from the number of ivs since some
544   // loops like affine.parallel and scf.parallel have multiple ivs.
545   llvm::SetVector<Operation *> loopSet;
546   for (Value iv : allIvs) {
547     if (!iv)
548       return {};
549     // The induction variable is a block argument of the entry block of the
550     // loop operation.
551     BlockArgument ivVal = iv.dyn_cast<BlockArgument>();
552     if (!ivVal)
553       return {};
554     loopSet.insert(ivVal.getOwner()->getParentOp());
555   }
556   LinalgLoops loops(loopSet.begin(), loopSet.end());
557   return loops;
558 }
559 
560 namespace {
561 template <typename LoopType>
562 class LinalgRewritePattern : public RewritePattern {
563 public:
564   LinalgRewritePattern(ArrayRef<unsigned> interchangeVector)
565       : RewritePattern(/*benefit=*/1, MatchAnyOpTypeTag()),
566         interchangeVector(interchangeVector.begin(), interchangeVector.end()) {}
567 
568   LogicalResult matchAndRewrite(Operation *op,
569                                 PatternRewriter &rewriter) const override {
570     if (!isa<LinalgOp>(op))
571       return failure();
572     if (!linalgOpToLoopsImpl<LoopType>(op, rewriter, interchangeVector))
573       return failure();
574     rewriter.eraseOp(op);
575     return success();
576   }
577 
578 private:
579   SmallVector<unsigned, 4> interchangeVector;
580 };
581 
582 struct FoldAffineOp;
583 } // namespace
584 
585 template <typename LoopType>
586 static void lowerLinalgToLoopsImpl(FuncOp funcOp,
587                                    ArrayRef<unsigned> interchangeVector) {
588   MLIRContext *context = funcOp.getContext();
589   OwningRewritePatternList patterns;
590   patterns.insert<LinalgRewritePattern<LoopType>>(interchangeVector);
591   DimOp::getCanonicalizationPatterns(patterns, context);
592   AffineApplyOp::getCanonicalizationPatterns(patterns, context);
593   patterns.insert<FoldAffineOp>(context);
594   // Just apply the patterns greedily.
595   applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
596 }
597 
598 namespace {
599 /// Local folding pattern for AffineApplyOp that we can apply greedily.
600 /// This replaces AffineApplyOp by the proper value in cases where the
601 /// associated map is trivial.
602 /// A trivial map here is defined as a map with a single result and either:
603 ///   1. Zero operand + returns a single AffineConstantExpr
604 ///   2. One operand + returns a single AffineDimExpr
605 ///   3. One operand + returns a single AffineSymbolExpr
606 //
607 /// In the first case, the AffineApplyOp is replaced by a new constant. In the
608 /// other cases, it is replaced by its unique operand.
609 struct FoldAffineOp : public RewritePattern {
610   FoldAffineOp(MLIRContext *context)
611       : RewritePattern(AffineApplyOp::getOperationName(), 0, context) {}
612 
613   LogicalResult matchAndRewrite(Operation *op,
614                                 PatternRewriter &rewriter) const override {
615     AffineApplyOp affineApplyOp = cast<AffineApplyOp>(op);
616     auto map = affineApplyOp.getAffineMap();
617     if (map.getNumResults() != 1 || map.getNumInputs() > 1)
618       return failure();
619 
620     AffineExpr expr = map.getResult(0);
621     if (map.getNumInputs() == 0) {
622       if (auto val = expr.dyn_cast<AffineConstantExpr>()) {
623         rewriter.replaceOpWithNewOp<ConstantIndexOp>(op, val.getValue());
624         return success();
625       }
626       return failure();
627     }
628     if (expr.dyn_cast<AffineDimExpr>() || expr.dyn_cast<AffineSymbolExpr>()) {
629       rewriter.replaceOp(op, op->getOperand(0));
630       return success();
631     }
632     return failure();
633   }
634 };
635 
636 struct LowerToAffineLoops
637     : public LinalgLowerToAffineLoopsBase<LowerToAffineLoops> {
638   void runOnFunction() override {
639     lowerLinalgToLoopsImpl<AffineForOp>(getFunction(), interchangeVector);
640   }
641 };
642 
643 struct LowerToLoops : public LinalgLowerToLoopsBase<LowerToLoops> {
644   void runOnFunction() override {
645     lowerLinalgToLoopsImpl<scf::ForOp>(getFunction(), interchangeVector);
646   }
647 };
648 
649 struct LowerToParallelLoops
650     : public LinalgLowerToParallelLoopsBase<LowerToParallelLoops> {
651   void runOnFunction() override {
652     lowerLinalgToLoopsImpl<scf::ParallelOp>(getFunction(), interchangeVector);
653   }
654 };
655 } // namespace
656 
657 std::unique_ptr<OperationPass<FuncOp>> mlir::createConvertLinalgToLoopsPass() {
658   return std::make_unique<LowerToLoops>();
659 }
660 
661 std::unique_ptr<OperationPass<FuncOp>>
662 mlir::createConvertLinalgToParallelLoopsPass() {
663   return std::make_unique<LowerToParallelLoops>();
664 }
665 
666 std::unique_ptr<OperationPass<FuncOp>>
667 mlir::createConvertLinalgToAffineLoopsPass() {
668   return std::make_unique<LowerToAffineLoops>();
669 }
670 
671 /// Emits a loop nest with the proper body for `op`.
672 template <typename LoopTy>
673 Optional<LinalgLoops>
674 mlir::linalg::linalgLowerOpToLoops(OpBuilder &builder, Operation *op,
675                                    ArrayRef<unsigned> interchangeVector) {
676   return linalgOpToLoopsImpl<LoopTy>(op, builder, interchangeVector);
677 }
678 
679 template Optional<LinalgLoops> mlir::linalg::linalgLowerOpToLoops<AffineForOp>(
680     OpBuilder &builder, Operation *op, ArrayRef<unsigned> interchangeVector);
681 template Optional<LinalgLoops> mlir::linalg::linalgLowerOpToLoops<scf::ForOp>(
682     OpBuilder &builder, Operation *op, ArrayRef<unsigned> interchangeVector);
683 template Optional<LinalgLoops>
684 mlir::linalg::linalgLowerOpToLoops<scf::ParallelOp>(
685     OpBuilder &builder, Operation *op, ArrayRef<unsigned> interchangeVector);
686 
687 /// Emits a loop nest of `affine.for` with the proper body for `op`.
688 LogicalResult
689 mlir::linalg::linalgOpToAffineLoops(OpBuilder &builder, Operation *op,
690                                     ArrayRef<unsigned> interchangeVector) {
691   Optional<LinalgLoops> loops =
692       linalgLowerOpToLoops<AffineForOp>(builder, op, interchangeVector);
693   return loops ? success() : failure();
694 }
695 
696 /// Emits a loop nest of `scf.for` with the proper body for `op`.
697 LogicalResult
698 mlir::linalg::linalgOpToLoops(OpBuilder &builder, Operation *op,
699                               ArrayRef<unsigned> interchangeVector) {
700   Optional<LinalgLoops> loops =
701       linalgLowerOpToLoops<scf::ForOp>(builder, op, interchangeVector);
702   return loops ? success() : failure();
703 }
704 
705 /// Emits a loop nest of `scf.parallel` with the proper body for `op`.
706 LogicalResult
707 mlir::linalg::linalgOpToParallelLoops(OpBuilder &builder, Operation *op,
708                                       ArrayRef<unsigned> interchangeVector) {
709   Optional<LinalgLoops> loops =
710       linalgLowerOpToLoops<scf::ParallelOp>(builder, op, interchangeVector);
711   return loops ? success() : failure();
712 }
713