1 //===- Loops.cpp - conversion from Linalg named and generic ops to loops --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "PassDetail.h" 10 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" 11 #include "mlir/Dialect/Linalg/IR/LinalgOps.h" 12 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" 13 #include "mlir/Dialect/Linalg/Passes.h" 14 #include "mlir/Dialect/Linalg/Transforms/Transforms.h" 15 #include "mlir/Dialect/Linalg/Utils/Utils.h" 16 #include "mlir/Dialect/SCF/Transforms.h" 17 #include "mlir/Dialect/StandardOps/Utils/Utils.h" 18 #include "mlir/IR/AffineExpr.h" 19 #include "mlir/IR/AffineMap.h" 20 #include "mlir/IR/BlockAndValueMapping.h" 21 #include "mlir/Support/LLVM.h" 22 #include "mlir/Transforms/DialectConversion.h" 23 #include "mlir/Transforms/FoldUtils.h" 24 #include "mlir/Transforms/GreedyPatternRewriteDriver.h" 25 #include "llvm/ADT/TypeSwitch.h" 26 27 using namespace mlir; 28 using namespace mlir::linalg; 29 30 static SmallVector<Value> makeCanonicalAffineApplies(OpBuilder &b, Location loc, 31 AffineMap map, 32 ArrayRef<Value> vals) { 33 if (map.isEmpty()) 34 return {}; 35 36 assert(map.getNumInputs() == vals.size()); 37 SmallVector<Value> res; 38 res.reserve(map.getNumResults()); 39 auto dims = map.getNumDims(); 40 for (auto e : map.getResults()) { 41 auto exprMap = AffineMap::get(dims, map.getNumSymbols(), e); 42 SmallVector<Value> operands(vals.begin(), vals.end()); 43 canonicalizeMapAndOperands(&exprMap, &operands); 44 res.push_back(b.create<AffineApplyOp>(loc, exprMap, operands)); 45 } 46 return res; 47 } 48 49 template <typename LoadOpTy, typename StoreOpTy, typename OpType> 50 static void inlineRegionAndEmitStore(OpBuilder &b, Location loc, OpType op, 51 ArrayRef<Value> indexedValues, 52 ArrayRef<SmallVector<Value>> indexing, 53 ArrayRef<Value> outputBuffers) { 54 auto &block = op->getRegion(0).front(); 55 BlockAndValueMapping map; 56 map.map(block.getArguments(), indexedValues); 57 for (auto &op : block.without_terminator()) { 58 auto *newOp = b.clone(op, map); 59 map.map(op.getResults(), newOp->getResults()); 60 } 61 62 Operation *terminator = block.getTerminator(); 63 for (OpOperand &operand : terminator->getOpOperands()) { 64 Value toStore = map.lookupOrDefault(operand.get()); 65 b.create<StoreOpTy>(loc, toStore, outputBuffers[operand.getOperandNumber()], 66 indexing[operand.getOperandNumber()]); 67 } 68 } 69 70 // Returns a pair that contains input indices and output indices of a 71 // SingleInputPoolingOp `op`. 72 struct InputAndOutputIndices { 73 SmallVector<Value> inputs; 74 SmallVector<Value> outputs; 75 }; 76 template <typename SingleInputPoolingOp> 77 static InputAndOutputIndices 78 getInputAndOutputIndices(OpBuilder &b, Location loc, ArrayRef<Value> allIvs, 79 SingleInputPoolingOp op) { 80 auto mapsRange = op.indexing_maps().template getAsRange<AffineMapAttr>(); 81 auto maps = llvm::to_vector<8>( 82 llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); })); 83 return InputAndOutputIndices{ 84 makeCanonicalAffineApplies(b, loc, maps[0], allIvs), 85 makeCanonicalAffineApplies(b, loc, maps[2], allIvs)}; 86 } 87 88 /// Emits the MLIR for the scalar part of the generic op by: 89 /// 1. Emitting load ops for each input and output view in order. This is 90 /// achieved by applying the appropriate input or output map to the 91 /// enclosing induction variables. 92 /// 2. Emitting a call to `op.fun()` that takes as arguments the scalars 93 /// from point 1. above. 94 /// 3. Emitting store ops to store the results of 2. to the output 95 /// views. 96 /// 97 /// An example output may resemble: 98 /// 99 /// ``` 100 /// scf.for %i = %c0 to %0 step %c1 { 101 /// scf.for %j = %c0 to %1 step %c1 { 102 /// scf.for %k = %c0 to %4 step %c1 { 103 /// %11 = load %arg0[%i, %j] : 104 /// memref<?x?xf32, stride_specification> 105 /// %12 = load %arg1[%i, %j, %k] : 106 /// memref<?x?x?xf32, stride_specification> 107 /// %13 = load %arg2[%i, %k, %j] : 108 /// memref<?x?x?xf32, stride_specification> 109 /// %14:2 = call @foo(%11, %12, %13) : (f32, f32, f32) -> (f32, f32) 110 /// store %14#0, %arg1[%i, %j, %k] : 111 /// memref<?x?x?Xf32, stride_specification> 112 /// store %14#1, %arg2[%i, %k, %j] : 113 /// memref<?x?x?Xf32, stride_specification> 114 /// } 115 /// } 116 /// } 117 /// ``` 118 template <typename LoadOpTy, typename StoreOpTy> 119 static void emitScalarImplementation(OpBuilder &b, Location loc, 120 ArrayRef<Value> allIvs, 121 LinalgOp linalgOp) { 122 assert(linalgOp.hasBufferSemantics() && 123 "expected linalg op with buffer semantics"); 124 SmallVector<Value> indexedValues; 125 indexedValues.reserve(linalgOp.getNumInputsAndOutputs()); 126 127 auto allIvsPlusDims = SmallVector<Value>(allIvs.begin(), allIvs.end()); 128 129 // TODO: Avoid the loads if the corresponding argument of the 130 // region has no uses. 131 // 1.a. Emit load from input operand or for scalars access the operand itself. 132 for (OpOperand *inputOperand : linalgOp.getInputOperands()) { 133 if (linalgOp.isScalar(inputOperand)) { 134 indexedValues.push_back(inputOperand->get()); 135 continue; 136 } 137 auto indexing = makeCanonicalAffineApplies( 138 b, loc, linalgOp.getTiedIndexingMap(inputOperand), allIvsPlusDims); 139 indexedValues.push_back( 140 b.create<LoadOpTy>(loc, inputOperand->get(), indexing)); 141 } 142 // 1.b. Emit load from output views. 143 for (OpOperand *outputOperand : linalgOp.getOutputOperands()) { 144 SmallVector<Value> indexing = makeCanonicalAffineApplies( 145 b, loc, linalgOp.getTiedIndexingMap(outputOperand), allIvsPlusDims); 146 indexedValues.push_back( 147 b.create<LoadOpTy>(loc, outputOperand->get(), indexing)); 148 } 149 150 // TODO: When a region inliner exists, use it. 151 // 2. Inline region, currently only works for a single basic block. 152 // 3. Emit store. 153 SmallVector<SmallVector<Value>, 8> indexing; 154 SmallVector<Value> outputBuffers; 155 for (OpOperand *outputOperand : linalgOp.getOutputBufferOperands()) { 156 indexing.push_back(makeCanonicalAffineApplies( 157 b, loc, linalgOp.getTiedIndexingMap(outputOperand), allIvsPlusDims)); 158 outputBuffers.push_back(outputOperand->get()); 159 } 160 inlineRegionAndEmitStore<LoadOpTy, StoreOpTy>(b, loc, linalgOp, indexedValues, 161 indexing, outputBuffers); 162 } 163 164 /// Replace the index operations in the body of the loop nest by the matching 165 /// induction variables. 166 static void replaceIndexOpsByInductionVariables(LinalgOp linalgOp, 167 PatternRewriter &rewriter, 168 ArrayRef<Operation *> loopOps) { 169 // Extract the induction variables of the loop nest from outer to inner. 170 SmallVector<Value> allIvs; 171 for (Operation *loopOp : loopOps) { 172 llvm::TypeSwitch<Operation *>(loopOp) 173 .Case([&](scf::ParallelOp parallelOp) { 174 allIvs.append(parallelOp.getInductionVars().begin(), 175 parallelOp.getInductionVars().end()); 176 }) 177 .Case([&](scf::ForOp forOp) { 178 allIvs.push_back(forOp.getInductionVar()); 179 }) 180 .Case([&](AffineForOp affineForOp) { 181 allIvs.push_back(affineForOp.getInductionVar()); 182 }) 183 .Default([&](Operation *op) { assert(false && "unexpected op"); }); 184 } 185 assert(linalgOp.getNumLoops() == allIvs.size() && 186 "expected the number of loops and induction variables to match"); 187 // Replace the index operations in the body of the innermost loop op. 188 if (!loopOps.empty()) { 189 LoopLikeOpInterface loopOp = loopOps.back(); 190 for (IndexOp indexOp : 191 llvm::make_early_inc_range(loopOp.getLoopBody().getOps<IndexOp>())) 192 rewriter.replaceOp(indexOp, allIvs[indexOp.dim()]); 193 } 194 } 195 196 template <typename LoopTy> 197 static FailureOr<LinalgLoops> linalgOpToLoopsImpl(PatternRewriter &rewriter, 198 LinalgOp linalgOp) { 199 using LoadOpTy = 200 typename std::conditional<std::is_same<LoopTy, AffineForOp>::value, 201 AffineLoadOp, memref::LoadOp>::type; 202 using StoreOpTy = 203 typename std::conditional<std::is_same<LoopTy, AffineForOp>::value, 204 AffineStoreOp, memref::StoreOp>::type; 205 206 // The flattened loopToOperandRangesMaps is expected to be an invertible 207 // permutation map (which is asserted in the inverse calculation). 208 assert(linalgOp.hasBufferSemantics() && 209 "expected linalg op with buffer semantics"); 210 211 auto loopRanges = linalgOp.createLoopRanges(rewriter, linalgOp.getLoc()); 212 auto iteratorTypes = llvm::to_vector<4>(linalgOp.iterator_types().getValue()); 213 214 SmallVector<Value> allIvs; 215 GenerateLoopNest<LoopTy>::doit( 216 rewriter, linalgOp.getLoc(), loopRanges, linalgOp, iteratorTypes, 217 [&](OpBuilder &b, Location loc, ValueRange ivs, 218 ValueRange operandValuesToUse) -> scf::ValueVector { 219 assert(operandValuesToUse == linalgOp->getOperands() && 220 "expect operands are captured and not passed by loop argument"); 221 allIvs.append(ivs.begin(), ivs.end()); 222 emitScalarImplementation<LoadOpTy, StoreOpTy>(b, loc, allIvs, linalgOp); 223 return scf::ValueVector{}; 224 }); 225 // Number of loop ops might be different from the number of ivs since some 226 // loops like affine.parallel and scf.parallel have multiple ivs. 227 SetVector<Operation *> loopSet; 228 for (Value iv : allIvs) { 229 if (!iv) 230 return failure(); 231 // The induction variable is a block argument of the entry block of the 232 // loop operation. 233 BlockArgument ivVal = iv.dyn_cast<BlockArgument>(); 234 if (!ivVal) 235 return failure(); 236 loopSet.insert(ivVal.getOwner()->getParentOp()); 237 } 238 LinalgLoops loops(loopSet.begin(), loopSet.end()); 239 // Replace all index operations in the loop body. 240 replaceIndexOpsByInductionVariables(linalgOp, rewriter, loops); 241 return loops; 242 } 243 244 namespace { 245 template <typename LoopType> 246 class LinalgRewritePattern : public RewritePattern { 247 public: 248 LinalgRewritePattern(MLIRContext *context) 249 : RewritePattern(MatchAnyOpTypeTag(), /*benefit=*/1, context) {} 250 251 LogicalResult matchAndRewrite(Operation *op, 252 PatternRewriter &rewriter) const override { 253 auto linalgOp = dyn_cast<LinalgOp>(op); 254 if (!isa<LinalgOp>(op)) 255 return failure(); 256 if (failed(linalgOpToLoopsImpl<LoopType>(rewriter, linalgOp))) 257 return failure(); 258 rewriter.eraseOp(op); 259 return success(); 260 } 261 }; 262 263 /// Converts tiled_loop to SCF loop nests. All parallel dimensions are collected 264 /// into an scf.parallel loop and all sequential dimensions will result in the 265 /// nested scf.for loop nest. The pattern assumes that a tiled loop with 266 /// iterator_types ["reduction", "parallel", "reduction"] can be reordered. It 267 /// is true for the tiling that is currently suppported by Linalg. 268 struct TiledLoopToSCFPattern : public OpRewritePattern<TiledLoopOp> { 269 using OpRewritePattern<TiledLoopOp>::OpRewritePattern; 270 271 LogicalResult matchAndRewrite(TiledLoopOp tiledLoop, 272 PatternRewriter &rewriter) const override { 273 // Fail conversion if the `tiled_loop` has not been bufferized. 274 if (!tiledLoop.hasBufferSemantics()) 275 return failure(); 276 277 // Collect loop control parameters for parallel and sequential dimensions. 278 SmallVector<Value, 3> seqLBs, seqUBs, seqSteps, seqIVs; 279 SmallVector<Value, 3> parLBs, parUBs, parSteps, parIVs; 280 for (auto en : llvm::enumerate( 281 llvm::zip(tiledLoop.lowerBound(), tiledLoop.upperBound(), 282 tiledLoop.step(), tiledLoop.getInductionVars()))) { 283 Value lb, ub, step, iv; 284 std::tie(lb, ub, step, iv) = en.value(); 285 if (tiledLoop.isParallelDimension(en.index())) { 286 parLBs.push_back(lb); 287 parUBs.push_back(ub); 288 parSteps.push_back(step); 289 parIVs.push_back(iv); 290 } else { 291 seqLBs.push_back(lb); 292 seqUBs.push_back(ub); 293 seqSteps.push_back(step); 294 seqIVs.push_back(iv); 295 } 296 } 297 298 Location loc = tiledLoop.getLoc(); 299 auto generateForLoopNestAndCloneBody = [&](OpBuilder &builder, Location loc, 300 ValueRange ivs) { 301 BlockAndValueMapping bvm; 302 bvm.map(parIVs, ivs); 303 bvm.map(tiledLoop.getRegionInputArgs(), tiledLoop.inputs()); 304 bvm.map(tiledLoop.getRegionOutputArgs(), tiledLoop.outputs()); 305 306 // If not all dimensions of the tiled loop are parallel, an scf.for loop 307 // nest is generated. 308 if (!seqIVs.empty()) { 309 scf::LoopNest nest = 310 scf::buildLoopNest(builder, loc, seqLBs, seqUBs, seqSteps, 311 [&](OpBuilder &builder, Location loc, 312 ValueRange ivs) { bvm.map(seqIVs, ivs); }); 313 builder.setInsertionPointToStart(nest.loops.back().getBody()); 314 } 315 for (auto &op : tiledLoop.getBody()->without_terminator()) 316 builder.clone(op, bvm); 317 }; 318 319 if (parIVs.empty()) 320 generateForLoopNestAndCloneBody(rewriter, loc, llvm::None); 321 else 322 rewriter.create<scf::ParallelOp>(loc, parLBs, parUBs, parSteps, 323 generateForLoopNestAndCloneBody); 324 rewriter.eraseOp(tiledLoop); 325 return success(); 326 } 327 }; 328 329 /// Local folding pattern for AffineApplyOp that we can apply greedily. 330 /// This replaces AffineApplyOp by the proper value in cases where the 331 /// associated map is trivial. 332 /// A trivial map here is defined as a map with a single result and either: 333 /// 1. Zero operand + returns a single AffineConstantExpr 334 /// 2. One operand + returns a single AffineDimExpr 335 /// 3. One operand + returns a single AffineSymbolExpr 336 // 337 /// In the first case, the AffineApplyOp is replaced by a new constant. In the 338 /// other cases, it is replaced by its unique operand. 339 struct FoldAffineOp : public RewritePattern { 340 FoldAffineOp(MLIRContext *context) 341 : RewritePattern(AffineApplyOp::getOperationName(), 0, context) {} 342 343 LogicalResult matchAndRewrite(Operation *op, 344 PatternRewriter &rewriter) const override { 345 AffineApplyOp affineApplyOp = cast<AffineApplyOp>(op); 346 auto map = affineApplyOp.getAffineMap(); 347 if (map.getNumResults() != 1 || map.getNumInputs() > 1) 348 return failure(); 349 350 AffineExpr expr = map.getResult(0); 351 if (map.getNumInputs() == 0) { 352 if (auto val = expr.dyn_cast<AffineConstantExpr>()) { 353 rewriter.replaceOpWithNewOp<arith::ConstantIndexOp>(op, val.getValue()); 354 return success(); 355 } 356 return failure(); 357 } 358 if (expr.dyn_cast<AffineDimExpr>() || expr.dyn_cast<AffineSymbolExpr>()) { 359 rewriter.replaceOp(op, op->getOperand(0)); 360 return success(); 361 } 362 return failure(); 363 } 364 }; 365 366 template <typename LoopType> 367 static void lowerLinalgToLoopsImpl(FuncOp funcOp) { 368 MLIRContext *context = funcOp.getContext(); 369 RewritePatternSet patterns(context); 370 patterns.add<LinalgRewritePattern<LoopType>>(context); 371 memref::DimOp::getCanonicalizationPatterns(patterns, context); 372 tensor::DimOp::getCanonicalizationPatterns(patterns, context); 373 AffineApplyOp::getCanonicalizationPatterns(patterns, context); 374 patterns.add<FoldAffineOp>(context); 375 // Just apply the patterns greedily. 376 (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); 377 } 378 379 struct LowerToAffineLoops 380 : public LinalgLowerToAffineLoopsBase<LowerToAffineLoops> { 381 void getDependentDialects(DialectRegistry ®istry) const override { 382 registry.insert<memref::MemRefDialect>(); 383 } 384 void runOnFunction() override { 385 lowerLinalgToLoopsImpl<AffineForOp>(getFunction()); 386 } 387 }; 388 389 struct LowerToLoops : public LinalgLowerToLoopsBase<LowerToLoops> { 390 void getDependentDialects(DialectRegistry ®istry) const override { 391 registry.insert<memref::MemRefDialect, scf::SCFDialect>(); 392 } 393 void runOnFunction() override { 394 lowerLinalgToLoopsImpl<scf::ForOp>(getFunction()); 395 } 396 }; 397 398 struct LowerToParallelLoops 399 : public LinalgLowerToParallelLoopsBase<LowerToParallelLoops> { 400 void runOnFunction() override { 401 lowerLinalgToLoopsImpl<scf::ParallelOp>(getFunction()); 402 } 403 }; 404 405 struct LowerTiledLoopsToSCF 406 : public LinalgLowerTiledLoopsToSCFBase<LowerTiledLoopsToSCF> { 407 void runOnFunction() override { 408 MLIRContext *context = &getContext(); 409 RewritePatternSet patterns(context); 410 populateTiledLoopToSCFPattern(patterns); 411 (void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns)); 412 } 413 }; 414 } // namespace 415 416 /// Rewrite a TiledLoopOp with bounds/step that potentially do not divide evenly 417 /// into two TiledLoopOps: One where the step divides the iteration space 418 /// evenly, followed another one for the last (partial) iteration (if any). This 419 /// function only rewrites the `idx`-th loop of the loop nest represented by 420 /// the TiledLoopOp. To peel the entire loop nest, this function must be called 421 /// multiple times. 422 /// 423 /// This function rewrites the given TiledLoopOp in-place and creates a new 424 /// TiledLoopOp for the last iteration. It replaces all uses of the original 425 /// TiledLoopOp with the results of the newly generated one. 426 /// 427 /// The newly generated TiledLoopOp is returned via `result`. The boundary 428 /// at which the loop is split (new upper bound) is returned via `splitBound`. 429 /// The return value indicates whether the TiledLoopOp was rewritten or not. 430 static LogicalResult peelTiledLoop(RewriterBase &b, TiledLoopOp loopOp, 431 int64_t idx, TiledLoopOp &result, 432 Value &splitBound) { 433 Value lb = loopOp.lowerBound()[idx], ub = loopOp.upperBound()[idx], 434 step = loopOp.step()[idx]; 435 auto ubInt = getConstantIntValue(ub); 436 437 auto loc = loopOp.getLoc(); 438 AffineExpr exprLb, exprUb, exprStep; 439 bindSymbols(b.getContext(), exprLb, exprUb, exprStep); 440 // New upper bound: %ub - (%ub - %lb) mod %step 441 auto modMap = AffineMap::get(0, 3, {exprUb - ((exprUb - exprLb) % exprStep)}); 442 SmallVector<Value> operands{lb, ub, step}; 443 mlir::canonicalizeMapAndOperands(&modMap, &operands); 444 modMap = mlir::simplifyAffineMap(modMap); 445 RewriterBase::InsertionGuard guard(b); 446 b.setInsertionPoint(loopOp); 447 splitBound = b.createOrFold<AffineApplyOp>(loc, modMap, operands); 448 // No specialization necessary if step already divides upper bound evenly. 449 if (splitBound == ub || (ubInt && ubInt == getConstantIntValue(splitBound))) 450 return failure(); 451 452 // Create remainder loop. 453 b.setInsertionPointAfter(loopOp); 454 auto remainderLoop = cast<TiledLoopOp>(b.clone(*loopOp.getOperation())); 455 loopOp.replaceAllUsesWith(remainderLoop->getResults()); 456 // Outputs: Take tensors from main loop's results. Take memrefs from main 457 // loop's outputs. 458 SmallVector<Value> remainderOutputs; 459 for (unsigned o = 0, t = 0; o < loopOp.getNumOutputs(); ++o) { 460 remainderOutputs.push_back(loopOp.outputs()[o].getType().isa<MemRefType>() 461 ? loopOp.outputs()[o] 462 : loopOp->getResult(t++)); 463 } 464 remainderLoop.outputsMutable().assign(remainderOutputs); 465 466 // Set new loop bounds. 467 b.updateRootInPlace(loopOp, [&]() { 468 SmallVector<Value> ubs = loopOp.upperBound(); 469 ubs[idx] = splitBound; 470 loopOp.upperBoundMutable().assign(ubs); 471 }); 472 SmallVector<Value> lbs = remainderLoop.lowerBound(); 473 lbs[idx] = splitBound; 474 remainderLoop.lowerBoundMutable().assign(lbs); 475 476 result = remainderLoop; 477 return success(); 478 } 479 480 template <typename OpTy, bool IsMin> 481 static void 482 rewriteAffineOpAfterPeeling(RewriterBase &rewriter, TiledLoopOp mainLoop, 483 TiledLoopOp remainderLoop, Value mainIv, 484 Value remainderIv, Value ub, Value step) { 485 mainLoop.walk([&](OpTy affineOp) { 486 AffineMap map = affineOp.getAffineMap(); 487 (void)scf::rewritePeeledMinMaxOp(rewriter, affineOp, map, 488 affineOp.operands(), IsMin, mainIv, ub, 489 step, /*insideLoop=*/true); 490 }); 491 remainderLoop.walk([&](OpTy affineOp) { 492 AffineMap map = affineOp.getAffineMap(); 493 (void)scf::rewritePeeledMinMaxOp(rewriter, affineOp, map, 494 affineOp.operands(), IsMin, remainderIv, 495 ub, step, /*insideLoop=*/false); 496 }); 497 } 498 499 LogicalResult mlir::linalg::peelAndCanonicalizeTiledLoop(RewriterBase &rewriter, 500 TiledLoopOp loopOp, 501 int64_t idx, 502 TiledLoopOp &result) { 503 int64_t numLoops = loopOp.iterator_types().size(); 504 if (idx < 0 || numLoops <= idx) 505 return failure(); 506 507 Value ub = loopOp.upperBound()[idx]; 508 TiledLoopOp remainderLoop; 509 Value splitBound; 510 if (failed(peelTiledLoop(rewriter, loopOp, idx, remainderLoop, splitBound))) 511 return failure(); 512 513 // Rewrite affine.min and affine.max ops. 514 Value mainIv = loopOp.getInductionVars()[idx], step = loopOp.step()[idx], 515 remainderIv = remainderLoop.getInductionVars()[idx]; 516 517 rewriteAffineOpAfterPeeling<AffineMinOp, /*IsMin=*/true>( 518 rewriter, loopOp, remainderLoop, mainIv, remainderIv, ub, step); 519 rewriteAffineOpAfterPeeling<AffineMaxOp, /*IsMin=*/false>( 520 rewriter, loopOp, remainderLoop, mainIv, remainderIv, ub, step); 521 522 result = remainderLoop; 523 return success(); 524 } 525 526 void mlir::linalg::populateTiledLoopToSCFPattern(RewritePatternSet &patterns) { 527 patterns.add<TiledLoopToSCFPattern>(patterns.getContext()); 528 } 529 530 std::unique_ptr<OperationPass<FuncOp>> 531 mlir::createConvertLinalgTiledLoopsToSCFPass() { 532 return std::make_unique<LowerTiledLoopsToSCF>(); 533 } 534 535 std::unique_ptr<OperationPass<FuncOp>> mlir::createConvertLinalgToLoopsPass() { 536 return std::make_unique<LowerToLoops>(); 537 } 538 539 std::unique_ptr<OperationPass<FuncOp>> 540 mlir::createConvertLinalgToParallelLoopsPass() { 541 return std::make_unique<LowerToParallelLoops>(); 542 } 543 544 std::unique_ptr<OperationPass<FuncOp>> 545 mlir::createConvertLinalgToAffineLoopsPass() { 546 return std::make_unique<LowerToAffineLoops>(); 547 } 548 549 /// Emits a loop nest of `affine.for` with the proper body for `linalgOp`. 550 FailureOr<LinalgLoops> 551 mlir::linalg::linalgOpToAffineLoops(PatternRewriter &rewriter, 552 LinalgOp linalgOp) { 553 return linalgOpToLoopsImpl<AffineForOp>(rewriter, linalgOp); 554 } 555 556 /// Emits a loop nest of `scf.for` with the proper body for `linalgOp`. 557 FailureOr<LinalgLoops> mlir::linalg::linalgOpToLoops(PatternRewriter &rewriter, 558 LinalgOp linalgOp) { 559 return linalgOpToLoopsImpl<scf::ForOp>(rewriter, linalgOp); 560 } 561 562 /// Emits a loop nest of `scf.parallel` with the proper body for `linalgOp`. 563 FailureOr<LinalgLoops> 564 mlir::linalg::linalgOpToParallelLoops(PatternRewriter &rewriter, 565 LinalgOp linalgOp) { 566 return linalgOpToLoopsImpl<scf::ParallelOp>(rewriter, linalgOp); 567 } 568