//===- LoopCanonicalization.cpp - Cross-dialect canonicalization patterns -===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains cross-dialect canonicalization patterns that cannot be // actual canonicalization patterns due to undesired additional dependencies. // //===----------------------------------------------------------------------===// #include "PassDetail.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/SCF/Transforms/Passes.h" #include "mlir/Dialect/SCF/Transforms/Transforms.h" #include "mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "llvm/ADT/TypeSwitch.h" using namespace mlir; using namespace mlir::scf; /// A simple, conservative analysis to determine if the loop is shape /// conserving. I.e., the type of the arg-th yielded value is the same as the /// type of the corresponding basic block argument of the loop. /// Note: This function handles only simple cases. Expand as needed. static bool isShapePreserving(ForOp forOp, int64_t arg) { auto yieldOp = cast(forOp.getBody()->getTerminator()); assert(arg < static_cast(yieldOp.getResults().size()) && "arg is out of bounds"); Value value = yieldOp.getResults()[arg]; while (value) { if (value == forOp.getRegionIterArgs()[arg]) return true; OpResult opResult = value.dyn_cast(); if (!opResult) return false; using tensor::InsertSliceOp; value = llvm::TypeSwitch(opResult.getOwner()) .template Case( [&](InsertSliceOp op) { return op.getDest(); }) .template Case([&](ForOp forOp) { return isShapePreserving(forOp, opResult.getResultNumber()) ? forOp.getIterOperands()[opResult.getResultNumber()] : Value(); }) .Default([&](auto op) { return Value(); }); } return false; } namespace { /// Fold dim ops of iter_args to dim ops of their respective init args. E.g.: /// /// ``` /// %0 = ... : tensor /// scf.for ... iter_args(%arg0 = %0) -> (tensor) { /// %1 = tensor.dim %arg0, %c0 : tensor /// ... /// } /// ``` /// /// is folded to: /// /// ``` /// %0 = ... : tensor /// scf.for ... iter_args(%arg0 = %0) -> (tensor) { /// %1 = tensor.dim %0, %c0 : tensor /// ... /// } /// ``` /// /// Note: Dim ops are folded only if it can be proven that the runtime type of /// the iter arg does not change with loop iterations. template struct DimOfIterArgFolder : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(OpTy dimOp, PatternRewriter &rewriter) const override { auto blockArg = dimOp.getSource().template dyn_cast(); if (!blockArg) return failure(); auto forOp = dyn_cast(blockArg.getParentBlock()->getParentOp()); if (!forOp) return failure(); if (!isShapePreserving(forOp, blockArg.getArgNumber() - 1)) return failure(); Value initArg = forOp.getOpOperandForRegionIterArg(blockArg).get(); rewriter.updateRootInPlace( dimOp, [&]() { dimOp.getSourceMutable().assign(initArg); }); return success(); }; }; /// Fold dim ops of loop results to dim ops of their respective init args. E.g.: /// /// ``` /// %0 = ... : tensor /// %r = scf.for ... iter_args(%arg0 = %0) -> (tensor) { /// ... /// } /// %1 = tensor.dim %r, %c0 : tensor /// ``` /// /// is folded to: /// /// ``` /// %0 = ... : tensor /// %r = scf.for ... iter_args(%arg0 = %0) -> (tensor) { /// ... /// } /// %1 = tensor.dim %0, %c0 : tensor /// ``` /// /// Note: Dim ops are folded only if it can be proven that the runtime type of /// the iter arg does not change with loop iterations. template struct DimOfLoopResultFolder : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(OpTy dimOp, PatternRewriter &rewriter) const override { auto forOp = dimOp.getSource().template getDefiningOp(); if (!forOp) return failure(); auto opResult = dimOp.getSource().template cast(); unsigned resultNumber = opResult.getResultNumber(); if (!isShapePreserving(forOp, resultNumber)) return failure(); rewriter.updateRootInPlace(dimOp, [&]() { dimOp.getSourceMutable().assign(forOp.getIterOperands()[resultNumber]); }); return success(); } }; /// Canonicalize AffineMinOp/AffineMaxOp operations in the context of scf.for /// and scf.parallel loops with a known range. template struct AffineOpSCFCanonicalizationPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(OpTy op, PatternRewriter &rewriter) const override { auto loopMatcher = [](Value iv, OpFoldResult &lb, OpFoldResult &ub, OpFoldResult &step) { if (scf::ForOp forOp = scf::getForInductionVarOwner(iv)) { lb = forOp.getLowerBound(); ub = forOp.getUpperBound(); step = forOp.getStep(); return success(); } if (scf::ParallelOp parOp = scf::getParallelForInductionVarOwner(iv)) { for (unsigned idx = 0; idx < parOp.getNumLoops(); ++idx) { if (parOp.getInductionVars()[idx] == iv) { lb = parOp.getLowerBound()[idx]; ub = parOp.getUpperBound()[idx]; step = parOp.getStep()[idx]; return success(); } } return failure(); } if (scf::ForeachThreadOp foreachThreadOp = scf::getForeachThreadOpThreadIndexOwner(iv)) { for (int64_t idx = 0; idx < foreachThreadOp.getRank(); ++idx) { if (foreachThreadOp.getThreadIndices()[idx] == iv) { lb = OpBuilder(iv.getContext()).getIndexAttr(0); ub = foreachThreadOp.getNumThreads()[idx]; step = OpBuilder(iv.getContext()).getIndexAttr(1); return success(); } } return failure(); } return failure(); }; return scf::canonicalizeMinMaxOpInLoop(rewriter, op, op.getAffineMap(), op.operands(), IsMin, loopMatcher); } }; struct SCFForLoopCanonicalization : public SCFForLoopCanonicalizationBase { void runOnOperation() override { auto *parentOp = getOperation(); MLIRContext *ctx = parentOp->getContext(); RewritePatternSet patterns(ctx); scf::populateSCFForLoopCanonicalizationPatterns(patterns); if (failed(applyPatternsAndFoldGreedily(parentOp, std::move(patterns)))) signalPassFailure(); } }; } // namespace void mlir::scf::populateSCFForLoopCanonicalizationPatterns( RewritePatternSet &patterns) { MLIRContext *ctx = patterns.getContext(); patterns .add, AffineOpSCFCanonicalizationPattern, DimOfIterArgFolder, DimOfIterArgFolder, DimOfLoopResultFolder, DimOfLoopResultFolder>(ctx); } std::unique_ptr mlir::createSCFForLoopCanonicalizationPass() { return std::make_unique(); }