//===- LinalgTransforms.cpp - Linalg transformations as patterns ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements logic and helpers to expose Linalg transforms as rewrite // patterns. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/Transforms/HoistPadding.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/SCF/Transforms.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/Matchers.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include #define DEBUG_TYPE "linalg-transforms" using namespace mlir; using namespace mlir::linalg; #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ") //===----------------------------------------------------------------------===// // Transformations exposed as rewrite patterns. //===----------------------------------------------------------------------===// // Marker used as attribute name in generated Linalg rewriting transformations. const StringLiteral mlir::linalg::LinalgTransforms::kLinalgTransformMarker = "__internal_linalg_transform__"; mlir::linalg::LinalgTransformationFilter::LinalgTransformationFilter( ArrayRef matchDisjunction, Optional replacement) : matchDisjunction(matchDisjunction.begin(), matchDisjunction.end()), replacement(replacement), matchByDefault(false) {} mlir::linalg::LinalgTransformationFilter::LinalgTransformationFilter( FilterFunction f, ArrayRef matchDisjunction, Optional replacement) : filters(), matchDisjunction(matchDisjunction.begin(), matchDisjunction.end()), replacement(replacement), matchByDefault(false) { if (f) filters.push_back(f); } LogicalResult mlir::linalg::LinalgTransformationFilter::checkAndNotify( PatternRewriter &rewriter, Operation *op) const { if (llvm::any_of(filters, [&](const FilterFunction &f) { return failed(f(op)); })) return failure(); auto attr = op->template getAttrOfType( LinalgTransforms::kLinalgTransformMarker); if (!attr) { // 1. Has no filter case and matchDisjunction is empty. if (matchDisjunction.empty() || matchByDefault) return success(); // 2. Has no filter but was expecting a filter. return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { diag << " does not have any filter from list: "; interleaveComma(matchDisjunction, diag); }); } // 4. Match explicit filter. for (auto filter : matchDisjunction) if (attr.getValue() == filter) return success(); // 5. Fail to match. return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { diag << " does not have any filter from list: "; interleaveComma(matchDisjunction, diag); }); } void mlir::linalg::LinalgTransformationFilter:: replaceLinalgTransformationFilter(PatternRewriter &rewriter, Operation *op) const { if (replacement.hasValue()) op->setAttr(LinalgTransforms::kLinalgTransformMarker, replacement.getValue()); else op->removeAttr( rewriter.getStringAttr(LinalgTransforms::kLinalgTransformMarker)); } bool mlir::linalg::LinalgTransformationFilter::hasReplacementFilter( Operation *op) const { if (!replacement) return false; auto attr = op->getAttr(LinalgTransforms::kLinalgTransformMarker) .dyn_cast(); return attr && attr == replacement.getValue(); } LinalgTilingOptions & mlir::linalg::LinalgTilingOptions::setTileSizes(ArrayRef ts) { assert(!tileSizeComputationFunction && "tile sizes already set"); SmallVector tileSizes(ts.begin(), ts.end()); tileSizeComputationFunction = [tileSizes](OpBuilder &b, Operation *op) { OpBuilder::InsertionGuard guard(b); b.setInsertionPointToStart( &op->getParentOfType().getBody().front()); return llvm::to_vector<4>(map_range(tileSizes, [&](int64_t s) { Value v = b.create(op->getLoc(), s); return v; })); }; return *this; } LinalgTilingOptions &mlir::linalg::LinalgTilingOptions::scalarizeDynamicDims() { assert(!tileSizeComputationFunction && "tile sizes already set"); tileSizeComputationFunction = [](OpBuilder &b, Operation *op) { SmallVector tileSizes; auto linalgOp = dyn_cast(op); if (!linalgOp) return tileSizes; Location loc = linalgOp.getLoc(); auto allShapeSizes = linalgOp.createFlatListOfOperandDims(b, loc); AffineMap map = linalgOp.getShapesToLoopsMap(); if (!map) return tileSizes; auto shapeSizes = applyMapToValues(b, loc, map, allShapeSizes); // If the shape size is dynamic, tile by 1. Otherwise, do not tile (tile // size 0). for (Value shapeSize : shapeSizes) tileSizes.push_back(getConstantIntValue(shapeSize).hasValue() ? b.create(loc, 0) : b.create(loc, 1)); return tileSizes; }; return *this; } /// Helper function that tries to pad `opOperand`. Exit early and return success /// for scalar operands or if `paddingFunc` returns failure. Otherwise, try to /// pad the operand even if it already has a static shape. Set `result` to the /// result of the created PadTensorOp or return failure if the operand cannot be /// padded to a static shape. static LogicalResult padOperandToSmallestStaticBoundingBox( OpBuilder &b, linalg::LinalgOp opToPad, OpOperand *opOperand, const PaddingValueComputationFunction &paddingFunc, const PaddingNoFoldComputationFunction &nofoldFunc, Value &result) { // Can't pad scalars. if (opToPad.getShape(opOperand).empty()) return success(); // Can't pad if no padding value is known. FailureOr paddingValue = paddingFunc(b, *opOperand); if (failed(paddingValue)) return success(); auto sliceOp = opOperand->get().getDefiningOp(); // Not a slice op, cannot construct a static bounding box. if (!sliceOp) return failure(); SmallVector staticSizes; staticSizes.reserve(opToPad.getRank(opOperand)); auto shapedOp = cast(sliceOp.getOperation()); for (auto size : shapedOp.getMixedSizes()) { // If the size is an attribute add it directly to `staticSizes`. if (size.is()) { staticSizes.push_back( size.get().dyn_cast().getInt()); continue; } // Otherwise, try to compute a constant upper bound for the size value. FailureOr upperBound = getConstantUpperBoundForIndex(size.get()); if (failed(upperBound)) { LLVM_DEBUG(DBGS() << "No constant bounding box can be found for padding"); return failure(); } staticSizes.push_back(upperBound.getValue()); } auto staticTensorType = RankedTensorType::get( staticSizes, getElementTypeOrSelf(opOperand->get())); bool nofold = nofoldFunc ? nofoldFunc(*opOperand) : false; result = linalg::PadTensorOp::createPadHighOp( staticTensorType, opOperand->get(), paddingValue.getValue(), /*nofold=*/nofold, opToPad->getLoc(), b); return success(); } FailureOr> linalg::rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, const PaddingValueComputationFunction &paddingFunc, const PaddingNoFoldComputationFunction &nofoldFunc, LinalgOp &paddedOp) { Location loc = opToPad->getLoc(); // TODO: there are cases where we may still want to pad to larger sizes. assert(opToPad.hasTensorSemantics() && "expected operation to have tensor semantics"); OpBuilder::InsertionGuard g(b); // Set IP after op because we also take the dims of the original output. b.setInsertionPointAfter(opToPad); // Make a copy of the shaped operands and update it. SmallVector newOperands; newOperands.reserve(opToPad.getNumInputsAndOutputs()); for (OpOperand *opOperand : opToPad.getInputAndOutputOperands()) { Value paddedOperand; // If padding was requested but the shape cannot be bounded statically then // the pattern fails to apply. if (failed(padOperandToSmallestStaticBoundingBox( b, opToPad, opOperand, paddingFunc, nofoldFunc, paddedOperand))) return failure(); newOperands.push_back(paddedOperand ? paddedOperand : opOperand->get()); } SmallVector> reifiedResultShapes; if (failed(cast(opToPad.getOperation()) .reifyResultShapes(b, reifiedResultShapes))) return failure(); assert(reifiedResultShapes.size() == opToPad->getNumResults() && "expected same number of results"); // Clone `opToPad` to operate on the statically padded shapes. auto resultTensorTypes = ValueRange(newOperands).take_back(opToPad.getNumOutputs()).getTypes(); paddedOp = opToPad.clone(b, loc, resultTensorTypes, newOperands); // Recover the slice out of the new static results. This keeps the original // linalg op around because it uses the dims of the original results. SmallVector paddedSubviewResults; paddedSubviewResults.reserve(opToPad->getNumResults()); for (auto en : llvm::enumerate(paddedOp->getResults())) { Value paddedResult = en.value(); int64_t resultNumber = en.index(); int64_t rank = paddedResult.getType().cast().getRank(); SmallVector offsets(rank, b.getIndexAttr(0)); SmallVector sizes; for (Value v : reifiedResultShapes[resultNumber]) sizes.push_back(v); SmallVector strides(rank, b.getIndexAttr(1)); paddedSubviewResults.push_back(b.create( loc, paddedResult, offsets, sizes, strides)); } return paddedSubviewResults; } /// Linalg base tiling pattern. mlir::linalg::LinalgBaseTilingPattern::LinalgBaseTilingPattern( StringRef opName, MLIRContext *context, LinalgTilingOptions options, LinalgTransformationFilter filter, PatternBenefit benefit) : RewritePattern(opName, benefit, context), filter(filter), options(options) {} mlir::linalg::LinalgBaseTilingPattern::LinalgBaseTilingPattern( MLIRContext *context, LinalgTilingOptions options, LinalgTransformationFilter filter, PatternBenefit benefit) : RewritePattern(MatchAnyOpTypeTag(), benefit, context), filter(filter), options(options) {} /// Try to peel a loop `op` and return the new result. // TODO: Add support for scf.parallel and affine.for loops. static SmallVector peelLoop(RewriterBase &rewriter, Operation *op) { return llvm::TypeSwitch>(op) .Case([&](scf::ForOp forOp) { scf::ForOp partialIteration; if (succeeded(scf::peelAndCanonicalizeForLoop(rewriter, forOp, partialIteration))) return partialIteration->getResults(); assert(!partialIteration && "expected that loop was not peeled"); return forOp->getResults(); }) .Default([&](Operation *op) { return op->getResults(); }); } /// Try to peel a TiledLoopOp and return the new result. static SmallVector peelLoop(RewriterBase &rewriter, TiledLoopOp tiledLoop, int64_t idx) { assert(idx < static_cast(tiledLoop.iterator_types().size()) && "requested peeling of non-existing loop"); TiledLoopOp result; if (succeeded(peelAndCanonicalizeTiledLoop(rewriter, tiledLoop, idx, result))) return result->getResults(); assert(!result && "expected that loop was not peeled"); return tiledLoop->getResults(); } /// Peel loops after tiling. static void peelLoops(RewriterBase &rewriter, TiledLinalgOp &res, const LinalgTilingOptions &options) { for (int64_t loop : options.peeledLoops) { assert(loop < static_cast(res.loops.size()) && "requested peeling of non-existing loop"); SmallVector loopResults; Operation *loopOp = res.loops[loop]; if (options.loopType == LinalgTilingLoopType::TiledLoops) { assert(llvm::all_of( res.loops, [&](Operation *op) { return op == res.loops.front(); }) && "expected that all loop ops are the same TiledLoopOp"); auto tiledLoopOp = dyn_cast(loopOp); assert(tiledLoopOp && "expected TiledLoopOp"); loopResults = peelLoop(rewriter, tiledLoopOp, loop); } else { loopResults = peelLoop(rewriter, loopOp); } // The result of the loop nest may change with peeling. if (res.tensorResults.size() == loopOp->getNumResults() && std::equal(res.tensorResults.begin(), res.tensorResults.end(), loopOp->getResults().begin())) res.tensorResults = loopResults; } } LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewriteBase( Operation *op, PatternRewriter &rewriter, TiledLinalgOp &result) const { LinalgOp linalgOp = dyn_cast(op); if (!linalgOp) return failure(); if (failed(filter.checkAndNotify(rewriter, linalgOp))) return failure(); Optional res = tileLinalgOp(rewriter, linalgOp, options); if (!res) return failure(); // Clear filter to stop recursive pattern application. filter.replaceLinalgTransformationFilter(rewriter, res->op); // Peel loops. peelLoops(rewriter, *res, options); result = *res; return success(); } static ValueRange getTiledOpResult(TiledLinalgOp tiledOp) { if (tiledOp.loops.empty()) return tiledOp.op.getOperation()->getResults(); return tiledOp.loops.front()->getResults(); } static ValueRange getTiledAndFusedOpResult(TiledAndFusedLinalgOps tiledAndFusedOp) { if (tiledAndFusedOp.fusedLoops.empty()) return tiledAndFusedOp.op.getOperation()->getResults(); return tiledAndFusedOp.fusedLoops.front()->getResults(); } mlir::linalg::LinalgBaseTileAndFusePattern::LinalgBaseTileAndFusePattern( StringRef opName, MLIRContext *context, const LinalgDependenceGraph &dependenceGraph, LinalgTilingOptions tilingOptions, LinalgFusionOptions fusionOptions, LinalgTransformationFilter filter, LinalgTransformationFilter fusedOpMarker, LinalgTransformationFilter originalOpMarker, PatternBenefit benefit) : RewritePattern(opName, benefit, context, {}), dependenceGraph(dependenceGraph), tilingOptions(tilingOptions), fusionOptions(fusionOptions), filter(filter), fusedOpMarker(fusedOpMarker), originalOpMarker(originalOpMarker) {} LogicalResult mlir::linalg::LinalgBaseTileAndFusePattern::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { LinalgOp linalgOp = dyn_cast(op); // TODO: remove hasIndexSemantics check once index ops are supported. if (!linalgOp || linalgOp.hasIndexSemantics()) return failure(); if (failed(filter.checkAndNotify(rewriter, linalgOp))) return failure(); DenseSet producers; producers.insert(linalgOp); for (auto dependence : dependenceGraph.getDependentOperationsInto(linalgOp)) { Optional operandNumber = dependence.getIndexingOpViewOperandNum(); // When looking at dependences into, indexingOp is always OpOperand. We // could assert, but continue if this is not the case. if (!operandNumber) continue; if (!fusionOptions.indicesToFuse.count(operandNumber.getValue())) continue; if (isa(dependence.getDependentOp())) producers.insert(dependence.getDependentOp()); } SmallVector fusionOps; for (auto it = op->getBlock()->begin(), ie = Block::iterator(op); it != ie; ++it) { auto producerLinalgOp = dyn_cast(&(*it)); if (producerLinalgOp && producers.count(producerLinalgOp)) fusionOps.push_back(producerLinalgOp); } fusionOps.push_back(linalgOp); SmallVector tileSizes = tilingOptions.tileSizeComputationFunction(rewriter, op); LinalgTilingOptions instanceTilingOptions = tilingOptions; instanceTilingOptions.setTileSizes(tileSizes); Optional tiledAndFusedOps = tileAndFuseLinalgOps( rewriter, fusionOps, dependenceGraph, instanceTilingOptions); if (!tiledAndFusedOps) return failure(); // Tile the unfused loops; SmallVector unfusedLoopTileSizes; Value zero = rewriter.create(op->getLoc(), 0); for (auto tileSize : enumerate(tileSizes)) { if (tiledAndFusedOps->fusedLoopDims.count(tileSize.index())) unfusedLoopTileSizes.push_back(zero); else unfusedLoopTileSizes.push_back(tileSize.value()); } // Tile the loop only if there is a non-zero tile size. if (unfusedLoopTileSizes.size() > linalgOp.getNumLoops()) unfusedLoopTileSizes.resize(linalgOp.getNumLoops()); if (llvm::any_of(unfusedLoopTileSizes, [](Value val) { if (auto cst = val.getDefiningOp()) return cst.value() != 0; return true; })) { LinalgTilingOptions unfusedTilingOptions = tilingOptions; unfusedTilingOptions.setTileSizes(unfusedLoopTileSizes); Optional unfusedTiledOp = tileLinalgOp(rewriter, tiledAndFusedOps->op, unfusedTilingOptions); if (!unfusedTiledOp) return failure(); rewriter.replaceOp(tiledAndFusedOps->op, getTiledOpResult(unfusedTiledOp.getValue())); tiledAndFusedOps->op = unfusedTiledOp->op; } op->replaceAllUsesWith(getTiledAndFusedOpResult(tiledAndFusedOps.getValue())); filter.replaceLinalgTransformationFilter(rewriter, tiledAndFusedOps->op.getOperation()); for (auto fusedOp : tiledAndFusedOps->fusedProducers) { fusedOpMarker.replaceLinalgTransformationFilter(rewriter, fusedOp.getOperation()); } for (auto origProducerOp : ArrayRef(fusionOps).drop_back()) { originalOpMarker.replaceLinalgTransformationFilter( rewriter, origProducerOp.getOperation()); } rewriter.updateRootInPlace(op, [&]() { originalOpMarker.replaceLinalgTransformationFilter(rewriter, op); }); return success(); } /// Linalg padding pattern. mlir::linalg::LinalgPaddingPattern::LinalgPaddingPattern( MLIRContext *context, LinalgPaddingOptions options, LinalgTransformationFilter filter, PatternBenefit benefit) : RewritePattern(MatchAnyOpTypeTag(), benefit, context), filter(filter), options(options) {} mlir::linalg::LinalgPaddingPattern::LinalgPaddingPattern( StringRef opName, MLIRContext *context, LinalgPaddingOptions options, LinalgTransformationFilter filter, PatternBenefit benefit) : RewritePattern(opName, benefit, context, {}), filter(filter), options(options) {} LogicalResult mlir::linalg::LinalgPaddingPattern::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { LinalgOp linalgOp = dyn_cast(op); if (!linalgOp) return failure(); if (!linalgOp.hasTensorSemantics()) return failure(); if (failed(filter.checkAndNotify(rewriter, op))) return failure(); // Pad the operation. LinalgOp paddedOp; FailureOr> newResults = rewriteAsPaddedOp( rewriter, linalgOp, options.paddingValueComputationFunction, options.paddingNoFoldComputationFunction, paddedOp); if (failed(newResults)) return failure(); // Compute the desired hoisting depths. SmallVector depths; if (options.paddingHoistComputationFunction) { for (OpOperand *opOperand : linalgOp.getInputAndOutputOperands()) depths.push_back(options.paddingHoistComputationFunction(*opOperand)); } // Hoist the padding. for (auto en : enumerate(depths)) { OpOperand &opOperand = paddedOp->getOpOperand(en.index()); auto padTensorOp = opOperand.get().getDefiningOp(); if (!padTensorOp || en.value() == 0) continue; PadTensorOp hoistedOp; FailureOr newResult = hoistPaddingOnTensors(padTensorOp, en.value(), hoistedOp); if (failed(newResult)) continue; rewriter.replaceOp(padTensorOp, newResult.getValue()); } // Replace the original operation to pad. rewriter.replaceOp(op, newResults.getValue()); filter.replaceLinalgTransformationFilter(rewriter, paddedOp); return success(); } /// Linalg generic interchange pattern. mlir::linalg::GenericOpInterchangePattern::GenericOpInterchangePattern( MLIRContext *context, ArrayRef interchangeVector, LinalgTransformationFilter filter, PatternBenefit benefit) : OpRewritePattern(context, benefit), filter(filter), interchangeVector(interchangeVector.begin(), interchangeVector.end()) {} LogicalResult mlir::linalg::GenericOpInterchangePattern::matchAndRewrite( GenericOp genericOp, PatternRewriter &rewriter) const { if (failed(filter.checkAndNotify(rewriter, genericOp))) return failure(); if (failed(interchangeGenericOpPrecondition(genericOp, interchangeVector))) return failure(); // TODO: figure out how this interplays with named ops. In particular this // should break the named op property. rewriter.updateRootInPlace(genericOp, [&]() { interchangeGenericOp(rewriter, genericOp, interchangeVector); // New filter if specified. filter.replaceLinalgTransformationFilter(rewriter, genericOp); }); return success(); } /// Linalg generalization pattern. mlir::linalg::LinalgGeneralizationPattern::LinalgGeneralizationPattern( MLIRContext *context, LinalgTransformationFilter filter, PatternBenefit benefit) : RewritePattern(MatchAnyOpTypeTag(), benefit, context), filter(filter) {} mlir::linalg::LinalgGeneralizationPattern::LinalgGeneralizationPattern( StringRef opName, MLIRContext *context, LinalgTransformationFilter filter, PatternBenefit benefit) : RewritePattern(opName, benefit, context, {}), filter(filter) {} LogicalResult mlir::linalg::LinalgGeneralizationPattern::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { if (failed(filter.checkAndNotify(rewriter, op))) return failure(); if (failed(generalizeNamedOpPrecondition(op))) return failure(); GenericOp genericOp = generalizeNamedOp(rewriter, op); rewriter.replaceOp(op, genericOp.getResults()); filter.replaceLinalgTransformationFilter(rewriter, genericOp); return success(); } mlir::linalg::LinalgBasePromotionPattern::LinalgBasePromotionPattern( MLIRContext *context, LinalgTransformationFilter filter, LinalgPromotionOptions options, PatternBenefit benefit) : RewritePattern(MatchAnyOpTypeTag(), benefit, context), filter(filter), options(options) {} mlir::linalg::LinalgBasePromotionPattern::LinalgBasePromotionPattern( StringRef opName, MLIRContext *context, LinalgPromotionOptions options, LinalgTransformationFilter filter, PatternBenefit benefit) : RewritePattern(opName, benefit, context, {}), filter(filter), options(options) {} LogicalResult mlir::linalg::LinalgBasePromotionPattern::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { if (failed(filter.checkAndNotify(rewriter, op))) return failure(); if (failed(promoteSubviewsPrecondition(op, options))) return failure(); // TODO: We cannot use root update here. This pattern is creating other ops, // so if the promotion fails, those need to be cleaned up, which doesnt seem // to be happening here. So to fail properly, we should be cloning the op and // deleting the previous op. This needs more investigation. rewriter.startRootUpdate(op); Optional promotedOp = promoteSubViews(rewriter, op, options); if (!promotedOp) { rewriter.cancelRootUpdate(op); return op->emitError("subview promotion failed"); } rewriter.finalizeRootUpdate(op); filter.replaceLinalgTransformationFilter(rewriter, op); return success(); } mlir::linalg::LinalgBaseVectorizationPattern::LinalgBaseVectorizationPattern( MLIRContext *context, LinalgTransformationFilter filter, PatternBenefit benefit) : RewritePattern(MatchAnyOpTypeTag(), benefit, context), filter(filter) {} mlir::linalg::LinalgBaseVectorizationPattern::LinalgBaseVectorizationPattern( StringRef opName, MLIRContext *context, LinalgTransformationFilter filter, PatternBenefit benefit) : RewritePattern(opName, benefit, context, {}), filter(filter) {} LogicalResult mlir::linalg::LinalgBaseVectorizationPattern::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { LinalgOp linalgOp = dyn_cast(op); if (!linalgOp) return failure(); if (failed(filter.checkAndNotify(rewriter, linalgOp))) return failure(); SmallVector newResults; if (failed(vectorizeLinalgOp(rewriter, op, newResults))) return failure(); if (!newResults.empty()) rewriter.replaceOp(op, newResults); else rewriter.eraseOp(op); return success(); } LogicalResult mlir::linalg::applyStagedPatterns( Operation *op, ArrayRef stage1Patterns, const FrozenRewritePatternSet &stage2Patterns, function_ref stage3Lambda) { unsigned iteration = 0; (void)iteration; for (const auto &patterns : stage1Patterns) { LLVM_DEBUG(DBGS() << "Before 1st stage, iter: " << ++iteration << "\n" << *op); if (failed(applyPatternsAndFoldGreedily(op, patterns))) { LLVM_DEBUG(DBGS() << "Underlying first stage rewrite did not converge"); return failure(); } LLVM_DEBUG(DBGS() << "After 1st stage, iter: " << ++iteration << "\n" << *op); if (failed(applyPatternsAndFoldGreedily(op, stage2Patterns))) { LLVM_DEBUG(DBGS() << "Underlying 2nd stage rewrite did not converge"); return failure(); } LLVM_DEBUG(DBGS() << "After 2nd stage, iter : " << iteration << "\n" << *op); if (stage3Lambda) { if (failed(stage3Lambda(op))) return failure(); LLVM_DEBUG(DBGS() << "After 3rd stage, iter : " << iteration << "\n" << *op); } } return success(); } static SmallVector getNParallelLoopsAttrs(unsigned nParallelLoops) { return SmallVector(nParallelLoops, getParallelIteratorTypeName()); } /// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp (to initialize /// with pad_val) and GenericOp (to copy contents). LogicalResult PadTensorOpTransformationPattern::matchAndRewrite( linalg::PadTensorOp padOp, PatternRewriter &rewriter) const { auto inputShapedType = padOp.source().getType().cast(); auto resultShapedType = padOp.result().getType().cast(); // Bail on non-static shapes. if (!inputShapedType.hasStaticShape()) return failure(); if (!resultShapedType.hasStaticShape()) return failure(); // Only support padding with a constant for now, i.e. either: // 1. A BBarg from a different block. // 2. A value defined outside of the current block. Block &block = padOp.region().front(); auto yieldOp = cast(block.getTerminator()); assert(yieldOp.getNumOperands() == 1 && "expected single operand yield"); Value padValue = yieldOp.values().front(); Operation *definingOp = padValue.getDefiningOp(); if (definingOp && definingOp->getBlock() == &block) return failure(); if (!definingOp && padValue.cast().getOwner() == &block) return failure(); // Create tensor with the padded shape Location loc = padOp.getLoc(); SmallVector indices(resultShapedType.getRank(), rewriter.create(loc, 0)); Value initTensor = rewriter.create( loc, resultShapedType.getShape(), resultShapedType.getElementType()); // Initialize tensor with the pad value Value tmpTensor = rewriter.create(loc, padValue, initTensor).result(); // Copy original contents into new tensor // Uses linalg.generic, but could be done with tensor.insert_slice SmallVector outputExprs; for (unsigned i = 0; i < resultShapedType.getRank(); ++i) { outputExprs.push_back(getAffineDimExpr(i, rewriter.getContext()) + padOp.static_low()[i].cast().getInt()); } SmallVector transferMaps = { rewriter.getMultiDimIdentityMap(inputShapedType.getRank()), AffineMap::get(resultShapedType.getRank(), /*symbolCount=*/0, outputExprs, rewriter.getContext())}; rewriter.replaceOpWithNewOp( padOp, resultShapedType, padOp.source(), tmpTensor, transferMaps, getNParallelLoopsAttrs(resultShapedType.getRank()), [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { nestedBuilder.create(nestedLoc, args[0]); }); return success(); } /// Filling `dest` using FillOp constant padding value if possible. /// Otherwise, generate a tensor::GenerateOp. Value GeneralizePadTensorOpPattern::createFillOrGenerateOp( PatternRewriter &rewriter, PadTensorOp padOp, Value dest, const SmallVector &dynSizes) const { auto padValue = padOp.getConstantPaddingValue(); if (padValue) return rewriter.create(padOp.getLoc(), padValue, dest).result(); // Fill could not be optimized: Lower to tensor::GenerateOp with region. auto generateOp = rewriter.create( padOp.getLoc(), padOp.getResultType(), dynSizes); // Copy region to new op. BlockAndValueMapping bvm; padOp.region().cloneInto(&generateOp.getRegion(), bvm); // Rewrite linalg::YieldOp to tensor::YieldOp. OpBuilder::InsertionGuard guard(rewriter); auto yieldOp = dyn_cast(generateOp.getRegion().front().getTerminator()); assert(yieldOp && "malformed PadTensorOp: expected YieldOp terminator"); assert(yieldOp.values().size() == 1); rewriter.setInsertionPoint(yieldOp); rewriter.replaceOpWithNewOp(yieldOp, yieldOp.values()[0]); return generateOp; } LogicalResult GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp, PatternRewriter &rewriter) const { // Given an OpFoldResult, return an index-typed value. auto getIdxValue = [&](OpFoldResult ofr) { if (auto val = ofr.dyn_cast()) return val; return rewriter .create( padOp.getLoc(), ofr.get().cast().getInt()) .getResult(); }; auto resultType = padOp.getResultType(); // Compute size of InitTensorOp. Any combination of static/dynamic is // supported. SmallVector dynSizes; SmallVector staticSizes; for (unsigned dim = 0; dim < resultType.getRank(); ++dim) { if (resultType.isDynamicDim(dim)) { auto srcSize = rewriter.createOrFold(padOp.getLoc(), padOp.source(), dim); // Add low and high padding value. auto plusLow = rewriter.createOrFold( padOp.getLoc(), srcSize, getIdxValue(padOp.getMixedLowPad()[dim])); auto plusHigh = rewriter.createOrFold( padOp.getLoc(), plusLow, getIdxValue(padOp.getMixedHighPad()[dim])); dynSizes.push_back(plusHigh); } staticSizes.push_back(resultType.getDimSize(dim)); } // Init tensor and fill it with padding. Value init = rewriter.create( padOp.getLoc(), dynSizes, staticSizes, resultType.getElementType()); Value fill = createFillOrGenerateOp(rewriter, padOp, init, dynSizes); // Try optimize the copy of source. if (optimizeCopyFn && optimizeCopyFn(rewriter, padOp, fill).succeeded()) return success(); // PadTensorOps cannot be optimized. Generate a InsertSliceOp instead // for copying the PadOp source. auto sourceType = padOp.getSourceType(); // Compute size of source of PadTensorOp. SmallVector srcSizes; for (unsigned dim = 0; dim < sourceType.getRank(); ++dim) { if (sourceType.isDynamicDim(dim)) { srcSizes.push_back(rewriter.createOrFold( padOp.getLoc(), padOp.source(), dim)); } else { srcSizes.push_back(rewriter.getIndexAttr(sourceType.getDimSize(dim))); } } // Strides of InsertSliceOp are all 1. SmallVector strides(sourceType.getRank(), rewriter.getIndexAttr(1)); rewriter.replaceOpWithNewOp( padOp, padOp.source(), fill, padOp.getMixedLowPad(), srcSizes, strides); return success(); } LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite( tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const { auto padOp = sliceOp.source().getDefiningOp(); if (!padOp) return failure(); // Only unit stride supported. if (!sliceOp.hasUnitStride()) return failure(); Operation *tiledPadOp = padOp.getTiledImplementation( rewriter, /*dest=*/ValueRange{}, sliceOp.getMixedOffsets(), sliceOp.getMixedSizes()); // All shapes are static and the data source is actually used. Rewrite into // pad_tensor(subtensor(x)). rewriter.replaceOp(sliceOp, tiledPadOp->getResults()); return success(); } namespace { // The following are patterns for downscaling convolution ops with size-1 // window dimensions. // // Note that we'd eventually want to write such transformations in a generic // way, e.g., converting to linalg.generic, removing the size-1 dimensions, // and then turning back to named ops. But for now it's fine to have a few // patterns matching special ops to get started. /// Rewrites 2-D convolution ops with size-1 window dimensions into 1-D /// convolution ops. struct DownscaleSizeOneWindowed2DConvolution final : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(linalg::Conv2DNhwcHwcfOp convOp, PatternRewriter &rewriter) const override { auto linalgOp = cast(*convOp); if (linalgOp.hasBufferSemantics()) return failure(); // To be implemented Value input = convOp.inputs().front(); Value filter = convOp.inputs().back(); Value output = convOp.outputs().front(); auto inputType = input.getType().dyn_cast(); auto filterType = filter.getType().dyn_cast(); auto outputType = output.getType().dyn_cast(); auto filterShape = filterType.getShape(); auto outputShape = outputType.getShape(); // Only handle the case where at least one of the window dimensions is // of size 1. Other cases can rely on tiling to reduce to such cases. int64_t fhSize = filterShape[0], fwSize = filterShape[1]; int64_t ohSize = outputShape[1], owSize = outputShape[2]; bool removeH = (fhSize == 1 && ohSize == 1); bool removeW = (fwSize == 1 && owSize == 1); if (!removeH && !removeW) return failure(); // Get new shapes and types for all operands by removing the size-1 // dimension. using RTTBuilder = RankedTensorType::Builder; auto newInputType = RTTBuilder(inputType).dropDim((removeH ? 1 : 2)); auto newFilterType = RTTBuilder(filterType).dropDim((removeH ? 0 : 1)); auto newOutputType = RTTBuilder(outputType).dropDim(removeH ? 1 : 2); // Rank-reduce operands. Location loc = convOp.getLoc(); Value newInput = tensor::createCanonicalRankReducingExtractSliceOp( rewriter, loc, input, newInputType); Value newFilter = tensor::createCanonicalRankReducingExtractSliceOp( rewriter, loc, filter, newFilterType); Value newOutput = tensor::createCanonicalRankReducingExtractSliceOp( rewriter, loc, output, newOutputType); // Rank-reduce strides and dilations too. // TODO: dropDim 1-liner helper. auto strides = llvm::to_vector<4>(convOp.strides().getValues()); strides.erase(strides.begin() + (removeH ? 0 : 1)); auto stridesAttr = rewriter.getI64VectorAttr(strides); auto dilations = llvm::to_vector<4>(convOp.dilations().getValues()); dilations.erase(dilations.begin() + (removeH ? 0 : 1)); auto dilationsAttr = rewriter.getI64VectorAttr(dilations); auto conv1DOp = rewriter.create( loc, newOutputType, ValueRange{newInput, newFilter}, ValueRange{newOutput}, stridesAttr, dilationsAttr); // Insert back. Value inserted = tensor::createCanonicalRankReducingInsertSliceOp( rewriter, loc, conv1DOp.getResult(0), output); rewriter.replaceOp(convOp, inserted); return success(); }; }; /// Rewrites 2-D depthwise convolution ops with size-1 (w, kw) or (h, kh) /// dimensions into 1-D depthwise convolution ops. struct DownscaleDepthwiseConv2DNhwcHwcOp final : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(DepthwiseConv2DNhwcHwcOp convOp, PatternRewriter &rewriter) const override { auto linalgOp = cast(*convOp); if (linalgOp.hasBufferSemantics()) return failure(); // To be implemented Value input = convOp.inputs().front(); Value kernel = convOp.inputs().back(); Value output = convOp.outputs().front(); auto inputType = input.getType().dyn_cast(); auto kernelType = kernel.getType().dyn_cast(); auto outputType = output.getType().dyn_cast(); auto kernelShape = kernelType.getShape(); auto outputShape = outputType.getShape(); // Only handle the case where at least one of the window dimensions is // of size 1. Other cases can rely on tiling to reduce to such cases. int64_t khSize = kernelShape[0], kwSize = kernelShape[1]; int64_t ohSize = outputShape[1], owSize = outputShape[2]; bool removeH = (khSize == 1 && ohSize == 1); bool removeW = (kwSize == 1 && owSize == 1); if (!removeH && !removeW) return failure(); // Get new shapes and types for all operands by removing the size-1 // dimension. using RTTBuilder = RankedTensorType::Builder; auto newInputType = RTTBuilder(inputType).dropDim((removeH ? 1 : 2)); auto newKernelType = RTTBuilder(kernelType).dropDim((removeH ? 0 : 1)); auto newOutputType = RTTBuilder(outputType).dropDim(removeH ? 1 : 2); // Rank-reduce operands. Location loc = convOp.getLoc(); Value newInput = tensor::createCanonicalRankReducingExtractSliceOp( rewriter, loc, input, newInputType); Value newKernel = tensor::createCanonicalRankReducingExtractSliceOp( rewriter, loc, kernel, newKernelType); Value newOutput = tensor::createCanonicalRankReducingExtractSliceOp( rewriter, loc, output, newOutputType); // Rank-reduce strides and dilations too. // TODO: dropDim 1-liner helper. auto strides = llvm::to_vector<4>(convOp.strides().getValues()); strides.erase(strides.begin() + (removeH ? 0 : 1)); auto stridesAttr = rewriter.getI64VectorAttr(strides); auto dilations = llvm::to_vector<4>(convOp.dilations().getValues()); dilations.erase(dilations.begin() + (removeH ? 0 : 1)); auto dilationsAttr = rewriter.getI64VectorAttr(dilations); auto conv1DOp = rewriter.create( loc, newOutputType, ValueRange{newInput, newKernel}, ValueRange{newOutput}, stridesAttr, dilationsAttr); // Insert back. Value inserted = tensor::createCanonicalRankReducingInsertSliceOp( rewriter, loc, conv1DOp.getResult(0), output); rewriter.replaceOp(convOp, inserted); return success(); }; }; } // namespace void linalg::populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, PatternBenefit benefit) { patterns.add(patterns.getContext(), benefit); }