1 //===- LinalgTransforms.cpp - Linalg transformations as patterns ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements logic and helpers to expose Linalg transforms as rewrite 10 // patterns. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "mlir/Dialect/Linalg/Transforms/Transforms.h" 15 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" 16 #include "mlir/Dialect/Linalg/IR/LinalgOps.h" 17 #include "mlir/Dialect/Linalg/Utils/Utils.h" 18 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" 19 #include "mlir/Dialect/Utils/StructuredOpsUtils.h" 20 #include "mlir/Dialect/Vector/EDSC/Intrinsics.h" 21 #include "mlir/Dialect/Vector/VectorOps.h" 22 #include "mlir/IR/AffineExpr.h" 23 #include "mlir/IR/Matchers.h" 24 #include "mlir/Pass/Pass.h" 25 #include "mlir/Support/LLVM.h" 26 #include "mlir/Transforms/GreedyPatternRewriteDriver.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <type_traits> 30 31 #define DEBUG_TYPE "linalg-transforms" 32 33 using namespace mlir; 34 using namespace mlir::edsc; 35 using namespace mlir::edsc::intrinsics; 36 using namespace mlir::linalg; 37 38 #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ") 39 40 //===----------------------------------------------------------------------===// 41 // Transformations exposed as rewrite patterns. 42 //===----------------------------------------------------------------------===// 43 // Marker used as attribute name in generated Linalg rewriting transformations. 44 const StringLiteral mlir::linalg::LinalgTransforms::kLinalgTransformMarker = 45 "__internal_linalg_transform__"; 46 47 mlir::linalg::LinalgMarker::LinalgMarker(ArrayRef<Identifier> matchDisjunction, 48 Optional<Identifier> replacement) 49 : matchDisjunction(matchDisjunction.begin(), matchDisjunction.end()), 50 replacement(replacement) {} 51 52 LogicalResult 53 mlir::linalg::LinalgMarker::checkAndNotify(PatternRewriter &rewriter, 54 Operation *op) const { 55 auto attr = op->template getAttrOfType<StringAttr>( 56 LinalgTransforms::kLinalgTransformMarker); 57 58 if (!attr) { 59 // 1. Has no marker case and matchDisjunction is empty. 60 if (matchDisjunction.empty()) 61 return success(); 62 63 // 2. Has no marker but was expecting a marker. 64 return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { 65 diag << " does not have any marker from list: "; 66 interleaveComma(matchDisjunction, diag); 67 }); 68 } 69 70 // 4. Match explicit marker. 71 for (auto marker : matchDisjunction) 72 if (attr.getValue() == marker) 73 return success(); 74 75 // 5. Fail to match. 76 return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { 77 diag << " does not have any marker from list: "; 78 interleaveComma(matchDisjunction, diag); 79 }); 80 } 81 82 void mlir::linalg::LinalgMarker::replaceLinalgMarker(PatternRewriter &rewriter, 83 Operation *op) const { 84 if (replacement.hasValue()) 85 op->setAttr(LinalgTransforms::kLinalgTransformMarker, 86 rewriter.getStringAttr(replacement.getValue())); 87 else 88 op->removeAttr(Identifier::get(LinalgTransforms::kLinalgTransformMarker, 89 rewriter.getContext())); 90 } 91 92 LinalgTilingOptions & 93 mlir::linalg::LinalgTilingOptions::setTileSizes(ArrayRef<int64_t> ts) { 94 SmallVector<int64_t, 4> tileSizes(ts.begin(), ts.end()); 95 tileSizeComputationFunction = [tileSizes](OpBuilder &b, Operation *op) { 96 OpBuilder::InsertionGuard guard(b); 97 b.setInsertionPointToStart( 98 &op->getParentOfType<FuncOp>().getBody().front()); 99 return llvm::to_vector<4>(map_range(tileSizes, [&](int64_t s) { 100 Value v = b.create<ConstantIndexOp>(op->getLoc(), s); 101 return v; 102 })); 103 }; 104 return *this; 105 } 106 107 /// Linalg base tiling pattern. 108 mlir::linalg::LinalgBaseTilingPattern::LinalgBaseTilingPattern( 109 StringRef opName, MLIRContext *context, LinalgTilingOptions options, 110 LinalgMarker marker, PatternBenefit benefit) 111 : RewritePattern(opName, {}, benefit, context), marker(marker), 112 options(options) {} 113 114 mlir::linalg::LinalgBaseTilingPattern::LinalgBaseTilingPattern( 115 LinalgTilingOptions options, LinalgMarker marker, PatternBenefit benefit) 116 : RewritePattern(benefit, MatchAnyOpTypeTag()), marker(marker), 117 options(options) {} 118 119 LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewriteBase( 120 Operation *op, PatternRewriter &rewriter, TiledLinalgOp &result) const { 121 LinalgOp linalgOp = dyn_cast<LinalgOp>(op); 122 if (!linalgOp) 123 return failure(); 124 if (failed(marker.checkAndNotify(rewriter, linalgOp))) 125 return failure(); 126 127 Optional<TiledLinalgOp> res = tileLinalgOp(rewriter, linalgOp, options); 128 129 if (!res) 130 return failure(); 131 132 // Return relevant information to derived pattern. 133 result = *res; 134 135 // New marker if specified. 136 marker.replaceLinalgMarker(rewriter, res->op.getOperation()); 137 return success(); 138 } 139 140 mlir::linalg::LinalgBaseTileAndFusePattern::LinalgBaseTileAndFusePattern( 141 StringRef opName, MLIRContext *context, 142 const LinalgDependenceGraph &dependenceGraph, 143 LinalgTilingOptions tilingOptions, LinalgFusionOptions fusionOptions, 144 LinalgMarker marker, LinalgMarker fusedOpMarker, 145 LinalgMarker originalOpMarker, PatternBenefit benefit) 146 : RewritePattern(opName, {}, benefit, context), 147 dependenceGraph(dependenceGraph), tilingOptions(tilingOptions), 148 fusionOptions(fusionOptions), marker(marker), 149 fusedOpMarker(fusedOpMarker), originalOpMarker(originalOpMarker) {} 150 151 LogicalResult mlir::linalg::LinalgBaseTileAndFusePattern::matchAndRewrite( 152 Operation *op, PatternRewriter &rewriter) const { 153 LinalgOp linalgOp = dyn_cast<LinalgOp>(op); 154 if (!linalgOp) 155 return failure(); 156 if (failed(marker.checkAndNotify(rewriter, linalgOp))) 157 return failure(); 158 if (!linalgOp.hasBufferSemantics()) 159 return failure(); 160 161 DenseSet<Operation *> producers; 162 producers.insert(linalgOp); 163 for (auto dependence : dependenceGraph.getDependentOperations(linalgOp)) { 164 if (!fusionOptions.indicesToFuse.count( 165 dependence.indexingOpView->getOperandNumber())) 166 continue; 167 if (isa<LinalgOp>(dependence.dependentOpView->getOwner())) 168 producers.insert(dependence.dependentOpView->getOwner()); 169 } 170 171 SmallVector<LinalgOp, 1> fusionOps; 172 for (auto it = op->getBlock()->begin(), ie = Block::iterator(op); it != ie; 173 ++it) { 174 auto producerLinalgOp = dyn_cast<LinalgOp>(&(*it)); 175 if (producerLinalgOp && producers.count(producerLinalgOp)) 176 fusionOps.push_back(producerLinalgOp); 177 } 178 fusionOps.push_back(linalgOp); 179 180 SmallVector<Value, 4> tileSizes = 181 tilingOptions.tileSizeComputationFunction(rewriter, op); 182 LinalgTilingOptions instanceTilingOptions = tilingOptions; 183 instanceTilingOptions.setTileSizes(tileSizes); 184 Optional<TiledAndFusedLinalgOps> tiledAndFusedOps = tileAndFuseLinalgOps( 185 rewriter, fusionOps, dependenceGraph, instanceTilingOptions); 186 if (!tiledAndFusedOps) 187 return failure(); 188 189 // Tile the unfused loops; 190 SmallVector<Value, 4> unfusedLoopTileSizes; 191 Value zero = rewriter.create<ConstantIndexOp>(op->getLoc(), 0); 192 for (auto tileSize : enumerate(tileSizes)) { 193 if (tiledAndFusedOps->fusedLoopDims.count(tileSize.index())) 194 unfusedLoopTileSizes.push_back(zero); 195 else 196 unfusedLoopTileSizes.push_back(tileSize.value()); 197 } 198 // Tile the loop only if there is a non-zero tile size. 199 if (unfusedLoopTileSizes.size() > linalgOp.getNumLoops()) 200 unfusedLoopTileSizes.resize(linalgOp.getNumLoops()); 201 if (llvm::any_of(unfusedLoopTileSizes, [](Value val) { 202 if (auto cst = val.getDefiningOp<ConstantIndexOp>()) 203 return cst.getValue() != 0; 204 return true; 205 })) { 206 LinalgTilingOptions unfusedTilingOptions = tilingOptions; 207 unfusedTilingOptions.setTileSizes(unfusedLoopTileSizes); 208 Optional<TiledLinalgOp> unfusedTiledOp = 209 tileLinalgOp(rewriter, tiledAndFusedOps->op, unfusedTilingOptions); 210 if (!unfusedTiledOp) 211 return failure(); 212 rewriter.eraseOp(tiledAndFusedOps->op); 213 tiledAndFusedOps->op = unfusedTiledOp->op; 214 } 215 216 marker.replaceLinalgMarker(rewriter, tiledAndFusedOps->op.getOperation()); 217 for (auto fusedOp : tiledAndFusedOps->fusedProducers) { 218 fusedOpMarker.replaceLinalgMarker(rewriter, fusedOp.getOperation()); 219 } 220 for (auto origProducerOp : ArrayRef<LinalgOp>(fusionOps).drop_back()) { 221 originalOpMarker.replaceLinalgMarker(rewriter, 222 origProducerOp.getOperation()); 223 } 224 rewriter.updateRootInPlace( 225 op, [&]() { originalOpMarker.replaceLinalgMarker(rewriter, op); }); 226 return success(); 227 } 228 229 /// Linalg base interchange pattern. 230 mlir::linalg::LinalgBaseInterchangePattern::LinalgBaseInterchangePattern( 231 StringRef opName, MLIRContext *context, 232 ArrayRef<unsigned> interchangeVector, LinalgMarker marker, 233 PatternBenefit benefit) 234 : RewritePattern(opName, {}, benefit, context), marker(marker), 235 interchangeVector(interchangeVector.begin(), interchangeVector.end()) {} 236 237 LogicalResult mlir::linalg::LinalgBaseInterchangePattern::matchAndRewrite( 238 Operation *op, PatternRewriter &rewriter) const { 239 LinalgOp linalgOp = dyn_cast<LinalgOp>(op); 240 if (!linalgOp) 241 return failure(); 242 if (failed(marker.checkAndNotify(rewriter, linalgOp))) 243 return failure(); 244 if (failed(interchangeGenericLinalgOpPrecondition(op, interchangeVector))) 245 return failure(); 246 247 // TODO: figure out how this interplays with named ops. In particular this 248 // should break the named op property. 249 rewriter.updateRootInPlace(op, [&]() { 250 interchange(linalgOp, interchangeVector); 251 // New marker if specified. 252 marker.replaceLinalgMarker(rewriter, op); 253 }); 254 return success(); 255 } 256 257 mlir::linalg::LinalgBasePromotionPattern::LinalgBasePromotionPattern( 258 StringRef opName, MLIRContext *context, LinalgPromotionOptions options, 259 LinalgMarker marker, PatternBenefit benefit) 260 : RewritePattern(opName, {}, benefit, context), marker(marker), 261 options(options) {} 262 263 LogicalResult mlir::linalg::LinalgBasePromotionPattern::matchAndRewrite( 264 Operation *op, PatternRewriter &rewriter) const { 265 if (failed(marker.checkAndNotify(rewriter, op))) 266 return failure(); 267 if (failed(promoteSubviewsPrecondition(op, options))) 268 return failure(); 269 270 // TODO: We cannot use root update here. This pattern is creating other ops, 271 // so if the promotion fails, those need to be cleaned up, which doesnt seem 272 // to be happening here. So to fail properly, we should be cloning the op and 273 // deleting the previous op. This needs more investigation. 274 rewriter.startRootUpdate(op); 275 Optional<LinalgOp> promotedOp = promoteSubViews(rewriter, op, options); 276 if (!promotedOp) { 277 rewriter.cancelRootUpdate(op); 278 return op->emitError("subview promotion failed"); 279 } 280 rewriter.finalizeRootUpdate(op); 281 marker.replaceLinalgMarker(rewriter, op); 282 return success(); 283 } 284 285 mlir::linalg::LinalgBaseVectorizationPattern::LinalgBaseVectorizationPattern( 286 StringRef opName, MLIRContext *context, LinalgMarker marker, 287 PatternBenefit benefit) 288 : RewritePattern(opName, {}, benefit, context), marker(marker) {} 289 290 LogicalResult mlir::linalg::LinalgBaseVectorizationPattern::matchAndRewrite( 291 Operation *op, PatternRewriter &rewriter) const { 292 LinalgOp linalgOp = dyn_cast<LinalgOp>(op); 293 if (!linalgOp) 294 return failure(); 295 if (failed(marker.checkAndNotify(rewriter, linalgOp))) 296 return failure(); 297 if (failed(vectorizeLinalgOpPrecondition(op))) 298 return failure(); 299 vectorizeLinalgOp(rewriter, op); 300 rewriter.eraseOp(op); 301 return success(); 302 } 303 304 LogicalResult mlir::linalg::applyStagedPatterns( 305 Operation *op, ArrayRef<FrozenRewritePatternList> stage1Patterns, 306 const FrozenRewritePatternList &stage2Patterns, 307 function_ref<LogicalResult(Operation *)> stage3Lambda) { 308 unsigned iteration = 0; 309 (void)iteration; 310 for (const auto &patterns : stage1Patterns) { 311 LLVM_DEBUG(DBGS() << "Before 1st stage, iter: " << ++iteration << "\n" 312 << *op); 313 if (failed(applyPatternsAndFoldGreedily(op, patterns))) { 314 LLVM_DEBUG(DBGS() << "Underlying first stage rewrite did not converge"); 315 return failure(); 316 } 317 LLVM_DEBUG(DBGS() << "After 1st stage, iter: " << ++iteration << "\n" 318 << *op); 319 if (failed(applyPatternsAndFoldGreedily(op, stage2Patterns))) { 320 LLVM_DEBUG(DBGS() << "Underlying 2nd stage rewrite did not converge"); 321 return failure(); 322 } 323 LLVM_DEBUG(DBGS() << "After 2nd stage, iter : " << iteration << "\n" 324 << *op); 325 if (stage3Lambda) { 326 if (failed(stage3Lambda(op))) 327 return failure(); 328 LLVM_DEBUG(DBGS() << "After 3rd stage, iter : " << iteration << "\n" 329 << *op); 330 } 331 } 332 return success(); 333 } 334 335 /// Traverse `e` and return an AffineExpr where all occurrences of `dim` have 336 /// been replaced by either: 337 /// - `min` if `positivePath` is true when we reach an occurrence of `dim` 338 /// - `max` if `positivePath` is true when we reach an occurrence of `dim` 339 /// `positivePath` is negated each time we hit a multiplicative or divisive 340 /// binary op with a constant negative coefficient. 341 static AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, 342 AffineExpr max, bool positivePath = true) { 343 if (e == dim) 344 return positivePath ? min : max; 345 if (auto bin = e.dyn_cast<AffineBinaryOpExpr>()) { 346 AffineExpr lhs = bin.getLHS(); 347 AffineExpr rhs = bin.getRHS(); 348 if (bin.getKind() == mlir::AffineExprKind::Add) 349 return substWithMin(lhs, dim, min, max, positivePath) + 350 substWithMin(rhs, dim, min, max, positivePath); 351 352 auto c1 = bin.getLHS().dyn_cast<AffineConstantExpr>(); 353 auto c2 = bin.getRHS().dyn_cast<AffineConstantExpr>(); 354 if (c1 && c1.getValue() < 0) 355 return getAffineBinaryOpExpr( 356 bin.getKind(), c1, substWithMin(rhs, dim, min, max, !positivePath)); 357 if (c2 && c2.getValue() < 0) 358 return getAffineBinaryOpExpr( 359 bin.getKind(), substWithMin(lhs, dim, min, max, !positivePath), c2); 360 return getAffineBinaryOpExpr( 361 bin.getKind(), substWithMin(lhs, dim, min, max, positivePath), 362 substWithMin(rhs, dim, min, max, positivePath)); 363 } 364 return e; 365 } 366 367 /// Given the `lbVal`, `ubVal` and `stepVal` of a loop, append `lbVal` and 368 /// `ubVal` to `dims` and `stepVal` to `symbols`. 369 /// Create new AffineDimExpr (`%lb` and `%ub`) and AffineSymbolExpr (`%step`) 370 /// with positions matching the newly appended values. Substitute occurrences of 371 /// `dimExpr` by either the min expression (i.e. `%lb`) or the max expression 372 /// (i.e. `%lb + %step * floordiv(%ub -1 - %lb, %step)`), depending on whether 373 /// the induction variable is used with a positive or negative coefficient. 374 static AffineExpr substituteLoopInExpr(AffineExpr expr, AffineExpr dimExpr, 375 Value lbVal, Value ubVal, Value stepVal, 376 SmallVectorImpl<Value> &dims, 377 SmallVectorImpl<Value> &symbols) { 378 MLIRContext *ctx = lbVal.getContext(); 379 AffineExpr lb = getAffineDimExpr(dims.size(), ctx); 380 dims.push_back(lbVal); 381 AffineExpr ub = getAffineDimExpr(dims.size(), ctx); 382 dims.push_back(ubVal); 383 AffineExpr step = getAffineSymbolExpr(symbols.size(), ctx); 384 symbols.push_back(stepVal); 385 LLVM_DEBUG(DBGS() << "Before: " << expr << "\n"); 386 AffineExpr ee = substWithMin(expr, dimExpr, lb, 387 lb + step * ((ub - 1) - lb).floorDiv(step)); 388 LLVM_DEBUG(DBGS() << "After: " << expr << "\n"); 389 return ee; 390 } 391 392 /// Traverse the `dims` and substitute known min or max expressions in place of 393 /// induction variables in `exprs`. 394 static AffineMap substitute(AffineMap map, SmallVectorImpl<Value> &dims, 395 SmallVectorImpl<Value> &symbols) { 396 auto exprs = llvm::to_vector<4>(map.getResults()); 397 for (AffineExpr &expr : exprs) { 398 bool substituted = true; 399 while (substituted) { 400 substituted = false; 401 for (unsigned dimIdx = 0; dimIdx < dims.size(); ++dimIdx) { 402 Value dim = dims[dimIdx]; 403 AffineExpr dimExpr = getAffineDimExpr(dimIdx, expr.getContext()); 404 LLVM_DEBUG(DBGS() << "Subst: " << dim << " @ " << dimExpr << "\n"); 405 AffineExpr substitutedExpr; 406 if (auto forOp = scf::getForInductionVarOwner(dim)) 407 substitutedExpr = substituteLoopInExpr( 408 expr, dimExpr, forOp.lowerBound(), forOp.upperBound(), 409 forOp.step(), dims, symbols); 410 411 if (auto parallelForOp = scf::getParallelForInductionVarOwner(dim)) 412 for (unsigned idx = 0, e = parallelForOp.getNumLoops(); idx < e; 413 ++idx) 414 substitutedExpr = substituteLoopInExpr( 415 expr, dimExpr, parallelForOp.lowerBound()[idx], 416 parallelForOp.upperBound()[idx], parallelForOp.step()[idx], 417 dims, symbols); 418 419 if (!substitutedExpr) 420 continue; 421 422 substituted = (substitutedExpr != expr); 423 expr = substitutedExpr; 424 } 425 } 426 427 // Cleanup and simplify the results. 428 // This needs to happen outside of the loop iterating on dims.size() since 429 // it modifies dims. 430 SmallVector<Value, 4> operands(dims.begin(), dims.end()); 431 operands.append(symbols.begin(), symbols.end()); 432 auto map = AffineMap::get(dims.size(), symbols.size(), exprs, 433 exprs.front().getContext()); 434 435 LLVM_DEBUG(DBGS() << "Map to simplify: " << map << "\n"); 436 437 // Pull in affine.apply operations and compose them fully into the 438 // result. 439 fullyComposeAffineMapAndOperands(&map, &operands); 440 canonicalizeMapAndOperands(&map, &operands); 441 map = simplifyAffineMap(map); 442 // Assign the results. 443 exprs.assign(map.getResults().begin(), map.getResults().end()); 444 dims.assign(operands.begin(), operands.begin() + map.getNumDims()); 445 symbols.assign(operands.begin() + map.getNumDims(), operands.end()); 446 447 LLVM_DEBUG(DBGS() << "Map simplified: " << map << "\n"); 448 } 449 450 assert(!exprs.empty() && "Unexpected empty exprs"); 451 return AffineMap::get(dims.size(), symbols.size(), exprs, map.getContext()); 452 } 453 454 LogicalResult AffineMinSCFCanonicalizationPattern::matchAndRewrite( 455 AffineMinOp minOp, PatternRewriter &rewriter) const { 456 LLVM_DEBUG(DBGS() << "Canonicalize AffineMinSCF: " << *minOp.getOperation() 457 << "\n"); 458 459 SmallVector<Value, 4> dims(minOp.getDimOperands()), 460 symbols(minOp.getSymbolOperands()); 461 AffineMap map = substitute(minOp.getAffineMap(), dims, symbols); 462 463 LLVM_DEBUG(DBGS() << "Resulting map: " << map << "\n"); 464 465 // Check whether any of the expressions, when subtracted from all other 466 // expressions, produces only >= 0 constants. If so, it is the min. 467 for (auto e : minOp.getAffineMap().getResults()) { 468 LLVM_DEBUG(DBGS() << "Candidate min: " << e << "\n"); 469 if (!e.isSymbolicOrConstant()) 470 continue; 471 472 auto isNonPositive = [](AffineExpr e) { 473 if (auto cst = e.dyn_cast<AffineConstantExpr>()) 474 return cst.getValue() < 0; 475 return true; 476 }; 477 478 // Build the subMap and check everything is statically known to be 479 // positive. 480 SmallVector<AffineExpr, 4> subExprs; 481 subExprs.reserve(map.getNumResults()); 482 for (auto ee : map.getResults()) 483 subExprs.push_back(ee - e); 484 MLIRContext *ctx = minOp.getContext(); 485 AffineMap subMap = simplifyAffineMap( 486 AffineMap::get(map.getNumDims(), map.getNumSymbols(), subExprs, ctx)); 487 LLVM_DEBUG(DBGS() << "simplified subMap: " << subMap << "\n"); 488 if (llvm::any_of(subMap.getResults(), isNonPositive)) 489 continue; 490 491 // Static min found. 492 if (auto cst = e.dyn_cast<AffineConstantExpr>()) { 493 rewriter.replaceOpWithNewOp<ConstantIndexOp>(minOp, cst.getValue()); 494 } else { 495 auto resultMap = AffineMap::get(0, map.getNumSymbols(), {e}, ctx); 496 SmallVector<Value, 4> resultOperands = dims; 497 resultOperands.append(symbols.begin(), symbols.end()); 498 canonicalizeMapAndOperands(&resultMap, &resultOperands); 499 resultMap = simplifyAffineMap(resultMap); 500 rewriter.replaceOpWithNewOp<AffineApplyOp>(minOp, resultMap, 501 resultOperands); 502 } 503 return success(); 504 } 505 506 return failure(); 507 } 508