1 //===- LoopUnrollAndJam.cpp - Code to perform loop unroll and jam ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements loop unroll and jam. Unroll and jam is a transformation 10 // that improves locality, in particular, register reuse, while also improving 11 // operation level parallelism. The example below shows what it does in nearly 12 // the general case. Loop unroll and jam currently works if the bounds of the 13 // loops inner to the loop being unroll-jammed do not depend on the latter. 14 // 15 // Before After unroll and jam of i by factor 2: 16 // 17 // for i, step = 2 18 // for i S1(i); 19 // S1; S2(i); 20 // S2; S1(i+1); 21 // for j S2(i+1); 22 // S3; for j 23 // S4; S3(i, j); 24 // S5; S4(i, j); 25 // S6; S3(i+1, j) 26 // S4(i+1, j) 27 // S5(i); 28 // S6(i); 29 // S5(i+1); 30 // S6(i+1); 31 // 32 // Note: 'if/else' blocks are not jammed. So, if there are loops inside if 33 // op's, bodies of those loops will not be jammed. 34 //===----------------------------------------------------------------------===// 35 #include "mlir/Analysis/LoopAnalysis.h" 36 #include "mlir/Dialect/Affine/IR/AffineOps.h" 37 #include "mlir/Dialect/Affine/Passes.h" 38 #include "mlir/IR/AffineExpr.h" 39 #include "mlir/IR/AffineMap.h" 40 #include "mlir/IR/BlockAndValueMapping.h" 41 #include "mlir/IR/Builders.h" 42 #include "mlir/Pass/Pass.h" 43 #include "mlir/Transforms/LoopUtils.h" 44 #include "llvm/ADT/DenseMap.h" 45 #include "llvm/Support/CommandLine.h" 46 47 using namespace mlir; 48 49 #define DEBUG_TYPE "affine-loop-unroll-jam" 50 51 static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options"); 52 53 // Loop unroll and jam factor. 54 static llvm::cl::opt<unsigned> 55 clUnrollJamFactor("unroll-jam-factor", llvm::cl::Hidden, 56 llvm::cl::desc("Use this unroll jam factor for all loops" 57 " (default 4)"), 58 llvm::cl::cat(clOptionsCategory)); 59 60 namespace { 61 /// Loop unroll jam pass. Currently, this just unroll jams the first 62 /// outer loop in a Function. 63 struct LoopUnrollAndJam : public FunctionPass<LoopUnrollAndJam> { 64 /// Include the generated pass utilities. 65 #define GEN_PASS_AffineLoopUnrollAndJam 66 #include "mlir/Dialect/Affine/Passes.h.inc" 67 68 Optional<unsigned> unrollJamFactor; 69 static const unsigned kDefaultUnrollJamFactor = 4; 70 71 explicit LoopUnrollAndJam(Optional<unsigned> unrollJamFactor = None) 72 : unrollJamFactor(unrollJamFactor) {} 73 74 void runOnFunction() override; 75 LogicalResult runOnAffineForOp(AffineForOp forOp); 76 }; 77 } // end anonymous namespace 78 79 std::unique_ptr<OpPassBase<FuncOp>> 80 mlir::createLoopUnrollAndJamPass(int unrollJamFactor) { 81 return std::make_unique<LoopUnrollAndJam>( 82 unrollJamFactor == -1 ? None : Optional<unsigned>(unrollJamFactor)); 83 } 84 85 void LoopUnrollAndJam::runOnFunction() { 86 // Currently, just the outermost loop from the first loop nest is 87 // unroll-and-jammed by this pass. However, runOnAffineForOp can be called on 88 // any for operation. 89 auto &entryBlock = getFunction().front(); 90 if (auto forOp = dyn_cast<AffineForOp>(entryBlock.front())) 91 runOnAffineForOp(forOp); 92 } 93 94 /// Unroll and jam a 'affine.for' op. Default unroll jam factor is 95 /// kDefaultUnrollJamFactor. Return failure if nothing was done. 96 LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) { 97 // Unroll and jam by the factor that was passed if any. 98 if (unrollJamFactor.hasValue()) 99 return loopUnrollJamByFactor(forOp, unrollJamFactor.getValue()); 100 // Otherwise, unroll jam by the command-line factor if one was specified. 101 if (clUnrollJamFactor.getNumOccurrences() > 0) 102 return loopUnrollJamByFactor(forOp, clUnrollJamFactor); 103 104 // Unroll and jam by four otherwise. 105 return loopUnrollJamByFactor(forOp, kDefaultUnrollJamFactor); 106 } 107