1 //===- AffineLoopNormalize.cpp - AffineLoopNormalize Pass -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a normalizer for affine loop-like ops. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PassDetail.h" 14 #include "mlir/Dialect/Affine/IR/AffineOps.h" 15 #include "mlir/Dialect/Affine/IR/AffineValueMap.h" 16 #include "mlir/Dialect/Affine/Passes.h" 17 #include "mlir/Dialect/Affine/Utils.h" 18 #include "mlir/IR/PatternMatch.h" 19 #include "mlir/Transforms/LoopUtils.h" 20 21 using namespace mlir; 22 23 void mlir::normalizeAffineParallel(AffineParallelOp op) { 24 AffineMap lbMap = op.lowerBoundsMap(); 25 SmallVector<int64_t, 8> steps = op.getSteps(); 26 // No need to do any work if the parallel op is already normalized. 27 bool isAlreadyNormalized = 28 llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) { 29 int64_t step = std::get<0>(tuple); 30 auto lbExpr = 31 std::get<1>(tuple).template dyn_cast<AffineConstantExpr>(); 32 return lbExpr && lbExpr.getValue() == 0 && step == 1; 33 }); 34 if (isAlreadyNormalized) 35 return; 36 37 AffineValueMap ranges = op.getRangesValueMap(); 38 auto builder = OpBuilder::atBlockBegin(op.getBody()); 39 auto zeroExpr = builder.getAffineConstantExpr(0); 40 SmallVector<AffineExpr, 8> lbExprs; 41 SmallVector<AffineExpr, 8> ubExprs; 42 for (unsigned i = 0, e = steps.size(); i < e; ++i) { 43 int64_t step = steps[i]; 44 45 // Adjust the lower bound to be 0. 46 lbExprs.push_back(zeroExpr); 47 48 // Adjust the upper bound expression: 'range / step'. 49 AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step); 50 ubExprs.push_back(ubExpr); 51 52 // Adjust the corresponding IV: 'lb + i * step'. 53 BlockArgument iv = op.getBody()->getArgument(i); 54 AffineExpr lbExpr = lbMap.getResult(i); 55 unsigned nDims = lbMap.getNumDims(); 56 auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step; 57 auto map = AffineMap::get(/*dimCount=*/nDims + 1, 58 /*symbolCount=*/lbMap.getNumSymbols(), expr); 59 60 // Use an 'affine.apply' op that will be simplified later in subsequent 61 // canonicalizations. 62 OperandRange lbOperands = op.getLowerBoundsOperands(); 63 OperandRange dimOperands = lbOperands.take_front(nDims); 64 OperandRange symbolOperands = lbOperands.drop_front(nDims); 65 SmallVector<Value, 8> applyOperands{dimOperands}; 66 applyOperands.push_back(iv); 67 applyOperands.append(symbolOperands.begin(), symbolOperands.end()); 68 auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands); 69 iv.replaceAllUsesExcept(apply, SmallPtrSet<Operation *, 1>{apply}); 70 } 71 72 SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1); 73 op.setSteps(newSteps); 74 auto newLowerMap = AffineMap::get( 75 /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext()); 76 op.setLowerBounds({}, newLowerMap); 77 auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(), 78 ubExprs, op.getContext()); 79 op.setUpperBounds(ranges.getOperands(), newUpperMap); 80 } 81 82 /// Normalizes affine.for ops. If the affine.for op has only a single iteration 83 /// only then it is simply promoted, else it is normalized in the traditional 84 /// way, by converting the lower bound to zero and loop step to one. The upper 85 /// bound is set to the trip count of the loop. For now, original loops must 86 /// have lower bound with a single result only. There is no such restriction on 87 /// upper bounds. 88 static void normalizeAffineFor(AffineForOp op) { 89 if (succeeded(promoteIfSingleIteration(op))) 90 return; 91 92 // Check if the forop is already normalized. 93 if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) && 94 (op.getStep() == 1)) 95 return; 96 97 // Check if the lower bound has a single result only. Loops with a max lower 98 // bound can't be normalized without additional support like 99 // affine.execute_region's. If the lower bound does not have a single result 100 // then skip this op. 101 if (op.getLowerBoundMap().getNumResults() != 1) 102 return; 103 104 Location loc = op.getLoc(); 105 OpBuilder opBuilder(op); 106 int64_t origLoopStep = op.getStep(); 107 108 // Calculate upperBound for normalized loop. 109 SmallVector<Value, 4> ubOperands; 110 AffineBound lb = op.getLowerBound(); 111 AffineBound ub = op.getUpperBound(); 112 ubOperands.reserve(ub.getNumOperands() + lb.getNumOperands()); 113 AffineMap origLbMap = lb.getMap(); 114 AffineMap origUbMap = ub.getMap(); 115 116 // Add dimension operands from upper/lower bound. 117 for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j) 118 ubOperands.push_back(ub.getOperand(j)); 119 for (unsigned j = 0, e = origLbMap.getNumDims(); j < e; ++j) 120 ubOperands.push_back(lb.getOperand(j)); 121 122 // Add symbol operands from upper/lower bound. 123 for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j) 124 ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j)); 125 for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j) 126 ubOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j)); 127 128 // Add original result expressions from lower/upper bound map. 129 SmallVector<AffineExpr, 1> origLbExprs(origLbMap.getResults().begin(), 130 origLbMap.getResults().end()); 131 SmallVector<AffineExpr, 2> origUbExprs(origUbMap.getResults().begin(), 132 origUbMap.getResults().end()); 133 SmallVector<AffineExpr, 4> newUbExprs; 134 135 // The original upperBound can have more than one result. For the new 136 // upperBound of this loop, take difference of all possible combinations of 137 // the ub results and lb result and ceildiv with the loop step. For e.g., 138 // 139 // affine.for %i1 = 0 to min affine_map<(d0)[] -> (d0 + 32, 1024)>(%i0) 140 // will have an upperBound map as, 141 // affine_map<(d0)[] -> (((d0 + 32) - 0) ceildiv 1, (1024 - 0) ceildiv 142 // 1)>(%i0) 143 // 144 // Insert all combinations of upper/lower bound results. 145 for (unsigned i = 0, e = origUbExprs.size(); i < e; ++i) { 146 newUbExprs.push_back( 147 (origUbExprs[i] - origLbExprs[0]).ceilDiv(origLoopStep)); 148 } 149 150 // Construct newUbMap. 151 AffineMap newUbMap = 152 AffineMap::get(origLbMap.getNumDims() + origUbMap.getNumDims(), 153 origLbMap.getNumSymbols() + origUbMap.getNumSymbols(), 154 newUbExprs, opBuilder.getContext()); 155 156 // Normalize the loop. 157 op.setUpperBound(ubOperands, newUbMap); 158 op.setLowerBound({}, opBuilder.getConstantAffineMap(0)); 159 op.setStep(1); 160 161 // Calculate the Value of new loopIV. Create affine.apply for the value of 162 // the loopIV in normalized loop. 163 opBuilder.setInsertionPointToStart(op.getBody()); 164 SmallVector<Value, 4> lbOperands(lb.getOperands().begin(), 165 lb.getOperands().begin() + 166 lb.getMap().getNumDims()); 167 // Add an extra dim operand for loopIV. 168 lbOperands.push_back(op.getInductionVar()); 169 // Add symbol operands from lower bound. 170 for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j) 171 lbOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j)); 172 173 AffineExpr origIVExpr = opBuilder.getAffineDimExpr(lb.getMap().getNumDims()); 174 AffineExpr newIVExpr = origIVExpr * origLoopStep + origLbMap.getResult(0); 175 AffineMap ivMap = AffineMap::get(origLbMap.getNumDims() + 1, 176 origLbMap.getNumSymbols(), newIVExpr); 177 Operation *newIV = opBuilder.create<AffineApplyOp>(loc, ivMap, lbOperands); 178 op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), 179 SmallPtrSet<Operation *, 1>{newIV}); 180 } 181 182 namespace { 183 184 /// Normalize affine.parallel ops so that lower bounds are 0 and steps are 1. 185 /// As currently implemented, this pass cannot fail, but it might skip over ops 186 /// that are already in a normalized form. 187 struct AffineLoopNormalizePass 188 : public AffineLoopNormalizeBase<AffineLoopNormalizePass> { 189 190 void runOnFunction() override { 191 getFunction().walk([](Operation *op) { 192 if (auto affineParallel = dyn_cast<AffineParallelOp>(op)) 193 normalizeAffineParallel(affineParallel); 194 else if (auto affineFor = dyn_cast<AffineForOp>(op)) 195 normalizeAffineFor(affineFor); 196 }); 197 } 198 }; 199 200 } // namespace 201 202 std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineLoopNormalizePass() { 203 return std::make_unique<AffineLoopNormalizePass>(); 204 } 205