1 //===- AffineLoopNormalize.cpp - AffineLoopNormalize Pass -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a normalizer for affine loop-like ops. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PassDetail.h" 14 #include "mlir/Dialect/Affine/IR/AffineOps.h" 15 #include "mlir/Dialect/Affine/IR/AffineValueMap.h" 16 #include "mlir/Dialect/Affine/Passes.h" 17 #include "mlir/Dialect/Affine/Utils.h" 18 #include "mlir/IR/PatternMatch.h" 19 #include "mlir/Transforms/LoopUtils.h" 20 21 using namespace mlir; 22 23 void mlir::normalizeAffineParallel(AffineParallelOp op) { 24 // Loops with min/max in bounds are not normalized at the moment. 25 if (op.hasMinMaxBounds()) 26 return; 27 28 AffineMap lbMap = op.lowerBoundsMap(); 29 SmallVector<int64_t, 8> steps = op.getSteps(); 30 // No need to do any work if the parallel op is already normalized. 31 bool isAlreadyNormalized = 32 llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) { 33 int64_t step = std::get<0>(tuple); 34 auto lbExpr = 35 std::get<1>(tuple).template dyn_cast<AffineConstantExpr>(); 36 return lbExpr && lbExpr.getValue() == 0 && step == 1; 37 }); 38 if (isAlreadyNormalized) 39 return; 40 41 AffineValueMap ranges; 42 AffineValueMap::difference(op.getUpperBoundsValueMap(), 43 op.getLowerBoundsValueMap(), &ranges); 44 auto builder = OpBuilder::atBlockBegin(op.getBody()); 45 auto zeroExpr = builder.getAffineConstantExpr(0); 46 SmallVector<AffineExpr, 8> lbExprs; 47 SmallVector<AffineExpr, 8> ubExprs; 48 for (unsigned i = 0, e = steps.size(); i < e; ++i) { 49 int64_t step = steps[i]; 50 51 // Adjust the lower bound to be 0. 52 lbExprs.push_back(zeroExpr); 53 54 // Adjust the upper bound expression: 'range / step'. 55 AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step); 56 ubExprs.push_back(ubExpr); 57 58 // Adjust the corresponding IV: 'lb + i * step'. 59 BlockArgument iv = op.getBody()->getArgument(i); 60 AffineExpr lbExpr = lbMap.getResult(i); 61 unsigned nDims = lbMap.getNumDims(); 62 auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step; 63 auto map = AffineMap::get(/*dimCount=*/nDims + 1, 64 /*symbolCount=*/lbMap.getNumSymbols(), expr); 65 66 // Use an 'affine.apply' op that will be simplified later in subsequent 67 // canonicalizations. 68 OperandRange lbOperands = op.getLowerBoundsOperands(); 69 OperandRange dimOperands = lbOperands.take_front(nDims); 70 OperandRange symbolOperands = lbOperands.drop_front(nDims); 71 SmallVector<Value, 8> applyOperands{dimOperands}; 72 applyOperands.push_back(iv); 73 applyOperands.append(symbolOperands.begin(), symbolOperands.end()); 74 auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands); 75 iv.replaceAllUsesExcept(apply, apply); 76 } 77 78 SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1); 79 op.setSteps(newSteps); 80 auto newLowerMap = AffineMap::get( 81 /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext()); 82 op.setLowerBounds({}, newLowerMap); 83 auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(), 84 ubExprs, op.getContext()); 85 op.setUpperBounds(ranges.getOperands(), newUpperMap); 86 } 87 88 /// Normalizes affine.for ops. If the affine.for op has only a single iteration 89 /// only then it is simply promoted, else it is normalized in the traditional 90 /// way, by converting the lower bound to zero and loop step to one. The upper 91 /// bound is set to the trip count of the loop. For now, original loops must 92 /// have lower bound with a single result only. There is no such restriction on 93 /// upper bounds. 94 static void normalizeAffineFor(AffineForOp op) { 95 if (succeeded(promoteIfSingleIteration(op))) 96 return; 97 98 // Check if the forop is already normalized. 99 if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) && 100 (op.getStep() == 1)) 101 return; 102 103 // Check if the lower bound has a single result only. Loops with a max lower 104 // bound can't be normalized without additional support like 105 // affine.execute_region's. If the lower bound does not have a single result 106 // then skip this op. 107 if (op.getLowerBoundMap().getNumResults() != 1) 108 return; 109 110 Location loc = op.getLoc(); 111 OpBuilder opBuilder(op); 112 int64_t origLoopStep = op.getStep(); 113 114 // Calculate upperBound for normalized loop. 115 SmallVector<Value, 4> ubOperands; 116 AffineBound lb = op.getLowerBound(); 117 AffineBound ub = op.getUpperBound(); 118 ubOperands.reserve(ub.getNumOperands() + lb.getNumOperands()); 119 AffineMap origLbMap = lb.getMap(); 120 AffineMap origUbMap = ub.getMap(); 121 122 // Add dimension operands from upper/lower bound. 123 for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j) 124 ubOperands.push_back(ub.getOperand(j)); 125 for (unsigned j = 0, e = origLbMap.getNumDims(); j < e; ++j) 126 ubOperands.push_back(lb.getOperand(j)); 127 128 // Add symbol operands from upper/lower bound. 129 for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j) 130 ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j)); 131 for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j) 132 ubOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j)); 133 134 // Add original result expressions from lower/upper bound map. 135 SmallVector<AffineExpr, 1> origLbExprs(origLbMap.getResults().begin(), 136 origLbMap.getResults().end()); 137 SmallVector<AffineExpr, 2> origUbExprs(origUbMap.getResults().begin(), 138 origUbMap.getResults().end()); 139 SmallVector<AffineExpr, 4> newUbExprs; 140 141 // The original upperBound can have more than one result. For the new 142 // upperBound of this loop, take difference of all possible combinations of 143 // the ub results and lb result and ceildiv with the loop step. For e.g., 144 // 145 // affine.for %i1 = 0 to min affine_map<(d0)[] -> (d0 + 32, 1024)>(%i0) 146 // will have an upperBound map as, 147 // affine_map<(d0)[] -> (((d0 + 32) - 0) ceildiv 1, (1024 - 0) ceildiv 148 // 1)>(%i0) 149 // 150 // Insert all combinations of upper/lower bound results. 151 for (unsigned i = 0, e = origUbExprs.size(); i < e; ++i) { 152 newUbExprs.push_back( 153 (origUbExprs[i] - origLbExprs[0]).ceilDiv(origLoopStep)); 154 } 155 156 // Construct newUbMap. 157 AffineMap newUbMap = 158 AffineMap::get(origLbMap.getNumDims() + origUbMap.getNumDims(), 159 origLbMap.getNumSymbols() + origUbMap.getNumSymbols(), 160 newUbExprs, opBuilder.getContext()); 161 162 // Normalize the loop. 163 op.setUpperBound(ubOperands, newUbMap); 164 op.setLowerBound({}, opBuilder.getConstantAffineMap(0)); 165 op.setStep(1); 166 167 // Calculate the Value of new loopIV. Create affine.apply for the value of 168 // the loopIV in normalized loop. 169 opBuilder.setInsertionPointToStart(op.getBody()); 170 SmallVector<Value, 4> lbOperands(lb.getOperands().begin(), 171 lb.getOperands().begin() + 172 lb.getMap().getNumDims()); 173 // Add an extra dim operand for loopIV. 174 lbOperands.push_back(op.getInductionVar()); 175 // Add symbol operands from lower bound. 176 for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j) 177 lbOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j)); 178 179 AffineExpr origIVExpr = opBuilder.getAffineDimExpr(lb.getMap().getNumDims()); 180 AffineExpr newIVExpr = origIVExpr * origLoopStep + origLbMap.getResult(0); 181 AffineMap ivMap = AffineMap::get(origLbMap.getNumDims() + 1, 182 origLbMap.getNumSymbols(), newIVExpr); 183 Operation *newIV = opBuilder.create<AffineApplyOp>(loc, ivMap, lbOperands); 184 op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV); 185 } 186 187 namespace { 188 189 /// Normalize affine.parallel ops so that lower bounds are 0 and steps are 1. 190 /// As currently implemented, this pass cannot fail, but it might skip over ops 191 /// that are already in a normalized form. 192 struct AffineLoopNormalizePass 193 : public AffineLoopNormalizeBase<AffineLoopNormalizePass> { 194 195 void runOnFunction() override { 196 getFunction().walk([](Operation *op) { 197 if (auto affineParallel = dyn_cast<AffineParallelOp>(op)) 198 normalizeAffineParallel(affineParallel); 199 else if (auto affineFor = dyn_cast<AffineForOp>(op)) 200 normalizeAffineFor(affineFor); 201 }); 202 } 203 }; 204 205 } // namespace 206 207 std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineLoopNormalizePass() { 208 return std::make_unique<AffineLoopNormalizePass>(); 209 } 210