1 //===- AffineLoopNormalize.cpp - AffineLoopNormalize Pass -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a normalizer for affine loop-like ops.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PassDetail.h"
14 #include "mlir/Dialect/Affine/IR/AffineOps.h"
15 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
16 #include "mlir/Dialect/Affine/Passes.h"
17 #include "mlir/Dialect/Affine/Utils.h"
18 #include "mlir/IR/PatternMatch.h"
19 #include "mlir/Transforms/LoopUtils.h"
20 
21 using namespace mlir;
22 
23 void mlir::normalizeAffineParallel(AffineParallelOp op) {
24   // Loops with min/max in bounds are not normalized at the moment.
25   if (op.hasMinMaxBounds())
26     return;
27 
28   AffineMap lbMap = op.lowerBoundsMap();
29   SmallVector<int64_t, 8> steps = op.getSteps();
30   // No need to do any work if the parallel op is already normalized.
31   bool isAlreadyNormalized =
32       llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
33         int64_t step = std::get<0>(tuple);
34         auto lbExpr =
35             std::get<1>(tuple).template dyn_cast<AffineConstantExpr>();
36         return lbExpr && lbExpr.getValue() == 0 && step == 1;
37       });
38   if (isAlreadyNormalized)
39     return;
40 
41   AffineValueMap ranges;
42   AffineValueMap::difference(op.getUpperBoundsValueMap(),
43                              op.getLowerBoundsValueMap(), &ranges);
44   auto builder = OpBuilder::atBlockBegin(op.getBody());
45   auto zeroExpr = builder.getAffineConstantExpr(0);
46   SmallVector<AffineExpr, 8> lbExprs;
47   SmallVector<AffineExpr, 8> ubExprs;
48   for (unsigned i = 0, e = steps.size(); i < e; ++i) {
49     int64_t step = steps[i];
50 
51     // Adjust the lower bound to be 0.
52     lbExprs.push_back(zeroExpr);
53 
54     // Adjust the upper bound expression: 'range / step'.
55     AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
56     ubExprs.push_back(ubExpr);
57 
58     // Adjust the corresponding IV: 'lb + i * step'.
59     BlockArgument iv = op.getBody()->getArgument(i);
60     AffineExpr lbExpr = lbMap.getResult(i);
61     unsigned nDims = lbMap.getNumDims();
62     auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
63     auto map = AffineMap::get(/*dimCount=*/nDims + 1,
64                               /*symbolCount=*/lbMap.getNumSymbols(), expr);
65 
66     // Use an 'affine.apply' op that will be simplified later in subsequent
67     // canonicalizations.
68     OperandRange lbOperands = op.getLowerBoundsOperands();
69     OperandRange dimOperands = lbOperands.take_front(nDims);
70     OperandRange symbolOperands = lbOperands.drop_front(nDims);
71     SmallVector<Value, 8> applyOperands{dimOperands};
72     applyOperands.push_back(iv);
73     applyOperands.append(symbolOperands.begin(), symbolOperands.end());
74     auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands);
75     iv.replaceAllUsesExcept(apply, apply);
76   }
77 
78   SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
79   op.setSteps(newSteps);
80   auto newLowerMap = AffineMap::get(
81       /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
82   op.setLowerBounds({}, newLowerMap);
83   auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
84                                     ubExprs, op.getContext());
85   op.setUpperBounds(ranges.getOperands(), newUpperMap);
86 }
87 
88 /// Normalizes affine.for ops. If the affine.for op has only a single iteration
89 /// only then it is simply promoted, else it is normalized in the traditional
90 /// way, by converting the lower bound to zero and loop step to one. The upper
91 /// bound is set to the trip count of the loop. For now, original loops must
92 /// have lower bound with a single result only. There is no such restriction on
93 /// upper bounds.
94 static void normalizeAffineFor(AffineForOp op) {
95   if (succeeded(promoteIfSingleIteration(op)))
96     return;
97 
98   // Check if the forop is already normalized.
99   if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
100       (op.getStep() == 1))
101     return;
102 
103   // Check if the lower bound has a single result only. Loops with a max lower
104   // bound can't be normalized without additional support like
105   // affine.execute_region's. If the lower bound does not have a single result
106   // then skip this op.
107   if (op.getLowerBoundMap().getNumResults() != 1)
108     return;
109 
110   Location loc = op.getLoc();
111   OpBuilder opBuilder(op);
112   int64_t origLoopStep = op.getStep();
113 
114   // Calculate upperBound for normalized loop.
115   SmallVector<Value, 4> ubOperands;
116   AffineBound lb = op.getLowerBound();
117   AffineBound ub = op.getUpperBound();
118   ubOperands.reserve(ub.getNumOperands() + lb.getNumOperands());
119   AffineMap origLbMap = lb.getMap();
120   AffineMap origUbMap = ub.getMap();
121 
122   // Add dimension operands from upper/lower bound.
123   for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j)
124     ubOperands.push_back(ub.getOperand(j));
125   for (unsigned j = 0, e = origLbMap.getNumDims(); j < e; ++j)
126     ubOperands.push_back(lb.getOperand(j));
127 
128   // Add symbol operands from upper/lower bound.
129   for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j)
130     ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j));
131   for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j)
132     ubOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j));
133 
134   // Add original result expressions from lower/upper bound map.
135   SmallVector<AffineExpr, 1> origLbExprs(origLbMap.getResults().begin(),
136                                          origLbMap.getResults().end());
137   SmallVector<AffineExpr, 2> origUbExprs(origUbMap.getResults().begin(),
138                                          origUbMap.getResults().end());
139   SmallVector<AffineExpr, 4> newUbExprs;
140 
141   // The original upperBound can have more than one result. For the new
142   // upperBound of this loop, take difference of all possible combinations of
143   // the ub results and lb result and ceildiv with the loop step. For e.g.,
144   //
145   //  affine.for %i1 = 0 to min affine_map<(d0)[] -> (d0 + 32, 1024)>(%i0)
146   //  will have an upperBound map as,
147   //  affine_map<(d0)[] -> (((d0 + 32) - 0) ceildiv 1, (1024 - 0) ceildiv
148   //  1)>(%i0)
149   //
150   // Insert all combinations of upper/lower bound results.
151   for (unsigned i = 0, e = origUbExprs.size(); i < e; ++i) {
152     newUbExprs.push_back(
153         (origUbExprs[i] - origLbExprs[0]).ceilDiv(origLoopStep));
154   }
155 
156   // Construct newUbMap.
157   AffineMap newUbMap =
158       AffineMap::get(origLbMap.getNumDims() + origUbMap.getNumDims(),
159                      origLbMap.getNumSymbols() + origUbMap.getNumSymbols(),
160                      newUbExprs, opBuilder.getContext());
161 
162   // Normalize the loop.
163   op.setUpperBound(ubOperands, newUbMap);
164   op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
165   op.setStep(1);
166 
167   // Calculate the Value of new loopIV. Create affine.apply for the value of
168   // the loopIV in normalized loop.
169   opBuilder.setInsertionPointToStart(op.getBody());
170   SmallVector<Value, 4> lbOperands(lb.getOperands().begin(),
171                                    lb.getOperands().begin() +
172                                        lb.getMap().getNumDims());
173   // Add an extra dim operand for loopIV.
174   lbOperands.push_back(op.getInductionVar());
175   // Add symbol operands from lower bound.
176   for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j)
177     lbOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j));
178 
179   AffineExpr origIVExpr = opBuilder.getAffineDimExpr(lb.getMap().getNumDims());
180   AffineExpr newIVExpr = origIVExpr * origLoopStep + origLbMap.getResult(0);
181   AffineMap ivMap = AffineMap::get(origLbMap.getNumDims() + 1,
182                                    origLbMap.getNumSymbols(), newIVExpr);
183   Operation *newIV = opBuilder.create<AffineApplyOp>(loc, ivMap, lbOperands);
184   op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV);
185 }
186 
187 namespace {
188 
189 /// Normalize affine.parallel ops so that lower bounds are 0 and steps are 1.
190 /// As currently implemented, this pass cannot fail, but it might skip over ops
191 /// that are already in a normalized form.
192 struct AffineLoopNormalizePass
193     : public AffineLoopNormalizeBase<AffineLoopNormalizePass> {
194 
195   void runOnFunction() override {
196     getFunction().walk([](Operation *op) {
197       if (auto affineParallel = dyn_cast<AffineParallelOp>(op))
198         normalizeAffineParallel(affineParallel);
199       else if (auto affineFor = dyn_cast<AffineForOp>(op))
200         normalizeAffineFor(affineFor);
201     });
202   }
203 };
204 
205 } // namespace
206 
207 std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineLoopNormalizePass() {
208   return std::make_unique<AffineLoopNormalizePass>();
209 }
210