1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop analysis routines.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
14 
15 #include "mlir/Analysis/SliceAnalysis.h"
16 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
17 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
18 #include "mlir/Dialect/Affine/Analysis/NestedMatcher.h"
19 #include "mlir/Dialect/Affine/IR/AffineOps.h"
20 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
21 #include "mlir/Support/MathExtras.h"
22 
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallString.h"
26 #include <type_traits>
27 
28 using namespace mlir;
29 
30 /// Returns the trip count of the loop as an affine expression if the latter is
31 /// expressible as an affine expression, and nullptr otherwise. The trip count
32 /// expression is simplified before returning. This method only utilizes map
33 /// composition to construct lower and upper bounds before computing the trip
34 /// count expressions.
getTripCountMapAndOperands(AffineForOp forOp,AffineMap * tripCountMap,SmallVectorImpl<Value> * tripCountOperands)35 void mlir::getTripCountMapAndOperands(
36     AffineForOp forOp, AffineMap *tripCountMap,
37     SmallVectorImpl<Value> *tripCountOperands) {
38   MLIRContext *context = forOp.getContext();
39   int64_t step = forOp.getStep();
40   int64_t loopSpan;
41   if (forOp.hasConstantBounds()) {
42     int64_t lb = forOp.getConstantLowerBound();
43     int64_t ub = forOp.getConstantUpperBound();
44     loopSpan = ub - lb;
45     if (loopSpan < 0)
46       loopSpan = 0;
47     *tripCountMap = AffineMap::getConstantMap(ceilDiv(loopSpan, step), context);
48     tripCountOperands->clear();
49     return;
50   }
51   auto lbMap = forOp.getLowerBoundMap();
52   auto ubMap = forOp.getUpperBoundMap();
53   if (lbMap.getNumResults() != 1) {
54     *tripCountMap = AffineMap();
55     return;
56   }
57 
58   // Difference of each upper bound expression from the single lower bound
59   // expression (divided by the step) provides the expressions for the trip
60   // count map.
61   AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands());
62 
63   SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
64                                          lbMap.getResult(0));
65   auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
66                                    lbSplatExpr, context);
67   AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
68 
69   AffineValueMap tripCountValueMap;
70   AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
71   for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
72     tripCountValueMap.setResult(i,
73                                 tripCountValueMap.getResult(i).ceilDiv(step));
74 
75   *tripCountMap = tripCountValueMap.getAffineMap();
76   tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
77                             tripCountValueMap.getOperands().end());
78 }
79 
80 /// Returns the trip count of the loop if it's a constant, None otherwise. This
81 /// method uses affine expression analysis (in turn using getTripCount) and is
82 /// able to determine constant trip count in non-trivial cases.
getConstantTripCount(AffineForOp forOp)83 Optional<uint64_t> mlir::getConstantTripCount(AffineForOp forOp) {
84   SmallVector<Value, 4> operands;
85   AffineMap map;
86   getTripCountMapAndOperands(forOp, &map, &operands);
87 
88   if (!map)
89     return None;
90 
91   // Take the min if all trip counts are constant.
92   Optional<uint64_t> tripCount;
93   for (auto resultExpr : map.getResults()) {
94     if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
95       if (tripCount.has_value())
96         tripCount = std::min(tripCount.value(),
97                              static_cast<uint64_t>(constExpr.getValue()));
98       else
99         tripCount = constExpr.getValue();
100     } else
101       return None;
102   }
103   return tripCount;
104 }
105 
106 /// Returns the greatest known integral divisor of the trip count. Affine
107 /// expression analysis is used (indirectly through getTripCount), and
108 /// this method is thus able to determine non-trivial divisors.
getLargestDivisorOfTripCount(AffineForOp forOp)109 uint64_t mlir::getLargestDivisorOfTripCount(AffineForOp forOp) {
110   SmallVector<Value, 4> operands;
111   AffineMap map;
112   getTripCountMapAndOperands(forOp, &map, &operands);
113 
114   if (!map)
115     return 1;
116 
117   // The largest divisor of the trip count is the GCD of the individual largest
118   // divisors.
119   assert(map.getNumResults() >= 1 && "expected one or more results");
120   Optional<uint64_t> gcd;
121   for (auto resultExpr : map.getResults()) {
122     uint64_t thisGcd;
123     if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
124       uint64_t tripCount = constExpr.getValue();
125       // 0 iteration loops (greatest divisor is 2^64 - 1).
126       if (tripCount == 0)
127         thisGcd = std::numeric_limits<uint64_t>::max();
128       else
129         // The greatest divisor is the trip count.
130         thisGcd = tripCount;
131     } else {
132       // Trip count is not a known constant; return its largest known divisor.
133       thisGcd = resultExpr.getLargestKnownDivisor();
134     }
135     if (gcd.has_value())
136       gcd = llvm::GreatestCommonDivisor64(gcd.value(), thisGcd);
137     else
138       gcd = thisGcd;
139   }
140   assert(gcd.has_value() && "value expected per above logic");
141   return gcd.value();
142 }
143 
144 /// Given an induction variable `iv` of type AffineForOp and an access `index`
145 /// of type index, returns `true` if `index` is independent of `iv` and
146 /// false otherwise. The determination supports composition with at most one
147 /// AffineApplyOp. The 'at most one AffineApplyOp' comes from the fact that
148 /// the composition of AffineApplyOp needs to be canonicalized by construction
149 /// to avoid writing code that composes arbitrary numbers of AffineApplyOps
150 /// everywhere. To achieve this, at the very least, the compose-affine-apply
151 /// pass must have been run.
152 ///
153 /// Prerequisites:
154 ///   1. `iv` and `index` of the proper type;
155 ///   2. at most one reachable AffineApplyOp from index;
156 ///
157 /// Returns false in cases with more than one AffineApplyOp, this is
158 /// conservative.
isAccessIndexInvariant(Value iv,Value index)159 static bool isAccessIndexInvariant(Value iv, Value index) {
160   assert(isForInductionVar(iv) && "iv must be a AffineForOp");
161   assert(index.getType().isa<IndexType>() && "index must be of IndexType");
162   SmallVector<Operation *, 4> affineApplyOps;
163   getReachableAffineApplyOps({index}, affineApplyOps);
164 
165   if (affineApplyOps.empty()) {
166     // Pointer equality test because of Value pointer semantics.
167     return index != iv;
168   }
169 
170   if (affineApplyOps.size() > 1) {
171     affineApplyOps[0]->emitRemark(
172         "CompositionAffineMapsPass must have been run: there should be at most "
173         "one AffineApplyOp, returning false conservatively.");
174     return false;
175   }
176 
177   auto composeOp = cast<AffineApplyOp>(affineApplyOps[0]);
178   // We need yet another level of indirection because the `dim` index of the
179   // access may not correspond to the `dim` index of composeOp.
180   return !composeOp.getAffineValueMap().isFunctionOf(0, iv);
181 }
182 
getInvariantAccesses(Value iv,ArrayRef<Value> indices)183 DenseSet<Value> mlir::getInvariantAccesses(Value iv, ArrayRef<Value> indices) {
184   DenseSet<Value> res;
185   for (auto val : indices) {
186     if (isAccessIndexInvariant(iv, val)) {
187       res.insert(val);
188     }
189   }
190   return res;
191 }
192 
193 /// Given:
194 ///   1. an induction variable `iv` of type AffineForOp;
195 ///   2. a `memoryOp` of type const LoadOp& or const StoreOp&;
196 /// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous
197 /// is defined as either invariant or varying only along a unique MemRef dim.
198 /// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to
199 /// convey the memRef access is invariant along `iv`).
200 ///
201 /// Prerequisites:
202 ///   1. `memRefDim` ~= nullptr;
203 ///   2. `iv` of the proper type;
204 ///   3. the MemRef accessed by `memoryOp` has no layout map or at most an
205 ///      identity layout map.
206 ///
207 /// Currently only supports no layoutMap or identity layoutMap in the MemRef.
208 /// Returns false if the MemRef has a non-identity layoutMap or more than 1
209 /// layoutMap. This is conservative.
210 ///
211 // TODO: check strides.
212 template <typename LoadOrStoreOp>
isContiguousAccess(Value iv,LoadOrStoreOp memoryOp,int * memRefDim)213 static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
214                                int *memRefDim) {
215   static_assert(
216       llvm::is_one_of<LoadOrStoreOp, AffineLoadOp, AffineStoreOp>::value,
217       "Must be called on either LoadOp or StoreOp");
218   assert(memRefDim && "memRefDim == nullptr");
219   auto memRefType = memoryOp.getMemRefType();
220 
221   if (!memRefType.getLayout().isIdentity())
222     return memoryOp.emitError("NYI: non-trivial layoutMap"), false;
223 
224   int uniqueVaryingIndexAlongIv = -1;
225   auto accessMap = memoryOp.getAffineMap();
226   SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
227   unsigned numDims = accessMap.getNumDims();
228   for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
229     // Gather map operands used result expr 'i' in 'exprOperands'.
230     SmallVector<Value, 4> exprOperands;
231     auto resultExpr = accessMap.getResult(i);
232     resultExpr.walk([&](AffineExpr expr) {
233       if (auto dimExpr = expr.dyn_cast<AffineDimExpr>())
234         exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
235       else if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>())
236         exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
237     });
238     // Check access invariance of each operand in 'exprOperands'.
239     for (auto exprOperand : exprOperands) {
240       if (!isAccessIndexInvariant(iv, exprOperand)) {
241         if (uniqueVaryingIndexAlongIv != -1) {
242           // 2+ varying indices -> do not vectorize along iv.
243           return false;
244         }
245         uniqueVaryingIndexAlongIv = i;
246       }
247     }
248   }
249 
250   if (uniqueVaryingIndexAlongIv == -1)
251     *memRefDim = -1;
252   else
253     *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
254   return true;
255 }
256 
257 template <typename LoadOrStoreOp>
isVectorElement(LoadOrStoreOp memoryOp)258 static bool isVectorElement(LoadOrStoreOp memoryOp) {
259   auto memRefType = memoryOp.getMemRefType();
260   return memRefType.getElementType().template isa<VectorType>();
261 }
262 
263 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
264 
265 static bool
isVectorizableLoopBodyWithOpCond(AffineForOp loop,const VectorizableOpFun & isVectorizableOp,NestedPattern & vectorTransferMatcher)266 isVectorizableLoopBodyWithOpCond(AffineForOp loop,
267                                  const VectorizableOpFun &isVectorizableOp,
268                                  NestedPattern &vectorTransferMatcher) {
269   auto *forOp = loop.getOperation();
270 
271   // No vectorization across conditionals for now.
272   auto conditionals = matcher::If();
273   SmallVector<NestedMatch, 8> conditionalsMatched;
274   conditionals.match(forOp, &conditionalsMatched);
275   if (!conditionalsMatched.empty()) {
276     return false;
277   }
278 
279   // No vectorization across unknown regions.
280   auto regions = matcher::Op([](Operation &op) -> bool {
281     return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
282   });
283   SmallVector<NestedMatch, 8> regionsMatched;
284   regions.match(forOp, &regionsMatched);
285   if (!regionsMatched.empty()) {
286     return false;
287   }
288 
289   SmallVector<NestedMatch, 8> vectorTransfersMatched;
290   vectorTransferMatcher.match(forOp, &vectorTransfersMatched);
291   if (!vectorTransfersMatched.empty()) {
292     return false;
293   }
294 
295   auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
296   SmallVector<NestedMatch, 8> loadAndStoresMatched;
297   loadAndStores.match(forOp, &loadAndStoresMatched);
298   for (auto ls : loadAndStoresMatched) {
299     auto *op = ls.getMatchedOperation();
300     auto load = dyn_cast<AffineLoadOp>(op);
301     auto store = dyn_cast<AffineStoreOp>(op);
302     // Only scalar types are considered vectorizable, all load/store must be
303     // vectorizable for a loop to qualify as vectorizable.
304     // TODO: ponder whether we want to be more general here.
305     bool vector = load ? isVectorElement(load) : isVectorElement(store);
306     if (vector) {
307       return false;
308     }
309     if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
310       return false;
311     }
312   }
313   return true;
314 }
315 
isVectorizableLoopBody(AffineForOp loop,int * memRefDim,NestedPattern & vectorTransferMatcher)316 bool mlir::isVectorizableLoopBody(AffineForOp loop, int *memRefDim,
317                                   NestedPattern &vectorTransferMatcher) {
318   *memRefDim = -1;
319   VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
320     auto load = dyn_cast<AffineLoadOp>(op);
321     auto store = dyn_cast<AffineStoreOp>(op);
322     int thisOpMemRefDim = -1;
323     bool isContiguous = load ? isContiguousAccess(loop.getInductionVar(), load,
324                                                   &thisOpMemRefDim)
325                              : isContiguousAccess(loop.getInductionVar(), store,
326                                                   &thisOpMemRefDim);
327     if (thisOpMemRefDim != -1) {
328       // If memory accesses vary across different dimensions then the loop is
329       // not vectorizable.
330       if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
331         return false;
332       *memRefDim = thisOpMemRefDim;
333     }
334     return isContiguous;
335   });
336   return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
337 }
338 
isVectorizableLoopBody(AffineForOp loop,NestedPattern & vectorTransferMatcher)339 bool mlir::isVectorizableLoopBody(AffineForOp loop,
340                                   NestedPattern &vectorTransferMatcher) {
341   return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher);
342 }
343 
344 /// Checks whether SSA dominance would be violated if a for op's body
345 /// operations are shifted by the specified shifts. This method checks if a
346 /// 'def' and all its uses have the same shift factor.
347 // TODO: extend this to check for memory-based dependence violation when we have
348 // the support.
isOpwiseShiftValid(AffineForOp forOp,ArrayRef<uint64_t> shifts)349 bool mlir::isOpwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
350   auto *forBody = forOp.getBody();
351   assert(shifts.size() == forBody->getOperations().size());
352 
353   // Work backwards over the body of the block so that the shift of a use's
354   // ancestor operation in the block gets recorded before it's looked up.
355   DenseMap<Operation *, uint64_t> forBodyShift;
356   for (const auto &it :
357        llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
358     auto &op = it.value();
359 
360     // Get the index of the current operation, note that we are iterating in
361     // reverse so we need to fix it up.
362     size_t index = shifts.size() - it.index() - 1;
363 
364     // Remember the shift of this operation.
365     uint64_t shift = shifts[index];
366     forBodyShift.try_emplace(&op, shift);
367 
368     // Validate the results of this operation if it were to be shifted.
369     for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
370       Value result = op.getResult(i);
371       for (auto *user : result.getUsers()) {
372         // If an ancestor operation doesn't lie in the block of forOp,
373         // there is no shift to check.
374         if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
375           assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map");
376           if (shift != forBodyShift[ancOp])
377             return false;
378         }
379       }
380     }
381   }
382   return true;
383 }
384