1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements miscellaneous loop analysis routines. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" 14 15 #include "mlir/Analysis/SliceAnalysis.h" 16 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" 17 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h" 18 #include "mlir/Dialect/Affine/Analysis/NestedMatcher.h" 19 #include "mlir/Dialect/Affine/IR/AffineOps.h" 20 #include "mlir/Dialect/Affine/IR/AffineValueMap.h" 21 #include "mlir/Support/MathExtras.h" 22 23 #include "llvm/ADT/DenseSet.h" 24 #include "llvm/ADT/SmallPtrSet.h" 25 #include "llvm/ADT/SmallString.h" 26 #include <type_traits> 27 28 using namespace mlir; 29 30 /// Returns the trip count of the loop as an affine expression if the latter is 31 /// expressible as an affine expression, and nullptr otherwise. The trip count 32 /// expression is simplified before returning. This method only utilizes map 33 /// composition to construct lower and upper bounds before computing the trip 34 /// count expressions. 35 void mlir::getTripCountMapAndOperands( 36 AffineForOp forOp, AffineMap *tripCountMap, 37 SmallVectorImpl<Value> *tripCountOperands) { 38 MLIRContext *context = forOp.getContext(); 39 int64_t step = forOp.getStep(); 40 int64_t loopSpan; 41 if (forOp.hasConstantBounds()) { 42 int64_t lb = forOp.getConstantLowerBound(); 43 int64_t ub = forOp.getConstantUpperBound(); 44 loopSpan = ub - lb; 45 if (loopSpan < 0) 46 loopSpan = 0; 47 *tripCountMap = AffineMap::getConstantMap(ceilDiv(loopSpan, step), context); 48 tripCountOperands->clear(); 49 return; 50 } 51 auto lbMap = forOp.getLowerBoundMap(); 52 auto ubMap = forOp.getUpperBoundMap(); 53 if (lbMap.getNumResults() != 1) { 54 *tripCountMap = AffineMap(); 55 return; 56 } 57 58 // Difference of each upper bound expression from the single lower bound 59 // expression (divided by the step) provides the expressions for the trip 60 // count map. 61 AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands()); 62 63 SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(), 64 lbMap.getResult(0)); 65 auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(), 66 lbSplatExpr, context); 67 AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands()); 68 69 AffineValueMap tripCountValueMap; 70 AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap); 71 for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i) 72 tripCountValueMap.setResult(i, 73 tripCountValueMap.getResult(i).ceilDiv(step)); 74 75 *tripCountMap = tripCountValueMap.getAffineMap(); 76 tripCountOperands->assign(tripCountValueMap.getOperands().begin(), 77 tripCountValueMap.getOperands().end()); 78 } 79 80 /// Returns the trip count of the loop if it's a constant, None otherwise. This 81 /// method uses affine expression analysis (in turn using getTripCount) and is 82 /// able to determine constant trip count in non-trivial cases. 83 Optional<uint64_t> mlir::getConstantTripCount(AffineForOp forOp) { 84 SmallVector<Value, 4> operands; 85 AffineMap map; 86 getTripCountMapAndOperands(forOp, &map, &operands); 87 88 if (!map) 89 return None; 90 91 // Take the min if all trip counts are constant. 92 Optional<uint64_t> tripCount; 93 for (auto resultExpr : map.getResults()) { 94 if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) { 95 if (tripCount) 96 tripCount = 97 std::min(*tripCount, static_cast<uint64_t>(constExpr.getValue())); 98 else 99 tripCount = constExpr.getValue(); 100 } else 101 return None; 102 } 103 return tripCount; 104 } 105 106 /// Returns the greatest known integral divisor of the trip count. Affine 107 /// expression analysis is used (indirectly through getTripCount), and 108 /// this method is thus able to determine non-trivial divisors. 109 uint64_t mlir::getLargestDivisorOfTripCount(AffineForOp forOp) { 110 SmallVector<Value, 4> operands; 111 AffineMap map; 112 getTripCountMapAndOperands(forOp, &map, &operands); 113 114 if (!map) 115 return 1; 116 117 // The largest divisor of the trip count is the GCD of the individual largest 118 // divisors. 119 assert(map.getNumResults() >= 1 && "expected one or more results"); 120 Optional<uint64_t> gcd; 121 for (auto resultExpr : map.getResults()) { 122 uint64_t thisGcd; 123 if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) { 124 uint64_t tripCount = constExpr.getValue(); 125 // 0 iteration loops (greatest divisor is 2^64 - 1). 126 if (tripCount == 0) 127 thisGcd = std::numeric_limits<uint64_t>::max(); 128 else 129 // The greatest divisor is the trip count. 130 thisGcd = tripCount; 131 } else { 132 // Trip count is not a known constant; return its largest known divisor. 133 thisGcd = resultExpr.getLargestKnownDivisor(); 134 } 135 if (gcd) 136 gcd = llvm::GreatestCommonDivisor64(*gcd, thisGcd); 137 else 138 gcd = thisGcd; 139 } 140 assert(gcd.has_value() && "value expected per above logic"); 141 return gcd.value(); 142 } 143 144 /// Given an induction variable `iv` of type AffineForOp and an access `index` 145 /// of type index, returns `true` if `index` is independent of `iv` and 146 /// false otherwise. The determination supports composition with at most one 147 /// AffineApplyOp. The 'at most one AffineApplyOp' comes from the fact that 148 /// the composition of AffineApplyOp needs to be canonicalized by construction 149 /// to avoid writing code that composes arbitrary numbers of AffineApplyOps 150 /// everywhere. To achieve this, at the very least, the compose-affine-apply 151 /// pass must have been run. 152 /// 153 /// Prerequisites: 154 /// 1. `iv` and `index` of the proper type; 155 /// 2. at most one reachable AffineApplyOp from index; 156 /// 157 /// Returns false in cases with more than one AffineApplyOp, this is 158 /// conservative. 159 static bool isAccessIndexInvariant(Value iv, Value index) { 160 assert(isForInductionVar(iv) && "iv must be a AffineForOp"); 161 assert(index.getType().isa<IndexType>() && "index must be of IndexType"); 162 SmallVector<Operation *, 4> affineApplyOps; 163 getReachableAffineApplyOps({index}, affineApplyOps); 164 165 if (affineApplyOps.empty()) { 166 // Pointer equality test because of Value pointer semantics. 167 return index != iv; 168 } 169 170 if (affineApplyOps.size() > 1) { 171 affineApplyOps[0]->emitRemark( 172 "CompositionAffineMapsPass must have been run: there should be at most " 173 "one AffineApplyOp, returning false conservatively."); 174 return false; 175 } 176 177 auto composeOp = cast<AffineApplyOp>(affineApplyOps[0]); 178 // We need yet another level of indirection because the `dim` index of the 179 // access may not correspond to the `dim` index of composeOp. 180 return !composeOp.getAffineValueMap().isFunctionOf(0, iv); 181 } 182 183 DenseSet<Value> mlir::getInvariantAccesses(Value iv, ArrayRef<Value> indices) { 184 DenseSet<Value> res; 185 for (auto val : indices) { 186 if (isAccessIndexInvariant(iv, val)) { 187 res.insert(val); 188 } 189 } 190 return res; 191 } 192 193 /// Given: 194 /// 1. an induction variable `iv` of type AffineForOp; 195 /// 2. a `memoryOp` of type const LoadOp& or const StoreOp&; 196 /// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous 197 /// is defined as either invariant or varying only along a unique MemRef dim. 198 /// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to 199 /// convey the memRef access is invariant along `iv`). 200 /// 201 /// Prerequisites: 202 /// 1. `memRefDim` ~= nullptr; 203 /// 2. `iv` of the proper type; 204 /// 3. the MemRef accessed by `memoryOp` has no layout map or at most an 205 /// identity layout map. 206 /// 207 /// Currently only supports no layoutMap or identity layoutMap in the MemRef. 208 /// Returns false if the MemRef has a non-identity layoutMap or more than 1 209 /// layoutMap. This is conservative. 210 /// 211 // TODO: check strides. 212 template <typename LoadOrStoreOp> 213 static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, 214 int *memRefDim) { 215 static_assert( 216 llvm::is_one_of<LoadOrStoreOp, AffineLoadOp, AffineStoreOp>::value, 217 "Must be called on either LoadOp or StoreOp"); 218 assert(memRefDim && "memRefDim == nullptr"); 219 auto memRefType = memoryOp.getMemRefType(); 220 221 if (!memRefType.getLayout().isIdentity()) 222 return memoryOp.emitError("NYI: non-trivial layoutMap"), false; 223 224 int uniqueVaryingIndexAlongIv = -1; 225 auto accessMap = memoryOp.getAffineMap(); 226 SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands()); 227 unsigned numDims = accessMap.getNumDims(); 228 for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) { 229 // Gather map operands used result expr 'i' in 'exprOperands'. 230 SmallVector<Value, 4> exprOperands; 231 auto resultExpr = accessMap.getResult(i); 232 resultExpr.walk([&](AffineExpr expr) { 233 if (auto dimExpr = expr.dyn_cast<AffineDimExpr>()) 234 exprOperands.push_back(mapOperands[dimExpr.getPosition()]); 235 else if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>()) 236 exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]); 237 }); 238 // Check access invariance of each operand in 'exprOperands'. 239 for (auto exprOperand : exprOperands) { 240 if (!isAccessIndexInvariant(iv, exprOperand)) { 241 if (uniqueVaryingIndexAlongIv != -1) { 242 // 2+ varying indices -> do not vectorize along iv. 243 return false; 244 } 245 uniqueVaryingIndexAlongIv = i; 246 } 247 } 248 } 249 250 if (uniqueVaryingIndexAlongIv == -1) 251 *memRefDim = -1; 252 else 253 *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1); 254 return true; 255 } 256 257 template <typename LoadOrStoreOp> 258 static bool isVectorElement(LoadOrStoreOp memoryOp) { 259 auto memRefType = memoryOp.getMemRefType(); 260 return memRefType.getElementType().template isa<VectorType>(); 261 } 262 263 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>; 264 265 static bool 266 isVectorizableLoopBodyWithOpCond(AffineForOp loop, 267 const VectorizableOpFun &isVectorizableOp, 268 NestedPattern &vectorTransferMatcher) { 269 auto *forOp = loop.getOperation(); 270 271 // No vectorization across conditionals for now. 272 auto conditionals = matcher::If(); 273 SmallVector<NestedMatch, 8> conditionalsMatched; 274 conditionals.match(forOp, &conditionalsMatched); 275 if (!conditionalsMatched.empty()) { 276 return false; 277 } 278 279 // No vectorization across unknown regions. 280 auto regions = matcher::Op([](Operation &op) -> bool { 281 return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op); 282 }); 283 SmallVector<NestedMatch, 8> regionsMatched; 284 regions.match(forOp, ®ionsMatched); 285 if (!regionsMatched.empty()) { 286 return false; 287 } 288 289 SmallVector<NestedMatch, 8> vectorTransfersMatched; 290 vectorTransferMatcher.match(forOp, &vectorTransfersMatched); 291 if (!vectorTransfersMatched.empty()) { 292 return false; 293 } 294 295 auto loadAndStores = matcher::Op(matcher::isLoadOrStore); 296 SmallVector<NestedMatch, 8> loadAndStoresMatched; 297 loadAndStores.match(forOp, &loadAndStoresMatched); 298 for (auto ls : loadAndStoresMatched) { 299 auto *op = ls.getMatchedOperation(); 300 auto load = dyn_cast<AffineLoadOp>(op); 301 auto store = dyn_cast<AffineStoreOp>(op); 302 // Only scalar types are considered vectorizable, all load/store must be 303 // vectorizable for a loop to qualify as vectorizable. 304 // TODO: ponder whether we want to be more general here. 305 bool vector = load ? isVectorElement(load) : isVectorElement(store); 306 if (vector) { 307 return false; 308 } 309 if (isVectorizableOp && !isVectorizableOp(loop, *op)) { 310 return false; 311 } 312 } 313 return true; 314 } 315 316 bool mlir::isVectorizableLoopBody(AffineForOp loop, int *memRefDim, 317 NestedPattern &vectorTransferMatcher) { 318 *memRefDim = -1; 319 VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) { 320 auto load = dyn_cast<AffineLoadOp>(op); 321 auto store = dyn_cast<AffineStoreOp>(op); 322 int thisOpMemRefDim = -1; 323 bool isContiguous = load ? isContiguousAccess(loop.getInductionVar(), load, 324 &thisOpMemRefDim) 325 : isContiguousAccess(loop.getInductionVar(), store, 326 &thisOpMemRefDim); 327 if (thisOpMemRefDim != -1) { 328 // If memory accesses vary across different dimensions then the loop is 329 // not vectorizable. 330 if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim) 331 return false; 332 *memRefDim = thisOpMemRefDim; 333 } 334 return isContiguous; 335 }); 336 return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher); 337 } 338 339 bool mlir::isVectorizableLoopBody(AffineForOp loop, 340 NestedPattern &vectorTransferMatcher) { 341 return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher); 342 } 343 344 /// Checks whether SSA dominance would be violated if a for op's body 345 /// operations are shifted by the specified shifts. This method checks if a 346 /// 'def' and all its uses have the same shift factor. 347 // TODO: extend this to check for memory-based dependence violation when we have 348 // the support. 349 bool mlir::isOpwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) { 350 auto *forBody = forOp.getBody(); 351 assert(shifts.size() == forBody->getOperations().size()); 352 353 // Work backwards over the body of the block so that the shift of a use's 354 // ancestor operation in the block gets recorded before it's looked up. 355 DenseMap<Operation *, uint64_t> forBodyShift; 356 for (const auto &it : 357 llvm::enumerate(llvm::reverse(forBody->getOperations()))) { 358 auto &op = it.value(); 359 360 // Get the index of the current operation, note that we are iterating in 361 // reverse so we need to fix it up. 362 size_t index = shifts.size() - it.index() - 1; 363 364 // Remember the shift of this operation. 365 uint64_t shift = shifts[index]; 366 forBodyShift.try_emplace(&op, shift); 367 368 // Validate the results of this operation if it were to be shifted. 369 for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) { 370 Value result = op.getResult(i); 371 for (auto *user : result.getUsers()) { 372 // If an ancestor operation doesn't lie in the block of forOp, 373 // there is no shift to check. 374 if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) { 375 assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map"); 376 if (shift != forBodyShift[ancOp]) 377 return false; 378 } 379 } 380 } 381 } 382 return true; 383 } 384