1 //===- AffineAnalysis.cpp - Affine structures analysis routines -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements miscellaneous analysis routines for affine structures 10 // (expressions, maps, sets), and other utilities relying on such analysis. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" 15 #include "mlir/Analysis/SliceAnalysis.h" 16 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" 17 #include "mlir/Dialect/Affine/Analysis/Utils.h" 18 #include "mlir/Dialect/Affine/IR/AffineOps.h" 19 #include "mlir/Dialect/Affine/IR/AffineValueMap.h" 20 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" 21 #include "mlir/Dialect/StandardOps/IR/Ops.h" 22 #include "mlir/IR/AffineExprVisitor.h" 23 #include "mlir/IR/BuiltinOps.h" 24 #include "mlir/IR/IntegerSet.h" 25 #include "mlir/Support/MathExtras.h" 26 #include "llvm/ADT/DenseMap.h" 27 #include "llvm/ADT/TypeSwitch.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/raw_ostream.h" 30 31 #define DEBUG_TYPE "affine-analysis" 32 33 using namespace mlir; 34 35 /// Get the value that is being reduced by `pos`-th reduction in the loop if 36 /// such a reduction can be performed by affine parallel loops. This assumes 37 /// floating-point operations are commutative. On success, `kind` will be the 38 /// reduction kind suitable for use in affine parallel loop builder. If the 39 /// reduction is not supported, returns null. 40 static Value getSupportedReduction(AffineForOp forOp, unsigned pos, 41 arith::AtomicRMWKind &kind) { 42 SmallVector<Operation *> combinerOps; 43 Value reducedVal = 44 matchReduction(forOp.getRegionIterArgs(), pos, combinerOps); 45 if (!reducedVal) 46 return nullptr; 47 48 // Expected only one combiner operation. 49 if (combinerOps.size() > 1) 50 return nullptr; 51 52 Operation *combinerOp = combinerOps.back(); 53 Optional<arith::AtomicRMWKind> maybeKind = 54 TypeSwitch<Operation *, Optional<arith::AtomicRMWKind>>(combinerOp) 55 .Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; }) 56 .Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; }) 57 .Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; }) 58 .Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; }) 59 .Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; }) 60 .Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; }) 61 .Case([](arith::MinFOp) { return arith::AtomicRMWKind::minf; }) 62 .Case([](arith::MaxFOp) { return arith::AtomicRMWKind::maxf; }) 63 .Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; }) 64 .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; }) 65 .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; }) 66 .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; }) 67 .Default([](Operation *) -> Optional<arith::AtomicRMWKind> { 68 // TODO: AtomicRMW supports other kinds of reductions this is 69 // currently not detecting, add those when the need arises. 70 return llvm::None; 71 }); 72 if (!maybeKind) 73 return nullptr; 74 75 kind = *maybeKind; 76 return reducedVal; 77 } 78 79 /// Populate `supportedReductions` with descriptors of the supported reductions. 80 void mlir::getSupportedReductions( 81 AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) { 82 unsigned numIterArgs = forOp.getNumIterOperands(); 83 if (numIterArgs == 0) 84 return; 85 supportedReductions.reserve(numIterArgs); 86 for (unsigned i = 0; i < numIterArgs; ++i) { 87 arith::AtomicRMWKind kind; 88 if (Value value = getSupportedReduction(forOp, i, kind)) 89 supportedReductions.emplace_back(LoopReduction{kind, i, value}); 90 } 91 } 92 93 /// Returns true if `forOp' is a parallel loop. If `parallelReductions` is 94 /// provided, populates it with descriptors of the parallelizable reductions and 95 /// treats them as not preventing parallelization. 96 bool mlir::isLoopParallel(AffineForOp forOp, 97 SmallVectorImpl<LoopReduction> *parallelReductions) { 98 unsigned numIterArgs = forOp.getNumIterOperands(); 99 100 // Loop is not parallel if it has SSA loop-carried dependences and reduction 101 // detection is not requested. 102 if (numIterArgs > 0 && !parallelReductions) 103 return false; 104 105 // Find supported reductions of requested. 106 if (parallelReductions) { 107 getSupportedReductions(forOp, *parallelReductions); 108 // Return later to allow for identifying all parallel reductions even if the 109 // loop is not parallel. 110 if (parallelReductions->size() != numIterArgs) 111 return false; 112 } 113 114 // Check memory dependences. 115 return isLoopMemoryParallel(forOp); 116 } 117 118 /// Returns true if `forOp' doesn't have memory dependences preventing 119 /// parallelization. This function doesn't check iter_args and should be used 120 /// only as a building block for full parallel-checking functions. 121 bool mlir::isLoopMemoryParallel(AffineForOp forOp) { 122 // Collect all load and store ops in loop nest rooted at 'forOp'. 123 SmallVector<Operation *, 8> loadAndStoreOps; 124 auto walkResult = forOp.walk([&](Operation *op) -> WalkResult { 125 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) 126 loadAndStoreOps.push_back(op); 127 else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) && 128 !MemoryEffectOpInterface::hasNoEffect(op)) 129 return WalkResult::interrupt(); 130 131 return WalkResult::advance(); 132 }); 133 134 // Stop early if the loop has unknown ops with side effects. 135 if (walkResult.wasInterrupted()) 136 return false; 137 138 // Dep check depth would be number of enclosing loops + 1. 139 unsigned depth = getNestingDepth(forOp) + 1; 140 141 // Check dependences between all pairs of ops in 'loadAndStoreOps'. 142 for (auto *srcOp : loadAndStoreOps) { 143 MemRefAccess srcAccess(srcOp); 144 for (auto *dstOp : loadAndStoreOps) { 145 MemRefAccess dstAccess(dstOp); 146 FlatAffineValueConstraints dependenceConstraints; 147 DependenceResult result = checkMemrefAccessDependence( 148 srcAccess, dstAccess, depth, &dependenceConstraints, 149 /*dependenceComponents=*/nullptr); 150 if (result.value != DependenceResult::NoDependence) 151 return false; 152 } 153 } 154 return true; 155 } 156 157 /// Returns the sequence of AffineApplyOp Operations operation in 158 /// 'affineApplyOps', which are reachable via a search starting from 'operands', 159 /// and ending at operands which are not defined by AffineApplyOps. 160 // TODO: Add a method to AffineApplyOp which forward substitutes the 161 // AffineApplyOp into any user AffineApplyOps. 162 void mlir::getReachableAffineApplyOps( 163 ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) { 164 struct State { 165 // The ssa value for this node in the DFS traversal. 166 Value value; 167 // The operand index of 'value' to explore next during DFS traversal. 168 unsigned operandIndex; 169 }; 170 SmallVector<State, 4> worklist; 171 for (auto operand : operands) { 172 worklist.push_back({operand, 0}); 173 } 174 175 while (!worklist.empty()) { 176 State &state = worklist.back(); 177 auto *opInst = state.value.getDefiningOp(); 178 // Note: getDefiningOp will return nullptr if the operand is not an 179 // Operation (i.e. block argument), which is a terminator for the search. 180 if (!isa_and_nonnull<AffineApplyOp>(opInst)) { 181 worklist.pop_back(); 182 continue; 183 } 184 185 if (state.operandIndex == 0) { 186 // Pre-Visit: Add 'opInst' to reachable sequence. 187 affineApplyOps.push_back(opInst); 188 } 189 if (state.operandIndex < opInst->getNumOperands()) { 190 // Visit: Add next 'affineApplyOp' operand to worklist. 191 // Get next operand to visit at 'operandIndex'. 192 auto nextOperand = opInst->getOperand(state.operandIndex); 193 // Increment 'operandIndex' in 'state'. 194 ++state.operandIndex; 195 // Add 'nextOperand' to worklist. 196 worklist.push_back({nextOperand, 0}); 197 } else { 198 // Post-visit: done visiting operands AffineApplyOp, pop off stack. 199 worklist.pop_back(); 200 } 201 } 202 } 203 204 // Builds a system of constraints with dimensional identifiers corresponding to 205 // the loop IVs of the forOps appearing in that order. Any symbols founds in 206 // the bound operands are added as symbols in the system. Returns failure for 207 // the yet unimplemented cases. 208 // TODO: Handle non-unit steps through local variables or stride information in 209 // FlatAffineValueConstraints. (For eg., by using iv - lb % step = 0 and/or by 210 // introducing a method in FlatAffineValueConstraints 211 // setExprStride(ArrayRef<int64_t> expr, int64_t stride) 212 LogicalResult mlir::getIndexSet(MutableArrayRef<Operation *> ops, 213 FlatAffineValueConstraints *domain) { 214 SmallVector<Value, 4> indices; 215 SmallVector<AffineForOp, 8> forOps; 216 217 for (Operation *op : ops) { 218 assert((isa<AffineForOp, AffineIfOp>(op)) && 219 "ops should have either AffineForOp or AffineIfOp"); 220 if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) 221 forOps.push_back(forOp); 222 } 223 extractForInductionVars(forOps, &indices); 224 // Reset while associated Values in 'indices' to the domain. 225 domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices); 226 for (Operation *op : ops) { 227 // Add constraints from forOp's bounds. 228 if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) { 229 if (failed(domain->addAffineForOpDomain(forOp))) 230 return failure(); 231 } else if (AffineIfOp ifOp = dyn_cast<AffineIfOp>(op)) { 232 domain->addAffineIfOpDomain(ifOp); 233 } 234 } 235 return success(); 236 } 237 238 /// Computes the iteration domain for 'op' and populates 'indexSet', which 239 /// encapsulates the constraints involving loops surrounding 'op' and 240 /// potentially involving any Function symbols. The dimensional identifiers in 241 /// 'indexSet' correspond to the loops surrounding 'op' from outermost to 242 /// innermost. 243 static LogicalResult getOpIndexSet(Operation *op, 244 FlatAffineValueConstraints *indexSet) { 245 SmallVector<Operation *, 4> ops; 246 getEnclosingAffineForAndIfOps(*op, &ops); 247 return getIndexSet(ops, indexSet); 248 } 249 250 // Returns the number of outer loop common to 'src/dstDomain'. 251 // Loops common to 'src/dst' domains are added to 'commonLoops' if non-null. 252 static unsigned 253 getNumCommonLoops(const FlatAffineValueConstraints &srcDomain, 254 const FlatAffineValueConstraints &dstDomain, 255 SmallVectorImpl<AffineForOp> *commonLoops = nullptr) { 256 // Find the number of common loops shared by src and dst accesses. 257 unsigned minNumLoops = 258 std::min(srcDomain.getNumDimIds(), dstDomain.getNumDimIds()); 259 unsigned numCommonLoops = 0; 260 for (unsigned i = 0; i < minNumLoops; ++i) { 261 if (!isForInductionVar(srcDomain.getValue(i)) || 262 !isForInductionVar(dstDomain.getValue(i)) || 263 srcDomain.getValue(i) != dstDomain.getValue(i)) 264 break; 265 if (commonLoops != nullptr) 266 commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(i))); 267 ++numCommonLoops; 268 } 269 if (commonLoops != nullptr) 270 assert(commonLoops->size() == numCommonLoops); 271 return numCommonLoops; 272 } 273 274 /// Returns Block common to 'srcAccess.opInst' and 'dstAccess.opInst'. 275 static Block *getCommonBlock(const MemRefAccess &srcAccess, 276 const MemRefAccess &dstAccess, 277 const FlatAffineValueConstraints &srcDomain, 278 unsigned numCommonLoops) { 279 // Get the chain of ancestor blocks to the given `MemRefAccess` instance. The 280 // search terminates when either an op with the `AffineScope` trait or 281 // `endBlock` is reached. 282 auto getChainOfAncestorBlocks = [&](const MemRefAccess &access, 283 SmallVector<Block *, 4> &ancestorBlocks, 284 Block *endBlock = nullptr) { 285 Block *currBlock = access.opInst->getBlock(); 286 // Loop terminates when the currBlock is nullptr or equals to the endBlock, 287 // or its parent operation holds an affine scope. 288 while (currBlock && currBlock != endBlock && 289 !currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) { 290 ancestorBlocks.push_back(currBlock); 291 currBlock = currBlock->getParentOp()->getBlock(); 292 } 293 }; 294 295 if (numCommonLoops == 0) { 296 Block *block = srcAccess.opInst->getBlock(); 297 while (!llvm::isa<FuncOp>(block->getParentOp())) { 298 block = block->getParentOp()->getBlock(); 299 } 300 return block; 301 } 302 Value commonForIV = srcDomain.getValue(numCommonLoops - 1); 303 AffineForOp forOp = getForInductionVarOwner(commonForIV); 304 assert(forOp && "commonForValue was not an induction variable"); 305 306 // Find the closest common block including those in AffineIf. 307 SmallVector<Block *, 4> srcAncestorBlocks, dstAncestorBlocks; 308 getChainOfAncestorBlocks(srcAccess, srcAncestorBlocks, forOp.getBody()); 309 getChainOfAncestorBlocks(dstAccess, dstAncestorBlocks, forOp.getBody()); 310 311 Block *commonBlock = forOp.getBody(); 312 for (int i = srcAncestorBlocks.size() - 1, j = dstAncestorBlocks.size() - 1; 313 i >= 0 && j >= 0 && srcAncestorBlocks[i] == dstAncestorBlocks[j]; 314 i--, j--) 315 commonBlock = srcAncestorBlocks[i]; 316 317 return commonBlock; 318 } 319 320 // Returns true if the ancestor operation of 'srcAccess' appears before the 321 // ancestor operation of 'dstAccess' in the common ancestral block. Returns 322 // false otherwise. 323 // Note that because 'srcAccess' or 'dstAccess' may be nested in conditionals, 324 // the function is named 'srcAppearsBeforeDstInCommonBlock'. Note that 325 // 'numCommonLoops' is the number of contiguous surrounding outer loops. 326 static bool srcAppearsBeforeDstInAncestralBlock( 327 const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, 328 const FlatAffineValueConstraints &srcDomain, unsigned numCommonLoops) { 329 // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'. 330 auto *commonBlock = 331 getCommonBlock(srcAccess, dstAccess, srcDomain, numCommonLoops); 332 // Check the dominance relationship between the respective ancestors of the 333 // src and dst in the Block of the innermost among the common loops. 334 auto *srcInst = commonBlock->findAncestorOpInBlock(*srcAccess.opInst); 335 assert(srcInst != nullptr); 336 auto *dstInst = commonBlock->findAncestorOpInBlock(*dstAccess.opInst); 337 assert(dstInst != nullptr); 338 339 // Determine whether dstInst comes after srcInst. 340 return srcInst->isBeforeInBlock(dstInst); 341 } 342 343 // Adds ordering constraints to 'dependenceDomain' based on number of loops 344 // common to 'src/dstDomain' and requested 'loopDepth'. 345 // Note that 'loopDepth' cannot exceed the number of common loops plus one. 346 // EX: Given a loop nest of depth 2 with IVs 'i' and 'j': 347 // *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1 348 // *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1 349 // *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j' 350 static void 351 addOrderingConstraints(const FlatAffineValueConstraints &srcDomain, 352 const FlatAffineValueConstraints &dstDomain, 353 unsigned loopDepth, 354 FlatAffineValueConstraints *dependenceDomain) { 355 unsigned numCols = dependenceDomain->getNumCols(); 356 SmallVector<int64_t, 4> eq(numCols); 357 unsigned numSrcDims = srcDomain.getNumDimIds(); 358 unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain); 359 unsigned numCommonLoopConstraints = std::min(numCommonLoops, loopDepth); 360 for (unsigned i = 0; i < numCommonLoopConstraints; ++i) { 361 std::fill(eq.begin(), eq.end(), 0); 362 eq[i] = -1; 363 eq[i + numSrcDims] = 1; 364 if (i == loopDepth - 1) { 365 eq[numCols - 1] = -1; 366 dependenceDomain->addInequality(eq); 367 } else { 368 dependenceDomain->addEquality(eq); 369 } 370 } 371 } 372 373 // Computes distance and direction vectors in 'dependences', by adding 374 // variables to 'dependenceDomain' which represent the difference of the IVs, 375 // eliminating all other variables, and reading off distance vectors from 376 // equality constraints (if possible), and direction vectors from inequalities. 377 static void computeDirectionVector( 378 const FlatAffineValueConstraints &srcDomain, 379 const FlatAffineValueConstraints &dstDomain, unsigned loopDepth, 380 FlatAffineValueConstraints *dependenceDomain, 381 SmallVector<DependenceComponent, 2> *dependenceComponents) { 382 // Find the number of common loops shared by src and dst accesses. 383 SmallVector<AffineForOp, 4> commonLoops; 384 unsigned numCommonLoops = 385 getNumCommonLoops(srcDomain, dstDomain, &commonLoops); 386 if (numCommonLoops == 0) 387 return; 388 // Compute direction vectors for requested loop depth. 389 unsigned numIdsToEliminate = dependenceDomain->getNumIds(); 390 // Add new variables to 'dependenceDomain' to represent the direction 391 // constraints for each shared loop. 392 dependenceDomain->insertDimId(/*pos=*/0, /*num=*/numCommonLoops); 393 394 // Add equality constraints for each common loop, setting newly introduced 395 // variable at column 'j' to the 'dst' IV minus the 'src IV. 396 SmallVector<int64_t, 4> eq; 397 eq.resize(dependenceDomain->getNumCols()); 398 unsigned numSrcDims = srcDomain.getNumDimIds(); 399 // Constraint variables format: 400 // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant] 401 for (unsigned j = 0; j < numCommonLoops; ++j) { 402 std::fill(eq.begin(), eq.end(), 0); 403 eq[j] = 1; 404 eq[j + numCommonLoops] = 1; 405 eq[j + numCommonLoops + numSrcDims] = -1; 406 dependenceDomain->addEquality(eq); 407 } 408 409 // Eliminate all variables other than the direction variables just added. 410 dependenceDomain->projectOut(numCommonLoops, numIdsToEliminate); 411 412 // Scan each common loop variable column and set direction vectors based 413 // on eliminated constraint system. 414 dependenceComponents->resize(numCommonLoops); 415 for (unsigned j = 0; j < numCommonLoops; ++j) { 416 (*dependenceComponents)[j].op = commonLoops[j].getOperation(); 417 auto lbConst = 418 dependenceDomain->getConstantBound(FlatAffineConstraints::LB, j); 419 (*dependenceComponents)[j].lb = 420 lbConst.getValueOr(std::numeric_limits<int64_t>::min()); 421 auto ubConst = 422 dependenceDomain->getConstantBound(FlatAffineConstraints::UB, j); 423 (*dependenceComponents)[j].ub = 424 ubConst.getValueOr(std::numeric_limits<int64_t>::max()); 425 } 426 } 427 428 LogicalResult MemRefAccess::getAccessRelation(FlatAffineRelation &rel) const { 429 // Create set corresponding to domain of access. 430 FlatAffineValueConstraints domain; 431 if (failed(getOpIndexSet(opInst, &domain))) 432 return failure(); 433 434 // Get access relation from access map. 435 AffineValueMap accessValueMap; 436 getAccessMap(&accessValueMap); 437 if (failed(getRelationFromMap(accessValueMap, rel))) 438 return failure(); 439 440 FlatAffineRelation domainRel(rel.getNumDomainDims(), /*numRangeDims=*/0, 441 domain); 442 443 // Merge and align domain ids of `ret` and ids of `domain`. Since the domain 444 // of the access map is a subset of the domain of access, the domain ids of 445 // `ret` are guranteed to be a subset of ids of `domain`. 446 for (unsigned i = 0, e = domain.getNumDimIds(); i < e; ++i) { 447 unsigned loc; 448 if (rel.findId(domain.getValue(i), &loc)) { 449 rel.swapId(i, loc); 450 } else { 451 rel.insertDomainId(i); 452 rel.setValue(i, domain.getValue(i)); 453 } 454 } 455 456 // Append domain constraints to `ret`. 457 domainRel.appendRangeId(rel.getNumRangeDims()); 458 domainRel.mergeSymbolIds(rel); 459 domainRel.mergeLocalIds(rel); 460 rel.append(domainRel); 461 462 return success(); 463 } 464 465 // Populates 'accessMap' with composition of AffineApplyOps reachable from 466 // indices of MemRefAccess. 467 void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const { 468 // Get affine map from AffineLoad/Store. 469 AffineMap map; 470 if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst)) 471 map = loadOp.getAffineMap(); 472 else 473 map = cast<AffineWriteOpInterface>(opInst).getAffineMap(); 474 475 SmallVector<Value, 8> operands(indices.begin(), indices.end()); 476 fullyComposeAffineMapAndOperands(&map, &operands); 477 map = simplifyAffineMap(map); 478 canonicalizeMapAndOperands(&map, &operands); 479 accessMap->reset(map, operands); 480 } 481 482 // Builds a flat affine constraint system to check if there exists a dependence 483 // between memref accesses 'srcAccess' and 'dstAccess'. 484 // Returns 'NoDependence' if the accesses can be definitively shown not to 485 // access the same element. 486 // Returns 'HasDependence' if the accesses do access the same element. 487 // Returns 'Failure' if an error or unsupported case was encountered. 488 // If a dependence exists, returns in 'dependenceComponents' a direction 489 // vector for the dependence, with a component for each loop IV in loops 490 // common to both accesses (see Dependence in AffineAnalysis.h for details). 491 // 492 // The memref access dependence check is comprised of the following steps: 493 // *) Build access relation for each access. An access relation maps elements 494 // of an iteration domain to the element(s) of an array domain accessed by 495 // that iteration of the associated statement through some array reference. 496 // *) Compute the dependence relation by composing access relation of 497 // `srcAccess` with the inverse of access relation of `dstAccess`. 498 // Doing this builds a relation between iteration domain of `srcAccess` 499 // to the iteration domain of `dstAccess` which access the same memory 500 // location. 501 // *) Add ordering constraints for `srcAccess` to be accessed before 502 // `dstAccess`. 503 // 504 // This method builds a constraint system with the following column format: 505 // 506 // [src-dim-identifiers, dst-dim-identifiers, symbols, constant] 507 // 508 // For example, given the following MLIR code with "source" and "destination" 509 // accesses to the same memref label, and symbols %M, %N, %K: 510 // 511 // affine.for %i0 = 0 to 100 { 512 // affine.for %i1 = 0 to 50 { 513 // %a0 = affine.apply 514 // (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N] 515 // // Source memref access. 516 // store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32> 517 // } 518 // } 519 // 520 // affine.for %i2 = 0 to 100 { 521 // affine.for %i3 = 0 to 50 { 522 // %a1 = affine.apply 523 // (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M] 524 // // Destination memref access. 525 // %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32> 526 // } 527 // } 528 // 529 // The access relation for `srcAccess` would be the following: 530 // 531 // [src_dim0, src_dim1, mem_dim0, mem_dim1, %N, %M, const] 532 // 2 -4 -1 0 1 0 0 = 0 533 // 0 3 0 -1 0 -1 0 = 0 534 // 1 0 0 0 0 0 0 >= 0 535 // -1 0 0 0 0 0 100 >= 0 536 // 0 1 0 0 0 0 0 >= 0 537 // 0 -1 0 0 0 0 50 >= 0 538 // 539 // The access relation for `dstAccess` would be the following: 540 // 541 // [dst_dim0, dst_dim1, mem_dim0, mem_dim1, %M, %K, const] 542 // 7 9 -1 0 -1 0 0 = 0 543 // 0 11 0 -1 0 -1 0 = 0 544 // 1 0 0 0 0 0 0 >= 0 545 // -1 0 0 0 0 0 100 >= 0 546 // 0 1 0 0 0 0 0 >= 0 547 // 0 -1 0 0 0 0 50 >= 0 548 // 549 // The equalities in the above relations correspond to the access maps while 550 // the inequalities corresspond to the iteration domain constraints. 551 // 552 // The dependence relation formed: 553 // 554 // [src_dim0, src_dim1, dst_dim0, dst_dim1, %M, %N, %K, const] 555 // 2 -4 -7 -9 1 1 0 0 = 0 556 // 0 3 0 -11 -1 0 1 0 = 0 557 // 1 0 0 0 0 0 0 0 >= 0 558 // -1 0 0 0 0 0 0 100 >= 0 559 // 0 1 0 0 0 0 0 0 >= 0 560 // 0 -1 0 0 0 0 0 50 >= 0 561 // 0 0 1 0 0 0 0 0 >= 0 562 // 0 0 -1 0 0 0 0 100 >= 0 563 // 0 0 0 1 0 0 0 0 >= 0 564 // 0 0 0 -1 0 0 0 50 >= 0 565 // 566 // 567 // TODO: Support AffineExprs mod/floordiv/ceildiv. 568 DependenceResult mlir::checkMemrefAccessDependence( 569 const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, 570 unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints, 571 SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) { 572 LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: " 573 << Twine(loopDepth) << " between:\n";); 574 LLVM_DEBUG(srcAccess.opInst->dump();); 575 LLVM_DEBUG(dstAccess.opInst->dump();); 576 577 // Return 'NoDependence' if these accesses do not access the same memref. 578 if (srcAccess.memref != dstAccess.memref) 579 return DependenceResult::NoDependence; 580 581 // Return 'NoDependence' if one of these accesses is not an 582 // AffineWriteOpInterface. 583 if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) && 584 !isa<AffineWriteOpInterface>(dstAccess.opInst)) 585 return DependenceResult::NoDependence; 586 587 // Create access relation from each MemRefAccess. 588 FlatAffineRelation srcRel, dstRel; 589 if (failed(srcAccess.getAccessRelation(srcRel))) 590 return DependenceResult::Failure; 591 if (failed(dstAccess.getAccessRelation(dstRel))) 592 return DependenceResult::Failure; 593 594 FlatAffineValueConstraints srcDomain = srcRel.getDomainSet(); 595 FlatAffineValueConstraints dstDomain = dstRel.getDomainSet(); 596 597 // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor 598 // operation of 'srcAccess' does not properly dominate the ancestor 599 // operation of 'dstAccess' in the same common operation block. 600 // Note: this check is skipped if 'allowRAR' is true, because because RAR 601 // deps can exist irrespective of lexicographic ordering b/w src and dst. 602 unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain); 603 assert(loopDepth <= numCommonLoops + 1); 604 if (!allowRAR && loopDepth > numCommonLoops && 605 !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess, srcDomain, 606 numCommonLoops)) { 607 return DependenceResult::NoDependence; 608 } 609 610 // Compute the dependence relation by composing `srcRel` with the inverse of 611 // `dstRel`. Doing this builds a relation between iteration domain of 612 // `srcAccess` to the iteration domain of `dstAccess` which access the same 613 // memory locations. 614 dstRel.inverse(); 615 dstRel.compose(srcRel); 616 *dependenceConstraints = dstRel; 617 618 // Add 'src' happens before 'dst' ordering constraints. 619 addOrderingConstraints(srcDomain, dstDomain, loopDepth, 620 dependenceConstraints); 621 622 // Return 'NoDependence' if the solution space is empty: no dependence. 623 if (dependenceConstraints->isEmpty()) 624 return DependenceResult::NoDependence; 625 626 // Compute dependence direction vector and return true. 627 if (dependenceComponents != nullptr) 628 computeDirectionVector(srcDomain, dstDomain, loopDepth, 629 dependenceConstraints, dependenceComponents); 630 631 LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n"); 632 LLVM_DEBUG(dependenceConstraints->dump()); 633 return DependenceResult::HasDependence; 634 } 635 636 /// Gathers dependence components for dependences between all ops in loop nest 637 /// rooted at 'forOp' at loop depths in range [1, maxLoopDepth]. 638 void mlir::getDependenceComponents( 639 AffineForOp forOp, unsigned maxLoopDepth, 640 std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) { 641 // Collect all load and store ops in loop nest rooted at 'forOp'. 642 SmallVector<Operation *, 8> loadAndStoreOps; 643 forOp->walk([&](Operation *op) { 644 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) 645 loadAndStoreOps.push_back(op); 646 }); 647 648 unsigned numOps = loadAndStoreOps.size(); 649 for (unsigned d = 1; d <= maxLoopDepth; ++d) { 650 for (unsigned i = 0; i < numOps; ++i) { 651 auto *srcOp = loadAndStoreOps[i]; 652 MemRefAccess srcAccess(srcOp); 653 for (unsigned j = 0; j < numOps; ++j) { 654 auto *dstOp = loadAndStoreOps[j]; 655 MemRefAccess dstAccess(dstOp); 656 657 FlatAffineValueConstraints dependenceConstraints; 658 SmallVector<DependenceComponent, 2> depComps; 659 // TODO: Explore whether it would be profitable to pre-compute and store 660 // deps instead of repeatedly checking. 661 DependenceResult result = checkMemrefAccessDependence( 662 srcAccess, dstAccess, d, &dependenceConstraints, &depComps); 663 if (hasDependence(result)) 664 depCompsVec->push_back(depComps); 665 } 666 } 667 } 668 } 669