1 //===- AffineAnalysis.cpp - Affine structures analysis routines -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for affine structures
10 // (expressions, maps, sets), and other utilities relying on such analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
15 #include "mlir/Analysis/SliceAnalysis.h"
16 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
17 #include "mlir/Dialect/Affine/Analysis/Utils.h"
18 #include "mlir/Dialect/Affine/IR/AffineOps.h"
19 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
20 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
21 #include "mlir/Dialect/StandardOps/IR/Ops.h"
22 #include "mlir/IR/AffineExprVisitor.h"
23 #include "mlir/IR/BuiltinOps.h"
24 #include "mlir/IR/IntegerSet.h"
25 #include "mlir/Support/MathExtras.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/TypeSwitch.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
30 
31 #define DEBUG_TYPE "affine-analysis"
32 
33 using namespace mlir;
34 
35 /// Get the value that is being reduced by `pos`-th reduction in the loop if
36 /// such a reduction can be performed by affine parallel loops. This assumes
37 /// floating-point operations are commutative. On success, `kind` will be the
38 /// reduction kind suitable for use in affine parallel loop builder. If the
39 /// reduction is not supported, returns null.
40 static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
41                                    arith::AtomicRMWKind &kind) {
42   SmallVector<Operation *> combinerOps;
43   Value reducedVal =
44       matchReduction(forOp.getRegionIterArgs(), pos, combinerOps);
45   if (!reducedVal)
46     return nullptr;
47 
48   // Expected only one combiner operation.
49   if (combinerOps.size() > 1)
50     return nullptr;
51 
52   Operation *combinerOp = combinerOps.back();
53   Optional<arith::AtomicRMWKind> maybeKind =
54       TypeSwitch<Operation *, Optional<arith::AtomicRMWKind>>(combinerOp)
55           .Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; })
56           .Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; })
57           .Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; })
58           .Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; })
59           .Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; })
60           .Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; })
61           .Case([](arith::MinFOp) { return arith::AtomicRMWKind::minf; })
62           .Case([](arith::MaxFOp) { return arith::AtomicRMWKind::maxf; })
63           .Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; })
64           .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
65           .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
66           .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; })
67           .Default([](Operation *) -> Optional<arith::AtomicRMWKind> {
68             // TODO: AtomicRMW supports other kinds of reductions this is
69             // currently not detecting, add those when the need arises.
70             return llvm::None;
71           });
72   if (!maybeKind)
73     return nullptr;
74 
75   kind = *maybeKind;
76   return reducedVal;
77 }
78 
79 /// Populate `supportedReductions` with descriptors of the supported reductions.
80 void mlir::getSupportedReductions(
81     AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) {
82   unsigned numIterArgs = forOp.getNumIterOperands();
83   if (numIterArgs == 0)
84     return;
85   supportedReductions.reserve(numIterArgs);
86   for (unsigned i = 0; i < numIterArgs; ++i) {
87     arith::AtomicRMWKind kind;
88     if (Value value = getSupportedReduction(forOp, i, kind))
89       supportedReductions.emplace_back(LoopReduction{kind, i, value});
90   }
91 }
92 
93 /// Returns true if `forOp' is a parallel loop. If `parallelReductions` is
94 /// provided, populates it with descriptors of the parallelizable reductions and
95 /// treats them as not preventing parallelization.
96 bool mlir::isLoopParallel(AffineForOp forOp,
97                           SmallVectorImpl<LoopReduction> *parallelReductions) {
98   unsigned numIterArgs = forOp.getNumIterOperands();
99 
100   // Loop is not parallel if it has SSA loop-carried dependences and reduction
101   // detection is not requested.
102   if (numIterArgs > 0 && !parallelReductions)
103     return false;
104 
105   // Find supported reductions of requested.
106   if (parallelReductions) {
107     getSupportedReductions(forOp, *parallelReductions);
108     // Return later to allow for identifying all parallel reductions even if the
109     // loop is not parallel.
110     if (parallelReductions->size() != numIterArgs)
111       return false;
112   }
113 
114   // Check memory dependences.
115   return isLoopMemoryParallel(forOp);
116 }
117 
118 /// Returns true if `forOp' doesn't have memory dependences preventing
119 /// parallelization. This function doesn't check iter_args and should be used
120 /// only as a building block for full parallel-checking functions.
121 bool mlir::isLoopMemoryParallel(AffineForOp forOp) {
122   // Collect all load and store ops in loop nest rooted at 'forOp'.
123   SmallVector<Operation *, 8> loadAndStoreOps;
124   auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
125     if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
126       loadAndStoreOps.push_back(op);
127     else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
128              !MemoryEffectOpInterface::hasNoEffect(op))
129       return WalkResult::interrupt();
130 
131     return WalkResult::advance();
132   });
133 
134   // Stop early if the loop has unknown ops with side effects.
135   if (walkResult.wasInterrupted())
136     return false;
137 
138   // Dep check depth would be number of enclosing loops + 1.
139   unsigned depth = getNestingDepth(forOp) + 1;
140 
141   // Check dependences between all pairs of ops in 'loadAndStoreOps'.
142   for (auto *srcOp : loadAndStoreOps) {
143     MemRefAccess srcAccess(srcOp);
144     for (auto *dstOp : loadAndStoreOps) {
145       MemRefAccess dstAccess(dstOp);
146       FlatAffineValueConstraints dependenceConstraints;
147       DependenceResult result = checkMemrefAccessDependence(
148           srcAccess, dstAccess, depth, &dependenceConstraints,
149           /*dependenceComponents=*/nullptr);
150       if (result.value != DependenceResult::NoDependence)
151         return false;
152     }
153   }
154   return true;
155 }
156 
157 /// Returns the sequence of AffineApplyOp Operations operation in
158 /// 'affineApplyOps', which are reachable via a search starting from 'operands',
159 /// and ending at operands which are not defined by AffineApplyOps.
160 // TODO: Add a method to AffineApplyOp which forward substitutes the
161 // AffineApplyOp into any user AffineApplyOps.
162 void mlir::getReachableAffineApplyOps(
163     ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) {
164   struct State {
165     // The ssa value for this node in the DFS traversal.
166     Value value;
167     // The operand index of 'value' to explore next during DFS traversal.
168     unsigned operandIndex;
169   };
170   SmallVector<State, 4> worklist;
171   for (auto operand : operands) {
172     worklist.push_back({operand, 0});
173   }
174 
175   while (!worklist.empty()) {
176     State &state = worklist.back();
177     auto *opInst = state.value.getDefiningOp();
178     // Note: getDefiningOp will return nullptr if the operand is not an
179     // Operation (i.e. block argument), which is a terminator for the search.
180     if (!isa_and_nonnull<AffineApplyOp>(opInst)) {
181       worklist.pop_back();
182       continue;
183     }
184 
185     if (state.operandIndex == 0) {
186       // Pre-Visit: Add 'opInst' to reachable sequence.
187       affineApplyOps.push_back(opInst);
188     }
189     if (state.operandIndex < opInst->getNumOperands()) {
190       // Visit: Add next 'affineApplyOp' operand to worklist.
191       // Get next operand to visit at 'operandIndex'.
192       auto nextOperand = opInst->getOperand(state.operandIndex);
193       // Increment 'operandIndex' in 'state'.
194       ++state.operandIndex;
195       // Add 'nextOperand' to worklist.
196       worklist.push_back({nextOperand, 0});
197     } else {
198       // Post-visit: done visiting operands AffineApplyOp, pop off stack.
199       worklist.pop_back();
200     }
201   }
202 }
203 
204 // Builds a system of constraints with dimensional identifiers corresponding to
205 // the loop IVs of the forOps appearing in that order. Any symbols founds in
206 // the bound operands are added as symbols in the system. Returns failure for
207 // the yet unimplemented cases.
208 // TODO: Handle non-unit steps through local variables or stride information in
209 // FlatAffineValueConstraints. (For eg., by using iv - lb % step = 0 and/or by
210 // introducing a method in FlatAffineValueConstraints
211 // setExprStride(ArrayRef<int64_t> expr, int64_t stride)
212 LogicalResult mlir::getIndexSet(MutableArrayRef<Operation *> ops,
213                                 FlatAffineValueConstraints *domain) {
214   SmallVector<Value, 4> indices;
215   SmallVector<AffineForOp, 8> forOps;
216 
217   for (Operation *op : ops) {
218     assert((isa<AffineForOp, AffineIfOp>(op)) &&
219            "ops should have either AffineForOp or AffineIfOp");
220     if (AffineForOp forOp = dyn_cast<AffineForOp>(op))
221       forOps.push_back(forOp);
222   }
223   extractForInductionVars(forOps, &indices);
224   // Reset while associated Values in 'indices' to the domain.
225   domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices);
226   for (Operation *op : ops) {
227     // Add constraints from forOp's bounds.
228     if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
229       if (failed(domain->addAffineForOpDomain(forOp)))
230         return failure();
231     } else if (AffineIfOp ifOp = dyn_cast<AffineIfOp>(op)) {
232       domain->addAffineIfOpDomain(ifOp);
233     }
234   }
235   return success();
236 }
237 
238 /// Computes the iteration domain for 'op' and populates 'indexSet', which
239 /// encapsulates the constraints involving loops surrounding 'op' and
240 /// potentially involving any Function symbols. The dimensional identifiers in
241 /// 'indexSet' correspond to the loops surrounding 'op' from outermost to
242 /// innermost.
243 static LogicalResult getOpIndexSet(Operation *op,
244                                    FlatAffineValueConstraints *indexSet) {
245   SmallVector<Operation *, 4> ops;
246   getEnclosingAffineForAndIfOps(*op, &ops);
247   return getIndexSet(ops, indexSet);
248 }
249 
250 // Returns the number of outer loop common to 'src/dstDomain'.
251 // Loops common to 'src/dst' domains are added to 'commonLoops' if non-null.
252 static unsigned
253 getNumCommonLoops(const FlatAffineValueConstraints &srcDomain,
254                   const FlatAffineValueConstraints &dstDomain,
255                   SmallVectorImpl<AffineForOp> *commonLoops = nullptr) {
256   // Find the number of common loops shared by src and dst accesses.
257   unsigned minNumLoops =
258       std::min(srcDomain.getNumDimIds(), dstDomain.getNumDimIds());
259   unsigned numCommonLoops = 0;
260   for (unsigned i = 0; i < minNumLoops; ++i) {
261     if (!isForInductionVar(srcDomain.getValue(i)) ||
262         !isForInductionVar(dstDomain.getValue(i)) ||
263         srcDomain.getValue(i) != dstDomain.getValue(i))
264       break;
265     if (commonLoops != nullptr)
266       commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(i)));
267     ++numCommonLoops;
268   }
269   if (commonLoops != nullptr)
270     assert(commonLoops->size() == numCommonLoops);
271   return numCommonLoops;
272 }
273 
274 /// Returns Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
275 static Block *getCommonBlock(const MemRefAccess &srcAccess,
276                              const MemRefAccess &dstAccess,
277                              const FlatAffineValueConstraints &srcDomain,
278                              unsigned numCommonLoops) {
279   // Get the chain of ancestor blocks to the given `MemRefAccess` instance. The
280   // search terminates when either an op with the `AffineScope` trait or
281   // `endBlock` is reached.
282   auto getChainOfAncestorBlocks = [&](const MemRefAccess &access,
283                                       SmallVector<Block *, 4> &ancestorBlocks,
284                                       Block *endBlock = nullptr) {
285     Block *currBlock = access.opInst->getBlock();
286     // Loop terminates when the currBlock is nullptr or equals to the endBlock,
287     // or its parent operation holds an affine scope.
288     while (currBlock && currBlock != endBlock &&
289            !currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) {
290       ancestorBlocks.push_back(currBlock);
291       currBlock = currBlock->getParentOp()->getBlock();
292     }
293   };
294 
295   if (numCommonLoops == 0) {
296     Block *block = srcAccess.opInst->getBlock();
297     while (!llvm::isa<FuncOp>(block->getParentOp())) {
298       block = block->getParentOp()->getBlock();
299     }
300     return block;
301   }
302   Value commonForIV = srcDomain.getValue(numCommonLoops - 1);
303   AffineForOp forOp = getForInductionVarOwner(commonForIV);
304   assert(forOp && "commonForValue was not an induction variable");
305 
306   // Find the closest common block including those in AffineIf.
307   SmallVector<Block *, 4> srcAncestorBlocks, dstAncestorBlocks;
308   getChainOfAncestorBlocks(srcAccess, srcAncestorBlocks, forOp.getBody());
309   getChainOfAncestorBlocks(dstAccess, dstAncestorBlocks, forOp.getBody());
310 
311   Block *commonBlock = forOp.getBody();
312   for (int i = srcAncestorBlocks.size() - 1, j = dstAncestorBlocks.size() - 1;
313        i >= 0 && j >= 0 && srcAncestorBlocks[i] == dstAncestorBlocks[j];
314        i--, j--)
315     commonBlock = srcAncestorBlocks[i];
316 
317   return commonBlock;
318 }
319 
320 // Returns true if the ancestor operation of 'srcAccess' appears before the
321 // ancestor operation of 'dstAccess' in the common ancestral block. Returns
322 // false otherwise.
323 // Note that because 'srcAccess' or 'dstAccess' may be nested in conditionals,
324 // the function is named 'srcAppearsBeforeDstInCommonBlock'. Note that
325 // 'numCommonLoops' is the number of contiguous surrounding outer loops.
326 static bool srcAppearsBeforeDstInAncestralBlock(
327     const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
328     const FlatAffineValueConstraints &srcDomain, unsigned numCommonLoops) {
329   // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
330   auto *commonBlock =
331       getCommonBlock(srcAccess, dstAccess, srcDomain, numCommonLoops);
332   // Check the dominance relationship between the respective ancestors of the
333   // src and dst in the Block of the innermost among the common loops.
334   auto *srcInst = commonBlock->findAncestorOpInBlock(*srcAccess.opInst);
335   assert(srcInst != nullptr);
336   auto *dstInst = commonBlock->findAncestorOpInBlock(*dstAccess.opInst);
337   assert(dstInst != nullptr);
338 
339   // Determine whether dstInst comes after srcInst.
340   return srcInst->isBeforeInBlock(dstInst);
341 }
342 
343 // Adds ordering constraints to 'dependenceDomain' based on number of loops
344 // common to 'src/dstDomain' and requested 'loopDepth'.
345 // Note that 'loopDepth' cannot exceed the number of common loops plus one.
346 // EX: Given a loop nest of depth 2 with IVs 'i' and 'j':
347 // *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1
348 // *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1
349 // *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j'
350 static void
351 addOrderingConstraints(const FlatAffineValueConstraints &srcDomain,
352                        const FlatAffineValueConstraints &dstDomain,
353                        unsigned loopDepth,
354                        FlatAffineValueConstraints *dependenceDomain) {
355   unsigned numCols = dependenceDomain->getNumCols();
356   SmallVector<int64_t, 4> eq(numCols);
357   unsigned numSrcDims = srcDomain.getNumDimIds();
358   unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
359   unsigned numCommonLoopConstraints = std::min(numCommonLoops, loopDepth);
360   for (unsigned i = 0; i < numCommonLoopConstraints; ++i) {
361     std::fill(eq.begin(), eq.end(), 0);
362     eq[i] = -1;
363     eq[i + numSrcDims] = 1;
364     if (i == loopDepth - 1) {
365       eq[numCols - 1] = -1;
366       dependenceDomain->addInequality(eq);
367     } else {
368       dependenceDomain->addEquality(eq);
369     }
370   }
371 }
372 
373 // Computes distance and direction vectors in 'dependences', by adding
374 // variables to 'dependenceDomain' which represent the difference of the IVs,
375 // eliminating all other variables, and reading off distance vectors from
376 // equality constraints (if possible), and direction vectors from inequalities.
377 static void computeDirectionVector(
378     const FlatAffineValueConstraints &srcDomain,
379     const FlatAffineValueConstraints &dstDomain, unsigned loopDepth,
380     FlatAffineValueConstraints *dependenceDomain,
381     SmallVector<DependenceComponent, 2> *dependenceComponents) {
382   // Find the number of common loops shared by src and dst accesses.
383   SmallVector<AffineForOp, 4> commonLoops;
384   unsigned numCommonLoops =
385       getNumCommonLoops(srcDomain, dstDomain, &commonLoops);
386   if (numCommonLoops == 0)
387     return;
388   // Compute direction vectors for requested loop depth.
389   unsigned numIdsToEliminate = dependenceDomain->getNumIds();
390   // Add new variables to 'dependenceDomain' to represent the direction
391   // constraints for each shared loop.
392   dependenceDomain->insertDimId(/*pos=*/0, /*num=*/numCommonLoops);
393 
394   // Add equality constraints for each common loop, setting newly introduced
395   // variable at column 'j' to the 'dst' IV minus the 'src IV.
396   SmallVector<int64_t, 4> eq;
397   eq.resize(dependenceDomain->getNumCols());
398   unsigned numSrcDims = srcDomain.getNumDimIds();
399   // Constraint variables format:
400   // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant]
401   for (unsigned j = 0; j < numCommonLoops; ++j) {
402     std::fill(eq.begin(), eq.end(), 0);
403     eq[j] = 1;
404     eq[j + numCommonLoops] = 1;
405     eq[j + numCommonLoops + numSrcDims] = -1;
406     dependenceDomain->addEquality(eq);
407   }
408 
409   // Eliminate all variables other than the direction variables just added.
410   dependenceDomain->projectOut(numCommonLoops, numIdsToEliminate);
411 
412   // Scan each common loop variable column and set direction vectors based
413   // on eliminated constraint system.
414   dependenceComponents->resize(numCommonLoops);
415   for (unsigned j = 0; j < numCommonLoops; ++j) {
416     (*dependenceComponents)[j].op = commonLoops[j].getOperation();
417     auto lbConst =
418         dependenceDomain->getConstantBound(FlatAffineConstraints::LB, j);
419     (*dependenceComponents)[j].lb =
420         lbConst.getValueOr(std::numeric_limits<int64_t>::min());
421     auto ubConst =
422         dependenceDomain->getConstantBound(FlatAffineConstraints::UB, j);
423     (*dependenceComponents)[j].ub =
424         ubConst.getValueOr(std::numeric_limits<int64_t>::max());
425   }
426 }
427 
428 LogicalResult MemRefAccess::getAccessRelation(FlatAffineRelation &rel) const {
429   // Create set corresponding to domain of access.
430   FlatAffineValueConstraints domain;
431   if (failed(getOpIndexSet(opInst, &domain)))
432     return failure();
433 
434   // Get access relation from access map.
435   AffineValueMap accessValueMap;
436   getAccessMap(&accessValueMap);
437   if (failed(getRelationFromMap(accessValueMap, rel)))
438     return failure();
439 
440   FlatAffineRelation domainRel(rel.getNumDomainDims(), /*numRangeDims=*/0,
441                                domain);
442 
443   // Merge and align domain ids of `ret` and ids of `domain`. Since the domain
444   // of the access map is a subset of the domain of access, the domain ids of
445   // `ret` are guranteed to be a subset of ids of `domain`.
446   for (unsigned i = 0, e = domain.getNumDimIds(); i < e; ++i) {
447     unsigned loc;
448     if (rel.findId(domain.getValue(i), &loc)) {
449       rel.swapId(i, loc);
450     } else {
451       rel.insertDomainId(i);
452       rel.setValue(i, domain.getValue(i));
453     }
454   }
455 
456   // Append domain constraints to `ret`.
457   domainRel.appendRangeId(rel.getNumRangeDims());
458   domainRel.mergeSymbolIds(rel);
459   domainRel.mergeLocalIds(rel);
460   rel.append(domainRel);
461 
462   return success();
463 }
464 
465 // Populates 'accessMap' with composition of AffineApplyOps reachable from
466 // indices of MemRefAccess.
467 void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
468   // Get affine map from AffineLoad/Store.
469   AffineMap map;
470   if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst))
471     map = loadOp.getAffineMap();
472   else
473     map = cast<AffineWriteOpInterface>(opInst).getAffineMap();
474 
475   SmallVector<Value, 8> operands(indices.begin(), indices.end());
476   fullyComposeAffineMapAndOperands(&map, &operands);
477   map = simplifyAffineMap(map);
478   canonicalizeMapAndOperands(&map, &operands);
479   accessMap->reset(map, operands);
480 }
481 
482 // Builds a flat affine constraint system to check if there exists a dependence
483 // between memref accesses 'srcAccess' and 'dstAccess'.
484 // Returns 'NoDependence' if the accesses can be definitively shown not to
485 // access the same element.
486 // Returns 'HasDependence' if the accesses do access the same element.
487 // Returns 'Failure' if an error or unsupported case was encountered.
488 // If a dependence exists, returns in 'dependenceComponents' a direction
489 // vector for the dependence, with a component for each loop IV in loops
490 // common to both accesses (see Dependence in AffineAnalysis.h for details).
491 //
492 // The memref access dependence check is comprised of the following steps:
493 // *) Build access relation for each access. An access relation maps elements
494 //    of an iteration domain to the element(s) of an array domain accessed by
495 //    that iteration of the associated statement through some array reference.
496 // *) Compute the dependence relation by composing access relation of
497 //    `srcAccess` with the inverse of access relation of `dstAccess`.
498 //    Doing this builds a relation between iteration domain of `srcAccess`
499 //    to the iteration domain of `dstAccess` which access the same memory
500 //    location.
501 // *) Add ordering constraints for `srcAccess` to be accessed before
502 //    `dstAccess`.
503 //
504 // This method builds a constraint system with the following column format:
505 //
506 //  [src-dim-identifiers, dst-dim-identifiers, symbols, constant]
507 //
508 // For example, given the following MLIR code with "source" and "destination"
509 // accesses to the same memref label, and symbols %M, %N, %K:
510 //
511 //   affine.for %i0 = 0 to 100 {
512 //     affine.for %i1 = 0 to 50 {
513 //       %a0 = affine.apply
514 //         (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
515 //       // Source memref access.
516 //       store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32>
517 //     }
518 //   }
519 //
520 //   affine.for %i2 = 0 to 100 {
521 //     affine.for %i3 = 0 to 50 {
522 //       %a1 = affine.apply
523 //         (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
524 //       // Destination memref access.
525 //       %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32>
526 //     }
527 //   }
528 //
529 // The access relation for `srcAccess` would be the following:
530 //
531 //   [src_dim0, src_dim1, mem_dim0, mem_dim1,  %N,   %M,  const]
532 //       2        -4       -1         0         1     0     0     = 0
533 //       0         3        0        -1         0    -1     0     = 0
534 //       1         0        0         0         0     0     0    >= 0
535 //      -1         0        0         0         0     0     100  >= 0
536 //       0         1        0         0         0     0     0    >= 0
537 //       0        -1        0         0         0     0     50   >= 0
538 //
539 //  The access relation for `dstAccess` would be the following:
540 //
541 //   [dst_dim0, dst_dim1, mem_dim0, mem_dim1,  %M,   %K,  const]
542 //       7         9       -1         0        -1     0     0     = 0
543 //       0         11       0        -1         0    -1     0     = 0
544 //       1         0        0         0         0     0     0    >= 0
545 //      -1         0        0         0         0     0     100  >= 0
546 //       0         1        0         0         0     0     0    >= 0
547 //       0        -1        0         0         0     0     50   >= 0
548 //
549 //  The equalities in the above relations correspond to the access maps while
550 //  the inequalities corresspond to the iteration domain constraints.
551 //
552 // The dependence relation formed:
553 //
554 //   [src_dim0, src_dim1, dst_dim0, dst_dim1,  %M,   %N,   %K,  const]
555 //      2         -4        -7        -9        1     1     0     0    = 0
556 //      0          3         0        -11      -1     0     1     0    = 0
557 //       1         0         0         0        0     0     0     0    >= 0
558 //      -1         0         0         0        0     0     0     100  >= 0
559 //       0         1         0         0        0     0     0     0    >= 0
560 //       0        -1         0         0        0     0     0     50   >= 0
561 //       0         0         1         0        0     0     0     0    >= 0
562 //       0         0        -1         0        0     0     0     100  >= 0
563 //       0         0         0         1        0     0     0     0    >= 0
564 //       0         0         0        -1        0     0     0     50   >= 0
565 //
566 //
567 // TODO: Support AffineExprs mod/floordiv/ceildiv.
568 DependenceResult mlir::checkMemrefAccessDependence(
569     const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
570     unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints,
571     SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) {
572   LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
573                           << Twine(loopDepth) << " between:\n";);
574   LLVM_DEBUG(srcAccess.opInst->dump(););
575   LLVM_DEBUG(dstAccess.opInst->dump(););
576 
577   // Return 'NoDependence' if these accesses do not access the same memref.
578   if (srcAccess.memref != dstAccess.memref)
579     return DependenceResult::NoDependence;
580 
581   // Return 'NoDependence' if one of these accesses is not an
582   // AffineWriteOpInterface.
583   if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
584       !isa<AffineWriteOpInterface>(dstAccess.opInst))
585     return DependenceResult::NoDependence;
586 
587   // Create access relation from each MemRefAccess.
588   FlatAffineRelation srcRel, dstRel;
589   if (failed(srcAccess.getAccessRelation(srcRel)))
590     return DependenceResult::Failure;
591   if (failed(dstAccess.getAccessRelation(dstRel)))
592     return DependenceResult::Failure;
593 
594   FlatAffineValueConstraints srcDomain = srcRel.getDomainSet();
595   FlatAffineValueConstraints dstDomain = dstRel.getDomainSet();
596 
597   // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor
598   // operation of 'srcAccess' does not properly dominate the ancestor
599   // operation of 'dstAccess' in the same common operation block.
600   // Note: this check is skipped if 'allowRAR' is true, because because RAR
601   // deps can exist irrespective of lexicographic ordering b/w src and dst.
602   unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
603   assert(loopDepth <= numCommonLoops + 1);
604   if (!allowRAR && loopDepth > numCommonLoops &&
605       !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess, srcDomain,
606                                            numCommonLoops)) {
607     return DependenceResult::NoDependence;
608   }
609 
610   // Compute the dependence relation by composing `srcRel` with the inverse of
611   // `dstRel`. Doing this builds a relation between iteration domain of
612   // `srcAccess` to the iteration domain of `dstAccess` which access the same
613   // memory locations.
614   dstRel.inverse();
615   dstRel.compose(srcRel);
616   *dependenceConstraints = dstRel;
617 
618   // Add 'src' happens before 'dst' ordering constraints.
619   addOrderingConstraints(srcDomain, dstDomain, loopDepth,
620                          dependenceConstraints);
621 
622   // Return 'NoDependence' if the solution space is empty: no dependence.
623   if (dependenceConstraints->isEmpty())
624     return DependenceResult::NoDependence;
625 
626   // Compute dependence direction vector and return true.
627   if (dependenceComponents != nullptr)
628     computeDirectionVector(srcDomain, dstDomain, loopDepth,
629                            dependenceConstraints, dependenceComponents);
630 
631   LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
632   LLVM_DEBUG(dependenceConstraints->dump());
633   return DependenceResult::HasDependence;
634 }
635 
636 /// Gathers dependence components for dependences between all ops in loop nest
637 /// rooted at 'forOp' at loop depths in range [1, maxLoopDepth].
638 void mlir::getDependenceComponents(
639     AffineForOp forOp, unsigned maxLoopDepth,
640     std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) {
641   // Collect all load and store ops in loop nest rooted at 'forOp'.
642   SmallVector<Operation *, 8> loadAndStoreOps;
643   forOp->walk([&](Operation *op) {
644     if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
645       loadAndStoreOps.push_back(op);
646   });
647 
648   unsigned numOps = loadAndStoreOps.size();
649   for (unsigned d = 1; d <= maxLoopDepth; ++d) {
650     for (unsigned i = 0; i < numOps; ++i) {
651       auto *srcOp = loadAndStoreOps[i];
652       MemRefAccess srcAccess(srcOp);
653       for (unsigned j = 0; j < numOps; ++j) {
654         auto *dstOp = loadAndStoreOps[j];
655         MemRefAccess dstAccess(dstOp);
656 
657         FlatAffineValueConstraints dependenceConstraints;
658         SmallVector<DependenceComponent, 2> depComps;
659         // TODO: Explore whether it would be profitable to pre-compute and store
660         // deps instead of repeatedly checking.
661         DependenceResult result = checkMemrefAccessDependence(
662             srcAccess, dstAccess, d, &dependenceConstraints, &depComps);
663         if (hasDependence(result))
664           depCompsVec->push_back(depComps);
665       }
666     }
667   }
668 }
669