1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // DependenceAnalysis is an LLVM pass that analyses dependences between memory
10 // accesses. Currently, it is an (incomplete) implementation of the approach
11 // described in
12 //
13 //            Practical Dependence Testing
14 //            Goff, Kennedy, Tseng
15 //            PLDI 1991
16 //
17 // There's a single entry point that analyzes the dependence between a pair
18 // of memory references in a function, returning either NULL, for no dependence,
19 // or a more-or-less detailed description of the dependence between them.
20 //
21 // Currently, the implementation cannot propagate constraints between
22 // coupled RDIV subscripts and lacks a multi-subscript MIV test.
23 // Both of these are conservative weaknesses;
24 // that is, not a source of correctness problems.
25 //
26 // Since Clang linearizes some array subscripts, the dependence
27 // analysis is using SCEV->delinearize to recover the representation of multiple
28 // subscripts, and thus avoid the more expensive and less precise MIV tests. The
29 // delinearization is controlled by the flag -da-delinearize.
30 //
31 // We should pay some careful attention to the possibility of integer overflow
32 // in the implementation of the various tests. This could happen with Add,
33 // Subtract, or Multiply, with both APInt's and SCEV's.
34 //
35 // Some non-linear subscript pairs can be handled by the GCD test
36 // (and perhaps other tests).
37 // Should explore how often these things occur.
38 //
39 // Finally, it seems like certain test cases expose weaknesses in the SCEV
40 // simplification, especially in the handling of sign and zero extensions.
41 // It could be useful to spend time exploring these.
42 //
43 // Please note that this is work in progress and the interface is subject to
44 // change.
45 //
46 //===----------------------------------------------------------------------===//
47 //                                                                            //
48 //                   In memory of Ken Kennedy, 1945 - 2007                    //
49 //                                                                            //
50 //===----------------------------------------------------------------------===//
51 
52 #include "llvm/Analysis/DependenceAnalysis.h"
53 #include "llvm/ADT/Statistic.h"
54 #include "llvm/Analysis/AliasAnalysis.h"
55 #include "llvm/Analysis/Delinearization.h"
56 #include "llvm/Analysis/LoopInfo.h"
57 #include "llvm/Analysis/ScalarEvolution.h"
58 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
59 #include "llvm/Analysis/ValueTracking.h"
60 #include "llvm/IR/InstIterator.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/InitializePasses.h"
63 #include "llvm/Support/CommandLine.h"
64 #include "llvm/Support/Debug.h"
65 #include "llvm/Support/ErrorHandling.h"
66 #include "llvm/Support/raw_ostream.h"
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "da"
71 
72 //===----------------------------------------------------------------------===//
73 // statistics
74 
75 STATISTIC(TotalArrayPairs, "Array pairs tested");
76 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
77 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
78 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
79 STATISTIC(ZIVapplications, "ZIV applications");
80 STATISTIC(ZIVindependence, "ZIV independence");
81 STATISTIC(StrongSIVapplications, "Strong SIV applications");
82 STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
83 STATISTIC(StrongSIVindependence, "Strong SIV independence");
84 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
85 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
86 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
87 STATISTIC(ExactSIVapplications, "Exact SIV applications");
88 STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
89 STATISTIC(ExactSIVindependence, "Exact SIV independence");
90 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
91 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
92 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
93 STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
94 STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
95 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
96 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
97 STATISTIC(DeltaApplications, "Delta applications");
98 STATISTIC(DeltaSuccesses, "Delta successes");
99 STATISTIC(DeltaIndependence, "Delta independence");
100 STATISTIC(DeltaPropagations, "Delta propagations");
101 STATISTIC(GCDapplications, "GCD applications");
102 STATISTIC(GCDsuccesses, "GCD successes");
103 STATISTIC(GCDindependence, "GCD independence");
104 STATISTIC(BanerjeeApplications, "Banerjee applications");
105 STATISTIC(BanerjeeIndependence, "Banerjee independence");
106 STATISTIC(BanerjeeSuccesses, "Banerjee successes");
107 
108 static cl::opt<bool>
109     Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore,
110                 cl::desc("Try to delinearize array references."));
111 static cl::opt<bool> DisableDelinearizationChecks(
112     "da-disable-delinearization-checks", cl::init(false), cl::Hidden,
113     cl::ZeroOrMore,
114     cl::desc(
115         "Disable checks that try to statically verify validity of "
116         "delinearized subscripts. Enabling this option may result in incorrect "
117         "dependence vectors for languages that allow the subscript of one "
118         "dimension to underflow or overflow into another dimension."));
119 
120 static cl::opt<unsigned> MIVMaxLevelThreshold(
121     "da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::ZeroOrMore,
122     cl::desc("Maximum depth allowed for the recursive algorithm used to "
123              "explore MIV direction vectors."));
124 
125 //===----------------------------------------------------------------------===//
126 // basics
127 
128 DependenceAnalysis::Result
129 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
130   auto &AA = FAM.getResult<AAManager>(F);
131   auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
132   auto &LI = FAM.getResult<LoopAnalysis>(F);
133   return DependenceInfo(&F, &AA, &SE, &LI);
134 }
135 
136 AnalysisKey DependenceAnalysis::Key;
137 
138 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
139                       "Dependence Analysis", true, true)
140 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
141 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
142 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
143 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
144                     true, true)
145 
146 char DependenceAnalysisWrapperPass::ID = 0;
147 
148 DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass()
149     : FunctionPass(ID) {
150   initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
151 }
152 
153 FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
154   return new DependenceAnalysisWrapperPass();
155 }
156 
157 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) {
158   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
159   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
160   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
161   info.reset(new DependenceInfo(&F, &AA, &SE, &LI));
162   return false;
163 }
164 
165 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; }
166 
167 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); }
168 
169 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
170   AU.setPreservesAll();
171   AU.addRequiredTransitive<AAResultsWrapperPass>();
172   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
173   AU.addRequiredTransitive<LoopInfoWrapperPass>();
174 }
175 
176 // Used to test the dependence analyzer.
177 // Looks through the function, noting instructions that may access memory.
178 // Calls depends() on every possible pair and prints out the result.
179 // Ignores all other instructions.
180 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) {
181   auto *F = DA->getFunction();
182   for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
183        ++SrcI) {
184     if (SrcI->mayReadOrWriteMemory()) {
185       for (inst_iterator DstI = SrcI, DstE = inst_end(F);
186            DstI != DstE; ++DstI) {
187         if (DstI->mayReadOrWriteMemory()) {
188           OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
189           OS << "  da analyze - ";
190           if (auto D = DA->depends(&*SrcI, &*DstI, true)) {
191             D->dump(OS);
192             for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
193               if (D->isSplitable(Level)) {
194                 OS << "  da analyze - split level = " << Level;
195                 OS << ", iteration = " << *DA->getSplitIteration(*D, Level);
196                 OS << "!\n";
197               }
198             }
199           }
200           else
201             OS << "none!\n";
202         }
203       }
204     }
205   }
206 }
207 
208 void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
209                                           const Module *) const {
210   dumpExampleDependence(OS, info.get());
211 }
212 
213 PreservedAnalyses
214 DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
215   OS << "'Dependence Analysis' for function '" << F.getName() << "':\n";
216   dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F));
217   return PreservedAnalyses::all();
218 }
219 
220 //===----------------------------------------------------------------------===//
221 // Dependence methods
222 
223 // Returns true if this is an input dependence.
224 bool Dependence::isInput() const {
225   return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
226 }
227 
228 
229 // Returns true if this is an output dependence.
230 bool Dependence::isOutput() const {
231   return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
232 }
233 
234 
235 // Returns true if this is an flow (aka true)  dependence.
236 bool Dependence::isFlow() const {
237   return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
238 }
239 
240 
241 // Returns true if this is an anti dependence.
242 bool Dependence::isAnti() const {
243   return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
244 }
245 
246 
247 // Returns true if a particular level is scalar; that is,
248 // if no subscript in the source or destination mention the induction
249 // variable associated with the loop at this level.
250 // Leave this out of line, so it will serve as a virtual method anchor
251 bool Dependence::isScalar(unsigned level) const {
252   return false;
253 }
254 
255 
256 //===----------------------------------------------------------------------===//
257 // FullDependence methods
258 
259 FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
260                                bool PossiblyLoopIndependent,
261                                unsigned CommonLevels)
262     : Dependence(Source, Destination), Levels(CommonLevels),
263       LoopIndependent(PossiblyLoopIndependent) {
264   Consistent = true;
265   if (CommonLevels)
266     DV = std::make_unique<DVEntry[]>(CommonLevels);
267 }
268 
269 // The rest are simple getters that hide the implementation.
270 
271 // getDirection - Returns the direction associated with a particular level.
272 unsigned FullDependence::getDirection(unsigned Level) const {
273   assert(0 < Level && Level <= Levels && "Level out of range");
274   return DV[Level - 1].Direction;
275 }
276 
277 
278 // Returns the distance (or NULL) associated with a particular level.
279 const SCEV *FullDependence::getDistance(unsigned Level) const {
280   assert(0 < Level && Level <= Levels && "Level out of range");
281   return DV[Level - 1].Distance;
282 }
283 
284 
285 // Returns true if a particular level is scalar; that is,
286 // if no subscript in the source or destination mention the induction
287 // variable associated with the loop at this level.
288 bool FullDependence::isScalar(unsigned Level) const {
289   assert(0 < Level && Level <= Levels && "Level out of range");
290   return DV[Level - 1].Scalar;
291 }
292 
293 
294 // Returns true if peeling the first iteration from this loop
295 // will break this dependence.
296 bool FullDependence::isPeelFirst(unsigned Level) const {
297   assert(0 < Level && Level <= Levels && "Level out of range");
298   return DV[Level - 1].PeelFirst;
299 }
300 
301 
302 // Returns true if peeling the last iteration from this loop
303 // will break this dependence.
304 bool FullDependence::isPeelLast(unsigned Level) const {
305   assert(0 < Level && Level <= Levels && "Level out of range");
306   return DV[Level - 1].PeelLast;
307 }
308 
309 
310 // Returns true if splitting this loop will break the dependence.
311 bool FullDependence::isSplitable(unsigned Level) const {
312   assert(0 < Level && Level <= Levels && "Level out of range");
313   return DV[Level - 1].Splitable;
314 }
315 
316 
317 //===----------------------------------------------------------------------===//
318 // DependenceInfo::Constraint methods
319 
320 // If constraint is a point <X, Y>, returns X.
321 // Otherwise assert.
322 const SCEV *DependenceInfo::Constraint::getX() const {
323   assert(Kind == Point && "Kind should be Point");
324   return A;
325 }
326 
327 
328 // If constraint is a point <X, Y>, returns Y.
329 // Otherwise assert.
330 const SCEV *DependenceInfo::Constraint::getY() const {
331   assert(Kind == Point && "Kind should be Point");
332   return B;
333 }
334 
335 
336 // If constraint is a line AX + BY = C, returns A.
337 // Otherwise assert.
338 const SCEV *DependenceInfo::Constraint::getA() const {
339   assert((Kind == Line || Kind == Distance) &&
340          "Kind should be Line (or Distance)");
341   return A;
342 }
343 
344 
345 // If constraint is a line AX + BY = C, returns B.
346 // Otherwise assert.
347 const SCEV *DependenceInfo::Constraint::getB() const {
348   assert((Kind == Line || Kind == Distance) &&
349          "Kind should be Line (or Distance)");
350   return B;
351 }
352 
353 
354 // If constraint is a line AX + BY = C, returns C.
355 // Otherwise assert.
356 const SCEV *DependenceInfo::Constraint::getC() const {
357   assert((Kind == Line || Kind == Distance) &&
358          "Kind should be Line (or Distance)");
359   return C;
360 }
361 
362 
363 // If constraint is a distance, returns D.
364 // Otherwise assert.
365 const SCEV *DependenceInfo::Constraint::getD() const {
366   assert(Kind == Distance && "Kind should be Distance");
367   return SE->getNegativeSCEV(C);
368 }
369 
370 
371 // Returns the loop associated with this constraint.
372 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const {
373   assert((Kind == Distance || Kind == Line || Kind == Point) &&
374          "Kind should be Distance, Line, or Point");
375   return AssociatedLoop;
376 }
377 
378 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y,
379                                           const Loop *CurLoop) {
380   Kind = Point;
381   A = X;
382   B = Y;
383   AssociatedLoop = CurLoop;
384 }
385 
386 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB,
387                                          const SCEV *CC, const Loop *CurLoop) {
388   Kind = Line;
389   A = AA;
390   B = BB;
391   C = CC;
392   AssociatedLoop = CurLoop;
393 }
394 
395 void DependenceInfo::Constraint::setDistance(const SCEV *D,
396                                              const Loop *CurLoop) {
397   Kind = Distance;
398   A = SE->getOne(D->getType());
399   B = SE->getNegativeSCEV(A);
400   C = SE->getNegativeSCEV(D);
401   AssociatedLoop = CurLoop;
402 }
403 
404 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }
405 
406 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) {
407   SE = NewSE;
408   Kind = Any;
409 }
410 
411 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
412 // For debugging purposes. Dumps the constraint out to OS.
413 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
414   if (isEmpty())
415     OS << " Empty\n";
416   else if (isAny())
417     OS << " Any\n";
418   else if (isPoint())
419     OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
420   else if (isDistance())
421     OS << " Distance is " << *getD() <<
422       " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
423   else if (isLine())
424     OS << " Line is " << *getA() << "*X + " <<
425       *getB() << "*Y = " << *getC() << "\n";
426   else
427     llvm_unreachable("unknown constraint type in Constraint::dump");
428 }
429 #endif
430 
431 
432 // Updates X with the intersection
433 // of the Constraints X and Y. Returns true if X has changed.
434 // Corresponds to Figure 4 from the paper
435 //
436 //            Practical Dependence Testing
437 //            Goff, Kennedy, Tseng
438 //            PLDI 1991
439 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
440   ++DeltaApplications;
441   LLVM_DEBUG(dbgs() << "\tintersect constraints\n");
442   LLVM_DEBUG(dbgs() << "\t    X ="; X->dump(dbgs()));
443   LLVM_DEBUG(dbgs() << "\t    Y ="; Y->dump(dbgs()));
444   assert(!Y->isPoint() && "Y must not be a Point");
445   if (X->isAny()) {
446     if (Y->isAny())
447       return false;
448     *X = *Y;
449     return true;
450   }
451   if (X->isEmpty())
452     return false;
453   if (Y->isEmpty()) {
454     X->setEmpty();
455     return true;
456   }
457 
458   if (X->isDistance() && Y->isDistance()) {
459     LLVM_DEBUG(dbgs() << "\t    intersect 2 distances\n");
460     if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
461       return false;
462     if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
463       X->setEmpty();
464       ++DeltaSuccesses;
465       return true;
466     }
467     // Hmmm, interesting situation.
468     // I guess if either is constant, keep it and ignore the other.
469     if (isa<SCEVConstant>(Y->getD())) {
470       *X = *Y;
471       return true;
472     }
473     return false;
474   }
475 
476   // At this point, the pseudo-code in Figure 4 of the paper
477   // checks if (X->isPoint() && Y->isPoint()).
478   // This case can't occur in our implementation,
479   // since a Point can only arise as the result of intersecting
480   // two Line constraints, and the right-hand value, Y, is never
481   // the result of an intersection.
482   assert(!(X->isPoint() && Y->isPoint()) &&
483          "We shouldn't ever see X->isPoint() && Y->isPoint()");
484 
485   if (X->isLine() && Y->isLine()) {
486     LLVM_DEBUG(dbgs() << "\t    intersect 2 lines\n");
487     const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
488     const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
489     if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
490       // slopes are equal, so lines are parallel
491       LLVM_DEBUG(dbgs() << "\t\tsame slope\n");
492       Prod1 = SE->getMulExpr(X->getC(), Y->getB());
493       Prod2 = SE->getMulExpr(X->getB(), Y->getC());
494       if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
495         return false;
496       if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
497         X->setEmpty();
498         ++DeltaSuccesses;
499         return true;
500       }
501       return false;
502     }
503     if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
504       // slopes differ, so lines intersect
505       LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n");
506       const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
507       const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
508       const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
509       const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
510       const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
511       const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
512       const SCEVConstant *C1A2_C2A1 =
513         dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
514       const SCEVConstant *C1B2_C2B1 =
515         dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
516       const SCEVConstant *A1B2_A2B1 =
517         dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
518       const SCEVConstant *A2B1_A1B2 =
519         dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
520       if (!C1B2_C2B1 || !C1A2_C2A1 ||
521           !A1B2_A2B1 || !A2B1_A1B2)
522         return false;
523       APInt Xtop = C1B2_C2B1->getAPInt();
524       APInt Xbot = A1B2_A2B1->getAPInt();
525       APInt Ytop = C1A2_C2A1->getAPInt();
526       APInt Ybot = A2B1_A1B2->getAPInt();
527       LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
528       LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
529       LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
530       LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
531       APInt Xq = Xtop; // these need to be initialized, even
532       APInt Xr = Xtop; // though they're just going to be overwritten
533       APInt::sdivrem(Xtop, Xbot, Xq, Xr);
534       APInt Yq = Ytop;
535       APInt Yr = Ytop;
536       APInt::sdivrem(Ytop, Ybot, Yq, Yr);
537       if (Xr != 0 || Yr != 0) {
538         X->setEmpty();
539         ++DeltaSuccesses;
540         return true;
541       }
542       LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
543       if (Xq.slt(0) || Yq.slt(0)) {
544         X->setEmpty();
545         ++DeltaSuccesses;
546         return true;
547       }
548       if (const SCEVConstant *CUB =
549           collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
550         const APInt &UpperBound = CUB->getAPInt();
551         LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
552         if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
553           X->setEmpty();
554           ++DeltaSuccesses;
555           return true;
556         }
557       }
558       X->setPoint(SE->getConstant(Xq),
559                   SE->getConstant(Yq),
560                   X->getAssociatedLoop());
561       ++DeltaSuccesses;
562       return true;
563     }
564     return false;
565   }
566 
567   // if (X->isLine() && Y->isPoint()) This case can't occur.
568   assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
569 
570   if (X->isPoint() && Y->isLine()) {
571     LLVM_DEBUG(dbgs() << "\t    intersect Point and Line\n");
572     const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
573     const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
574     const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
575     if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
576       return false;
577     if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {
578       X->setEmpty();
579       ++DeltaSuccesses;
580       return true;
581     }
582     return false;
583   }
584 
585   llvm_unreachable("shouldn't reach the end of Constraint intersection");
586   return false;
587 }
588 
589 
590 //===----------------------------------------------------------------------===//
591 // DependenceInfo methods
592 
593 // For debugging purposes. Dumps a dependence to OS.
594 void Dependence::dump(raw_ostream &OS) const {
595   bool Splitable = false;
596   if (isConfused())
597     OS << "confused";
598   else {
599     if (isConsistent())
600       OS << "consistent ";
601     if (isFlow())
602       OS << "flow";
603     else if (isOutput())
604       OS << "output";
605     else if (isAnti())
606       OS << "anti";
607     else if (isInput())
608       OS << "input";
609     unsigned Levels = getLevels();
610     OS << " [";
611     for (unsigned II = 1; II <= Levels; ++II) {
612       if (isSplitable(II))
613         Splitable = true;
614       if (isPeelFirst(II))
615         OS << 'p';
616       const SCEV *Distance = getDistance(II);
617       if (Distance)
618         OS << *Distance;
619       else if (isScalar(II))
620         OS << "S";
621       else {
622         unsigned Direction = getDirection(II);
623         if (Direction == DVEntry::ALL)
624           OS << "*";
625         else {
626           if (Direction & DVEntry::LT)
627             OS << "<";
628           if (Direction & DVEntry::EQ)
629             OS << "=";
630           if (Direction & DVEntry::GT)
631             OS << ">";
632         }
633       }
634       if (isPeelLast(II))
635         OS << 'p';
636       if (II < Levels)
637         OS << " ";
638     }
639     if (isLoopIndependent())
640       OS << "|<";
641     OS << "]";
642     if (Splitable)
643       OS << " splitable";
644   }
645   OS << "!\n";
646 }
647 
648 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
649 // underlaying objects. If LocA and LocB are known to not alias (for any reason:
650 // tbaa, non-overlapping regions etc), then it is known there is no dependecy.
651 // Otherwise the underlying objects are checked to see if they point to
652 // different identifiable objects.
653 static AliasResult underlyingObjectsAlias(AAResults *AA,
654                                           const DataLayout &DL,
655                                           const MemoryLocation &LocA,
656                                           const MemoryLocation &LocB) {
657   // Check the original locations (minus size) for noalias, which can happen for
658   // tbaa, incompatible underlying object locations, etc.
659   MemoryLocation LocAS =
660       MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags);
661   MemoryLocation LocBS =
662       MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags);
663   if (AA->isNoAlias(LocAS, LocBS))
664     return AliasResult::NoAlias;
665 
666   // Check the underlying objects are the same
667   const Value *AObj = getUnderlyingObject(LocA.Ptr);
668   const Value *BObj = getUnderlyingObject(LocB.Ptr);
669 
670   // If the underlying objects are the same, they must alias
671   if (AObj == BObj)
672     return AliasResult::MustAlias;
673 
674   // We may have hit the recursion limit for underlying objects, or have
675   // underlying objects where we don't know they will alias.
676   if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj))
677     return AliasResult::MayAlias;
678 
679   // Otherwise we know the objects are different and both identified objects so
680   // must not alias.
681   return AliasResult::NoAlias;
682 }
683 
684 
685 // Returns true if the load or store can be analyzed. Atomic and volatile
686 // operations have properties which this analysis does not understand.
687 static
688 bool isLoadOrStore(const Instruction *I) {
689   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
690     return LI->isUnordered();
691   else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
692     return SI->isUnordered();
693   return false;
694 }
695 
696 
697 // Examines the loop nesting of the Src and Dst
698 // instructions and establishes their shared loops. Sets the variables
699 // CommonLevels, SrcLevels, and MaxLevels.
700 // The source and destination instructions needn't be contained in the same
701 // loop. The routine establishNestingLevels finds the level of most deeply
702 // nested loop that contains them both, CommonLevels. An instruction that's
703 // not contained in a loop is at level = 0. MaxLevels is equal to the level
704 // of the source plus the level of the destination, minus CommonLevels.
705 // This lets us allocate vectors MaxLevels in length, with room for every
706 // distinct loop referenced in both the source and destination subscripts.
707 // The variable SrcLevels is the nesting depth of the source instruction.
708 // It's used to help calculate distinct loops referenced by the destination.
709 // Here's the map from loops to levels:
710 //            0 - unused
711 //            1 - outermost common loop
712 //          ... - other common loops
713 // CommonLevels - innermost common loop
714 //          ... - loops containing Src but not Dst
715 //    SrcLevels - innermost loop containing Src but not Dst
716 //          ... - loops containing Dst but not Src
717 //    MaxLevels - innermost loops containing Dst but not Src
718 // Consider the follow code fragment:
719 //   for (a = ...) {
720 //     for (b = ...) {
721 //       for (c = ...) {
722 //         for (d = ...) {
723 //           A[] = ...;
724 //         }
725 //       }
726 //       for (e = ...) {
727 //         for (f = ...) {
728 //           for (g = ...) {
729 //             ... = A[];
730 //           }
731 //         }
732 //       }
733 //     }
734 //   }
735 // If we're looking at the possibility of a dependence between the store
736 // to A (the Src) and the load from A (the Dst), we'll note that they
737 // have 2 loops in common, so CommonLevels will equal 2 and the direction
738 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
739 // A map from loop names to loop numbers would look like
740 //     a - 1
741 //     b - 2 = CommonLevels
742 //     c - 3
743 //     d - 4 = SrcLevels
744 //     e - 5
745 //     f - 6
746 //     g - 7 = MaxLevels
747 void DependenceInfo::establishNestingLevels(const Instruction *Src,
748                                             const Instruction *Dst) {
749   const BasicBlock *SrcBlock = Src->getParent();
750   const BasicBlock *DstBlock = Dst->getParent();
751   unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
752   unsigned DstLevel = LI->getLoopDepth(DstBlock);
753   const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
754   const Loop *DstLoop = LI->getLoopFor(DstBlock);
755   SrcLevels = SrcLevel;
756   MaxLevels = SrcLevel + DstLevel;
757   while (SrcLevel > DstLevel) {
758     SrcLoop = SrcLoop->getParentLoop();
759     SrcLevel--;
760   }
761   while (DstLevel > SrcLevel) {
762     DstLoop = DstLoop->getParentLoop();
763     DstLevel--;
764   }
765   while (SrcLoop != DstLoop) {
766     SrcLoop = SrcLoop->getParentLoop();
767     DstLoop = DstLoop->getParentLoop();
768     SrcLevel--;
769   }
770   CommonLevels = SrcLevel;
771   MaxLevels -= CommonLevels;
772 }
773 
774 
775 // Given one of the loops containing the source, return
776 // its level index in our numbering scheme.
777 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
778   return SrcLoop->getLoopDepth();
779 }
780 
781 
782 // Given one of the loops containing the destination,
783 // return its level index in our numbering scheme.
784 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
785   unsigned D = DstLoop->getLoopDepth();
786   if (D > CommonLevels)
787     return D - CommonLevels + SrcLevels;
788   else
789     return D;
790 }
791 
792 
793 // Returns true if Expression is loop invariant in LoopNest.
794 bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
795                                      const Loop *LoopNest) const {
796   if (!LoopNest)
797     return true;
798   return SE->isLoopInvariant(Expression, LoopNest) &&
799     isLoopInvariant(Expression, LoopNest->getParentLoop());
800 }
801 
802 
803 
804 // Finds the set of loops from the LoopNest that
805 // have a level <= CommonLevels and are referred to by the SCEV Expression.
806 void DependenceInfo::collectCommonLoops(const SCEV *Expression,
807                                         const Loop *LoopNest,
808                                         SmallBitVector &Loops) const {
809   while (LoopNest) {
810     unsigned Level = LoopNest->getLoopDepth();
811     if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
812       Loops.set(Level);
813     LoopNest = LoopNest->getParentLoop();
814   }
815 }
816 
817 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
818 
819   unsigned widestWidthSeen = 0;
820   Type *widestType;
821 
822   // Go through each pair and find the widest bit to which we need
823   // to extend all of them.
824   for (Subscript *Pair : Pairs) {
825     const SCEV *Src = Pair->Src;
826     const SCEV *Dst = Pair->Dst;
827     IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
828     IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
829     if (SrcTy == nullptr || DstTy == nullptr) {
830       assert(SrcTy == DstTy && "This function only unify integer types and "
831              "expect Src and Dst share the same type "
832              "otherwise.");
833       continue;
834     }
835     if (SrcTy->getBitWidth() > widestWidthSeen) {
836       widestWidthSeen = SrcTy->getBitWidth();
837       widestType = SrcTy;
838     }
839     if (DstTy->getBitWidth() > widestWidthSeen) {
840       widestWidthSeen = DstTy->getBitWidth();
841       widestType = DstTy;
842     }
843   }
844 
845 
846   assert(widestWidthSeen > 0);
847 
848   // Now extend each pair to the widest seen.
849   for (Subscript *Pair : Pairs) {
850     const SCEV *Src = Pair->Src;
851     const SCEV *Dst = Pair->Dst;
852     IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
853     IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
854     if (SrcTy == nullptr || DstTy == nullptr) {
855       assert(SrcTy == DstTy && "This function only unify integer types and "
856              "expect Src and Dst share the same type "
857              "otherwise.");
858       continue;
859     }
860     if (SrcTy->getBitWidth() < widestWidthSeen)
861       // Sign-extend Src to widestType
862       Pair->Src = SE->getSignExtendExpr(Src, widestType);
863     if (DstTy->getBitWidth() < widestWidthSeen) {
864       // Sign-extend Dst to widestType
865       Pair->Dst = SE->getSignExtendExpr(Dst, widestType);
866     }
867   }
868 }
869 
870 // removeMatchingExtensions - Examines a subscript pair.
871 // If the source and destination are identically sign (or zero)
872 // extended, it strips off the extension in an effect to simplify
873 // the actual analysis.
874 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
875   const SCEV *Src = Pair->Src;
876   const SCEV *Dst = Pair->Dst;
877   if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) ||
878       (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
879     const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src);
880     const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst);
881     const SCEV *SrcCastOp = SrcCast->getOperand();
882     const SCEV *DstCastOp = DstCast->getOperand();
883     if (SrcCastOp->getType() == DstCastOp->getType()) {
884       Pair->Src = SrcCastOp;
885       Pair->Dst = DstCastOp;
886     }
887   }
888 }
889 
890 // Examine the scev and return true iff it's linear.
891 // Collect any loops mentioned in the set of "Loops".
892 bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
893                                     SmallBitVector &Loops, bool IsSrc) {
894   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
895   if (!AddRec)
896     return isLoopInvariant(Expr, LoopNest);
897   const SCEV *Start = AddRec->getStart();
898   const SCEV *Step = AddRec->getStepRecurrence(*SE);
899   const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
900   if (!isa<SCEVCouldNotCompute>(UB)) {
901     if (SE->getTypeSizeInBits(Start->getType()) <
902         SE->getTypeSizeInBits(UB->getType())) {
903       if (!AddRec->getNoWrapFlags())
904         return false;
905     }
906   }
907   if (!isLoopInvariant(Step, LoopNest))
908     return false;
909   if (IsSrc)
910     Loops.set(mapSrcLoop(AddRec->getLoop()));
911   else
912     Loops.set(mapDstLoop(AddRec->getLoop()));
913   return checkSubscript(Start, LoopNest, Loops, IsSrc);
914 }
915 
916 // Examine the scev and return true iff it's linear.
917 // Collect any loops mentioned in the set of "Loops".
918 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
919                                        SmallBitVector &Loops) {
920   return checkSubscript(Src, LoopNest, Loops, true);
921 }
922 
923 // Examine the scev and return true iff it's linear.
924 // Collect any loops mentioned in the set of "Loops".
925 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
926                                        SmallBitVector &Loops) {
927   return checkSubscript(Dst, LoopNest, Loops, false);
928 }
929 
930 
931 // Examines the subscript pair (the Src and Dst SCEVs)
932 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
933 // Collects the associated loops in a set.
934 DependenceInfo::Subscript::ClassificationKind
935 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
936                              const SCEV *Dst, const Loop *DstLoopNest,
937                              SmallBitVector &Loops) {
938   SmallBitVector SrcLoops(MaxLevels + 1);
939   SmallBitVector DstLoops(MaxLevels + 1);
940   if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
941     return Subscript::NonLinear;
942   if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))
943     return Subscript::NonLinear;
944   Loops = SrcLoops;
945   Loops |= DstLoops;
946   unsigned N = Loops.count();
947   if (N == 0)
948     return Subscript::ZIV;
949   if (N == 1)
950     return Subscript::SIV;
951   if (N == 2 && (SrcLoops.count() == 0 ||
952                  DstLoops.count() == 0 ||
953                  (SrcLoops.count() == 1 && DstLoops.count() == 1)))
954     return Subscript::RDIV;
955   return Subscript::MIV;
956 }
957 
958 
959 // A wrapper around SCEV::isKnownPredicate.
960 // Looks for cases where we're interested in comparing for equality.
961 // If both X and Y have been identically sign or zero extended,
962 // it strips off the (confusing) extensions before invoking
963 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
964 // will be similarly updated.
965 //
966 // If SCEV::isKnownPredicate can't prove the predicate,
967 // we try simple subtraction, which seems to help in some cases
968 // involving symbolics.
969 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X,
970                                       const SCEV *Y) const {
971   if (Pred == CmpInst::ICMP_EQ ||
972       Pred == CmpInst::ICMP_NE) {
973     if ((isa<SCEVSignExtendExpr>(X) &&
974          isa<SCEVSignExtendExpr>(Y)) ||
975         (isa<SCEVZeroExtendExpr>(X) &&
976          isa<SCEVZeroExtendExpr>(Y))) {
977       const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X);
978       const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y);
979       const SCEV *Xop = CX->getOperand();
980       const SCEV *Yop = CY->getOperand();
981       if (Xop->getType() == Yop->getType()) {
982         X = Xop;
983         Y = Yop;
984       }
985     }
986   }
987   if (SE->isKnownPredicate(Pred, X, Y))
988     return true;
989   // If SE->isKnownPredicate can't prove the condition,
990   // we try the brute-force approach of subtracting
991   // and testing the difference.
992   // By testing with SE->isKnownPredicate first, we avoid
993   // the possibility of overflow when the arguments are constants.
994   const SCEV *Delta = SE->getMinusSCEV(X, Y);
995   switch (Pred) {
996   case CmpInst::ICMP_EQ:
997     return Delta->isZero();
998   case CmpInst::ICMP_NE:
999     return SE->isKnownNonZero(Delta);
1000   case CmpInst::ICMP_SGE:
1001     return SE->isKnownNonNegative(Delta);
1002   case CmpInst::ICMP_SLE:
1003     return SE->isKnownNonPositive(Delta);
1004   case CmpInst::ICMP_SGT:
1005     return SE->isKnownPositive(Delta);
1006   case CmpInst::ICMP_SLT:
1007     return SE->isKnownNegative(Delta);
1008   default:
1009     llvm_unreachable("unexpected predicate in isKnownPredicate");
1010   }
1011 }
1012 
1013 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1))
1014 /// with some extra checking if S is an AddRec and we can prove less-than using
1015 /// the loop bounds.
1016 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const {
1017   // First unify to the same type
1018   auto *SType = dyn_cast<IntegerType>(S->getType());
1019   auto *SizeType = dyn_cast<IntegerType>(Size->getType());
1020   if (!SType || !SizeType)
1021     return false;
1022   Type *MaxType =
1023       (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType;
1024   S = SE->getTruncateOrZeroExtend(S, MaxType);
1025   Size = SE->getTruncateOrZeroExtend(Size, MaxType);
1026 
1027   // Special check for addrecs using BE taken count
1028   const SCEV *Bound = SE->getMinusSCEV(S, Size);
1029   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) {
1030     if (AddRec->isAffine()) {
1031       const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop());
1032       if (!isa<SCEVCouldNotCompute>(BECount)) {
1033         const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE);
1034         if (SE->isKnownNegative(Limit))
1035           return true;
1036       }
1037     }
1038   }
1039 
1040   // Check using normal isKnownNegative
1041   const SCEV *LimitedBound =
1042       SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType())));
1043   return SE->isKnownNegative(LimitedBound);
1044 }
1045 
1046 bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const {
1047   bool Inbounds = false;
1048   if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr))
1049     Inbounds = SrcGEP->isInBounds();
1050   if (Inbounds) {
1051     if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
1052       if (AddRec->isAffine()) {
1053         // We know S is for Ptr, the operand on a load/store, so doesn't wrap.
1054         // If both parts are NonNegative, the end result will be NonNegative
1055         if (SE->isKnownNonNegative(AddRec->getStart()) &&
1056             SE->isKnownNonNegative(AddRec->getOperand(1)))
1057           return true;
1058       }
1059     }
1060   }
1061 
1062   return SE->isKnownNonNegative(S);
1063 }
1064 
1065 // All subscripts are all the same type.
1066 // Loop bound may be smaller (e.g., a char).
1067 // Should zero extend loop bound, since it's always >= 0.
1068 // This routine collects upper bound and extends or truncates if needed.
1069 // Truncating is safe when subscripts are known not to wrap. Cases without
1070 // nowrap flags should have been rejected earlier.
1071 // Return null if no bound available.
1072 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {
1073   if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
1074     const SCEV *UB = SE->getBackedgeTakenCount(L);
1075     return SE->getTruncateOrZeroExtend(UB, T);
1076   }
1077   return nullptr;
1078 }
1079 
1080 
1081 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
1082 // If the cast fails, returns NULL.
1083 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L,
1084                                                               Type *T) const {
1085   if (const SCEV *UB = collectUpperBound(L, T))
1086     return dyn_cast<SCEVConstant>(UB);
1087   return nullptr;
1088 }
1089 
1090 
1091 // testZIV -
1092 // When we have a pair of subscripts of the form [c1] and [c2],
1093 // where c1 and c2 are both loop invariant, we attack it using
1094 // the ZIV test. Basically, we test by comparing the two values,
1095 // but there are actually three possible results:
1096 // 1) the values are equal, so there's a dependence
1097 // 2) the values are different, so there's no dependence
1098 // 3) the values might be equal, so we have to assume a dependence.
1099 //
1100 // Return true if dependence disproved.
1101 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
1102                              FullDependence &Result) const {
1103   LLVM_DEBUG(dbgs() << "    src = " << *Src << "\n");
1104   LLVM_DEBUG(dbgs() << "    dst = " << *Dst << "\n");
1105   ++ZIVapplications;
1106   if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
1107     LLVM_DEBUG(dbgs() << "    provably dependent\n");
1108     return false; // provably dependent
1109   }
1110   if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
1111     LLVM_DEBUG(dbgs() << "    provably independent\n");
1112     ++ZIVindependence;
1113     return true; // provably independent
1114   }
1115   LLVM_DEBUG(dbgs() << "    possibly dependent\n");
1116   Result.Consistent = false;
1117   return false; // possibly dependent
1118 }
1119 
1120 
1121 // strongSIVtest -
1122 // From the paper, Practical Dependence Testing, Section 4.2.1
1123 //
1124 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
1125 // where i is an induction variable, c1 and c2 are loop invariant,
1126 //  and a is a constant, we can solve it exactly using the Strong SIV test.
1127 //
1128 // Can prove independence. Failing that, can compute distance (and direction).
1129 // In the presence of symbolic terms, we can sometimes make progress.
1130 //
1131 // If there's a dependence,
1132 //
1133 //    c1 + a*i = c2 + a*i'
1134 //
1135 // The dependence distance is
1136 //
1137 //    d = i' - i = (c1 - c2)/a
1138 //
1139 // A dependence only exists if d is an integer and abs(d) <= U, where U is the
1140 // loop's upper bound. If a dependence exists, the dependence direction is
1141 // defined as
1142 //
1143 //                { < if d > 0
1144 //    direction = { = if d = 0
1145 //                { > if d < 0
1146 //
1147 // Return true if dependence disproved.
1148 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
1149                                    const SCEV *DstConst, const Loop *CurLoop,
1150                                    unsigned Level, FullDependence &Result,
1151                                    Constraint &NewConstraint) const {
1152   LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
1153   LLVM_DEBUG(dbgs() << "\t    Coeff = " << *Coeff);
1154   LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
1155   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst);
1156   LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
1157   LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst);
1158   LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
1159   ++StrongSIVapplications;
1160   assert(0 < Level && Level <= CommonLevels && "level out of range");
1161   Level--;
1162 
1163   const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
1164   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta);
1165   LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
1166 
1167   // check that |Delta| < iteration count
1168   if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1169     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound);
1170     LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
1171     const SCEV *AbsDelta =
1172       SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
1173     const SCEV *AbsCoeff =
1174       SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);
1175     const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
1176     if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
1177       // Distance greater than trip count - no dependence
1178       ++StrongSIVindependence;
1179       ++StrongSIVsuccesses;
1180       return true;
1181     }
1182   }
1183 
1184   // Can we compute distance?
1185   if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
1186     APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt();
1187     APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();
1188     APInt Distance  = ConstDelta; // these need to be initialized
1189     APInt Remainder = ConstDelta;
1190     APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
1191     LLVM_DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
1192     LLVM_DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
1193     // Make sure Coeff divides Delta exactly
1194     if (Remainder != 0) {
1195       // Coeff doesn't divide Distance, no dependence
1196       ++StrongSIVindependence;
1197       ++StrongSIVsuccesses;
1198       return true;
1199     }
1200     Result.DV[Level].Distance = SE->getConstant(Distance);
1201     NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
1202     if (Distance.sgt(0))
1203       Result.DV[Level].Direction &= Dependence::DVEntry::LT;
1204     else if (Distance.slt(0))
1205       Result.DV[Level].Direction &= Dependence::DVEntry::GT;
1206     else
1207       Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1208     ++StrongSIVsuccesses;
1209   }
1210   else if (Delta->isZero()) {
1211     // since 0/X == 0
1212     Result.DV[Level].Distance = Delta;
1213     NewConstraint.setDistance(Delta, CurLoop);
1214     Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1215     ++StrongSIVsuccesses;
1216   }
1217   else {
1218     if (Coeff->isOne()) {
1219       LLVM_DEBUG(dbgs() << "\t    Distance = " << *Delta << "\n");
1220       Result.DV[Level].Distance = Delta; // since X/1 == X
1221       NewConstraint.setDistance(Delta, CurLoop);
1222     }
1223     else {
1224       Result.Consistent = false;
1225       NewConstraint.setLine(Coeff,
1226                             SE->getNegativeSCEV(Coeff),
1227                             SE->getNegativeSCEV(Delta), CurLoop);
1228     }
1229 
1230     // maybe we can get a useful direction
1231     bool DeltaMaybeZero     = !SE->isKnownNonZero(Delta);
1232     bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
1233     bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
1234     bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
1235     bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
1236     // The double negatives above are confusing.
1237     // It helps to read !SE->isKnownNonZero(Delta)
1238     // as "Delta might be Zero"
1239     unsigned NewDirection = Dependence::DVEntry::NONE;
1240     if ((DeltaMaybePositive && CoeffMaybePositive) ||
1241         (DeltaMaybeNegative && CoeffMaybeNegative))
1242       NewDirection = Dependence::DVEntry::LT;
1243     if (DeltaMaybeZero)
1244       NewDirection |= Dependence::DVEntry::EQ;
1245     if ((DeltaMaybeNegative && CoeffMaybePositive) ||
1246         (DeltaMaybePositive && CoeffMaybeNegative))
1247       NewDirection |= Dependence::DVEntry::GT;
1248     if (NewDirection < Result.DV[Level].Direction)
1249       ++StrongSIVsuccesses;
1250     Result.DV[Level].Direction &= NewDirection;
1251   }
1252   return false;
1253 }
1254 
1255 
1256 // weakCrossingSIVtest -
1257 // From the paper, Practical Dependence Testing, Section 4.2.2
1258 //
1259 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
1260 // where i is an induction variable, c1 and c2 are loop invariant,
1261 // and a is a constant, we can solve it exactly using the
1262 // Weak-Crossing SIV test.
1263 //
1264 // Given c1 + a*i = c2 - a*i', we can look for the intersection of
1265 // the two lines, where i = i', yielding
1266 //
1267 //    c1 + a*i = c2 - a*i
1268 //    2a*i = c2 - c1
1269 //    i = (c2 - c1)/2a
1270 //
1271 // If i < 0, there is no dependence.
1272 // If i > upperbound, there is no dependence.
1273 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1274 // If i = upperbound, there's a dependence with distance = 0.
1275 // If i is integral, there's a dependence (all directions).
1276 // If the non-integer part = 1/2, there's a dependence (<> directions).
1277 // Otherwise, there's no dependence.
1278 //
1279 // Can prove independence. Failing that,
1280 // can sometimes refine the directions.
1281 // Can determine iteration for splitting.
1282 //
1283 // Return true if dependence disproved.
1284 bool DependenceInfo::weakCrossingSIVtest(
1285     const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,
1286     const Loop *CurLoop, unsigned Level, FullDependence &Result,
1287     Constraint &NewConstraint, const SCEV *&SplitIter) const {
1288   LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1289   LLVM_DEBUG(dbgs() << "\t    Coeff = " << *Coeff << "\n");
1290   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1291   LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1292   ++WeakCrossingSIVapplications;
1293   assert(0 < Level && Level <= CommonLevels && "Level out of range");
1294   Level--;
1295   Result.Consistent = false;
1296   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1297   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1298   NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
1299   if (Delta->isZero()) {
1300     Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
1301     Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
1302     ++WeakCrossingSIVsuccesses;
1303     if (!Result.DV[Level].Direction) {
1304       ++WeakCrossingSIVindependence;
1305       return true;
1306     }
1307     Result.DV[Level].Distance = Delta; // = 0
1308     return false;
1309   }
1310   const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
1311   if (!ConstCoeff)
1312     return false;
1313 
1314   Result.DV[Level].Splitable = true;
1315   if (SE->isKnownNegative(ConstCoeff)) {
1316     ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
1317     assert(ConstCoeff &&
1318            "dynamic cast of negative of ConstCoeff should yield constant");
1319     Delta = SE->getNegativeSCEV(Delta);
1320   }
1321   assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
1322 
1323   // compute SplitIter for use by DependenceInfo::getSplitIteration()
1324   SplitIter = SE->getUDivExpr(
1325       SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),
1326       SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));
1327   LLVM_DEBUG(dbgs() << "\t    Split iter = " << *SplitIter << "\n");
1328 
1329   const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1330   if (!ConstDelta)
1331     return false;
1332 
1333   // We're certain that ConstCoeff > 0; therefore,
1334   // if Delta < 0, then no dependence.
1335   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1336   LLVM_DEBUG(dbgs() << "\t    ConstCoeff = " << *ConstCoeff << "\n");
1337   if (SE->isKnownNegative(Delta)) {
1338     // No dependence, Delta < 0
1339     ++WeakCrossingSIVindependence;
1340     ++WeakCrossingSIVsuccesses;
1341     return true;
1342   }
1343 
1344   // We're certain that Delta > 0 and ConstCoeff > 0.
1345   // Check Delta/(2*ConstCoeff) against upper loop bound
1346   if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1347     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
1348     const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
1349     const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
1350                                     ConstantTwo);
1351     LLVM_DEBUG(dbgs() << "\t    ML = " << *ML << "\n");
1352     if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
1353       // Delta too big, no dependence
1354       ++WeakCrossingSIVindependence;
1355       ++WeakCrossingSIVsuccesses;
1356       return true;
1357     }
1358     if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
1359       // i = i' = UB
1360       Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
1361       Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
1362       ++WeakCrossingSIVsuccesses;
1363       if (!Result.DV[Level].Direction) {
1364         ++WeakCrossingSIVindependence;
1365         return true;
1366       }
1367       Result.DV[Level].Splitable = false;
1368       Result.DV[Level].Distance = SE->getZero(Delta->getType());
1369       return false;
1370     }
1371   }
1372 
1373   // check that Coeff divides Delta
1374   APInt APDelta = ConstDelta->getAPInt();
1375   APInt APCoeff = ConstCoeff->getAPInt();
1376   APInt Distance = APDelta; // these need to be initialzed
1377   APInt Remainder = APDelta;
1378   APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
1379   LLVM_DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
1380   if (Remainder != 0) {
1381     // Coeff doesn't divide Delta, no dependence
1382     ++WeakCrossingSIVindependence;
1383     ++WeakCrossingSIVsuccesses;
1384     return true;
1385   }
1386   LLVM_DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
1387 
1388   // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
1389   APInt Two = APInt(Distance.getBitWidth(), 2, true);
1390   Remainder = Distance.srem(Two);
1391   LLVM_DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
1392   if (Remainder != 0) {
1393     // Equal direction isn't possible
1394     Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
1395     ++WeakCrossingSIVsuccesses;
1396   }
1397   return false;
1398 }
1399 
1400 
1401 // Kirch's algorithm, from
1402 //
1403 //        Optimizing Supercompilers for Supercomputers
1404 //        Michael Wolfe
1405 //        MIT Press, 1989
1406 //
1407 // Program 2.1, page 29.
1408 // Computes the GCD of AM and BM.
1409 // Also finds a solution to the equation ax - by = gcd(a, b).
1410 // Returns true if dependence disproved; i.e., gcd does not divide Delta.
1411 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
1412                     const APInt &Delta, APInt &G, APInt &X, APInt &Y) {
1413   APInt A0(Bits, 1, true), A1(Bits, 0, true);
1414   APInt B0(Bits, 0, true), B1(Bits, 1, true);
1415   APInt G0 = AM.abs();
1416   APInt G1 = BM.abs();
1417   APInt Q = G0; // these need to be initialized
1418   APInt R = G0;
1419   APInt::sdivrem(G0, G1, Q, R);
1420   while (R != 0) {
1421     APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
1422     APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
1423     G0 = G1; G1 = R;
1424     APInt::sdivrem(G0, G1, Q, R);
1425   }
1426   G = G1;
1427   LLVM_DEBUG(dbgs() << "\t    GCD = " << G << "\n");
1428   X = AM.slt(0) ? -A1 : A1;
1429   Y = BM.slt(0) ? B1 : -B1;
1430 
1431   // make sure gcd divides Delta
1432   R = Delta.srem(G);
1433   if (R != 0)
1434     return true; // gcd doesn't divide Delta, no dependence
1435   Q = Delta.sdiv(G);
1436   return false;
1437 }
1438 
1439 static APInt floorOfQuotient(const APInt &A, const APInt &B) {
1440   APInt Q = A; // these need to be initialized
1441   APInt R = A;
1442   APInt::sdivrem(A, B, Q, R);
1443   if (R == 0)
1444     return Q;
1445   if ((A.sgt(0) && B.sgt(0)) ||
1446       (A.slt(0) && B.slt(0)))
1447     return Q;
1448   else
1449     return Q - 1;
1450 }
1451 
1452 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
1453   APInt Q = A; // these need to be initialized
1454   APInt R = A;
1455   APInt::sdivrem(A, B, Q, R);
1456   if (R == 0)
1457     return Q;
1458   if ((A.sgt(0) && B.sgt(0)) ||
1459       (A.slt(0) && B.slt(0)))
1460     return Q + 1;
1461   else
1462     return Q;
1463 }
1464 
1465 // exactSIVtest -
1466 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1467 // where i is an induction variable, c1 and c2 are loop invariant, and a1
1468 // and a2 are constant, we can solve it exactly using an algorithm developed
1469 // by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:
1470 //
1471 //        Dependence Analysis for Supercomputing
1472 //        Utpal Banerjee
1473 //        Kluwer Academic Publishers, 1988
1474 //
1475 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1476 // so use them if possible. They're also a bit better with symbolics and,
1477 // in the case of the strong SIV test, can compute Distances.
1478 //
1479 // Return true if dependence disproved.
1480 //
1481 // This is a modified version of the original Banerjee algorithm. The original
1482 // only tested whether Dst depends on Src. This algorithm extends that and
1483 // returns all the dependencies that exist between Dst and Src.
1484 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1485                                   const SCEV *SrcConst, const SCEV *DstConst,
1486                                   const Loop *CurLoop, unsigned Level,
1487                                   FullDependence &Result,
1488                                   Constraint &NewConstraint) const {
1489   LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
1490   LLVM_DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
1491   LLVM_DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
1492   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1493   LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1494   ++ExactSIVapplications;
1495   assert(0 < Level && Level <= CommonLevels && "Level out of range");
1496   Level--;
1497   Result.Consistent = false;
1498   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1499   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1500   NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta,
1501                         CurLoop);
1502   const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1503   const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1504   const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1505   if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
1506     return false;
1507 
1508   // find gcd
1509   APInt G, X, Y;
1510   APInt AM = ConstSrcCoeff->getAPInt();
1511   APInt BM = ConstDstCoeff->getAPInt();
1512   APInt CM = ConstDelta->getAPInt();
1513   unsigned Bits = AM.getBitWidth();
1514   if (findGCD(Bits, AM, BM, CM, G, X, Y)) {
1515     // gcd doesn't divide Delta, no dependence
1516     ++ExactSIVindependence;
1517     ++ExactSIVsuccesses;
1518     return true;
1519   }
1520 
1521   LLVM_DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
1522 
1523   // since SCEV construction normalizes, LM = 0
1524   APInt UM(Bits, 1, true);
1525   bool UMValid = false;
1526   // UM is perhaps unavailable, let's check
1527   if (const SCEVConstant *CUB =
1528           collectConstantUpperBound(CurLoop, Delta->getType())) {
1529     UM = CUB->getAPInt();
1530     LLVM_DEBUG(dbgs() << "\t    UM = " << UM << "\n");
1531     UMValid = true;
1532   }
1533 
1534   APInt TU(APInt::getSignedMaxValue(Bits));
1535   APInt TL(APInt::getSignedMinValue(Bits));
1536   APInt TC = CM.sdiv(G);
1537   APInt TX = X * TC;
1538   APInt TY = Y * TC;
1539   LLVM_DEBUG(dbgs() << "\t    TC = " << TC << "\n");
1540   LLVM_DEBUG(dbgs() << "\t    TX = " << TX << "\n");
1541   LLVM_DEBUG(dbgs() << "\t    TY = " << TY << "\n");
1542 
1543   SmallVector<APInt, 2> TLVec, TUVec;
1544   APInt TB = BM.sdiv(G);
1545   if (TB.sgt(0)) {
1546     TLVec.push_back(ceilingOfQuotient(-TX, TB));
1547     LLVM_DEBUG(dbgs() << "\t    Possible TL = " << TLVec.back() << "\n");
1548     // New bound check - modification to Banerjee's e3 check
1549     if (UMValid) {
1550       TUVec.push_back(floorOfQuotient(UM - TX, TB));
1551       LLVM_DEBUG(dbgs() << "\t    Possible TU = " << TUVec.back() << "\n");
1552     }
1553   } else {
1554     TUVec.push_back(floorOfQuotient(-TX, TB));
1555     LLVM_DEBUG(dbgs() << "\t    Possible TU = " << TUVec.back() << "\n");
1556     // New bound check - modification to Banerjee's e3 check
1557     if (UMValid) {
1558       TLVec.push_back(ceilingOfQuotient(UM - TX, TB));
1559       LLVM_DEBUG(dbgs() << "\t    Possible TL = " << TLVec.back() << "\n");
1560     }
1561   }
1562 
1563   APInt TA = AM.sdiv(G);
1564   if (TA.sgt(0)) {
1565     if (UMValid) {
1566       TUVec.push_back(floorOfQuotient(UM - TY, TA));
1567       LLVM_DEBUG(dbgs() << "\t    Possible TU = " << TUVec.back() << "\n");
1568     }
1569     // New bound check - modification to Banerjee's e3 check
1570     TLVec.push_back(ceilingOfQuotient(-TY, TA));
1571     LLVM_DEBUG(dbgs() << "\t    Possible TL = " << TLVec.back() << "\n");
1572   } else {
1573     if (UMValid) {
1574       TLVec.push_back(ceilingOfQuotient(UM - TY, TA));
1575       LLVM_DEBUG(dbgs() << "\t    Possible TL = " << TLVec.back() << "\n");
1576     }
1577     // New bound check - modification to Banerjee's e3 check
1578     TUVec.push_back(floorOfQuotient(-TY, TA));
1579     LLVM_DEBUG(dbgs() << "\t    Possible TU = " << TUVec.back() << "\n");
1580   }
1581 
1582   LLVM_DEBUG(dbgs() << "\t    TA = " << TA << "\n");
1583   LLVM_DEBUG(dbgs() << "\t    TB = " << TB << "\n");
1584 
1585   if (TLVec.empty() || TUVec.empty())
1586     return false;
1587   TL = APIntOps::smax(TLVec.front(), TLVec.back());
1588   TU = APIntOps::smin(TUVec.front(), TUVec.back());
1589   LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1590   LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1591 
1592   if (TL.sgt(TU)) {
1593     ++ExactSIVindependence;
1594     ++ExactSIVsuccesses;
1595     return true;
1596   }
1597 
1598   // explore directions
1599   unsigned NewDirection = Dependence::DVEntry::NONE;
1600   APInt LowerDistance, UpperDistance;
1601   if (TA.sgt(TB)) {
1602     LowerDistance = (TY - TX) + (TA - TB) * TL;
1603     UpperDistance = (TY - TX) + (TA - TB) * TU;
1604   } else {
1605     LowerDistance = (TY - TX) + (TA - TB) * TU;
1606     UpperDistance = (TY - TX) + (TA - TB) * TL;
1607   }
1608 
1609   LLVM_DEBUG(dbgs() << "\t    LowerDistance = " << LowerDistance << "\n");
1610   LLVM_DEBUG(dbgs() << "\t    UpperDistance = " << UpperDistance << "\n");
1611 
1612   APInt Zero(Bits, 0, true);
1613   if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) {
1614     NewDirection |= Dependence::DVEntry::EQ;
1615     ++ExactSIVsuccesses;
1616   }
1617   if (LowerDistance.slt(0)) {
1618     NewDirection |= Dependence::DVEntry::GT;
1619     ++ExactSIVsuccesses;
1620   }
1621   if (UpperDistance.sgt(0)) {
1622     NewDirection |= Dependence::DVEntry::LT;
1623     ++ExactSIVsuccesses;
1624   }
1625 
1626   // finished
1627   Result.DV[Level].Direction &= NewDirection;
1628   if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
1629     ++ExactSIVindependence;
1630   LLVM_DEBUG(dbgs() << "\t    Result = ");
1631   LLVM_DEBUG(Result.dump(dbgs()));
1632   return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
1633 }
1634 
1635 
1636 // Return true if the divisor evenly divides the dividend.
1637 static
1638 bool isRemainderZero(const SCEVConstant *Dividend,
1639                      const SCEVConstant *Divisor) {
1640   const APInt &ConstDividend = Dividend->getAPInt();
1641   const APInt &ConstDivisor = Divisor->getAPInt();
1642   return ConstDividend.srem(ConstDivisor) == 0;
1643 }
1644 
1645 
1646 // weakZeroSrcSIVtest -
1647 // From the paper, Practical Dependence Testing, Section 4.2.2
1648 //
1649 // When we have a pair of subscripts of the form [c1] and [c2 + a*i],
1650 // where i is an induction variable, c1 and c2 are loop invariant,
1651 // and a is a constant, we can solve it exactly using the
1652 // Weak-Zero SIV test.
1653 //
1654 // Given
1655 //
1656 //    c1 = c2 + a*i
1657 //
1658 // we get
1659 //
1660 //    (c1 - c2)/a = i
1661 //
1662 // If i is not an integer, there's no dependence.
1663 // If i < 0 or > UB, there's no dependence.
1664 // If i = 0, the direction is >= and peeling the
1665 // 1st iteration will break the dependence.
1666 // If i = UB, the direction is <= and peeling the
1667 // last iteration will break the dependence.
1668 // Otherwise, the direction is *.
1669 //
1670 // Can prove independence. Failing that, we can sometimes refine
1671 // the directions. Can sometimes show that first or last
1672 // iteration carries all the dependences (so worth peeling).
1673 //
1674 // (see also weakZeroDstSIVtest)
1675 //
1676 // Return true if dependence disproved.
1677 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
1678                                         const SCEV *SrcConst,
1679                                         const SCEV *DstConst,
1680                                         const Loop *CurLoop, unsigned Level,
1681                                         FullDependence &Result,
1682                                         Constraint &NewConstraint) const {
1683   // For the WeakSIV test, it's possible the loop isn't common to
1684   // the Src and Dst loops. If it isn't, then there's no need to
1685   // record a direction.
1686   LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1687   LLVM_DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << "\n");
1688   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1689   LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1690   ++WeakZeroSIVapplications;
1691   assert(0 < Level && Level <= MaxLevels && "Level out of range");
1692   Level--;
1693   Result.Consistent = false;
1694   const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
1695   NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
1696                         CurLoop);
1697   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1698   if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
1699     if (Level < CommonLevels) {
1700       Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1701       Result.DV[Level].PeelFirst = true;
1702       ++WeakZeroSIVsuccesses;
1703     }
1704     return false; // dependences caused by first iteration
1705   }
1706   const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1707   if (!ConstCoeff)
1708     return false;
1709   const SCEV *AbsCoeff =
1710     SE->isKnownNegative(ConstCoeff) ?
1711     SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
1712   const SCEV *NewDelta =
1713     SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
1714 
1715   // check that Delta/SrcCoeff < iteration count
1716   // really check NewDelta < count*AbsCoeff
1717   if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1718     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
1719     const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
1720     if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
1721       ++WeakZeroSIVindependence;
1722       ++WeakZeroSIVsuccesses;
1723       return true;
1724     }
1725     if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
1726       // dependences caused by last iteration
1727       if (Level < CommonLevels) {
1728         Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1729         Result.DV[Level].PeelLast = true;
1730         ++WeakZeroSIVsuccesses;
1731       }
1732       return false;
1733     }
1734   }
1735 
1736   // check that Delta/SrcCoeff >= 0
1737   // really check that NewDelta >= 0
1738   if (SE->isKnownNegative(NewDelta)) {
1739     // No dependence, newDelta < 0
1740     ++WeakZeroSIVindependence;
1741     ++WeakZeroSIVsuccesses;
1742     return true;
1743   }
1744 
1745   // if SrcCoeff doesn't divide Delta, then no dependence
1746   if (isa<SCEVConstant>(Delta) &&
1747       !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
1748     ++WeakZeroSIVindependence;
1749     ++WeakZeroSIVsuccesses;
1750     return true;
1751   }
1752   return false;
1753 }
1754 
1755 
1756 // weakZeroDstSIVtest -
1757 // From the paper, Practical Dependence Testing, Section 4.2.2
1758 //
1759 // When we have a pair of subscripts of the form [c1 + a*i] and [c2],
1760 // where i is an induction variable, c1 and c2 are loop invariant,
1761 // and a is a constant, we can solve it exactly using the
1762 // Weak-Zero SIV test.
1763 //
1764 // Given
1765 //
1766 //    c1 + a*i = c2
1767 //
1768 // we get
1769 //
1770 //    i = (c2 - c1)/a
1771 //
1772 // If i is not an integer, there's no dependence.
1773 // If i < 0 or > UB, there's no dependence.
1774 // If i = 0, the direction is <= and peeling the
1775 // 1st iteration will break the dependence.
1776 // If i = UB, the direction is >= and peeling the
1777 // last iteration will break the dependence.
1778 // Otherwise, the direction is *.
1779 //
1780 // Can prove independence. Failing that, we can sometimes refine
1781 // the directions. Can sometimes show that first or last
1782 // iteration carries all the dependences (so worth peeling).
1783 //
1784 // (see also weakZeroSrcSIVtest)
1785 //
1786 // Return true if dependence disproved.
1787 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
1788                                         const SCEV *SrcConst,
1789                                         const SCEV *DstConst,
1790                                         const Loop *CurLoop, unsigned Level,
1791                                         FullDependence &Result,
1792                                         Constraint &NewConstraint) const {
1793   // For the WeakSIV test, it's possible the loop isn't common to the
1794   // Src and Dst loops. If it isn't, then there's no need to record a direction.
1795   LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
1796   LLVM_DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << "\n");
1797   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1798   LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1799   ++WeakZeroSIVapplications;
1800   assert(0 < Level && Level <= SrcLevels && "Level out of range");
1801   Level--;
1802   Result.Consistent = false;
1803   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1804   NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
1805                         CurLoop);
1806   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1807   if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
1808     if (Level < CommonLevels) {
1809       Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1810       Result.DV[Level].PeelFirst = true;
1811       ++WeakZeroSIVsuccesses;
1812     }
1813     return false; // dependences caused by first iteration
1814   }
1815   const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1816   if (!ConstCoeff)
1817     return false;
1818   const SCEV *AbsCoeff =
1819     SE->isKnownNegative(ConstCoeff) ?
1820     SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
1821   const SCEV *NewDelta =
1822     SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
1823 
1824   // check that Delta/SrcCoeff < iteration count
1825   // really check NewDelta < count*AbsCoeff
1826   if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1827     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
1828     const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
1829     if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
1830       ++WeakZeroSIVindependence;
1831       ++WeakZeroSIVsuccesses;
1832       return true;
1833     }
1834     if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
1835       // dependences caused by last iteration
1836       if (Level < CommonLevels) {
1837         Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1838         Result.DV[Level].PeelLast = true;
1839         ++WeakZeroSIVsuccesses;
1840       }
1841       return false;
1842     }
1843   }
1844 
1845   // check that Delta/SrcCoeff >= 0
1846   // really check that NewDelta >= 0
1847   if (SE->isKnownNegative(NewDelta)) {
1848     // No dependence, newDelta < 0
1849     ++WeakZeroSIVindependence;
1850     ++WeakZeroSIVsuccesses;
1851     return true;
1852   }
1853 
1854   // if SrcCoeff doesn't divide Delta, then no dependence
1855   if (isa<SCEVConstant>(Delta) &&
1856       !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
1857     ++WeakZeroSIVindependence;
1858     ++WeakZeroSIVsuccesses;
1859     return true;
1860   }
1861   return false;
1862 }
1863 
1864 
1865 // exactRDIVtest - Tests the RDIV subscript pair for dependence.
1866 // Things of the form [c1 + a*i] and [c2 + b*j],
1867 // where i and j are induction variable, c1 and c2 are loop invariant,
1868 // and a and b are constants.
1869 // Returns true if any possible dependence is disproved.
1870 // Marks the result as inconsistent.
1871 // Works in some cases that symbolicRDIVtest doesn't, and vice versa.
1872 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1873                                    const SCEV *SrcConst, const SCEV *DstConst,
1874                                    const Loop *SrcLoop, const Loop *DstLoop,
1875                                    FullDependence &Result) const {
1876   LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
1877   LLVM_DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
1878   LLVM_DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
1879   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1880   LLVM_DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1881   ++ExactRDIVapplications;
1882   Result.Consistent = false;
1883   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1884   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1885   const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1886   const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1887   const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1888   if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
1889     return false;
1890 
1891   // find gcd
1892   APInt G, X, Y;
1893   APInt AM = ConstSrcCoeff->getAPInt();
1894   APInt BM = ConstDstCoeff->getAPInt();
1895   APInt CM = ConstDelta->getAPInt();
1896   unsigned Bits = AM.getBitWidth();
1897   if (findGCD(Bits, AM, BM, CM, G, X, Y)) {
1898     // gcd doesn't divide Delta, no dependence
1899     ++ExactRDIVindependence;
1900     return true;
1901   }
1902 
1903   LLVM_DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
1904 
1905   // since SCEV construction seems to normalize, LM = 0
1906   APInt SrcUM(Bits, 1, true);
1907   bool SrcUMvalid = false;
1908   // SrcUM is perhaps unavailable, let's check
1909   if (const SCEVConstant *UpperBound =
1910           collectConstantUpperBound(SrcLoop, Delta->getType())) {
1911     SrcUM = UpperBound->getAPInt();
1912     LLVM_DEBUG(dbgs() << "\t    SrcUM = " << SrcUM << "\n");
1913     SrcUMvalid = true;
1914   }
1915 
1916   APInt DstUM(Bits, 1, true);
1917   bool DstUMvalid = false;
1918   // UM is perhaps unavailable, let's check
1919   if (const SCEVConstant *UpperBound =
1920           collectConstantUpperBound(DstLoop, Delta->getType())) {
1921     DstUM = UpperBound->getAPInt();
1922     LLVM_DEBUG(dbgs() << "\t    DstUM = " << DstUM << "\n");
1923     DstUMvalid = true;
1924   }
1925 
1926   APInt TU(APInt::getSignedMaxValue(Bits));
1927   APInt TL(APInt::getSignedMinValue(Bits));
1928   APInt TC = CM.sdiv(G);
1929   APInt TX = X * TC;
1930   APInt TY = Y * TC;
1931   LLVM_DEBUG(dbgs() << "\t    TC = " << TC << "\n");
1932   LLVM_DEBUG(dbgs() << "\t    TX = " << TX << "\n");
1933   LLVM_DEBUG(dbgs() << "\t    TY = " << TY << "\n");
1934 
1935   SmallVector<APInt, 2> TLVec, TUVec;
1936   APInt TB = BM.sdiv(G);
1937   if (TB.sgt(0)) {
1938     TLVec.push_back(ceilingOfQuotient(-TX, TB));
1939     LLVM_DEBUG(dbgs() << "\t    Possible TL = " << TLVec.back() << "\n");
1940     if (SrcUMvalid) {
1941       TUVec.push_back(floorOfQuotient(SrcUM - TX, TB));
1942       LLVM_DEBUG(dbgs() << "\t    Possible TU = " << TUVec.back() << "\n");
1943     }
1944   } else {
1945     TUVec.push_back(floorOfQuotient(-TX, TB));
1946     LLVM_DEBUG(dbgs() << "\t    Possible TU = " << TUVec.back() << "\n");
1947     if (SrcUMvalid) {
1948       TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB));
1949       LLVM_DEBUG(dbgs() << "\t    Possible TL = " << TLVec.back() << "\n");
1950     }
1951   }
1952 
1953   APInt TA = AM.sdiv(G);
1954   if (TA.sgt(0)) {
1955     TLVec.push_back(ceilingOfQuotient(-TY, TA));
1956     LLVM_DEBUG(dbgs() << "\t    Possible TL = " << TLVec.back() << "\n");
1957     if (DstUMvalid) {
1958       TUVec.push_back(floorOfQuotient(DstUM - TY, TA));
1959       LLVM_DEBUG(dbgs() << "\t    Possible TU = " << TUVec.back() << "\n");
1960     }
1961   } else {
1962     TUVec.push_back(floorOfQuotient(-TY, TA));
1963     LLVM_DEBUG(dbgs() << "\t    Possible TU = " << TUVec.back() << "\n");
1964     if (DstUMvalid) {
1965       TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA));
1966       LLVM_DEBUG(dbgs() << "\t    Possible TL = " << TLVec.back() << "\n");
1967     }
1968   }
1969 
1970   if (TLVec.empty() || TUVec.empty())
1971     return false;
1972 
1973   LLVM_DEBUG(dbgs() << "\t    TA = " << TA << "\n");
1974   LLVM_DEBUG(dbgs() << "\t    TB = " << TB << "\n");
1975 
1976   TL = APIntOps::smax(TLVec.front(), TLVec.back());
1977   TU = APIntOps::smin(TUVec.front(), TUVec.back());
1978   LLVM_DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1979   LLVM_DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1980 
1981   if (TL.sgt(TU))
1982     ++ExactRDIVindependence;
1983   return TL.sgt(TU);
1984 }
1985 
1986 
1987 // symbolicRDIVtest -
1988 // In Section 4.5 of the Practical Dependence Testing paper,the authors
1989 // introduce a special case of Banerjee's Inequalities (also called the
1990 // Extreme-Value Test) that can handle some of the SIV and RDIV cases,
1991 // particularly cases with symbolics. Since it's only able to disprove
1992 // dependence (not compute distances or directions), we'll use it as a
1993 // fall back for the other tests.
1994 //
1995 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
1996 // where i and j are induction variables and c1 and c2 are loop invariants,
1997 // we can use the symbolic tests to disprove some dependences, serving as a
1998 // backup for the RDIV test. Note that i and j can be the same variable,
1999 // letting this test serve as a backup for the various SIV tests.
2000 //
2001 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
2002 //  0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
2003 // loop bounds for the i and j loops, respectively. So, ...
2004 //
2005 // c1 + a1*i = c2 + a2*j
2006 // a1*i - a2*j = c2 - c1
2007 //
2008 // To test for a dependence, we compute c2 - c1 and make sure it's in the
2009 // range of the maximum and minimum possible values of a1*i - a2*j.
2010 // Considering the signs of a1 and a2, we have 4 possible cases:
2011 //
2012 // 1) If a1 >= 0 and a2 >= 0, then
2013 //        a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
2014 //              -a2*N2 <= c2 - c1 <= a1*N1
2015 //
2016 // 2) If a1 >= 0 and a2 <= 0, then
2017 //        a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
2018 //                  0 <= c2 - c1 <= a1*N1 - a2*N2
2019 //
2020 // 3) If a1 <= 0 and a2 >= 0, then
2021 //        a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
2022 //        a1*N1 - a2*N2 <= c2 - c1 <= 0
2023 //
2024 // 4) If a1 <= 0 and a2 <= 0, then
2025 //        a1*N1 - a2*0  <= c2 - c1 <= a1*0 - a2*N2
2026 //        a1*N1         <= c2 - c1 <=       -a2*N2
2027 //
2028 // return true if dependence disproved
2029 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
2030                                       const SCEV *C1, const SCEV *C2,
2031                                       const Loop *Loop1,
2032                                       const Loop *Loop2) const {
2033   ++SymbolicRDIVapplications;
2034   LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
2035   LLVM_DEBUG(dbgs() << "\t    A1 = " << *A1);
2036   LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
2037   LLVM_DEBUG(dbgs() << "\t    A2 = " << *A2 << "\n");
2038   LLVM_DEBUG(dbgs() << "\t    C1 = " << *C1 << "\n");
2039   LLVM_DEBUG(dbgs() << "\t    C2 = " << *C2 << "\n");
2040   const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
2041   const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
2042   LLVM_DEBUG(if (N1) dbgs() << "\t    N1 = " << *N1 << "\n");
2043   LLVM_DEBUG(if (N2) dbgs() << "\t    N2 = " << *N2 << "\n");
2044   const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
2045   const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
2046   LLVM_DEBUG(dbgs() << "\t    C2 - C1 = " << *C2_C1 << "\n");
2047   LLVM_DEBUG(dbgs() << "\t    C1 - C2 = " << *C1_C2 << "\n");
2048   if (SE->isKnownNonNegative(A1)) {
2049     if (SE->isKnownNonNegative(A2)) {
2050       // A1 >= 0 && A2 >= 0
2051       if (N1) {
2052         // make sure that c2 - c1 <= a1*N1
2053         const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2054         LLVM_DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
2055         if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
2056           ++SymbolicRDIVindependence;
2057           return true;
2058         }
2059       }
2060       if (N2) {
2061         // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
2062         const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2063         LLVM_DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
2064         if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
2065           ++SymbolicRDIVindependence;
2066           return true;
2067         }
2068       }
2069     }
2070     else if (SE->isKnownNonPositive(A2)) {
2071       // a1 >= 0 && a2 <= 0
2072       if (N1 && N2) {
2073         // make sure that c2 - c1 <= a1*N1 - a2*N2
2074         const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2075         const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2076         const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
2077         LLVM_DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2078         if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
2079           ++SymbolicRDIVindependence;
2080           return true;
2081         }
2082       }
2083       // make sure that 0 <= c2 - c1
2084       if (SE->isKnownNegative(C2_C1)) {
2085         ++SymbolicRDIVindependence;
2086         return true;
2087       }
2088     }
2089   }
2090   else if (SE->isKnownNonPositive(A1)) {
2091     if (SE->isKnownNonNegative(A2)) {
2092       // a1 <= 0 && a2 >= 0
2093       if (N1 && N2) {
2094         // make sure that a1*N1 - a2*N2 <= c2 - c1
2095         const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2096         const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2097         const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
2098         LLVM_DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2099         if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
2100           ++SymbolicRDIVindependence;
2101           return true;
2102         }
2103       }
2104       // make sure that c2 - c1 <= 0
2105       if (SE->isKnownPositive(C2_C1)) {
2106         ++SymbolicRDIVindependence;
2107         return true;
2108       }
2109     }
2110     else if (SE->isKnownNonPositive(A2)) {
2111       // a1 <= 0 && a2 <= 0
2112       if (N1) {
2113         // make sure that a1*N1 <= c2 - c1
2114         const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2115         LLVM_DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
2116         if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
2117           ++SymbolicRDIVindependence;
2118           return true;
2119         }
2120       }
2121       if (N2) {
2122         // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
2123         const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2124         LLVM_DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
2125         if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
2126           ++SymbolicRDIVindependence;
2127           return true;
2128         }
2129       }
2130     }
2131   }
2132   return false;
2133 }
2134 
2135 
2136 // testSIV -
2137 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
2138 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and
2139 // a2 are constant, we attack it with an SIV test. While they can all be
2140 // solved with the Exact SIV test, it's worthwhile to use simpler tests when
2141 // they apply; they're cheaper and sometimes more precise.
2142 //
2143 // Return true if dependence disproved.
2144 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
2145                              FullDependence &Result, Constraint &NewConstraint,
2146                              const SCEV *&SplitIter) const {
2147   LLVM_DEBUG(dbgs() << "    src = " << *Src << "\n");
2148   LLVM_DEBUG(dbgs() << "    dst = " << *Dst << "\n");
2149   const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
2150   const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
2151   if (SrcAddRec && DstAddRec) {
2152     const SCEV *SrcConst = SrcAddRec->getStart();
2153     const SCEV *DstConst = DstAddRec->getStart();
2154     const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2155     const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
2156     const Loop *CurLoop = SrcAddRec->getLoop();
2157     assert(CurLoop == DstAddRec->getLoop() &&
2158            "both loops in SIV should be same");
2159     Level = mapSrcLoop(CurLoop);
2160     bool disproven;
2161     if (SrcCoeff == DstCoeff)
2162       disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2163                                 Level, Result, NewConstraint);
2164     else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
2165       disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2166                                       Level, Result, NewConstraint, SplitIter);
2167     else
2168       disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
2169                                Level, Result, NewConstraint);
2170     return disproven ||
2171       gcdMIVtest(Src, Dst, Result) ||
2172       symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
2173   }
2174   if (SrcAddRec) {
2175     const SCEV *SrcConst = SrcAddRec->getStart();
2176     const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2177     const SCEV *DstConst = Dst;
2178     const Loop *CurLoop = SrcAddRec->getLoop();
2179     Level = mapSrcLoop(CurLoop);
2180     return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2181                               Level, Result, NewConstraint) ||
2182       gcdMIVtest(Src, Dst, Result);
2183   }
2184   if (DstAddRec) {
2185     const SCEV *DstConst = DstAddRec->getStart();
2186     const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
2187     const SCEV *SrcConst = Src;
2188     const Loop *CurLoop = DstAddRec->getLoop();
2189     Level = mapDstLoop(CurLoop);
2190     return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
2191                               CurLoop, Level, Result, NewConstraint) ||
2192       gcdMIVtest(Src, Dst, Result);
2193   }
2194   llvm_unreachable("SIV test expected at least one AddRec");
2195   return false;
2196 }
2197 
2198 
2199 // testRDIV -
2200 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2201 // where i and j are induction variables, c1 and c2 are loop invariant,
2202 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation
2203 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
2204 // It doesn't make sense to talk about distance or direction in this case,
2205 // so there's no point in making special versions of the Strong SIV test or
2206 // the Weak-crossing SIV test.
2207 //
2208 // With minor algebra, this test can also be used for things like
2209 // [c1 + a1*i + a2*j][c2].
2210 //
2211 // Return true if dependence disproved.
2212 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
2213                               FullDependence &Result) const {
2214   // we have 3 possible situations here:
2215   //   1) [a*i + b] and [c*j + d]
2216   //   2) [a*i + c*j + b] and [d]
2217   //   3) [b] and [a*i + c*j + d]
2218   // We need to find what we've got and get organized
2219 
2220   const SCEV *SrcConst, *DstConst;
2221   const SCEV *SrcCoeff, *DstCoeff;
2222   const Loop *SrcLoop, *DstLoop;
2223 
2224   LLVM_DEBUG(dbgs() << "    src = " << *Src << "\n");
2225   LLVM_DEBUG(dbgs() << "    dst = " << *Dst << "\n");
2226   const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
2227   const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
2228   if (SrcAddRec && DstAddRec) {
2229     SrcConst = SrcAddRec->getStart();
2230     SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2231     SrcLoop = SrcAddRec->getLoop();
2232     DstConst = DstAddRec->getStart();
2233     DstCoeff = DstAddRec->getStepRecurrence(*SE);
2234     DstLoop = DstAddRec->getLoop();
2235   }
2236   else if (SrcAddRec) {
2237     if (const SCEVAddRecExpr *tmpAddRec =
2238         dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
2239       SrcConst = tmpAddRec->getStart();
2240       SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
2241       SrcLoop = tmpAddRec->getLoop();
2242       DstConst = Dst;
2243       DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
2244       DstLoop = SrcAddRec->getLoop();
2245     }
2246     else
2247       llvm_unreachable("RDIV reached by surprising SCEVs");
2248   }
2249   else if (DstAddRec) {
2250     if (const SCEVAddRecExpr *tmpAddRec =
2251         dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
2252       DstConst = tmpAddRec->getStart();
2253       DstCoeff = tmpAddRec->getStepRecurrence(*SE);
2254       DstLoop = tmpAddRec->getLoop();
2255       SrcConst = Src;
2256       SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
2257       SrcLoop = DstAddRec->getLoop();
2258     }
2259     else
2260       llvm_unreachable("RDIV reached by surprising SCEVs");
2261   }
2262   else
2263     llvm_unreachable("RDIV expected at least one AddRec");
2264   return exactRDIVtest(SrcCoeff, DstCoeff,
2265                        SrcConst, DstConst,
2266                        SrcLoop, DstLoop,
2267                        Result) ||
2268     gcdMIVtest(Src, Dst, Result) ||
2269     symbolicRDIVtest(SrcCoeff, DstCoeff,
2270                      SrcConst, DstConst,
2271                      SrcLoop, DstLoop);
2272 }
2273 
2274 
2275 // Tests the single-subscript MIV pair (Src and Dst) for dependence.
2276 // Return true if dependence disproved.
2277 // Can sometimes refine direction vectors.
2278 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
2279                              const SmallBitVector &Loops,
2280                              FullDependence &Result) const {
2281   LLVM_DEBUG(dbgs() << "    src = " << *Src << "\n");
2282   LLVM_DEBUG(dbgs() << "    dst = " << *Dst << "\n");
2283   Result.Consistent = false;
2284   return gcdMIVtest(Src, Dst, Result) ||
2285     banerjeeMIVtest(Src, Dst, Loops, Result);
2286 }
2287 
2288 
2289 // Given a product, e.g., 10*X*Y, returns the first constant operand,
2290 // in this case 10. If there is no constant part, returns NULL.
2291 static
2292 const SCEVConstant *getConstantPart(const SCEV *Expr) {
2293   if (const auto *Constant = dyn_cast<SCEVConstant>(Expr))
2294     return Constant;
2295   else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr))
2296     if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0)))
2297       return Constant;
2298   return nullptr;
2299 }
2300 
2301 
2302 //===----------------------------------------------------------------------===//
2303 // gcdMIVtest -
2304 // Tests an MIV subscript pair for dependence.
2305 // Returns true if any possible dependence is disproved.
2306 // Marks the result as inconsistent.
2307 // Can sometimes disprove the equal direction for 1 or more loops,
2308 // as discussed in Michael Wolfe's book,
2309 // High Performance Compilers for Parallel Computing, page 235.
2310 //
2311 // We spend some effort (code!) to handle cases like
2312 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
2313 // but M and N are just loop-invariant variables.
2314 // This should help us handle linearized subscripts;
2315 // also makes this test a useful backup to the various SIV tests.
2316 //
2317 // It occurs to me that the presence of loop-invariant variables
2318 // changes the nature of the test from "greatest common divisor"
2319 // to "a common divisor".
2320 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
2321                                 FullDependence &Result) const {
2322   LLVM_DEBUG(dbgs() << "starting gcd\n");
2323   ++GCDapplications;
2324   unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
2325   APInt RunningGCD = APInt::getZero(BitWidth);
2326 
2327   // Examine Src coefficients.
2328   // Compute running GCD and record source constant.
2329   // Because we're looking for the constant at the end of the chain,
2330   // we can't quit the loop just because the GCD == 1.
2331   const SCEV *Coefficients = Src;
2332   while (const SCEVAddRecExpr *AddRec =
2333          dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2334     const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2335     // If the coefficient is the product of a constant and other stuff,
2336     // we can use the constant in the GCD computation.
2337     const auto *Constant = getConstantPart(Coeff);
2338     if (!Constant)
2339       return false;
2340     APInt ConstCoeff = Constant->getAPInt();
2341     RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2342     Coefficients = AddRec->getStart();
2343   }
2344   const SCEV *SrcConst = Coefficients;
2345 
2346   // Examine Dst coefficients.
2347   // Compute running GCD and record destination constant.
2348   // Because we're looking for the constant at the end of the chain,
2349   // we can't quit the loop just because the GCD == 1.
2350   Coefficients = Dst;
2351   while (const SCEVAddRecExpr *AddRec =
2352          dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2353     const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2354     // If the coefficient is the product of a constant and other stuff,
2355     // we can use the constant in the GCD computation.
2356     const auto *Constant = getConstantPart(Coeff);
2357     if (!Constant)
2358       return false;
2359     APInt ConstCoeff = Constant->getAPInt();
2360     RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2361     Coefficients = AddRec->getStart();
2362   }
2363   const SCEV *DstConst = Coefficients;
2364 
2365   APInt ExtraGCD = APInt::getZero(BitWidth);
2366   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
2367   LLVM_DEBUG(dbgs() << "    Delta = " << *Delta << "\n");
2368   const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
2369   if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
2370     // If Delta is a sum of products, we may be able to make further progress.
2371     for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) {
2372       const SCEV *Operand = Sum->getOperand(Op);
2373       if (isa<SCEVConstant>(Operand)) {
2374         assert(!Constant && "Surprised to find multiple constants");
2375         Constant = cast<SCEVConstant>(Operand);
2376       }
2377       else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {
2378         // Search for constant operand to participate in GCD;
2379         // If none found; return false.
2380         const SCEVConstant *ConstOp = getConstantPart(Product);
2381         if (!ConstOp)
2382           return false;
2383         APInt ConstOpValue = ConstOp->getAPInt();
2384         ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
2385                                                    ConstOpValue.abs());
2386       }
2387       else
2388         return false;
2389     }
2390   }
2391   if (!Constant)
2392     return false;
2393   APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();
2394   LLVM_DEBUG(dbgs() << "    ConstDelta = " << ConstDelta << "\n");
2395   if (ConstDelta == 0)
2396     return false;
2397   RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
2398   LLVM_DEBUG(dbgs() << "    RunningGCD = " << RunningGCD << "\n");
2399   APInt Remainder = ConstDelta.srem(RunningGCD);
2400   if (Remainder != 0) {
2401     ++GCDindependence;
2402     return true;
2403   }
2404 
2405   // Try to disprove equal directions.
2406   // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
2407   // the code above can't disprove the dependence because the GCD = 1.
2408   // So we consider what happen if i = i' and what happens if j = j'.
2409   // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
2410   // which is infeasible, so we can disallow the = direction for the i level.
2411   // Setting j = j' doesn't help matters, so we end up with a direction vector
2412   // of [<>, *]
2413   //
2414   // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
2415   // we need to remember that the constant part is 5 and the RunningGCD should
2416   // be initialized to ExtraGCD = 30.
2417   LLVM_DEBUG(dbgs() << "    ExtraGCD = " << ExtraGCD << '\n');
2418 
2419   bool Improved = false;
2420   Coefficients = Src;
2421   while (const SCEVAddRecExpr *AddRec =
2422          dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2423     Coefficients = AddRec->getStart();
2424     const Loop *CurLoop = AddRec->getLoop();
2425     RunningGCD = ExtraGCD;
2426     const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
2427     const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
2428     const SCEV *Inner = Src;
2429     while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
2430       AddRec = cast<SCEVAddRecExpr>(Inner);
2431       const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2432       if (CurLoop == AddRec->getLoop())
2433         ; // SrcCoeff == Coeff
2434       else {
2435         // If the coefficient is the product of a constant and other stuff,
2436         // we can use the constant in the GCD computation.
2437         Constant = getConstantPart(Coeff);
2438         if (!Constant)
2439           return false;
2440         APInt ConstCoeff = Constant->getAPInt();
2441         RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2442       }
2443       Inner = AddRec->getStart();
2444     }
2445     Inner = Dst;
2446     while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
2447       AddRec = cast<SCEVAddRecExpr>(Inner);
2448       const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2449       if (CurLoop == AddRec->getLoop())
2450         DstCoeff = Coeff;
2451       else {
2452         // If the coefficient is the product of a constant and other stuff,
2453         // we can use the constant in the GCD computation.
2454         Constant = getConstantPart(Coeff);
2455         if (!Constant)
2456           return false;
2457         APInt ConstCoeff = Constant->getAPInt();
2458         RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2459       }
2460       Inner = AddRec->getStart();
2461     }
2462     Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
2463     // If the coefficient is the product of a constant and other stuff,
2464     // we can use the constant in the GCD computation.
2465     Constant = getConstantPart(Delta);
2466     if (!Constant)
2467       // The difference of the two coefficients might not be a product
2468       // or constant, in which case we give up on this direction.
2469       continue;
2470     APInt ConstCoeff = Constant->getAPInt();
2471     RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2472     LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
2473     if (RunningGCD != 0) {
2474       Remainder = ConstDelta.srem(RunningGCD);
2475       LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
2476       if (Remainder != 0) {
2477         unsigned Level = mapSrcLoop(CurLoop);
2478         Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
2479         Improved = true;
2480       }
2481     }
2482   }
2483   if (Improved)
2484     ++GCDsuccesses;
2485   LLVM_DEBUG(dbgs() << "all done\n");
2486   return false;
2487 }
2488 
2489 
2490 //===----------------------------------------------------------------------===//
2491 // banerjeeMIVtest -
2492 // Use Banerjee's Inequalities to test an MIV subscript pair.
2493 // (Wolfe, in the race-car book, calls this the Extreme Value Test.)
2494 // Generally follows the discussion in Section 2.5.2 of
2495 //
2496 //    Optimizing Supercompilers for Supercomputers
2497 //    Michael Wolfe
2498 //
2499 // The inequalities given on page 25 are simplified in that loops are
2500 // normalized so that the lower bound is always 0 and the stride is always 1.
2501 // For example, Wolfe gives
2502 //
2503 //     LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2504 //
2505 // where A_k is the coefficient of the kth index in the source subscript,
2506 // B_k is the coefficient of the kth index in the destination subscript,
2507 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
2508 // index, and N_k is the stride of the kth index. Since all loops are normalized
2509 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
2510 // equation to
2511 //
2512 //     LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
2513 //            = (A^-_k - B_k)^- (U_k - 1)  - B_k
2514 //
2515 // Similar simplifications are possible for the other equations.
2516 //
2517 // When we can't determine the number of iterations for a loop,
2518 // we use NULL as an indicator for the worst case, infinity.
2519 // When computing the upper bound, NULL denotes +inf;
2520 // for the lower bound, NULL denotes -inf.
2521 //
2522 // Return true if dependence disproved.
2523 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
2524                                      const SmallBitVector &Loops,
2525                                      FullDependence &Result) const {
2526   LLVM_DEBUG(dbgs() << "starting Banerjee\n");
2527   ++BanerjeeApplications;
2528   LLVM_DEBUG(dbgs() << "    Src = " << *Src << '\n');
2529   const SCEV *A0;
2530   CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
2531   LLVM_DEBUG(dbgs() << "    Dst = " << *Dst << '\n');
2532   const SCEV *B0;
2533   CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
2534   BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
2535   const SCEV *Delta = SE->getMinusSCEV(B0, A0);
2536   LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
2537 
2538   // Compute bounds for all the * directions.
2539   LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
2540   for (unsigned K = 1; K <= MaxLevels; ++K) {
2541     Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
2542     Bound[K].Direction = Dependence::DVEntry::ALL;
2543     Bound[K].DirSet = Dependence::DVEntry::NONE;
2544     findBoundsALL(A, B, Bound, K);
2545 #ifndef NDEBUG
2546     LLVM_DEBUG(dbgs() << "\t    " << K << '\t');
2547     if (Bound[K].Lower[Dependence::DVEntry::ALL])
2548       LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
2549     else
2550       LLVM_DEBUG(dbgs() << "-inf\t");
2551     if (Bound[K].Upper[Dependence::DVEntry::ALL])
2552       LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
2553     else
2554       LLVM_DEBUG(dbgs() << "+inf\n");
2555 #endif
2556   }
2557 
2558   // Test the *, *, *, ... case.
2559   bool Disproved = false;
2560   if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {
2561     // Explore the direction vector hierarchy.
2562     unsigned DepthExpanded = 0;
2563     unsigned NewDeps = exploreDirections(1, A, B, Bound,
2564                                          Loops, DepthExpanded, Delta);
2565     if (NewDeps > 0) {
2566       bool Improved = false;
2567       for (unsigned K = 1; K <= CommonLevels; ++K) {
2568         if (Loops[K]) {
2569           unsigned Old = Result.DV[K - 1].Direction;
2570           Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
2571           Improved |= Old != Result.DV[K - 1].Direction;
2572           if (!Result.DV[K - 1].Direction) {
2573             Improved = false;
2574             Disproved = true;
2575             break;
2576           }
2577         }
2578       }
2579       if (Improved)
2580         ++BanerjeeSuccesses;
2581     }
2582     else {
2583       ++BanerjeeIndependence;
2584       Disproved = true;
2585     }
2586   }
2587   else {
2588     ++BanerjeeIndependence;
2589     Disproved = true;
2590   }
2591   delete [] Bound;
2592   delete [] A;
2593   delete [] B;
2594   return Disproved;
2595 }
2596 
2597 
2598 // Hierarchically expands the direction vector
2599 // search space, combining the directions of discovered dependences
2600 // in the DirSet field of Bound. Returns the number of distinct
2601 // dependences discovered. If the dependence is disproved,
2602 // it will return 0.
2603 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
2604                                            CoefficientInfo *B, BoundInfo *Bound,
2605                                            const SmallBitVector &Loops,
2606                                            unsigned &DepthExpanded,
2607                                            const SCEV *Delta) const {
2608   // This algorithm has worst case complexity of O(3^n), where 'n' is the number
2609   // of common loop levels. To avoid excessive compile-time, pessimize all the
2610   // results and immediately return when the number of common levels is beyond
2611   // the given threshold.
2612   if (CommonLevels > MIVMaxLevelThreshold) {
2613     LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
2614                          "direction exploration is terminated.\n");
2615     for (unsigned K = 1; K <= CommonLevels; ++K)
2616       if (Loops[K])
2617         Bound[K].DirSet = Dependence::DVEntry::ALL;
2618     return 1;
2619   }
2620 
2621   if (Level > CommonLevels) {
2622     // record result
2623     LLVM_DEBUG(dbgs() << "\t[");
2624     for (unsigned K = 1; K <= CommonLevels; ++K) {
2625       if (Loops[K]) {
2626         Bound[K].DirSet |= Bound[K].Direction;
2627 #ifndef NDEBUG
2628         switch (Bound[K].Direction) {
2629         case Dependence::DVEntry::LT:
2630           LLVM_DEBUG(dbgs() << " <");
2631           break;
2632         case Dependence::DVEntry::EQ:
2633           LLVM_DEBUG(dbgs() << " =");
2634           break;
2635         case Dependence::DVEntry::GT:
2636           LLVM_DEBUG(dbgs() << " >");
2637           break;
2638         case Dependence::DVEntry::ALL:
2639           LLVM_DEBUG(dbgs() << " *");
2640           break;
2641         default:
2642           llvm_unreachable("unexpected Bound[K].Direction");
2643         }
2644 #endif
2645       }
2646     }
2647     LLVM_DEBUG(dbgs() << " ]\n");
2648     return 1;
2649   }
2650   if (Loops[Level]) {
2651     if (Level > DepthExpanded) {
2652       DepthExpanded = Level;
2653       // compute bounds for <, =, > at current level
2654       findBoundsLT(A, B, Bound, Level);
2655       findBoundsGT(A, B, Bound, Level);
2656       findBoundsEQ(A, B, Bound, Level);
2657 #ifndef NDEBUG
2658       LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
2659       LLVM_DEBUG(dbgs() << "\t    <\t");
2660       if (Bound[Level].Lower[Dependence::DVEntry::LT])
2661         LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
2662                           << '\t');
2663       else
2664         LLVM_DEBUG(dbgs() << "-inf\t");
2665       if (Bound[Level].Upper[Dependence::DVEntry::LT])
2666         LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
2667                           << '\n');
2668       else
2669         LLVM_DEBUG(dbgs() << "+inf\n");
2670       LLVM_DEBUG(dbgs() << "\t    =\t");
2671       if (Bound[Level].Lower[Dependence::DVEntry::EQ])
2672         LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
2673                           << '\t');
2674       else
2675         LLVM_DEBUG(dbgs() << "-inf\t");
2676       if (Bound[Level].Upper[Dependence::DVEntry::EQ])
2677         LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
2678                           << '\n');
2679       else
2680         LLVM_DEBUG(dbgs() << "+inf\n");
2681       LLVM_DEBUG(dbgs() << "\t    >\t");
2682       if (Bound[Level].Lower[Dependence::DVEntry::GT])
2683         LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
2684                           << '\t');
2685       else
2686         LLVM_DEBUG(dbgs() << "-inf\t");
2687       if (Bound[Level].Upper[Dependence::DVEntry::GT])
2688         LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
2689                           << '\n');
2690       else
2691         LLVM_DEBUG(dbgs() << "+inf\n");
2692 #endif
2693     }
2694 
2695     unsigned NewDeps = 0;
2696 
2697     // test bounds for <, *, *, ...
2698     if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
2699       NewDeps += exploreDirections(Level + 1, A, B, Bound,
2700                                    Loops, DepthExpanded, Delta);
2701 
2702     // Test bounds for =, *, *, ...
2703     if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
2704       NewDeps += exploreDirections(Level + 1, A, B, Bound,
2705                                    Loops, DepthExpanded, Delta);
2706 
2707     // test bounds for >, *, *, ...
2708     if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
2709       NewDeps += exploreDirections(Level + 1, A, B, Bound,
2710                                    Loops, DepthExpanded, Delta);
2711 
2712     Bound[Level].Direction = Dependence::DVEntry::ALL;
2713     return NewDeps;
2714   }
2715   else
2716     return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);
2717 }
2718 
2719 
2720 // Returns true iff the current bounds are plausible.
2721 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
2722                                 BoundInfo *Bound, const SCEV *Delta) const {
2723   Bound[Level].Direction = DirKind;
2724   if (const SCEV *LowerBound = getLowerBound(Bound))
2725     if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))
2726       return false;
2727   if (const SCEV *UpperBound = getUpperBound(Bound))
2728     if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))
2729       return false;
2730   return true;
2731 }
2732 
2733 
2734 // Computes the upper and lower bounds for level K
2735 // using the * direction. Records them in Bound.
2736 // Wolfe gives the equations
2737 //
2738 //    LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
2739 //    UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
2740 //
2741 // Since we normalize loops, we can simplify these equations to
2742 //
2743 //    LB^*_k = (A^-_k - B^+_k)U_k
2744 //    UB^*_k = (A^+_k - B^-_k)U_k
2745 //
2746 // We must be careful to handle the case where the upper bound is unknown.
2747 // Note that the lower bound is always <= 0
2748 // and the upper bound is always >= 0.
2749 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B,
2750                                    BoundInfo *Bound, unsigned K) const {
2751   Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity.
2752   Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity.
2753   if (Bound[K].Iterations) {
2754     Bound[K].Lower[Dependence::DVEntry::ALL] =
2755       SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
2756                      Bound[K].Iterations);
2757     Bound[K].Upper[Dependence::DVEntry::ALL] =
2758       SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),
2759                      Bound[K].Iterations);
2760   }
2761   else {
2762     // If the difference is 0, we won't need to know the number of iterations.
2763     if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
2764       Bound[K].Lower[Dependence::DVEntry::ALL] =
2765           SE->getZero(A[K].Coeff->getType());
2766     if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
2767       Bound[K].Upper[Dependence::DVEntry::ALL] =
2768           SE->getZero(A[K].Coeff->getType());
2769   }
2770 }
2771 
2772 
2773 // Computes the upper and lower bounds for level K
2774 // using the = direction. Records them in Bound.
2775 // Wolfe gives the equations
2776 //
2777 //    LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
2778 //    UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
2779 //
2780 // Since we normalize loops, we can simplify these equations to
2781 //
2782 //    LB^=_k = (A_k - B_k)^- U_k
2783 //    UB^=_k = (A_k - B_k)^+ U_k
2784 //
2785 // We must be careful to handle the case where the upper bound is unknown.
2786 // Note that the lower bound is always <= 0
2787 // and the upper bound is always >= 0.
2788 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B,
2789                                   BoundInfo *Bound, unsigned K) const {
2790   Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity.
2791   Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity.
2792   if (Bound[K].Iterations) {
2793     const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
2794     const SCEV *NegativePart = getNegativePart(Delta);
2795     Bound[K].Lower[Dependence::DVEntry::EQ] =
2796       SE->getMulExpr(NegativePart, Bound[K].Iterations);
2797     const SCEV *PositivePart = getPositivePart(Delta);
2798     Bound[K].Upper[Dependence::DVEntry::EQ] =
2799       SE->getMulExpr(PositivePart, Bound[K].Iterations);
2800   }
2801   else {
2802     // If the positive/negative part of the difference is 0,
2803     // we won't need to know the number of iterations.
2804     const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
2805     const SCEV *NegativePart = getNegativePart(Delta);
2806     if (NegativePart->isZero())
2807       Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
2808     const SCEV *PositivePart = getPositivePart(Delta);
2809     if (PositivePart->isZero())
2810       Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
2811   }
2812 }
2813 
2814 
2815 // Computes the upper and lower bounds for level K
2816 // using the < direction. Records them in Bound.
2817 // Wolfe gives the equations
2818 //
2819 //    LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2820 //    UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2821 //
2822 // Since we normalize loops, we can simplify these equations to
2823 //
2824 //    LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
2825 //    UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
2826 //
2827 // We must be careful to handle the case where the upper bound is unknown.
2828 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B,
2829                                   BoundInfo *Bound, unsigned K) const {
2830   Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
2831   Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
2832   if (Bound[K].Iterations) {
2833     const SCEV *Iter_1 = SE->getMinusSCEV(
2834         Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
2835     const SCEV *NegPart =
2836       getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
2837     Bound[K].Lower[Dependence::DVEntry::LT] =
2838       SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
2839     const SCEV *PosPart =
2840       getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
2841     Bound[K].Upper[Dependence::DVEntry::LT] =
2842       SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
2843   }
2844   else {
2845     // If the positive/negative part of the difference is 0,
2846     // we won't need to know the number of iterations.
2847     const SCEV *NegPart =
2848       getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
2849     if (NegPart->isZero())
2850       Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
2851     const SCEV *PosPart =
2852       getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
2853     if (PosPart->isZero())
2854       Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
2855   }
2856 }
2857 
2858 
2859 // Computes the upper and lower bounds for level K
2860 // using the > direction. Records them in Bound.
2861 // Wolfe gives the equations
2862 //
2863 //    LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2864 //    UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2865 //
2866 // Since we normalize loops, we can simplify these equations to
2867 //
2868 //    LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
2869 //    UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
2870 //
2871 // We must be careful to handle the case where the upper bound is unknown.
2872 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B,
2873                                   BoundInfo *Bound, unsigned K) const {
2874   Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
2875   Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
2876   if (Bound[K].Iterations) {
2877     const SCEV *Iter_1 = SE->getMinusSCEV(
2878         Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
2879     const SCEV *NegPart =
2880       getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
2881     Bound[K].Lower[Dependence::DVEntry::GT] =
2882       SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
2883     const SCEV *PosPart =
2884       getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
2885     Bound[K].Upper[Dependence::DVEntry::GT] =
2886       SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
2887   }
2888   else {
2889     // If the positive/negative part of the difference is 0,
2890     // we won't need to know the number of iterations.
2891     const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
2892     if (NegPart->isZero())
2893       Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
2894     const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
2895     if (PosPart->isZero())
2896       Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
2897   }
2898 }
2899 
2900 
2901 // X^+ = max(X, 0)
2902 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {
2903   return SE->getSMaxExpr(X, SE->getZero(X->getType()));
2904 }
2905 
2906 
2907 // X^- = min(X, 0)
2908 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {
2909   return SE->getSMinExpr(X, SE->getZero(X->getType()));
2910 }
2911 
2912 
2913 // Walks through the subscript,
2914 // collecting each coefficient, the associated loop bounds,
2915 // and recording its positive and negative parts for later use.
2916 DependenceInfo::CoefficientInfo *
2917 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,
2918                                  const SCEV *&Constant) const {
2919   const SCEV *Zero = SE->getZero(Subscript->getType());
2920   CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
2921   for (unsigned K = 1; K <= MaxLevels; ++K) {
2922     CI[K].Coeff = Zero;
2923     CI[K].PosPart = Zero;
2924     CI[K].NegPart = Zero;
2925     CI[K].Iterations = nullptr;
2926   }
2927   while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
2928     const Loop *L = AddRec->getLoop();
2929     unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L);
2930     CI[K].Coeff = AddRec->getStepRecurrence(*SE);
2931     CI[K].PosPart = getPositivePart(CI[K].Coeff);
2932     CI[K].NegPart = getNegativePart(CI[K].Coeff);
2933     CI[K].Iterations = collectUpperBound(L, Subscript->getType());
2934     Subscript = AddRec->getStart();
2935   }
2936   Constant = Subscript;
2937 #ifndef NDEBUG
2938   LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
2939   for (unsigned K = 1; K <= MaxLevels; ++K) {
2940     LLVM_DEBUG(dbgs() << "\t    " << K << "\t" << *CI[K].Coeff);
2941     LLVM_DEBUG(dbgs() << "\tPos Part = ");
2942     LLVM_DEBUG(dbgs() << *CI[K].PosPart);
2943     LLVM_DEBUG(dbgs() << "\tNeg Part = ");
2944     LLVM_DEBUG(dbgs() << *CI[K].NegPart);
2945     LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
2946     if (CI[K].Iterations)
2947       LLVM_DEBUG(dbgs() << *CI[K].Iterations);
2948     else
2949       LLVM_DEBUG(dbgs() << "+inf");
2950     LLVM_DEBUG(dbgs() << '\n');
2951   }
2952   LLVM_DEBUG(dbgs() << "\t    Constant = " << *Subscript << '\n');
2953 #endif
2954   return CI;
2955 }
2956 
2957 
2958 // Looks through all the bounds info and
2959 // computes the lower bound given the current direction settings
2960 // at each level. If the lower bound for any level is -inf,
2961 // the result is -inf.
2962 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const {
2963   const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
2964   for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
2965     if (Bound[K].Lower[Bound[K].Direction])
2966       Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
2967     else
2968       Sum = nullptr;
2969   }
2970   return Sum;
2971 }
2972 
2973 
2974 // Looks through all the bounds info and
2975 // computes the upper bound given the current direction settings
2976 // at each level. If the upper bound at any level is +inf,
2977 // the result is +inf.
2978 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const {
2979   const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
2980   for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
2981     if (Bound[K].Upper[Bound[K].Direction])
2982       Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
2983     else
2984       Sum = nullptr;
2985   }
2986   return Sum;
2987 }
2988 
2989 
2990 //===----------------------------------------------------------------------===//
2991 // Constraint manipulation for Delta test.
2992 
2993 // Given a linear SCEV,
2994 // return the coefficient (the step)
2995 // corresponding to the specified loop.
2996 // If there isn't one, return 0.
2997 // For example, given a*i + b*j + c*k, finding the coefficient
2998 // corresponding to the j loop would yield b.
2999 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr,
3000                                             const Loop *TargetLoop) const {
3001   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
3002   if (!AddRec)
3003     return SE->getZero(Expr->getType());
3004   if (AddRec->getLoop() == TargetLoop)
3005     return AddRec->getStepRecurrence(*SE);
3006   return findCoefficient(AddRec->getStart(), TargetLoop);
3007 }
3008 
3009 
3010 // Given a linear SCEV,
3011 // return the SCEV given by zeroing out the coefficient
3012 // corresponding to the specified loop.
3013 // For example, given a*i + b*j + c*k, zeroing the coefficient
3014 // corresponding to the j loop would yield a*i + c*k.
3015 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr,
3016                                             const Loop *TargetLoop) const {
3017   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
3018   if (!AddRec)
3019     return Expr; // ignore
3020   if (AddRec->getLoop() == TargetLoop)
3021     return AddRec->getStart();
3022   return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
3023                            AddRec->getStepRecurrence(*SE),
3024                            AddRec->getLoop(),
3025                            AddRec->getNoWrapFlags());
3026 }
3027 
3028 
3029 // Given a linear SCEV Expr,
3030 // return the SCEV given by adding some Value to the
3031 // coefficient corresponding to the specified TargetLoop.
3032 // For example, given a*i + b*j + c*k, adding 1 to the coefficient
3033 // corresponding to the j loop would yield a*i + (b+1)*j + c*k.
3034 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr,
3035                                              const Loop *TargetLoop,
3036                                              const SCEV *Value) const {
3037   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
3038   if (!AddRec) // create a new addRec
3039     return SE->getAddRecExpr(Expr,
3040                              Value,
3041                              TargetLoop,
3042                              SCEV::FlagAnyWrap); // Worst case, with no info.
3043   if (AddRec->getLoop() == TargetLoop) {
3044     const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
3045     if (Sum->isZero())
3046       return AddRec->getStart();
3047     return SE->getAddRecExpr(AddRec->getStart(),
3048                              Sum,
3049                              AddRec->getLoop(),
3050                              AddRec->getNoWrapFlags());
3051   }
3052   if (SE->isLoopInvariant(AddRec, TargetLoop))
3053     return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap);
3054   return SE->getAddRecExpr(
3055       addToCoefficient(AddRec->getStart(), TargetLoop, Value),
3056       AddRec->getStepRecurrence(*SE), AddRec->getLoop(),
3057       AddRec->getNoWrapFlags());
3058 }
3059 
3060 
3061 // Review the constraints, looking for opportunities
3062 // to simplify a subscript pair (Src and Dst).
3063 // Return true if some simplification occurs.
3064 // If the simplification isn't exact (that is, if it is conservative
3065 // in terms of dependence), set consistent to false.
3066 // Corresponds to Figure 5 from the paper
3067 //
3068 //            Practical Dependence Testing
3069 //            Goff, Kennedy, Tseng
3070 //            PLDI 1991
3071 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
3072                                SmallBitVector &Loops,
3073                                SmallVectorImpl<Constraint> &Constraints,
3074                                bool &Consistent) {
3075   bool Result = false;
3076   for (unsigned LI : Loops.set_bits()) {
3077     LLVM_DEBUG(dbgs() << "\t    Constraint[" << LI << "] is");
3078     LLVM_DEBUG(Constraints[LI].dump(dbgs()));
3079     if (Constraints[LI].isDistance())
3080       Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
3081     else if (Constraints[LI].isLine())
3082       Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
3083     else if (Constraints[LI].isPoint())
3084       Result |= propagatePoint(Src, Dst, Constraints[LI]);
3085   }
3086   return Result;
3087 }
3088 
3089 
3090 // Attempt to propagate a distance
3091 // constraint into a subscript pair (Src and Dst).
3092 // Return true if some simplification occurs.
3093 // If the simplification isn't exact (that is, if it is conservative
3094 // in terms of dependence), set consistent to false.
3095 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
3096                                        Constraint &CurConstraint,
3097                                        bool &Consistent) {
3098   const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3099   LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3100   const SCEV *A_K = findCoefficient(Src, CurLoop);
3101   if (A_K->isZero())
3102     return false;
3103   const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
3104   Src = SE->getMinusSCEV(Src, DA_K);
3105   Src = zeroCoefficient(Src, CurLoop);
3106   LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3107   LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3108   Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
3109   LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3110   if (!findCoefficient(Dst, CurLoop)->isZero())
3111     Consistent = false;
3112   return true;
3113 }
3114 
3115 
3116 // Attempt to propagate a line
3117 // constraint into a subscript pair (Src and Dst).
3118 // Return true if some simplification occurs.
3119 // If the simplification isn't exact (that is, if it is conservative
3120 // in terms of dependence), set consistent to false.
3121 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
3122                                    Constraint &CurConstraint,
3123                                    bool &Consistent) {
3124   const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3125   const SCEV *A = CurConstraint.getA();
3126   const SCEV *B = CurConstraint.getB();
3127   const SCEV *C = CurConstraint.getC();
3128   LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C
3129                     << "\n");
3130   LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
3131   LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
3132   if (A->isZero()) {
3133     const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
3134     const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3135     if (!Bconst || !Cconst) return false;
3136     APInt Beta = Bconst->getAPInt();
3137     APInt Charlie = Cconst->getAPInt();
3138     APInt CdivB = Charlie.sdiv(Beta);
3139     assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
3140     const SCEV *AP_K = findCoefficient(Dst, CurLoop);
3141     //    Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3142     Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3143     Dst = zeroCoefficient(Dst, CurLoop);
3144     if (!findCoefficient(Src, CurLoop)->isZero())
3145       Consistent = false;
3146   }
3147   else if (B->isZero()) {
3148     const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
3149     const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3150     if (!Aconst || !Cconst) return false;
3151     APInt Alpha = Aconst->getAPInt();
3152     APInt Charlie = Cconst->getAPInt();
3153     APInt CdivA = Charlie.sdiv(Alpha);
3154     assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
3155     const SCEV *A_K = findCoefficient(Src, CurLoop);
3156     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
3157     Src = zeroCoefficient(Src, CurLoop);
3158     if (!findCoefficient(Dst, CurLoop)->isZero())
3159       Consistent = false;
3160   }
3161   else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
3162     const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
3163     const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3164     if (!Aconst || !Cconst) return false;
3165     APInt Alpha = Aconst->getAPInt();
3166     APInt Charlie = Cconst->getAPInt();
3167     APInt CdivA = Charlie.sdiv(Alpha);
3168     assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
3169     const SCEV *A_K = findCoefficient(Src, CurLoop);
3170     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
3171     Src = zeroCoefficient(Src, CurLoop);
3172     Dst = addToCoefficient(Dst, CurLoop, A_K);
3173     if (!findCoefficient(Dst, CurLoop)->isZero())
3174       Consistent = false;
3175   }
3176   else {
3177     // paper is incorrect here, or perhaps just misleading
3178     const SCEV *A_K = findCoefficient(Src, CurLoop);
3179     Src = SE->getMulExpr(Src, A);
3180     Dst = SE->getMulExpr(Dst, A);
3181     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
3182     Src = zeroCoefficient(Src, CurLoop);
3183     Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
3184     if (!findCoefficient(Dst, CurLoop)->isZero())
3185       Consistent = false;
3186   }
3187   LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
3188   LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
3189   return true;
3190 }
3191 
3192 
3193 // Attempt to propagate a point
3194 // constraint into a subscript pair (Src and Dst).
3195 // Return true if some simplification occurs.
3196 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,
3197                                     Constraint &CurConstraint) {
3198   const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3199   const SCEV *A_K = findCoefficient(Src, CurLoop);
3200   const SCEV *AP_K = findCoefficient(Dst, CurLoop);
3201   const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
3202   const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
3203   LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3204   Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
3205   Src = zeroCoefficient(Src, CurLoop);
3206   LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3207   LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3208   Dst = zeroCoefficient(Dst, CurLoop);
3209   LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3210   return true;
3211 }
3212 
3213 
3214 // Update direction vector entry based on the current constraint.
3215 void DependenceInfo::updateDirection(Dependence::DVEntry &Level,
3216                                      const Constraint &CurConstraint) const {
3217   LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint =");
3218   LLVM_DEBUG(CurConstraint.dump(dbgs()));
3219   if (CurConstraint.isAny())
3220     ; // use defaults
3221   else if (CurConstraint.isDistance()) {
3222     // this one is consistent, the others aren't
3223     Level.Scalar = false;
3224     Level.Distance = CurConstraint.getD();
3225     unsigned NewDirection = Dependence::DVEntry::NONE;
3226     if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
3227       NewDirection = Dependence::DVEntry::EQ;
3228     if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
3229       NewDirection |= Dependence::DVEntry::LT;
3230     if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
3231       NewDirection |= Dependence::DVEntry::GT;
3232     Level.Direction &= NewDirection;
3233   }
3234   else if (CurConstraint.isLine()) {
3235     Level.Scalar = false;
3236     Level.Distance = nullptr;
3237     // direction should be accurate
3238   }
3239   else if (CurConstraint.isPoint()) {
3240     Level.Scalar = false;
3241     Level.Distance = nullptr;
3242     unsigned NewDirection = Dependence::DVEntry::NONE;
3243     if (!isKnownPredicate(CmpInst::ICMP_NE,
3244                           CurConstraint.getY(),
3245                           CurConstraint.getX()))
3246       // if X may be = Y
3247       NewDirection |= Dependence::DVEntry::EQ;
3248     if (!isKnownPredicate(CmpInst::ICMP_SLE,
3249                           CurConstraint.getY(),
3250                           CurConstraint.getX()))
3251       // if Y may be > X
3252       NewDirection |= Dependence::DVEntry::LT;
3253     if (!isKnownPredicate(CmpInst::ICMP_SGE,
3254                           CurConstraint.getY(),
3255                           CurConstraint.getX()))
3256       // if Y may be < X
3257       NewDirection |= Dependence::DVEntry::GT;
3258     Level.Direction &= NewDirection;
3259   }
3260   else
3261     llvm_unreachable("constraint has unexpected kind");
3262 }
3263 
3264 /// Check if we can delinearize the subscripts. If the SCEVs representing the
3265 /// source and destination array references are recurrences on a nested loop,
3266 /// this function flattens the nested recurrences into separate recurrences
3267 /// for each loop level.
3268 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
3269                                     SmallVectorImpl<Subscript> &Pair) {
3270   assert(isLoadOrStore(Src) && "instruction is not load or store");
3271   assert(isLoadOrStore(Dst) && "instruction is not load or store");
3272   Value *SrcPtr = getLoadStorePointerOperand(Src);
3273   Value *DstPtr = getLoadStorePointerOperand(Dst);
3274   Loop *SrcLoop = LI->getLoopFor(Src->getParent());
3275   Loop *DstLoop = LI->getLoopFor(Dst->getParent());
3276   const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop);
3277   const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop);
3278   const SCEVUnknown *SrcBase =
3279       dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3280   const SCEVUnknown *DstBase =
3281       dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3282 
3283   if (!SrcBase || !DstBase || SrcBase != DstBase)
3284     return false;
3285 
3286   SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
3287 
3288   if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
3289                                SrcSubscripts, DstSubscripts) &&
3290       !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
3291                                     SrcSubscripts, DstSubscripts))
3292     return false;
3293 
3294   int Size = SrcSubscripts.size();
3295   LLVM_DEBUG({
3296     dbgs() << "\nSrcSubscripts: ";
3297     for (int I = 0; I < Size; I++)
3298       dbgs() << *SrcSubscripts[I];
3299     dbgs() << "\nDstSubscripts: ";
3300     for (int I = 0; I < Size; I++)
3301       dbgs() << *DstSubscripts[I];
3302   });
3303 
3304   // The delinearization transforms a single-subscript MIV dependence test into
3305   // a multi-subscript SIV dependence test that is easier to compute. So we
3306   // resize Pair to contain as many pairs of subscripts as the delinearization
3307   // has found, and then initialize the pairs following the delinearization.
3308   Pair.resize(Size);
3309   for (int I = 0; I < Size; ++I) {
3310     Pair[I].Src = SrcSubscripts[I];
3311     Pair[I].Dst = DstSubscripts[I];
3312     unifySubscriptType(&Pair[I]);
3313   }
3314 
3315   return true;
3316 }
3317 
3318 bool DependenceInfo::tryDelinearizeFixedSize(
3319     Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
3320     const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
3321     SmallVectorImpl<const SCEV *> &DstSubscripts) {
3322 
3323   Value *SrcPtr = getLoadStorePointerOperand(Src);
3324   Value *DstPtr = getLoadStorePointerOperand(Dst);
3325   const SCEVUnknown *SrcBase =
3326       dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3327   const SCEVUnknown *DstBase =
3328       dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3329   assert(SrcBase && DstBase && SrcBase == DstBase &&
3330          "expected src and dst scev unknowns to be equal");
3331 
3332   // Check the simple case where the array dimensions are fixed size.
3333   auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
3334   auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr);
3335   if (!SrcGEP || !DstGEP)
3336     return false;
3337 
3338   SmallVector<int, 4> SrcSizes, DstSizes;
3339   getIndexExpressionsFromGEP(*SE, SrcGEP, SrcSubscripts, SrcSizes);
3340   getIndexExpressionsFromGEP(*SE, DstGEP, DstSubscripts, DstSizes);
3341 
3342   // Check that the two size arrays are non-empty and equal in length and
3343   // value.
3344   if (SrcSizes.empty() || SrcSubscripts.size() <= 1 ||
3345       SrcSizes.size() != DstSizes.size() ||
3346       !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
3347     SrcSubscripts.clear();
3348     DstSubscripts.clear();
3349     return false;
3350   }
3351 
3352   Value *SrcBasePtr = SrcGEP->getOperand(0)->stripPointerCasts();
3353   Value *DstBasePtr = DstGEP->getOperand(0)->stripPointerCasts();
3354 
3355   // Check that for identical base pointers we do not miss index offsets
3356   // that have been added before this GEP is applied.
3357   if (SrcBasePtr != SrcBase->getValue() || DstBasePtr != DstBase->getValue()) {
3358     SrcSubscripts.clear();
3359     DstSubscripts.clear();
3360     return false;
3361   }
3362 
3363   assert(SrcSubscripts.size() == DstSubscripts.size() &&
3364          SrcSubscripts.size() == SrcSizes.size() + 1 &&
3365          "Expected equal number of entries in the list of sizes and "
3366          "subscripts.");
3367 
3368   // In general we cannot safely assume that the subscripts recovered from GEPs
3369   // are in the range of values defined for their corresponding array
3370   // dimensions. For example some C language usage/interpretation make it
3371   // impossible to verify this at compile-time. As such we can only delinearize
3372   // iff the subscripts are positive and are less than the range of the
3373   // dimension.
3374   if (!DisableDelinearizationChecks) {
3375     auto AllIndiciesInRange = [&](SmallVector<int, 4> &DimensionSizes,
3376                                   SmallVectorImpl<const SCEV *> &Subscripts,
3377                                   Value *Ptr) {
3378       size_t SSize = Subscripts.size();
3379       for (size_t I = 1; I < SSize; ++I) {
3380         const SCEV *S = Subscripts[I];
3381         if (!isKnownNonNegative(S, Ptr))
3382           return false;
3383         if (auto *SType = dyn_cast<IntegerType>(S->getType())) {
3384           const SCEV *Range = SE->getConstant(
3385               ConstantInt::get(SType, DimensionSizes[I - 1], false));
3386           if (!isKnownLessThan(S, Range))
3387             return false;
3388         }
3389       }
3390       return true;
3391     };
3392 
3393     if (!AllIndiciesInRange(SrcSizes, SrcSubscripts, SrcPtr) ||
3394         !AllIndiciesInRange(DstSizes, DstSubscripts, DstPtr)) {
3395       SrcSubscripts.clear();
3396       DstSubscripts.clear();
3397       return false;
3398     }
3399   }
3400   LLVM_DEBUG({
3401     dbgs() << "Delinearized subscripts of fixed-size array\n"
3402            << "SrcGEP:" << *SrcGEP << "\n"
3403            << "DstGEP:" << *DstGEP << "\n";
3404   });
3405   return true;
3406 }
3407 
3408 bool DependenceInfo::tryDelinearizeParametricSize(
3409     Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
3410     const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
3411     SmallVectorImpl<const SCEV *> &DstSubscripts) {
3412 
3413   Value *SrcPtr = getLoadStorePointerOperand(Src);
3414   Value *DstPtr = getLoadStorePointerOperand(Dst);
3415   const SCEVUnknown *SrcBase =
3416       dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3417   const SCEVUnknown *DstBase =
3418       dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3419   assert(SrcBase && DstBase && SrcBase == DstBase &&
3420          "expected src and dst scev unknowns to be equal");
3421 
3422   const SCEV *ElementSize = SE->getElementSize(Src);
3423   if (ElementSize != SE->getElementSize(Dst))
3424     return false;
3425 
3426   const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);
3427   const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);
3428 
3429   const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
3430   const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
3431   if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
3432     return false;
3433 
3434   // First step: collect parametric terms in both array references.
3435   SmallVector<const SCEV *, 4> Terms;
3436   collectParametricTerms(*SE, SrcAR, Terms);
3437   collectParametricTerms(*SE, DstAR, Terms);
3438 
3439   // Second step: find subscript sizes.
3440   SmallVector<const SCEV *, 4> Sizes;
3441   findArrayDimensions(*SE, Terms, Sizes, ElementSize);
3442 
3443   // Third step: compute the access functions for each subscript.
3444   computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes);
3445   computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes);
3446 
3447   // Fail when there is only a subscript: that's a linearized access function.
3448   if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
3449       SrcSubscripts.size() != DstSubscripts.size())
3450     return false;
3451 
3452   size_t Size = SrcSubscripts.size();
3453 
3454   // Statically check that the array bounds are in-range. The first subscript we
3455   // don't have a size for and it cannot overflow into another subscript, so is
3456   // always safe. The others need to be 0 <= subscript[i] < bound, for both src
3457   // and dst.
3458   // FIXME: It may be better to record these sizes and add them as constraints
3459   // to the dependency checks.
3460   if (!DisableDelinearizationChecks)
3461     for (size_t I = 1; I < Size; ++I) {
3462       if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr))
3463         return false;
3464 
3465       if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]))
3466         return false;
3467 
3468       if (!isKnownNonNegative(DstSubscripts[I], DstPtr))
3469         return false;
3470 
3471       if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1]))
3472         return false;
3473     }
3474 
3475   return true;
3476 }
3477 
3478 //===----------------------------------------------------------------------===//
3479 
3480 #ifndef NDEBUG
3481 // For debugging purposes, dump a small bit vector to dbgs().
3482 static void dumpSmallBitVector(SmallBitVector &BV) {
3483   dbgs() << "{";
3484   for (unsigned VI : BV.set_bits()) {
3485     dbgs() << VI;
3486     if (BV.find_next(VI) >= 0)
3487       dbgs() << ' ';
3488   }
3489   dbgs() << "}\n";
3490 }
3491 #endif
3492 
3493 bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
3494                                 FunctionAnalysisManager::Invalidator &Inv) {
3495   // Check if the analysis itself has been invalidated.
3496   auto PAC = PA.getChecker<DependenceAnalysis>();
3497   if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3498     return true;
3499 
3500   // Check transitive dependencies.
3501   return Inv.invalidate<AAManager>(F, PA) ||
3502          Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
3503          Inv.invalidate<LoopAnalysis>(F, PA);
3504 }
3505 
3506 // depends -
3507 // Returns NULL if there is no dependence.
3508 // Otherwise, return a Dependence with as many details as possible.
3509 // Corresponds to Section 3.1 in the paper
3510 //
3511 //            Practical Dependence Testing
3512 //            Goff, Kennedy, Tseng
3513 //            PLDI 1991
3514 //
3515 // Care is required to keep the routine below, getSplitIteration(),
3516 // up to date with respect to this routine.
3517 std::unique_ptr<Dependence>
3518 DependenceInfo::depends(Instruction *Src, Instruction *Dst,
3519                         bool PossiblyLoopIndependent) {
3520   if (Src == Dst)
3521     PossiblyLoopIndependent = false;
3522 
3523   if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
3524     // if both instructions don't reference memory, there's no dependence
3525     return nullptr;
3526 
3527   if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
3528     // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
3529     LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
3530     return std::make_unique<Dependence>(Src, Dst);
3531   }
3532 
3533   assert(isLoadOrStore(Src) && "instruction is not load or store");
3534   assert(isLoadOrStore(Dst) && "instruction is not load or store");
3535   Value *SrcPtr = getLoadStorePointerOperand(Src);
3536   Value *DstPtr = getLoadStorePointerOperand(Dst);
3537 
3538   switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(),
3539                                  MemoryLocation::get(Dst),
3540                                  MemoryLocation::get(Src))) {
3541   case AliasResult::MayAlias:
3542   case AliasResult::PartialAlias:
3543     // cannot analyse objects if we don't understand their aliasing.
3544     LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
3545     return std::make_unique<Dependence>(Src, Dst);
3546   case AliasResult::NoAlias:
3547     // If the objects noalias, they are distinct, accesses are independent.
3548     LLVM_DEBUG(dbgs() << "no alias\n");
3549     return nullptr;
3550   case AliasResult::MustAlias:
3551     break; // The underlying objects alias; test accesses for dependence.
3552   }
3553 
3554   // establish loop nesting levels
3555   establishNestingLevels(Src, Dst);
3556   LLVM_DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
3557   LLVM_DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
3558 
3559   FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
3560   ++TotalArrayPairs;
3561 
3562   unsigned Pairs = 1;
3563   SmallVector<Subscript, 2> Pair(Pairs);
3564   const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
3565   const SCEV *DstSCEV = SE->getSCEV(DstPtr);
3566   LLVM_DEBUG(dbgs() << "    SrcSCEV = " << *SrcSCEV << "\n");
3567   LLVM_DEBUG(dbgs() << "    DstSCEV = " << *DstSCEV << "\n");
3568   if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) {
3569     // If two pointers have different bases, trying to analyze indexes won't
3570     // work; we can't compare them to each other. This can happen, for example,
3571     // if one is produced by an LCSSA PHI node.
3572     //
3573     // We check this upfront so we don't crash in cases where getMinusSCEV()
3574     // returns a SCEVCouldNotCompute.
3575     LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
3576     return std::make_unique<Dependence>(Src, Dst);
3577   }
3578   Pair[0].Src = SrcSCEV;
3579   Pair[0].Dst = DstSCEV;
3580 
3581   if (Delinearize) {
3582     if (tryDelinearize(Src, Dst, Pair)) {
3583       LLVM_DEBUG(dbgs() << "    delinearized\n");
3584       Pairs = Pair.size();
3585     }
3586   }
3587 
3588   for (unsigned P = 0; P < Pairs; ++P) {
3589     Pair[P].Loops.resize(MaxLevels + 1);
3590     Pair[P].GroupLoops.resize(MaxLevels + 1);
3591     Pair[P].Group.resize(Pairs);
3592     removeMatchingExtensions(&Pair[P]);
3593     Pair[P].Classification =
3594       classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
3595                    Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
3596                    Pair[P].Loops);
3597     Pair[P].GroupLoops = Pair[P].Loops;
3598     Pair[P].Group.set(P);
3599     LLVM_DEBUG(dbgs() << "    subscript " << P << "\n");
3600     LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
3601     LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
3602     LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
3603     LLVM_DEBUG(dbgs() << "\tloops = ");
3604     LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops));
3605   }
3606 
3607   SmallBitVector Separable(Pairs);
3608   SmallBitVector Coupled(Pairs);
3609 
3610   // Partition subscripts into separable and minimally-coupled groups
3611   // Algorithm in paper is algorithmically better;
3612   // this may be faster in practice. Check someday.
3613   //
3614   // Here's an example of how it works. Consider this code:
3615   //
3616   //   for (i = ...) {
3617   //     for (j = ...) {
3618   //       for (k = ...) {
3619   //         for (l = ...) {
3620   //           for (m = ...) {
3621   //             A[i][j][k][m] = ...;
3622   //             ... = A[0][j][l][i + j];
3623   //           }
3624   //         }
3625   //       }
3626   //     }
3627   //   }
3628   //
3629   // There are 4 subscripts here:
3630   //    0 [i] and [0]
3631   //    1 [j] and [j]
3632   //    2 [k] and [l]
3633   //    3 [m] and [i + j]
3634   //
3635   // We've already classified each subscript pair as ZIV, SIV, etc.,
3636   // and collected all the loops mentioned by pair P in Pair[P].Loops.
3637   // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
3638   // and set Pair[P].Group = {P}.
3639   //
3640   //      Src Dst    Classification Loops  GroupLoops Group
3641   //    0 [i] [0]         SIV       {1}      {1}        {0}
3642   //    1 [j] [j]         SIV       {2}      {2}        {1}
3643   //    2 [k] [l]         RDIV      {3,4}    {3,4}      {2}
3644   //    3 [m] [i + j]     MIV       {1,2,5}  {1,2,5}    {3}
3645   //
3646   // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
3647   // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
3648   //
3649   // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
3650   // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
3651   // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
3652   // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
3653   // to either Separable or Coupled).
3654   //
3655   // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
3656   // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty,
3657   // so Pair[3].Group = {0, 1, 3} and Done = false.
3658   //
3659   // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
3660   // Since Done remains true, we add 2 to the set of Separable pairs.
3661   //
3662   // Finally, we consider 3. There's nothing to compare it with,
3663   // so Done remains true and we add it to the Coupled set.
3664   // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
3665   //
3666   // In the end, we've got 1 separable subscript and 1 coupled group.
3667   for (unsigned SI = 0; SI < Pairs; ++SI) {
3668     if (Pair[SI].Classification == Subscript::NonLinear) {
3669       // ignore these, but collect loops for later
3670       ++NonlinearSubscriptPairs;
3671       collectCommonLoops(Pair[SI].Src,
3672                          LI->getLoopFor(Src->getParent()),
3673                          Pair[SI].Loops);
3674       collectCommonLoops(Pair[SI].Dst,
3675                          LI->getLoopFor(Dst->getParent()),
3676                          Pair[SI].Loops);
3677       Result.Consistent = false;
3678     } else if (Pair[SI].Classification == Subscript::ZIV) {
3679       // always separable
3680       Separable.set(SI);
3681     }
3682     else {
3683       // SIV, RDIV, or MIV, so check for coupled group
3684       bool Done = true;
3685       for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
3686         SmallBitVector Intersection = Pair[SI].GroupLoops;
3687         Intersection &= Pair[SJ].GroupLoops;
3688         if (Intersection.any()) {
3689           // accumulate set of all the loops in group
3690           Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
3691           // accumulate set of all subscripts in group
3692           Pair[SJ].Group |= Pair[SI].Group;
3693           Done = false;
3694         }
3695       }
3696       if (Done) {
3697         if (Pair[SI].Group.count() == 1) {
3698           Separable.set(SI);
3699           ++SeparableSubscriptPairs;
3700         }
3701         else {
3702           Coupled.set(SI);
3703           ++CoupledSubscriptPairs;
3704         }
3705       }
3706     }
3707   }
3708 
3709   LLVM_DEBUG(dbgs() << "    Separable = ");
3710   LLVM_DEBUG(dumpSmallBitVector(Separable));
3711   LLVM_DEBUG(dbgs() << "    Coupled = ");
3712   LLVM_DEBUG(dumpSmallBitVector(Coupled));
3713 
3714   Constraint NewConstraint;
3715   NewConstraint.setAny(SE);
3716 
3717   // test separable subscripts
3718   for (unsigned SI : Separable.set_bits()) {
3719     LLVM_DEBUG(dbgs() << "testing subscript " << SI);
3720     switch (Pair[SI].Classification) {
3721     case Subscript::ZIV:
3722       LLVM_DEBUG(dbgs() << ", ZIV\n");
3723       if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
3724         return nullptr;
3725       break;
3726     case Subscript::SIV: {
3727       LLVM_DEBUG(dbgs() << ", SIV\n");
3728       unsigned Level;
3729       const SCEV *SplitIter = nullptr;
3730       if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
3731                   SplitIter))
3732         return nullptr;
3733       break;
3734     }
3735     case Subscript::RDIV:
3736       LLVM_DEBUG(dbgs() << ", RDIV\n");
3737       if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
3738         return nullptr;
3739       break;
3740     case Subscript::MIV:
3741       LLVM_DEBUG(dbgs() << ", MIV\n");
3742       if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
3743         return nullptr;
3744       break;
3745     default:
3746       llvm_unreachable("subscript has unexpected classification");
3747     }
3748   }
3749 
3750   if (Coupled.count()) {
3751     // test coupled subscript groups
3752     LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n");
3753     LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
3754     SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
3755     for (unsigned II = 0; II <= MaxLevels; ++II)
3756       Constraints[II].setAny(SE);
3757     for (unsigned SI : Coupled.set_bits()) {
3758       LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { ");
3759       SmallBitVector Group(Pair[SI].Group);
3760       SmallBitVector Sivs(Pairs);
3761       SmallBitVector Mivs(Pairs);
3762       SmallBitVector ConstrainedLevels(MaxLevels + 1);
3763       SmallVector<Subscript *, 4> PairsInGroup;
3764       for (unsigned SJ : Group.set_bits()) {
3765         LLVM_DEBUG(dbgs() << SJ << " ");
3766         if (Pair[SJ].Classification == Subscript::SIV)
3767           Sivs.set(SJ);
3768         else
3769           Mivs.set(SJ);
3770         PairsInGroup.push_back(&Pair[SJ]);
3771       }
3772       unifySubscriptType(PairsInGroup);
3773       LLVM_DEBUG(dbgs() << "}\n");
3774       while (Sivs.any()) {
3775         bool Changed = false;
3776         for (unsigned SJ : Sivs.set_bits()) {
3777           LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
3778           // SJ is an SIV subscript that's part of the current coupled group
3779           unsigned Level;
3780           const SCEV *SplitIter = nullptr;
3781           LLVM_DEBUG(dbgs() << "SIV\n");
3782           if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
3783                       SplitIter))
3784             return nullptr;
3785           ConstrainedLevels.set(Level);
3786           if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
3787             if (Constraints[Level].isEmpty()) {
3788               ++DeltaIndependence;
3789               return nullptr;
3790             }
3791             Changed = true;
3792           }
3793           Sivs.reset(SJ);
3794         }
3795         if (Changed) {
3796           // propagate, possibly creating new SIVs and ZIVs
3797           LLVM_DEBUG(dbgs() << "    propagating\n");
3798           LLVM_DEBUG(dbgs() << "\tMivs = ");
3799           LLVM_DEBUG(dumpSmallBitVector(Mivs));
3800           for (unsigned SJ : Mivs.set_bits()) {
3801             // SJ is an MIV subscript that's part of the current coupled group
3802             LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
3803             if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
3804                           Constraints, Result.Consistent)) {
3805               LLVM_DEBUG(dbgs() << "\t    Changed\n");
3806               ++DeltaPropagations;
3807               Pair[SJ].Classification =
3808                 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
3809                              Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
3810                              Pair[SJ].Loops);
3811               switch (Pair[SJ].Classification) {
3812               case Subscript::ZIV:
3813                 LLVM_DEBUG(dbgs() << "ZIV\n");
3814                 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
3815                   return nullptr;
3816                 Mivs.reset(SJ);
3817                 break;
3818               case Subscript::SIV:
3819                 Sivs.set(SJ);
3820                 Mivs.reset(SJ);
3821                 break;
3822               case Subscript::RDIV:
3823               case Subscript::MIV:
3824                 break;
3825               default:
3826                 llvm_unreachable("bad subscript classification");
3827               }
3828             }
3829           }
3830         }
3831       }
3832 
3833       // test & propagate remaining RDIVs
3834       for (unsigned SJ : Mivs.set_bits()) {
3835         if (Pair[SJ].Classification == Subscript::RDIV) {
3836           LLVM_DEBUG(dbgs() << "RDIV test\n");
3837           if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
3838             return nullptr;
3839           // I don't yet understand how to propagate RDIV results
3840           Mivs.reset(SJ);
3841         }
3842       }
3843 
3844       // test remaining MIVs
3845       // This code is temporary.
3846       // Better to somehow test all remaining subscripts simultaneously.
3847       for (unsigned SJ : Mivs.set_bits()) {
3848         if (Pair[SJ].Classification == Subscript::MIV) {
3849           LLVM_DEBUG(dbgs() << "MIV test\n");
3850           if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
3851             return nullptr;
3852         }
3853         else
3854           llvm_unreachable("expected only MIV subscripts at this point");
3855       }
3856 
3857       // update Result.DV from constraint vector
3858       LLVM_DEBUG(dbgs() << "    updating\n");
3859       for (unsigned SJ : ConstrainedLevels.set_bits()) {
3860         if (SJ > CommonLevels)
3861           break;
3862         updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
3863         if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
3864           return nullptr;
3865       }
3866     }
3867   }
3868 
3869   // Make sure the Scalar flags are set correctly.
3870   SmallBitVector CompleteLoops(MaxLevels + 1);
3871   for (unsigned SI = 0; SI < Pairs; ++SI)
3872     CompleteLoops |= Pair[SI].Loops;
3873   for (unsigned II = 1; II <= CommonLevels; ++II)
3874     if (CompleteLoops[II])
3875       Result.DV[II - 1].Scalar = false;
3876 
3877   if (PossiblyLoopIndependent) {
3878     // Make sure the LoopIndependent flag is set correctly.
3879     // All directions must include equal, otherwise no
3880     // loop-independent dependence is possible.
3881     for (unsigned II = 1; II <= CommonLevels; ++II) {
3882       if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
3883         Result.LoopIndependent = false;
3884         break;
3885       }
3886     }
3887   }
3888   else {
3889     // On the other hand, if all directions are equal and there's no
3890     // loop-independent dependence possible, then no dependence exists.
3891     bool AllEqual = true;
3892     for (unsigned II = 1; II <= CommonLevels; ++II) {
3893       if (Result.getDirection(II) != Dependence::DVEntry::EQ) {
3894         AllEqual = false;
3895         break;
3896       }
3897     }
3898     if (AllEqual)
3899       return nullptr;
3900   }
3901 
3902   return std::make_unique<FullDependence>(std::move(Result));
3903 }
3904 
3905 //===----------------------------------------------------------------------===//
3906 // getSplitIteration -
3907 // Rather than spend rarely-used space recording the splitting iteration
3908 // during the Weak-Crossing SIV test, we re-compute it on demand.
3909 // The re-computation is basically a repeat of the entire dependence test,
3910 // though simplified since we know that the dependence exists.
3911 // It's tedious, since we must go through all propagations, etc.
3912 //
3913 // Care is required to keep this code up to date with respect to the routine
3914 // above, depends().
3915 //
3916 // Generally, the dependence analyzer will be used to build
3917 // a dependence graph for a function (basically a map from instructions
3918 // to dependences). Looking for cycles in the graph shows us loops
3919 // that cannot be trivially vectorized/parallelized.
3920 //
3921 // We can try to improve the situation by examining all the dependences
3922 // that make up the cycle, looking for ones we can break.
3923 // Sometimes, peeling the first or last iteration of a loop will break
3924 // dependences, and we've got flags for those possibilities.
3925 // Sometimes, splitting a loop at some other iteration will do the trick,
3926 // and we've got a flag for that case. Rather than waste the space to
3927 // record the exact iteration (since we rarely know), we provide
3928 // a method that calculates the iteration. It's a drag that it must work
3929 // from scratch, but wonderful in that it's possible.
3930 //
3931 // Here's an example:
3932 //
3933 //    for (i = 0; i < 10; i++)
3934 //        A[i] = ...
3935 //        ... = A[11 - i]
3936 //
3937 // There's a loop-carried flow dependence from the store to the load,
3938 // found by the weak-crossing SIV test. The dependence will have a flag,
3939 // indicating that the dependence can be broken by splitting the loop.
3940 // Calling getSplitIteration will return 5.
3941 // Splitting the loop breaks the dependence, like so:
3942 //
3943 //    for (i = 0; i <= 5; i++)
3944 //        A[i] = ...
3945 //        ... = A[11 - i]
3946 //    for (i = 6; i < 10; i++)
3947 //        A[i] = ...
3948 //        ... = A[11 - i]
3949 //
3950 // breaks the dependence and allows us to vectorize/parallelize
3951 // both loops.
3952 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
3953                                               unsigned SplitLevel) {
3954   assert(Dep.isSplitable(SplitLevel) &&
3955          "Dep should be splitable at SplitLevel");
3956   Instruction *Src = Dep.getSrc();
3957   Instruction *Dst = Dep.getDst();
3958   assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
3959   assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
3960   assert(isLoadOrStore(Src));
3961   assert(isLoadOrStore(Dst));
3962   Value *SrcPtr = getLoadStorePointerOperand(Src);
3963   Value *DstPtr = getLoadStorePointerOperand(Dst);
3964   assert(underlyingObjectsAlias(
3965              AA, F->getParent()->getDataLayout(), MemoryLocation::get(Dst),
3966              MemoryLocation::get(Src)) == AliasResult::MustAlias);
3967 
3968   // establish loop nesting levels
3969   establishNestingLevels(Src, Dst);
3970 
3971   FullDependence Result(Src, Dst, false, CommonLevels);
3972 
3973   unsigned Pairs = 1;
3974   SmallVector<Subscript, 2> Pair(Pairs);
3975   const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
3976   const SCEV *DstSCEV = SE->getSCEV(DstPtr);
3977   Pair[0].Src = SrcSCEV;
3978   Pair[0].Dst = DstSCEV;
3979 
3980   if (Delinearize) {
3981     if (tryDelinearize(Src, Dst, Pair)) {
3982       LLVM_DEBUG(dbgs() << "    delinearized\n");
3983       Pairs = Pair.size();
3984     }
3985   }
3986 
3987   for (unsigned P = 0; P < Pairs; ++P) {
3988     Pair[P].Loops.resize(MaxLevels + 1);
3989     Pair[P].GroupLoops.resize(MaxLevels + 1);
3990     Pair[P].Group.resize(Pairs);
3991     removeMatchingExtensions(&Pair[P]);
3992     Pair[P].Classification =
3993       classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
3994                    Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
3995                    Pair[P].Loops);
3996     Pair[P].GroupLoops = Pair[P].Loops;
3997     Pair[P].Group.set(P);
3998   }
3999 
4000   SmallBitVector Separable(Pairs);
4001   SmallBitVector Coupled(Pairs);
4002 
4003   // partition subscripts into separable and minimally-coupled groups
4004   for (unsigned SI = 0; SI < Pairs; ++SI) {
4005     if (Pair[SI].Classification == Subscript::NonLinear) {
4006       // ignore these, but collect loops for later
4007       collectCommonLoops(Pair[SI].Src,
4008                          LI->getLoopFor(Src->getParent()),
4009                          Pair[SI].Loops);
4010       collectCommonLoops(Pair[SI].Dst,
4011                          LI->getLoopFor(Dst->getParent()),
4012                          Pair[SI].Loops);
4013       Result.Consistent = false;
4014     }
4015     else if (Pair[SI].Classification == Subscript::ZIV)
4016       Separable.set(SI);
4017     else {
4018       // SIV, RDIV, or MIV, so check for coupled group
4019       bool Done = true;
4020       for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
4021         SmallBitVector Intersection = Pair[SI].GroupLoops;
4022         Intersection &= Pair[SJ].GroupLoops;
4023         if (Intersection.any()) {
4024           // accumulate set of all the loops in group
4025           Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
4026           // accumulate set of all subscripts in group
4027           Pair[SJ].Group |= Pair[SI].Group;
4028           Done = false;
4029         }
4030       }
4031       if (Done) {
4032         if (Pair[SI].Group.count() == 1)
4033           Separable.set(SI);
4034         else
4035           Coupled.set(SI);
4036       }
4037     }
4038   }
4039 
4040   Constraint NewConstraint;
4041   NewConstraint.setAny(SE);
4042 
4043   // test separable subscripts
4044   for (unsigned SI : Separable.set_bits()) {
4045     switch (Pair[SI].Classification) {
4046     case Subscript::SIV: {
4047       unsigned Level;
4048       const SCEV *SplitIter = nullptr;
4049       (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
4050                      Result, NewConstraint, SplitIter);
4051       if (Level == SplitLevel) {
4052         assert(SplitIter != nullptr);
4053         return SplitIter;
4054       }
4055       break;
4056     }
4057     case Subscript::ZIV:
4058     case Subscript::RDIV:
4059     case Subscript::MIV:
4060       break;
4061     default:
4062       llvm_unreachable("subscript has unexpected classification");
4063     }
4064   }
4065 
4066   if (Coupled.count()) {
4067     // test coupled subscript groups
4068     SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
4069     for (unsigned II = 0; II <= MaxLevels; ++II)
4070       Constraints[II].setAny(SE);
4071     for (unsigned SI : Coupled.set_bits()) {
4072       SmallBitVector Group(Pair[SI].Group);
4073       SmallBitVector Sivs(Pairs);
4074       SmallBitVector Mivs(Pairs);
4075       SmallBitVector ConstrainedLevels(MaxLevels + 1);
4076       for (unsigned SJ : Group.set_bits()) {
4077         if (Pair[SJ].Classification == Subscript::SIV)
4078           Sivs.set(SJ);
4079         else
4080           Mivs.set(SJ);
4081       }
4082       while (Sivs.any()) {
4083         bool Changed = false;
4084         for (unsigned SJ : Sivs.set_bits()) {
4085           // SJ is an SIV subscript that's part of the current coupled group
4086           unsigned Level;
4087           const SCEV *SplitIter = nullptr;
4088           (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
4089                          Result, NewConstraint, SplitIter);
4090           if (Level == SplitLevel && SplitIter)
4091             return SplitIter;
4092           ConstrainedLevels.set(Level);
4093           if (intersectConstraints(&Constraints[Level], &NewConstraint))
4094             Changed = true;
4095           Sivs.reset(SJ);
4096         }
4097         if (Changed) {
4098           // propagate, possibly creating new SIVs and ZIVs
4099           for (unsigned SJ : Mivs.set_bits()) {
4100             // SJ is an MIV subscript that's part of the current coupled group
4101             if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
4102                           Pair[SJ].Loops, Constraints, Result.Consistent)) {
4103               Pair[SJ].Classification =
4104                 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
4105                              Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
4106                              Pair[SJ].Loops);
4107               switch (Pair[SJ].Classification) {
4108               case Subscript::ZIV:
4109                 Mivs.reset(SJ);
4110                 break;
4111               case Subscript::SIV:
4112                 Sivs.set(SJ);
4113                 Mivs.reset(SJ);
4114                 break;
4115               case Subscript::RDIV:
4116               case Subscript::MIV:
4117                 break;
4118               default:
4119                 llvm_unreachable("bad subscript classification");
4120               }
4121             }
4122           }
4123         }
4124       }
4125     }
4126   }
4127   llvm_unreachable("somehow reached end of routine");
4128   return nullptr;
4129 }
4130