1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The implementation for the loop memory dependence that was originally
11 // developed for the loop vectorizer.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Analysis/LoopAccessAnalysis.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DepthFirstIterator.h"
19 #include "llvm/ADT/EquivalenceClasses.h"
20 #include "llvm/ADT/PointerIntPair.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/iterator_range.h"
27 #include "llvm/Analysis/AliasAnalysis.h"
28 #include "llvm/Analysis/AliasSetTracker.h"
29 #include "llvm/Analysis/LoopAnalysisManager.h"
30 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/Analysis/MemoryLocation.h"
32 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
33 #include "llvm/Analysis/ScalarEvolution.h"
34 #include "llvm/Analysis/ScalarEvolutionExpander.h"
35 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
36 #include "llvm/Analysis/TargetLibraryInfo.h"
37 #include "llvm/Analysis/ValueTracking.h"
38 #include "llvm/Analysis/VectorUtils.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/InstrTypes.h"
49 #include "llvm/IR/Instruction.h"
50 #include "llvm/IR/Instructions.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/PassManager.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/IR/ValueHandle.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/raw_ostream.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstdlib>
66 #include <iterator>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "loop-accesses"
73 
74 static cl::opt<unsigned, true>
75 VectorizationFactor("force-vector-width", cl::Hidden,
76                     cl::desc("Sets the SIMD width. Zero is autoselect."),
77                     cl::location(VectorizerParams::VectorizationFactor));
78 unsigned VectorizerParams::VectorizationFactor;
79 
80 static cl::opt<unsigned, true>
81 VectorizationInterleave("force-vector-interleave", cl::Hidden,
82                         cl::desc("Sets the vectorization interleave count. "
83                                  "Zero is autoselect."),
84                         cl::location(
85                             VectorizerParams::VectorizationInterleave));
86 unsigned VectorizerParams::VectorizationInterleave;
87 
88 static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
89     "runtime-memory-check-threshold", cl::Hidden,
90     cl::desc("When performing memory disambiguation checks at runtime do not "
91              "generate more than this number of comparisons (default = 8)."),
92     cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
93 unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
94 
95 /// \brief The maximum iterations used to merge memory checks
96 static cl::opt<unsigned> MemoryCheckMergeThreshold(
97     "memory-check-merge-threshold", cl::Hidden,
98     cl::desc("Maximum number of comparisons done when trying to merge "
99              "runtime memory checks. (default = 100)"),
100     cl::init(100));
101 
102 /// Maximum SIMD width.
103 const unsigned VectorizerParams::MaxVectorWidth = 64;
104 
105 /// \brief We collect dependences up to this threshold.
106 static cl::opt<unsigned>
107     MaxDependences("max-dependences", cl::Hidden,
108                    cl::desc("Maximum number of dependences collected by "
109                             "loop-access analysis (default = 100)"),
110                    cl::init(100));
111 
112 /// This enables versioning on the strides of symbolically striding memory
113 /// accesses in code like the following.
114 ///   for (i = 0; i < N; ++i)
115 ///     A[i * Stride1] += B[i * Stride2] ...
116 ///
117 /// Will be roughly translated to
118 ///    if (Stride1 == 1 && Stride2 == 1) {
119 ///      for (i = 0; i < N; i+=4)
120 ///       A[i:i+3] += ...
121 ///    } else
122 ///      ...
123 static cl::opt<bool> EnableMemAccessVersioning(
124     "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125     cl::desc("Enable symbolic stride memory access versioning"));
126 
127 /// \brief Enable store-to-load forwarding conflict detection. This option can
128 /// be disabled for correctness testing.
129 static cl::opt<bool> EnableForwardingConflictDetection(
130     "store-to-load-forwarding-conflict-detection", cl::Hidden,
131     cl::desc("Enable conflict detection in loop-access analysis"),
132     cl::init(true));
133 
134 bool VectorizerParams::isInterleaveForced() {
135   return ::VectorizationInterleave.getNumOccurrences() > 0;
136 }
137 
138 void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
139                                     const Loop *TheLoop, const char *PassName,
140                                     OptimizationRemarkEmitter &ORE) {
141   DebugLoc DL = TheLoop->getStartLoc();
142   const Value *V = TheLoop->getHeader();
143   if (const Instruction *I = Message.getInstr()) {
144     // If there is no debug location attached to the instruction, revert back to
145     // using the loop's.
146     if (I->getDebugLoc())
147       DL = I->getDebugLoc();
148     V = I->getParent();
149   }
150   ORE.emitOptimizationRemarkAnalysis(PassName, DL, V, Message.str());
151 }
152 
153 Value *llvm::stripIntegerCast(Value *V) {
154   if (auto *CI = dyn_cast<CastInst>(V))
155     if (CI->getOperand(0)->getType()->isIntegerTy())
156       return CI->getOperand(0);
157   return V;
158 }
159 
160 const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
161                                             const ValueToValueMap &PtrToStride,
162                                             Value *Ptr, Value *OrigPtr) {
163   const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
164 
165   // If there is an entry in the map return the SCEV of the pointer with the
166   // symbolic stride replaced by one.
167   ValueToValueMap::const_iterator SI =
168       PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
169   if (SI != PtrToStride.end()) {
170     Value *StrideVal = SI->second;
171 
172     // Strip casts.
173     StrideVal = stripIntegerCast(StrideVal);
174 
175     ScalarEvolution *SE = PSE.getSE();
176     const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
177     const auto *CT =
178         static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
179 
180     PSE.addPredicate(*SE->getEqualPredicate(U, CT));
181     auto *Expr = PSE.getSCEV(Ptr);
182 
183     DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr
184                  << "\n");
185     return Expr;
186   }
187 
188   // Otherwise, just return the SCEV of the original pointer.
189   return OrigSCEV;
190 }
191 
192 /// Calculate Start and End points of memory access.
193 /// Let's assume A is the first access and B is a memory access on N-th loop
194 /// iteration. Then B is calculated as:
195 ///   B = A + Step*N .
196 /// Step value may be positive or negative.
197 /// N is a calculated back-edge taken count:
198 ///     N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
199 /// Start and End points are calculated in the following way:
200 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
201 /// where SizeOfElt is the size of single memory access in bytes.
202 ///
203 /// There is no conflict when the intervals are disjoint:
204 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
205 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
206                                     unsigned DepSetId, unsigned ASId,
207                                     const ValueToValueMap &Strides,
208                                     PredicatedScalarEvolution &PSE) {
209   // Get the stride replaced scev.
210   const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
211   ScalarEvolution *SE = PSE.getSE();
212 
213   const SCEV *ScStart;
214   const SCEV *ScEnd;
215 
216   if (SE->isLoopInvariant(Sc, Lp))
217     ScStart = ScEnd = Sc;
218   else {
219     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
220     assert(AR && "Invalid addrec expression");
221     const SCEV *Ex = PSE.getBackedgeTakenCount();
222 
223     ScStart = AR->getStart();
224     ScEnd = AR->evaluateAtIteration(Ex, *SE);
225     const SCEV *Step = AR->getStepRecurrence(*SE);
226 
227     // For expressions with negative step, the upper bound is ScStart and the
228     // lower bound is ScEnd.
229     if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
230       if (CStep->getValue()->isNegative())
231         std::swap(ScStart, ScEnd);
232     } else {
233       // Fallback case: the step is not constant, but we can still
234       // get the upper and lower bounds of the interval by using min/max
235       // expressions.
236       ScStart = SE->getUMinExpr(ScStart, ScEnd);
237       ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
238     }
239     // Add the size of the pointed element to ScEnd.
240     unsigned EltSize =
241       Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8;
242     const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
243     ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
244   }
245 
246   Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
247 }
248 
249 SmallVector<RuntimePointerChecking::PointerCheck, 4>
250 RuntimePointerChecking::generateChecks() const {
251   SmallVector<PointerCheck, 4> Checks;
252 
253   for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
254     for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
255       const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
256       const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
257 
258       if (needsChecking(CGI, CGJ))
259         Checks.push_back(std::make_pair(&CGI, &CGJ));
260     }
261   }
262   return Checks;
263 }
264 
265 void RuntimePointerChecking::generateChecks(
266     MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
267   assert(Checks.empty() && "Checks is not empty");
268   groupChecks(DepCands, UseDependencies);
269   Checks = generateChecks();
270 }
271 
272 bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M,
273                                            const CheckingPtrGroup &N) const {
274   for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
275     for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
276       if (needsChecking(M.Members[I], N.Members[J]))
277         return true;
278   return false;
279 }
280 
281 /// Compare \p I and \p J and return the minimum.
282 /// Return nullptr in case we couldn't find an answer.
283 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
284                                    ScalarEvolution *SE) {
285   const SCEV *Diff = SE->getMinusSCEV(J, I);
286   const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
287 
288   if (!C)
289     return nullptr;
290   if (C->getValue()->isNegative())
291     return J;
292   return I;
293 }
294 
295 bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) {
296   const SCEV *Start = RtCheck.Pointers[Index].Start;
297   const SCEV *End = RtCheck.Pointers[Index].End;
298 
299   // Compare the starts and ends with the known minimum and maximum
300   // of this set. We need to know how we compare against the min/max
301   // of the set in order to be able to emit memchecks.
302   const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
303   if (!Min0)
304     return false;
305 
306   const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
307   if (!Min1)
308     return false;
309 
310   // Update the low bound  expression if we've found a new min value.
311   if (Min0 == Start)
312     Low = Start;
313 
314   // Update the high bound expression if we've found a new max value.
315   if (Min1 != End)
316     High = End;
317 
318   Members.push_back(Index);
319   return true;
320 }
321 
322 void RuntimePointerChecking::groupChecks(
323     MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
324   // We build the groups from dependency candidates equivalence classes
325   // because:
326   //    - We know that pointers in the same equivalence class share
327   //      the same underlying object and therefore there is a chance
328   //      that we can compare pointers
329   //    - We wouldn't be able to merge two pointers for which we need
330   //      to emit a memcheck. The classes in DepCands are already
331   //      conveniently built such that no two pointers in the same
332   //      class need checking against each other.
333 
334   // We use the following (greedy) algorithm to construct the groups
335   // For every pointer in the equivalence class:
336   //   For each existing group:
337   //   - if the difference between this pointer and the min/max bounds
338   //     of the group is a constant, then make the pointer part of the
339   //     group and update the min/max bounds of that group as required.
340 
341   CheckingGroups.clear();
342 
343   // If we need to check two pointers to the same underlying object
344   // with a non-constant difference, we shouldn't perform any pointer
345   // grouping with those pointers. This is because we can easily get
346   // into cases where the resulting check would return false, even when
347   // the accesses are safe.
348   //
349   // The following example shows this:
350   // for (i = 0; i < 1000; ++i)
351   //   a[5000 + i * m] = a[i] + a[i + 9000]
352   //
353   // Here grouping gives a check of (5000, 5000 + 1000 * m) against
354   // (0, 10000) which is always false. However, if m is 1, there is no
355   // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
356   // us to perform an accurate check in this case.
357   //
358   // The above case requires that we have an UnknownDependence between
359   // accesses to the same underlying object. This cannot happen unless
360   // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
361   // is also false. In this case we will use the fallback path and create
362   // separate checking groups for all pointers.
363 
364   // If we don't have the dependency partitions, construct a new
365   // checking pointer group for each pointer. This is also required
366   // for correctness, because in this case we can have checking between
367   // pointers to the same underlying object.
368   if (!UseDependencies) {
369     for (unsigned I = 0; I < Pointers.size(); ++I)
370       CheckingGroups.push_back(CheckingPtrGroup(I, *this));
371     return;
372   }
373 
374   unsigned TotalComparisons = 0;
375 
376   DenseMap<Value *, unsigned> PositionMap;
377   for (unsigned Index = 0; Index < Pointers.size(); ++Index)
378     PositionMap[Pointers[Index].PointerValue] = Index;
379 
380   // We need to keep track of what pointers we've already seen so we
381   // don't process them twice.
382   SmallSet<unsigned, 2> Seen;
383 
384   // Go through all equivalence classes, get the "pointer check groups"
385   // and add them to the overall solution. We use the order in which accesses
386   // appear in 'Pointers' to enforce determinism.
387   for (unsigned I = 0; I < Pointers.size(); ++I) {
388     // We've seen this pointer before, and therefore already processed
389     // its equivalence class.
390     if (Seen.count(I))
391       continue;
392 
393     MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
394                                            Pointers[I].IsWritePtr);
395 
396     SmallVector<CheckingPtrGroup, 2> Groups;
397     auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
398 
399     // Because DepCands is constructed by visiting accesses in the order in
400     // which they appear in alias sets (which is deterministic) and the
401     // iteration order within an equivalence class member is only dependent on
402     // the order in which unions and insertions are performed on the
403     // equivalence class, the iteration order is deterministic.
404     for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
405          MI != ME; ++MI) {
406       unsigned Pointer = PositionMap[MI->getPointer()];
407       bool Merged = false;
408       // Mark this pointer as seen.
409       Seen.insert(Pointer);
410 
411       // Go through all the existing sets and see if we can find one
412       // which can include this pointer.
413       for (CheckingPtrGroup &Group : Groups) {
414         // Don't perform more than a certain amount of comparisons.
415         // This should limit the cost of grouping the pointers to something
416         // reasonable.  If we do end up hitting this threshold, the algorithm
417         // will create separate groups for all remaining pointers.
418         if (TotalComparisons > MemoryCheckMergeThreshold)
419           break;
420 
421         TotalComparisons++;
422 
423         if (Group.addPointer(Pointer)) {
424           Merged = true;
425           break;
426         }
427       }
428 
429       if (!Merged)
430         // We couldn't add this pointer to any existing set or the threshold
431         // for the number of comparisons has been reached. Create a new group
432         // to hold the current pointer.
433         Groups.push_back(CheckingPtrGroup(Pointer, *this));
434     }
435 
436     // We've computed the grouped checks for this partition.
437     // Save the results and continue with the next one.
438     std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
439   }
440 }
441 
442 bool RuntimePointerChecking::arePointersInSamePartition(
443     const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
444     unsigned PtrIdx2) {
445   return (PtrToPartition[PtrIdx1] != -1 &&
446           PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
447 }
448 
449 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
450   const PointerInfo &PointerI = Pointers[I];
451   const PointerInfo &PointerJ = Pointers[J];
452 
453   // No need to check if two readonly pointers intersect.
454   if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
455     return false;
456 
457   // Only need to check pointers between two different dependency sets.
458   if (PointerI.DependencySetId == PointerJ.DependencySetId)
459     return false;
460 
461   // Only need to check pointers in the same alias set.
462   if (PointerI.AliasSetId != PointerJ.AliasSetId)
463     return false;
464 
465   return true;
466 }
467 
468 void RuntimePointerChecking::printChecks(
469     raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
470     unsigned Depth) const {
471   unsigned N = 0;
472   for (const auto &Check : Checks) {
473     const auto &First = Check.first->Members, &Second = Check.second->Members;
474 
475     OS.indent(Depth) << "Check " << N++ << ":\n";
476 
477     OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
478     for (unsigned K = 0; K < First.size(); ++K)
479       OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
480 
481     OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
482     for (unsigned K = 0; K < Second.size(); ++K)
483       OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
484   }
485 }
486 
487 void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
488 
489   OS.indent(Depth) << "Run-time memory checks:\n";
490   printChecks(OS, Checks, Depth);
491 
492   OS.indent(Depth) << "Grouped accesses:\n";
493   for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
494     const auto &CG = CheckingGroups[I];
495 
496     OS.indent(Depth + 2) << "Group " << &CG << ":\n";
497     OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
498                          << ")\n";
499     for (unsigned J = 0; J < CG.Members.size(); ++J) {
500       OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
501                            << "\n";
502     }
503   }
504 }
505 
506 namespace {
507 
508 /// \brief Analyses memory accesses in a loop.
509 ///
510 /// Checks whether run time pointer checks are needed and builds sets for data
511 /// dependence checking.
512 class AccessAnalysis {
513 public:
514   /// \brief Read or write access location.
515   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
516   typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
517 
518   AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
519                  MemoryDepChecker::DepCandidates &DA,
520                  PredicatedScalarEvolution &PSE)
521       : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false),
522         PSE(PSE) {}
523 
524   /// \brief Register a load  and whether it is only read from.
525   void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
526     Value *Ptr = const_cast<Value*>(Loc.Ptr);
527     AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
528     Accesses.insert(MemAccessInfo(Ptr, false));
529     if (IsReadOnly)
530       ReadOnlyPtr.insert(Ptr);
531   }
532 
533   /// \brief Register a store.
534   void addStore(MemoryLocation &Loc) {
535     Value *Ptr = const_cast<Value*>(Loc.Ptr);
536     AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
537     Accesses.insert(MemAccessInfo(Ptr, true));
538   }
539 
540   /// \brief Check whether we can check the pointers at runtime for
541   /// non-intersection.
542   ///
543   /// Returns true if we need no check or if we do and we can generate them
544   /// (i.e. the pointers have computable bounds).
545   bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
546                        Loop *TheLoop, const ValueToValueMap &Strides,
547                        bool ShouldCheckWrap = false);
548 
549   /// \brief Goes over all memory accesses, checks whether a RT check is needed
550   /// and builds sets of dependent accesses.
551   void buildDependenceSets() {
552     processMemAccesses();
553   }
554 
555   /// \brief Initial processing of memory accesses determined that we need to
556   /// perform dependency checking.
557   ///
558   /// Note that this can later be cleared if we retry memcheck analysis without
559   /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
560   bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
561 
562   /// We decided that no dependence analysis would be used.  Reset the state.
563   void resetDepChecks(MemoryDepChecker &DepChecker) {
564     CheckDeps.clear();
565     DepChecker.clearDependences();
566   }
567 
568   MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
569 
570 private:
571   typedef SetVector<MemAccessInfo> PtrAccessSet;
572 
573   /// \brief Go over all memory access and check whether runtime pointer checks
574   /// are needed and build sets of dependency check candidates.
575   void processMemAccesses();
576 
577   /// Set of all accesses.
578   PtrAccessSet Accesses;
579 
580   const DataLayout &DL;
581 
582   /// Set of accesses that need a further dependence check.
583   MemAccessInfoSet CheckDeps;
584 
585   /// Set of pointers that are read only.
586   SmallPtrSet<Value*, 16> ReadOnlyPtr;
587 
588   /// An alias set tracker to partition the access set by underlying object and
589   //intrinsic property (such as TBAA metadata).
590   AliasSetTracker AST;
591 
592   LoopInfo *LI;
593 
594   /// Sets of potentially dependent accesses - members of one set share an
595   /// underlying pointer. The set "CheckDeps" identfies which sets really need a
596   /// dependence check.
597   MemoryDepChecker::DepCandidates &DepCands;
598 
599   /// \brief Initial processing of memory accesses determined that we may need
600   /// to add memchecks.  Perform the analysis to determine the necessary checks.
601   ///
602   /// Note that, this is different from isDependencyCheckNeeded.  When we retry
603   /// memcheck analysis without dependency checking
604   /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
605   /// while this remains set if we have potentially dependent accesses.
606   bool IsRTCheckAnalysisNeeded;
607 
608   /// The SCEV predicate containing all the SCEV-related assumptions.
609   PredicatedScalarEvolution &PSE;
610 };
611 
612 } // end anonymous namespace
613 
614 /// \brief Check whether a pointer can participate in a runtime bounds check.
615 static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
616                                 const ValueToValueMap &Strides, Value *Ptr,
617                                 Loop *L) {
618   const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
619 
620   // The bounds for loop-invariant pointer is trivial.
621   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
622     return true;
623 
624   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
625   if (!AR)
626     return false;
627 
628   return AR->isAffine();
629 }
630 
631 /// \brief Check whether a pointer address cannot wrap.
632 static bool isNoWrap(PredicatedScalarEvolution &PSE,
633                      const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
634   const SCEV *PtrScev = PSE.getSCEV(Ptr);
635   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
636     return true;
637 
638   int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
639   return Stride == 1;
640 }
641 
642 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
643                                      ScalarEvolution *SE, Loop *TheLoop,
644                                      const ValueToValueMap &StridesMap,
645                                      bool ShouldCheckWrap) {
646   // Find pointers with computable bounds. We are going to use this information
647   // to place a runtime bound check.
648   bool CanDoRT = true;
649 
650   bool NeedRTCheck = false;
651   if (!IsRTCheckAnalysisNeeded) return true;
652 
653   bool IsDepCheckNeeded = isDependencyCheckNeeded();
654 
655   // We assign a consecutive id to access from different alias sets.
656   // Accesses between different groups doesn't need to be checked.
657   unsigned ASId = 1;
658   for (auto &AS : AST) {
659     int NumReadPtrChecks = 0;
660     int NumWritePtrChecks = 0;
661 
662     // We assign consecutive id to access from different dependence sets.
663     // Accesses within the same set don't need a runtime check.
664     unsigned RunningDepId = 1;
665     DenseMap<Value *, unsigned> DepSetId;
666 
667     for (auto A : AS) {
668       Value *Ptr = A.getValue();
669       bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
670       MemAccessInfo Access(Ptr, IsWrite);
671 
672       if (IsWrite)
673         ++NumWritePtrChecks;
674       else
675         ++NumReadPtrChecks;
676 
677       if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) &&
678           // When we run after a failing dependency check we have to make sure
679           // we don't have wrapping pointers.
680           (!ShouldCheckWrap || isNoWrap(PSE, StridesMap, Ptr, TheLoop))) {
681         // The id of the dependence set.
682         unsigned DepId;
683 
684         if (IsDepCheckNeeded) {
685           Value *Leader = DepCands.getLeaderValue(Access).getPointer();
686           unsigned &LeaderId = DepSetId[Leader];
687           if (!LeaderId)
688             LeaderId = RunningDepId++;
689           DepId = LeaderId;
690         } else
691           // Each access has its own dependence set.
692           DepId = RunningDepId++;
693 
694         RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
695 
696         DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
697       } else {
698         DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
699         CanDoRT = false;
700       }
701     }
702 
703     // If we have at least two writes or one write and a read then we need to
704     // check them.  But there is no need to checks if there is only one
705     // dependence set for this alias set.
706     //
707     // Note that this function computes CanDoRT and NeedRTCheck independently.
708     // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
709     // for which we couldn't find the bounds but we don't actually need to emit
710     // any checks so it does not matter.
711     if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
712       NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
713                                                  NumWritePtrChecks >= 1));
714 
715     ++ASId;
716   }
717 
718   // If the pointers that we would use for the bounds comparison have different
719   // address spaces, assume the values aren't directly comparable, so we can't
720   // use them for the runtime check. We also have to assume they could
721   // overlap. In the future there should be metadata for whether address spaces
722   // are disjoint.
723   unsigned NumPointers = RtCheck.Pointers.size();
724   for (unsigned i = 0; i < NumPointers; ++i) {
725     for (unsigned j = i + 1; j < NumPointers; ++j) {
726       // Only need to check pointers between two different dependency sets.
727       if (RtCheck.Pointers[i].DependencySetId ==
728           RtCheck.Pointers[j].DependencySetId)
729        continue;
730       // Only need to check pointers in the same alias set.
731       if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
732         continue;
733 
734       Value *PtrI = RtCheck.Pointers[i].PointerValue;
735       Value *PtrJ = RtCheck.Pointers[j].PointerValue;
736 
737       unsigned ASi = PtrI->getType()->getPointerAddressSpace();
738       unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
739       if (ASi != ASj) {
740         DEBUG(dbgs() << "LAA: Runtime check would require comparison between"
741                        " different address spaces\n");
742         return false;
743       }
744     }
745   }
746 
747   if (NeedRTCheck && CanDoRT)
748     RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
749 
750   DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
751                << " pointer comparisons.\n");
752 
753   RtCheck.Need = NeedRTCheck;
754 
755   bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
756   if (!CanDoRTIfNeeded)
757     RtCheck.reset();
758   return CanDoRTIfNeeded;
759 }
760 
761 void AccessAnalysis::processMemAccesses() {
762   // We process the set twice: first we process read-write pointers, last we
763   // process read-only pointers. This allows us to skip dependence tests for
764   // read-only pointers.
765 
766   DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
767   DEBUG(dbgs() << "  AST: "; AST.dump());
768   DEBUG(dbgs() << "LAA:   Accesses(" << Accesses.size() << "):\n");
769   DEBUG({
770     for (auto A : Accesses)
771       dbgs() << "\t" << *A.getPointer() << " (" <<
772                 (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
773                                          "read-only" : "read")) << ")\n";
774   });
775 
776   // The AliasSetTracker has nicely partitioned our pointers by metadata
777   // compatibility and potential for underlying-object overlap. As a result, we
778   // only need to check for potential pointer dependencies within each alias
779   // set.
780   for (auto &AS : AST) {
781     // Note that both the alias-set tracker and the alias sets themselves used
782     // linked lists internally and so the iteration order here is deterministic
783     // (matching the original instruction order within each set).
784 
785     bool SetHasWrite = false;
786 
787     // Map of pointers to last access encountered.
788     typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
789     UnderlyingObjToAccessMap ObjToLastAccess;
790 
791     // Set of access to check after all writes have been processed.
792     PtrAccessSet DeferredAccesses;
793 
794     // Iterate over each alias set twice, once to process read/write pointers,
795     // and then to process read-only pointers.
796     for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
797       bool UseDeferred = SetIteration > 0;
798       PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
799 
800       for (auto AV : AS) {
801         Value *Ptr = AV.getValue();
802 
803         // For a single memory access in AliasSetTracker, Accesses may contain
804         // both read and write, and they both need to be handled for CheckDeps.
805         for (auto AC : S) {
806           if (AC.getPointer() != Ptr)
807             continue;
808 
809           bool IsWrite = AC.getInt();
810 
811           // If we're using the deferred access set, then it contains only
812           // reads.
813           bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
814           if (UseDeferred && !IsReadOnlyPtr)
815             continue;
816           // Otherwise, the pointer must be in the PtrAccessSet, either as a
817           // read or a write.
818           assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
819                   S.count(MemAccessInfo(Ptr, false))) &&
820                  "Alias-set pointer not in the access set?");
821 
822           MemAccessInfo Access(Ptr, IsWrite);
823           DepCands.insert(Access);
824 
825           // Memorize read-only pointers for later processing and skip them in
826           // the first round (they need to be checked after we have seen all
827           // write pointers). Note: we also mark pointer that are not
828           // consecutive as "read-only" pointers (so that we check
829           // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
830           if (!UseDeferred && IsReadOnlyPtr) {
831             DeferredAccesses.insert(Access);
832             continue;
833           }
834 
835           // If this is a write - check other reads and writes for conflicts. If
836           // this is a read only check other writes for conflicts (but only if
837           // there is no other write to the ptr - this is an optimization to
838           // catch "a[i] = a[i] + " without having to do a dependence check).
839           if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
840             CheckDeps.insert(Access);
841             IsRTCheckAnalysisNeeded = true;
842           }
843 
844           if (IsWrite)
845             SetHasWrite = true;
846 
847           // Create sets of pointers connected by a shared alias set and
848           // underlying object.
849           typedef SmallVector<Value *, 16> ValueVector;
850           ValueVector TempObjects;
851 
852           GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
853           DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
854           for (Value *UnderlyingObj : TempObjects) {
855             // nullptr never alias, don't join sets for pointer that have "null"
856             // in their UnderlyingObjects list.
857             if (isa<ConstantPointerNull>(UnderlyingObj))
858               continue;
859 
860             UnderlyingObjToAccessMap::iterator Prev =
861                 ObjToLastAccess.find(UnderlyingObj);
862             if (Prev != ObjToLastAccess.end())
863               DepCands.unionSets(Access, Prev->second);
864 
865             ObjToLastAccess[UnderlyingObj] = Access;
866             DEBUG(dbgs() << "  " << *UnderlyingObj << "\n");
867           }
868         }
869       }
870     }
871   }
872 }
873 
874 static bool isInBoundsGep(Value *Ptr) {
875   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
876     return GEP->isInBounds();
877   return false;
878 }
879 
880 /// \brief Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
881 /// i.e. monotonically increasing/decreasing.
882 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
883                            PredicatedScalarEvolution &PSE, const Loop *L) {
884   // FIXME: This should probably only return true for NUW.
885   if (AR->getNoWrapFlags(SCEV::NoWrapMask))
886     return true;
887 
888   // Scalar evolution does not propagate the non-wrapping flags to values that
889   // are derived from a non-wrapping induction variable because non-wrapping
890   // could be flow-sensitive.
891   //
892   // Look through the potentially overflowing instruction to try to prove
893   // non-wrapping for the *specific* value of Ptr.
894 
895   // The arithmetic implied by an inbounds GEP can't overflow.
896   auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
897   if (!GEP || !GEP->isInBounds())
898     return false;
899 
900   // Make sure there is only one non-const index and analyze that.
901   Value *NonConstIndex = nullptr;
902   for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
903     if (!isa<ConstantInt>(Index)) {
904       if (NonConstIndex)
905         return false;
906       NonConstIndex = Index;
907     }
908   if (!NonConstIndex)
909     // The recurrence is on the pointer, ignore for now.
910     return false;
911 
912   // The index in GEP is signed.  It is non-wrapping if it's derived from a NSW
913   // AddRec using a NSW operation.
914   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
915     if (OBO->hasNoSignedWrap() &&
916         // Assume constant for other the operand so that the AddRec can be
917         // easily found.
918         isa<ConstantInt>(OBO->getOperand(1))) {
919       auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
920 
921       if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
922         return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
923     }
924 
925   return false;
926 }
927 
928 /// \brief Check whether the access through \p Ptr has a constant stride.
929 int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
930                            const Loop *Lp, const ValueToValueMap &StridesMap,
931                            bool Assume, bool ShouldCheckWrap) {
932   Type *Ty = Ptr->getType();
933   assert(Ty->isPointerTy() && "Unexpected non-ptr");
934 
935   // Make sure that the pointer does not point to aggregate types.
936   auto *PtrTy = cast<PointerType>(Ty);
937   if (PtrTy->getElementType()->isAggregateType()) {
938     DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr
939                  << "\n");
940     return 0;
941   }
942 
943   const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
944 
945   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
946   if (Assume && !AR)
947     AR = PSE.getAsAddRec(Ptr);
948 
949   if (!AR) {
950     DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
951                  << " SCEV: " << *PtrScev << "\n");
952     return 0;
953   }
954 
955   // The accesss function must stride over the innermost loop.
956   if (Lp != AR->getLoop()) {
957     DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
958           *Ptr << " SCEV: " << *AR << "\n");
959     return 0;
960   }
961 
962   // The address calculation must not wrap. Otherwise, a dependence could be
963   // inverted.
964   // An inbounds getelementptr that is a AddRec with a unit stride
965   // cannot wrap per definition. The unit stride requirement is checked later.
966   // An getelementptr without an inbounds attribute and unit stride would have
967   // to access the pointer value "0" which is undefined behavior in address
968   // space 0, therefore we can also vectorize this case.
969   bool IsInBoundsGEP = isInBoundsGep(Ptr);
970   bool IsNoWrapAddRec = !ShouldCheckWrap ||
971     PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
972     isNoWrapAddRec(Ptr, AR, PSE, Lp);
973   bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
974   if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
975     if (Assume) {
976       PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
977       IsNoWrapAddRec = true;
978       DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
979                    << "LAA:   Pointer: " << *Ptr << "\n"
980                    << "LAA:   SCEV: " << *AR << "\n"
981                    << "LAA:   Added an overflow assumption\n");
982     } else {
983       DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
984                    << *Ptr << " SCEV: " << *AR << "\n");
985       return 0;
986     }
987   }
988 
989   // Check the step is constant.
990   const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
991 
992   // Calculate the pointer stride and check if it is constant.
993   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
994   if (!C) {
995     DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
996           " SCEV: " << *AR << "\n");
997     return 0;
998   }
999 
1000   auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1001   int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1002   const APInt &APStepVal = C->getAPInt();
1003 
1004   // Huge step value - give up.
1005   if (APStepVal.getBitWidth() > 64)
1006     return 0;
1007 
1008   int64_t StepVal = APStepVal.getSExtValue();
1009 
1010   // Strided access.
1011   int64_t Stride = StepVal / Size;
1012   int64_t Rem = StepVal % Size;
1013   if (Rem)
1014     return 0;
1015 
1016   // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1017   // know we can't "wrap around the address space". In case of address space
1018   // zero we know that this won't happen without triggering undefined behavior.
1019   if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
1020       Stride != 1 && Stride != -1) {
1021     if (Assume) {
1022       // We can avoid this case by adding a run-time check.
1023       DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1024                    << "inbouds or in address space 0 may wrap:\n"
1025                    << "LAA:   Pointer: " << *Ptr << "\n"
1026                    << "LAA:   SCEV: " << *AR << "\n"
1027                    << "LAA:   Added an overflow assumption\n");
1028       PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1029     } else
1030       return 0;
1031   }
1032 
1033   return Stride;
1034 }
1035 
1036 /// Take the pointer operand from the Load/Store instruction.
1037 /// Returns NULL if this is not a valid Load/Store instruction.
1038 static Value *getPointerOperand(Value *I) {
1039   if (auto *LI = dyn_cast<LoadInst>(I))
1040     return LI->getPointerOperand();
1041   if (auto *SI = dyn_cast<StoreInst>(I))
1042     return SI->getPointerOperand();
1043   return nullptr;
1044 }
1045 
1046 /// Take the address space operand from the Load/Store instruction.
1047 /// Returns -1 if this is not a valid Load/Store instruction.
1048 static unsigned getAddressSpaceOperand(Value *I) {
1049   if (LoadInst *L = dyn_cast<LoadInst>(I))
1050     return L->getPointerAddressSpace();
1051   if (StoreInst *S = dyn_cast<StoreInst>(I))
1052     return S->getPointerAddressSpace();
1053   return -1;
1054 }
1055 
1056 bool llvm::sortMemAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
1057                            ScalarEvolution &SE,
1058                            SmallVectorImpl<Value *> &Sorted) {
1059   SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs;
1060   OffValPairs.reserve(VL.size());
1061   Sorted.reserve(VL.size());
1062 
1063   // Walk over the pointers, and map each of them to an offset relative to
1064   // first pointer in the array.
1065   Value *Ptr0 = getPointerOperand(VL[0]);
1066   const SCEV *Scev0 = SE.getSCEV(Ptr0);
1067   Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
1068 
1069   for (auto *Val : VL) {
1070     Value *Ptr = getPointerOperand(Val);
1071 
1072     // If a pointer refers to a different underlying object, bail - the
1073     // pointers are by definition incomparable.
1074     Value *CurrObj = GetUnderlyingObject(Ptr, DL);
1075     if (CurrObj != Obj0)
1076       return false;
1077 
1078     const SCEVConstant *Diff =
1079         dyn_cast<SCEVConstant>(SE.getMinusSCEV(SE.getSCEV(Ptr), Scev0));
1080 
1081     // The pointers may not have a constant offset from each other, or SCEV
1082     // may just not be smart enough to figure out they do. Regardless,
1083     // there's nothing we can do.
1084     if (!Diff)
1085       return false;
1086 
1087     OffValPairs.emplace_back(Diff->getAPInt().getSExtValue(), Val);
1088   }
1089 
1090   std::sort(OffValPairs.begin(), OffValPairs.end(),
1091             [](const std::pair<int64_t, Value *> &Left,
1092                const std::pair<int64_t, Value *> &Right) {
1093               return Left.first < Right.first;
1094             });
1095 
1096   for (auto &it : OffValPairs)
1097     Sorted.push_back(it.second);
1098 
1099   return true;
1100 }
1101 
1102 /// Returns true if the memory operations \p A and \p B are consecutive.
1103 bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
1104                                ScalarEvolution &SE, bool CheckType) {
1105   Value *PtrA = getPointerOperand(A);
1106   Value *PtrB = getPointerOperand(B);
1107   unsigned ASA = getAddressSpaceOperand(A);
1108   unsigned ASB = getAddressSpaceOperand(B);
1109 
1110   // Check that the address spaces match and that the pointers are valid.
1111   if (!PtrA || !PtrB || (ASA != ASB))
1112     return false;
1113 
1114   // Make sure that A and B are different pointers.
1115   if (PtrA == PtrB)
1116     return false;
1117 
1118   // Make sure that A and B have the same type if required.
1119   if (CheckType && PtrA->getType() != PtrB->getType())
1120     return false;
1121 
1122   unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
1123   Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1124   APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
1125 
1126   APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
1127   PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1128   PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1129 
1130   //  OffsetDelta = OffsetB - OffsetA;
1131   const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1132   const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1133   const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1134   const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
1135   const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
1136   // Check if they are based on the same pointer. That makes the offsets
1137   // sufficient.
1138   if (PtrA == PtrB)
1139     return OffsetDelta == Size;
1140 
1141   // Compute the necessary base pointer delta to have the necessary final delta
1142   // equal to the size.
1143   // BaseDelta = Size - OffsetDelta;
1144   const SCEV *SizeSCEV = SE.getConstant(Size);
1145   const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1146 
1147   // Otherwise compute the distance with SCEV between the base pointers.
1148   const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1149   const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1150   const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1151   return X == PtrSCEVB;
1152 }
1153 
1154 bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
1155   switch (Type) {
1156   case NoDep:
1157   case Forward:
1158   case BackwardVectorizable:
1159     return true;
1160 
1161   case Unknown:
1162   case ForwardButPreventsForwarding:
1163   case Backward:
1164   case BackwardVectorizableButPreventsForwarding:
1165     return false;
1166   }
1167   llvm_unreachable("unexpected DepType!");
1168 }
1169 
1170 bool MemoryDepChecker::Dependence::isBackward() const {
1171   switch (Type) {
1172   case NoDep:
1173   case Forward:
1174   case ForwardButPreventsForwarding:
1175   case Unknown:
1176     return false;
1177 
1178   case BackwardVectorizable:
1179   case Backward:
1180   case BackwardVectorizableButPreventsForwarding:
1181     return true;
1182   }
1183   llvm_unreachable("unexpected DepType!");
1184 }
1185 
1186 bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
1187   return isBackward() || Type == Unknown;
1188 }
1189 
1190 bool MemoryDepChecker::Dependence::isForward() const {
1191   switch (Type) {
1192   case Forward:
1193   case ForwardButPreventsForwarding:
1194     return true;
1195 
1196   case NoDep:
1197   case Unknown:
1198   case BackwardVectorizable:
1199   case Backward:
1200   case BackwardVectorizableButPreventsForwarding:
1201     return false;
1202   }
1203   llvm_unreachable("unexpected DepType!");
1204 }
1205 
1206 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1207                                                     uint64_t TypeByteSize) {
1208   // If loads occur at a distance that is not a multiple of a feasible vector
1209   // factor store-load forwarding does not take place.
1210   // Positive dependences might cause troubles because vectorizing them might
1211   // prevent store-load forwarding making vectorized code run a lot slower.
1212   //   a[i] = a[i-3] ^ a[i-8];
1213   //   The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1214   //   hence on your typical architecture store-load forwarding does not take
1215   //   place. Vectorizing in such cases does not make sense.
1216   // Store-load forwarding distance.
1217 
1218   // After this many iterations store-to-load forwarding conflicts should not
1219   // cause any slowdowns.
1220   const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1221   // Maximum vector factor.
1222   uint64_t MaxVFWithoutSLForwardIssues = std::min(
1223       VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1224 
1225   // Compute the smallest VF at which the store and load would be misaligned.
1226   for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1227        VF *= 2) {
1228     // If the number of vector iteration between the store and the load are
1229     // small we could incur conflicts.
1230     if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1231       MaxVFWithoutSLForwardIssues = (VF >>= 1);
1232       break;
1233     }
1234   }
1235 
1236   if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1237     DEBUG(dbgs() << "LAA: Distance " << Distance
1238                  << " that could cause a store-load forwarding conflict\n");
1239     return true;
1240   }
1241 
1242   if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1243       MaxVFWithoutSLForwardIssues !=
1244           VectorizerParams::MaxVectorWidth * TypeByteSize)
1245     MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1246   return false;
1247 }
1248 
1249 /// Given a non-constant (unknown) dependence-distance \p Dist between two
1250 /// memory accesses, that have the same stride whose absolute value is given
1251 /// in \p Stride, and that have the same type size \p TypeByteSize,
1252 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1253 /// possible to prove statically that the dependence distance is larger
1254 /// than the range that the accesses will travel through the execution of
1255 /// the loop. If so, return true; false otherwise. This is useful for
1256 /// example in loops such as the following (PR31098):
1257 ///     for (i = 0; i < D; ++i) {
1258 ///                = out[i];
1259 ///       out[i+D] =
1260 ///     }
1261 static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
1262                                      const SCEV &BackedgeTakenCount,
1263                                      const SCEV &Dist, uint64_t Stride,
1264                                      uint64_t TypeByteSize) {
1265 
1266   // If we can prove that
1267   //      (**) |Dist| > BackedgeTakenCount * Step
1268   // where Step is the absolute stride of the memory accesses in bytes,
1269   // then there is no dependence.
1270   //
1271   // Ratioanle:
1272   // We basically want to check if the absolute distance (|Dist/Step|)
1273   // is >= the loop iteration count (or > BackedgeTakenCount).
1274   // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1275   // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1276   // that the dependence distance is >= VF; This is checked elsewhere.
1277   // But in some cases we can prune unknown dependence distances early, and
1278   // even before selecting the VF, and without a runtime test, by comparing
1279   // the distance against the loop iteration count. Since the vectorized code
1280   // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1281   // also guarantees that distance >= VF.
1282   //
1283   const uint64_t ByteStride = Stride * TypeByteSize;
1284   const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1285   const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1286 
1287   const SCEV *CastedDist = &Dist;
1288   const SCEV *CastedProduct = Product;
1289   uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1290   uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1291 
1292   // The dependence distance can be positive/negative, so we sign extend Dist;
1293   // The multiplication of the absolute stride in bytes and the
1294   // backdgeTakenCount is non-negative, so we zero extend Product.
1295   if (DistTypeSize > ProductTypeSize)
1296     CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1297   else
1298     CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1299 
1300   // Is  Dist - (BackedgeTakenCount * Step) > 0 ?
1301   // (If so, then we have proven (**) because |Dist| >= Dist)
1302   const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1303   if (SE.isKnownPositive(Minus))
1304     return true;
1305 
1306   // Second try: Is  -Dist - (BackedgeTakenCount * Step) > 0 ?
1307   // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1308   const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1309   Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1310   if (SE.isKnownPositive(Minus))
1311     return true;
1312 
1313   return false;
1314 }
1315 
1316 /// \brief Check the dependence for two accesses with the same stride \p Stride.
1317 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1318 /// bytes.
1319 ///
1320 /// \returns true if they are independent.
1321 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1322                                           uint64_t TypeByteSize) {
1323   assert(Stride > 1 && "The stride must be greater than 1");
1324   assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1325   assert(Distance > 0 && "The distance must be non-zero");
1326 
1327   // Skip if the distance is not multiple of type byte size.
1328   if (Distance % TypeByteSize)
1329     return false;
1330 
1331   uint64_t ScaledDist = Distance / TypeByteSize;
1332 
1333   // No dependence if the scaled distance is not multiple of the stride.
1334   // E.g.
1335   //      for (i = 0; i < 1024 ; i += 4)
1336   //        A[i+2] = A[i] + 1;
1337   //
1338   // Two accesses in memory (scaled distance is 2, stride is 4):
1339   //     | A[0] |      |      |      | A[4] |      |      |      |
1340   //     |      |      | A[2] |      |      |      | A[6] |      |
1341   //
1342   // E.g.
1343   //      for (i = 0; i < 1024 ; i += 3)
1344   //        A[i+4] = A[i] + 1;
1345   //
1346   // Two accesses in memory (scaled distance is 4, stride is 3):
1347   //     | A[0] |      |      | A[3] |      |      | A[6] |      |      |
1348   //     |      |      |      |      | A[4] |      |      | A[7] |      |
1349   return ScaledDist % Stride;
1350 }
1351 
1352 MemoryDepChecker::Dependence::DepType
1353 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1354                               const MemAccessInfo &B, unsigned BIdx,
1355                               const ValueToValueMap &Strides) {
1356   assert (AIdx < BIdx && "Must pass arguments in program order");
1357 
1358   Value *APtr = A.getPointer();
1359   Value *BPtr = B.getPointer();
1360   bool AIsWrite = A.getInt();
1361   bool BIsWrite = B.getInt();
1362 
1363   // Two reads are independent.
1364   if (!AIsWrite && !BIsWrite)
1365     return Dependence::NoDep;
1366 
1367   // We cannot check pointers in different address spaces.
1368   if (APtr->getType()->getPointerAddressSpace() !=
1369       BPtr->getType()->getPointerAddressSpace())
1370     return Dependence::Unknown;
1371 
1372   int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1373   int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1374 
1375   const SCEV *Src = PSE.getSCEV(APtr);
1376   const SCEV *Sink = PSE.getSCEV(BPtr);
1377 
1378   // If the induction step is negative we have to invert source and sink of the
1379   // dependence.
1380   if (StrideAPtr < 0) {
1381     std::swap(APtr, BPtr);
1382     std::swap(Src, Sink);
1383     std::swap(AIsWrite, BIsWrite);
1384     std::swap(AIdx, BIdx);
1385     std::swap(StrideAPtr, StrideBPtr);
1386   }
1387 
1388   const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1389 
1390   DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1391                << "(Induction step: " << StrideAPtr << ")\n");
1392   DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1393                << *InstMap[BIdx] << ": " << *Dist << "\n");
1394 
1395   // Need accesses with constant stride. We don't want to vectorize
1396   // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1397   // the address space.
1398   if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1399     DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1400     return Dependence::Unknown;
1401   }
1402 
1403   Type *ATy = APtr->getType()->getPointerElementType();
1404   Type *BTy = BPtr->getType()->getPointerElementType();
1405   auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1406   uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1407   uint64_t Stride = std::abs(StrideAPtr);
1408   const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1409   if (!C) {
1410     if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1411         isSafeDependenceDistance(DL, *(PSE.getSE()),
1412                                  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1413                                  TypeByteSize))
1414       return Dependence::NoDep;
1415 
1416     DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1417     ShouldRetryWithRuntimeCheck = true;
1418     return Dependence::Unknown;
1419   }
1420 
1421   const APInt &Val = C->getAPInt();
1422   int64_t Distance = Val.getSExtValue();
1423 
1424   // Attempt to prove strided accesses independent.
1425   if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1426       areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1427     DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1428     return Dependence::NoDep;
1429   }
1430 
1431   // Negative distances are not plausible dependencies.
1432   if (Val.isNegative()) {
1433     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1434     if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1435         (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1436          ATy != BTy)) {
1437       DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1438       return Dependence::ForwardButPreventsForwarding;
1439     }
1440 
1441     DEBUG(dbgs() << "LAA: Dependence is negative\n");
1442     return Dependence::Forward;
1443   }
1444 
1445   // Write to the same location with the same size.
1446   // Could be improved to assert type sizes are the same (i32 == float, etc).
1447   if (Val == 0) {
1448     if (ATy == BTy)
1449       return Dependence::Forward;
1450     DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
1451     return Dependence::Unknown;
1452   }
1453 
1454   assert(Val.isStrictlyPositive() && "Expect a positive value");
1455 
1456   if (ATy != BTy) {
1457     DEBUG(dbgs() <<
1458           "LAA: ReadWrite-Write positive dependency with different types\n");
1459     return Dependence::Unknown;
1460   }
1461 
1462   // Bail out early if passed-in parameters make vectorization not feasible.
1463   unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1464                            VectorizerParams::VectorizationFactor : 1);
1465   unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1466                            VectorizerParams::VectorizationInterleave : 1);
1467   // The minimum number of iterations for a vectorized/unrolled version.
1468   unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1469 
1470   // It's not vectorizable if the distance is smaller than the minimum distance
1471   // needed for a vectroized/unrolled version. Vectorizing one iteration in
1472   // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1473   // TypeByteSize (No need to plus the last gap distance).
1474   //
1475   // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1476   //      foo(int *A) {
1477   //        int *B = (int *)((char *)A + 14);
1478   //        for (i = 0 ; i < 1024 ; i += 2)
1479   //          B[i] = A[i] + 1;
1480   //      }
1481   //
1482   // Two accesses in memory (stride is 2):
1483   //     | A[0] |      | A[2] |      | A[4] |      | A[6] |      |
1484   //                              | B[0] |      | B[2] |      | B[4] |
1485   //
1486   // Distance needs for vectorizing iterations except the last iteration:
1487   // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1488   // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1489   //
1490   // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1491   // 12, which is less than distance.
1492   //
1493   // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1494   // the minimum distance needed is 28, which is greater than distance. It is
1495   // not safe to do vectorization.
1496   uint64_t MinDistanceNeeded =
1497       TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1498   if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1499     DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
1500                  << '\n');
1501     return Dependence::Backward;
1502   }
1503 
1504   // Unsafe if the minimum distance needed is greater than max safe distance.
1505   if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1506     DEBUG(dbgs() << "LAA: Failure because it needs at least "
1507                  << MinDistanceNeeded << " size in bytes");
1508     return Dependence::Backward;
1509   }
1510 
1511   // Positive distance bigger than max vectorization factor.
1512   // FIXME: Should use max factor instead of max distance in bytes, which could
1513   // not handle different types.
1514   // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1515   //      void foo (int *A, char *B) {
1516   //        for (unsigned i = 0; i < 1024; i++) {
1517   //          A[i+2] = A[i] + 1;
1518   //          B[i+2] = B[i] + 1;
1519   //        }
1520   //      }
1521   //
1522   // This case is currently unsafe according to the max safe distance. If we
1523   // analyze the two accesses on array B, the max safe dependence distance
1524   // is 2. Then we analyze the accesses on array A, the minimum distance needed
1525   // is 8, which is less than 2 and forbidden vectorization, But actually
1526   // both A and B could be vectorized by 2 iterations.
1527   MaxSafeDepDistBytes =
1528       std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1529 
1530   bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1531   if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1532       couldPreventStoreLoadForward(Distance, TypeByteSize))
1533     return Dependence::BackwardVectorizableButPreventsForwarding;
1534 
1535   DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1536                << " with max VF = "
1537                << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n');
1538 
1539   return Dependence::BackwardVectorizable;
1540 }
1541 
1542 bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
1543                                    MemAccessInfoSet &CheckDeps,
1544                                    const ValueToValueMap &Strides) {
1545 
1546   MaxSafeDepDistBytes = -1;
1547   while (!CheckDeps.empty()) {
1548     MemAccessInfo CurAccess = *CheckDeps.begin();
1549 
1550     // Get the relevant memory access set.
1551     EquivalenceClasses<MemAccessInfo>::iterator I =
1552       AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1553 
1554     // Check accesses within this set.
1555     EquivalenceClasses<MemAccessInfo>::member_iterator AI =
1556         AccessSets.member_begin(I);
1557     EquivalenceClasses<MemAccessInfo>::member_iterator AE =
1558         AccessSets.member_end();
1559 
1560     // Check every access pair.
1561     while (AI != AE) {
1562       CheckDeps.erase(*AI);
1563       EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
1564       while (OI != AE) {
1565         // Check every accessing instruction pair in program order.
1566         for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1567              I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1568           for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
1569                I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
1570             auto A = std::make_pair(&*AI, *I1);
1571             auto B = std::make_pair(&*OI, *I2);
1572 
1573             assert(*I1 != *I2);
1574             if (*I1 > *I2)
1575               std::swap(A, B);
1576 
1577             Dependence::DepType Type =
1578                 isDependent(*A.first, A.second, *B.first, B.second, Strides);
1579             SafeForVectorization &= Dependence::isSafeForVectorization(Type);
1580 
1581             // Gather dependences unless we accumulated MaxDependences
1582             // dependences.  In that case return as soon as we find the first
1583             // unsafe dependence.  This puts a limit on this quadratic
1584             // algorithm.
1585             if (RecordDependences) {
1586               if (Type != Dependence::NoDep)
1587                 Dependences.push_back(Dependence(A.second, B.second, Type));
1588 
1589               if (Dependences.size() >= MaxDependences) {
1590                 RecordDependences = false;
1591                 Dependences.clear();
1592                 DEBUG(dbgs() << "Too many dependences, stopped recording\n");
1593               }
1594             }
1595             if (!RecordDependences && !SafeForVectorization)
1596               return false;
1597           }
1598         ++OI;
1599       }
1600       AI++;
1601     }
1602   }
1603 
1604   DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1605   return SafeForVectorization;
1606 }
1607 
1608 SmallVector<Instruction *, 4>
1609 MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
1610   MemAccessInfo Access(Ptr, isWrite);
1611   auto &IndexVector = Accesses.find(Access)->second;
1612 
1613   SmallVector<Instruction *, 4> Insts;
1614   transform(IndexVector,
1615                  std::back_inserter(Insts),
1616                  [&](unsigned Idx) { return this->InstMap[Idx]; });
1617   return Insts;
1618 }
1619 
1620 const char *MemoryDepChecker::Dependence::DepName[] = {
1621     "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1622     "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1623 
1624 void MemoryDepChecker::Dependence::print(
1625     raw_ostream &OS, unsigned Depth,
1626     const SmallVectorImpl<Instruction *> &Instrs) const {
1627   OS.indent(Depth) << DepName[Type] << ":\n";
1628   OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1629   OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1630 }
1631 
1632 bool LoopAccessInfo::canAnalyzeLoop() {
1633   // We need to have a loop header.
1634   DEBUG(dbgs() << "LAA: Found a loop in "
1635                << TheLoop->getHeader()->getParent()->getName() << ": "
1636                << TheLoop->getHeader()->getName() << '\n');
1637 
1638   // We can only analyze innermost loops.
1639   if (!TheLoop->empty()) {
1640     DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1641     recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1642     return false;
1643   }
1644 
1645   // We must have a single backedge.
1646   if (TheLoop->getNumBackEdges() != 1) {
1647     DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1648     recordAnalysis("CFGNotUnderstood")
1649         << "loop control flow is not understood by analyzer";
1650     return false;
1651   }
1652 
1653   // We must have a single exiting block.
1654   if (!TheLoop->getExitingBlock()) {
1655     DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1656     recordAnalysis("CFGNotUnderstood")
1657         << "loop control flow is not understood by analyzer";
1658     return false;
1659   }
1660 
1661   // We only handle bottom-tested loops, i.e. loop in which the condition is
1662   // checked at the end of each iteration. With that we can assume that all
1663   // instructions in the loop are executed the same number of times.
1664   if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1665     DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1666     recordAnalysis("CFGNotUnderstood")
1667         << "loop control flow is not understood by analyzer";
1668     return false;
1669   }
1670 
1671   // ScalarEvolution needs to be able to find the exit count.
1672   const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1673   if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1674     recordAnalysis("CantComputeNumberOfIterations")
1675         << "could not determine number of loop iterations";
1676     DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1677     return false;
1678   }
1679 
1680   return true;
1681 }
1682 
1683 void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
1684                                  const TargetLibraryInfo *TLI,
1685                                  DominatorTree *DT) {
1686   typedef SmallPtrSet<Value*, 16> ValueSet;
1687 
1688   // Holds the Load and Store instructions.
1689   SmallVector<LoadInst *, 16> Loads;
1690   SmallVector<StoreInst *, 16> Stores;
1691 
1692   // Holds all the different accesses in the loop.
1693   unsigned NumReads = 0;
1694   unsigned NumReadWrites = 0;
1695 
1696   PtrRtChecking->Pointers.clear();
1697   PtrRtChecking->Need = false;
1698 
1699   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1700 
1701   // For each block.
1702   for (BasicBlock *BB : TheLoop->blocks()) {
1703     // Scan the BB and collect legal loads and stores.
1704     for (Instruction &I : *BB) {
1705       // If this is a load, save it. If this instruction can read from memory
1706       // but is not a load, then we quit. Notice that we don't handle function
1707       // calls that read or write.
1708       if (I.mayReadFromMemory()) {
1709         // Many math library functions read the rounding mode. We will only
1710         // vectorize a loop if it contains known function calls that don't set
1711         // the flag. Therefore, it is safe to ignore this read from memory.
1712         auto *Call = dyn_cast<CallInst>(&I);
1713         if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1714           continue;
1715 
1716         // If the function has an explicit vectorized counterpart, we can safely
1717         // assume that it can be vectorized.
1718         if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1719             TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1720           continue;
1721 
1722         auto *Ld = dyn_cast<LoadInst>(&I);
1723         if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
1724           recordAnalysis("NonSimpleLoad", Ld)
1725               << "read with atomic ordering or volatile read";
1726           DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1727           CanVecMem = false;
1728           return;
1729         }
1730         NumLoads++;
1731         Loads.push_back(Ld);
1732         DepChecker->addAccess(Ld);
1733         if (EnableMemAccessVersioning)
1734           collectStridedAccess(Ld);
1735         continue;
1736       }
1737 
1738       // Save 'store' instructions. Abort if other instructions write to memory.
1739       if (I.mayWriteToMemory()) {
1740         auto *St = dyn_cast<StoreInst>(&I);
1741         if (!St) {
1742           recordAnalysis("CantVectorizeInstruction", St)
1743               << "instruction cannot be vectorized";
1744           CanVecMem = false;
1745           return;
1746         }
1747         if (!St->isSimple() && !IsAnnotatedParallel) {
1748           recordAnalysis("NonSimpleStore", St)
1749               << "write with atomic ordering or volatile write";
1750           DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1751           CanVecMem = false;
1752           return;
1753         }
1754         NumStores++;
1755         Stores.push_back(St);
1756         DepChecker->addAccess(St);
1757         if (EnableMemAccessVersioning)
1758           collectStridedAccess(St);
1759       }
1760     } // Next instr.
1761   } // Next block.
1762 
1763   // Now we have two lists that hold the loads and the stores.
1764   // Next, we find the pointers that they use.
1765 
1766   // Check if we see any stores. If there are no stores, then we don't
1767   // care if the pointers are *restrict*.
1768   if (!Stores.size()) {
1769     DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1770     CanVecMem = true;
1771     return;
1772   }
1773 
1774   MemoryDepChecker::DepCandidates DependentAccesses;
1775   AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1776                           AA, LI, DependentAccesses, *PSE);
1777 
1778   // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1779   // multiple times on the same object. If the ptr is accessed twice, once
1780   // for read and once for write, it will only appear once (on the write
1781   // list). This is okay, since we are going to check for conflicts between
1782   // writes and between reads and writes, but not between reads and reads.
1783   ValueSet Seen;
1784 
1785   for (StoreInst *ST : Stores) {
1786     Value *Ptr = ST->getPointerOperand();
1787     // Check for store to loop invariant address.
1788     StoreToLoopInvariantAddress |= isUniform(Ptr);
1789     // If we did *not* see this pointer before, insert it to  the read-write
1790     // list. At this phase it is only a 'write' list.
1791     if (Seen.insert(Ptr).second) {
1792       ++NumReadWrites;
1793 
1794       MemoryLocation Loc = MemoryLocation::get(ST);
1795       // The TBAA metadata could have a control dependency on the predication
1796       // condition, so we cannot rely on it when determining whether or not we
1797       // need runtime pointer checks.
1798       if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1799         Loc.AATags.TBAA = nullptr;
1800 
1801       Accesses.addStore(Loc);
1802     }
1803   }
1804 
1805   if (IsAnnotatedParallel) {
1806     DEBUG(dbgs()
1807           << "LAA: A loop annotated parallel, ignore memory dependency "
1808           << "checks.\n");
1809     CanVecMem = true;
1810     return;
1811   }
1812 
1813   for (LoadInst *LD : Loads) {
1814     Value *Ptr = LD->getPointerOperand();
1815     // If we did *not* see this pointer before, insert it to the
1816     // read list. If we *did* see it before, then it is already in
1817     // the read-write list. This allows us to vectorize expressions
1818     // such as A[i] += x;  Because the address of A[i] is a read-write
1819     // pointer. This only works if the index of A[i] is consecutive.
1820     // If the address of i is unknown (for example A[B[i]]) then we may
1821     // read a few words, modify, and write a few words, and some of the
1822     // words may be written to the same address.
1823     bool IsReadOnlyPtr = false;
1824     if (Seen.insert(Ptr).second ||
1825         !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1826       ++NumReads;
1827       IsReadOnlyPtr = true;
1828     }
1829 
1830     MemoryLocation Loc = MemoryLocation::get(LD);
1831     // The TBAA metadata could have a control dependency on the predication
1832     // condition, so we cannot rely on it when determining whether or not we
1833     // need runtime pointer checks.
1834     if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1835       Loc.AATags.TBAA = nullptr;
1836 
1837     Accesses.addLoad(Loc, IsReadOnlyPtr);
1838   }
1839 
1840   // If we write (or read-write) to a single destination and there are no
1841   // other reads in this loop then is it safe to vectorize.
1842   if (NumReadWrites == 1 && NumReads == 0) {
1843     DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
1844     CanVecMem = true;
1845     return;
1846   }
1847 
1848   // Build dependence sets and check whether we need a runtime pointer bounds
1849   // check.
1850   Accesses.buildDependenceSets();
1851 
1852   // Find pointers with computable bounds. We are going to use this information
1853   // to place a runtime bound check.
1854   bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
1855                                                   TheLoop, SymbolicStrides);
1856   if (!CanDoRTIfNeeded) {
1857     recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
1858     DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
1859                  << "the array bounds.\n");
1860     CanVecMem = false;
1861     return;
1862   }
1863 
1864   DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
1865 
1866   CanVecMem = true;
1867   if (Accesses.isDependencyCheckNeeded()) {
1868     DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
1869     CanVecMem = DepChecker->areDepsSafe(
1870         DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
1871     MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
1872 
1873     if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
1874       DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
1875 
1876       // Clear the dependency checks. We assume they are not needed.
1877       Accesses.resetDepChecks(*DepChecker);
1878 
1879       PtrRtChecking->reset();
1880       PtrRtChecking->Need = true;
1881 
1882       auto *SE = PSE->getSE();
1883       CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
1884                                                  SymbolicStrides, true);
1885 
1886       // Check that we found the bounds for the pointer.
1887       if (!CanDoRTIfNeeded) {
1888         recordAnalysis("CantCheckMemDepsAtRunTime")
1889             << "cannot check memory dependencies at runtime";
1890         DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
1891         CanVecMem = false;
1892         return;
1893       }
1894 
1895       CanVecMem = true;
1896     }
1897   }
1898 
1899   if (CanVecMem)
1900     DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop.  We"
1901                  << (PtrRtChecking->Need ? "" : " don't")
1902                  << " need runtime memory checks.\n");
1903   else {
1904     recordAnalysis("UnsafeMemDep")
1905         << "unsafe dependent memory operations in loop. Use "
1906            "#pragma loop distribute(enable) to allow loop distribution "
1907            "to attempt to isolate the offending operations into a separate "
1908            "loop";
1909     DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
1910   }
1911 }
1912 
1913 bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
1914                                            DominatorTree *DT)  {
1915   assert(TheLoop->contains(BB) && "Unknown block used");
1916 
1917   // Blocks that do not dominate the latch need predication.
1918   BasicBlock* Latch = TheLoop->getLoopLatch();
1919   return !DT->dominates(BB, Latch);
1920 }
1921 
1922 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
1923                                                            Instruction *I) {
1924   assert(!Report && "Multiple reports generated");
1925 
1926   Value *CodeRegion = TheLoop->getHeader();
1927   DebugLoc DL = TheLoop->getStartLoc();
1928 
1929   if (I) {
1930     CodeRegion = I->getParent();
1931     // If there is no debug location attached to the instruction, revert back to
1932     // using the loop's.
1933     if (I->getDebugLoc())
1934       DL = I->getDebugLoc();
1935   }
1936 
1937   Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
1938                                                    CodeRegion);
1939   return *Report;
1940 }
1941 
1942 bool LoopAccessInfo::isUniform(Value *V) const {
1943   auto *SE = PSE->getSE();
1944   // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
1945   // never considered uniform.
1946   // TODO: Is this really what we want? Even without FP SCEV, we may want some
1947   // trivially loop-invariant FP values to be considered uniform.
1948   if (!SE->isSCEVable(V->getType()))
1949     return false;
1950   return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
1951 }
1952 
1953 // FIXME: this function is currently a duplicate of the one in
1954 // LoopVectorize.cpp.
1955 static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
1956                                  Instruction *Loc) {
1957   if (FirstInst)
1958     return FirstInst;
1959   if (Instruction *I = dyn_cast<Instruction>(V))
1960     return I->getParent() == Loc->getParent() ? I : nullptr;
1961   return nullptr;
1962 }
1963 
1964 namespace {
1965 
1966 /// \brief IR Values for the lower and upper bounds of a pointer evolution.  We
1967 /// need to use value-handles because SCEV expansion can invalidate previously
1968 /// expanded values.  Thus expansion of a pointer can invalidate the bounds for
1969 /// a previous one.
1970 struct PointerBounds {
1971   TrackingVH<Value> Start;
1972   TrackingVH<Value> End;
1973 };
1974 
1975 } // end anonymous namespace
1976 
1977 /// \brief Expand code for the lower and upper bound of the pointer group \p CG
1978 /// in \p TheLoop.  \return the values for the bounds.
1979 static PointerBounds
1980 expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
1981              Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
1982              const RuntimePointerChecking &PtrRtChecking) {
1983   Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
1984   const SCEV *Sc = SE->getSCEV(Ptr);
1985 
1986   unsigned AS = Ptr->getType()->getPointerAddressSpace();
1987   LLVMContext &Ctx = Loc->getContext();
1988 
1989   // Use this type for pointer arithmetic.
1990   Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
1991 
1992   if (SE->isLoopInvariant(Sc, TheLoop)) {
1993     DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
1994                  << "\n");
1995     // Ptr could be in the loop body. If so, expand a new one at the correct
1996     // location.
1997     Instruction *Inst = dyn_cast<Instruction>(Ptr);
1998     Value *NewPtr = (Inst && TheLoop->contains(Inst))
1999                         ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
2000                         : Ptr;
2001     return {NewPtr, NewPtr};
2002   } else {
2003     Value *Start = nullptr, *End = nullptr;
2004     DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
2005     Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
2006     End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
2007     DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
2008     return {Start, End};
2009   }
2010 }
2011 
2012 /// \brief Turns a collection of checks into a collection of expanded upper and
2013 /// lower bounds for both pointers in the check.
2014 static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
2015     const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks,
2016     Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
2017     const RuntimePointerChecking &PtrRtChecking) {
2018   SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
2019 
2020   // Here we're relying on the SCEV Expander's cache to only emit code for the
2021   // same bounds once.
2022   transform(
2023       PointerChecks, std::back_inserter(ChecksWithBounds),
2024       [&](const RuntimePointerChecking::PointerCheck &Check) {
2025         PointerBounds
2026           First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
2027           Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
2028         return std::make_pair(First, Second);
2029       });
2030 
2031   return ChecksWithBounds;
2032 }
2033 
2034 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
2035     Instruction *Loc,
2036     const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks)
2037     const {
2038   const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2039   auto *SE = PSE->getSE();
2040   SCEVExpander Exp(*SE, DL, "induction");
2041   auto ExpandedChecks =
2042       expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
2043 
2044   LLVMContext &Ctx = Loc->getContext();
2045   Instruction *FirstInst = nullptr;
2046   IRBuilder<> ChkBuilder(Loc);
2047   // Our instructions might fold to a constant.
2048   Value *MemoryRuntimeCheck = nullptr;
2049 
2050   for (const auto &Check : ExpandedChecks) {
2051     const PointerBounds &A = Check.first, &B = Check.second;
2052     // Check if two pointers (A and B) conflict where conflict is computed as:
2053     // start(A) <= end(B) && start(B) <= end(A)
2054     unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
2055     unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
2056 
2057     assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
2058            (AS1 == A.End->getType()->getPointerAddressSpace()) &&
2059            "Trying to bounds check pointers with different address spaces");
2060 
2061     Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
2062     Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
2063 
2064     Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
2065     Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
2066     Value *End0 =   ChkBuilder.CreateBitCast(A.End,   PtrArithTy1, "bc");
2067     Value *End1 =   ChkBuilder.CreateBitCast(B.End,   PtrArithTy0, "bc");
2068 
2069     // [A|B].Start points to the first accessed byte under base [A|B].
2070     // [A|B].End points to the last accessed byte, plus one.
2071     // There is no conflict when the intervals are disjoint:
2072     // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2073     //
2074     // bound0 = (B.Start < A.End)
2075     // bound1 = (A.Start < B.End)
2076     //  IsConflict = bound0 & bound1
2077     Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
2078     FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
2079     Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
2080     FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
2081     Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2082     FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2083     if (MemoryRuntimeCheck) {
2084       IsConflict =
2085           ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2086       FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2087     }
2088     MemoryRuntimeCheck = IsConflict;
2089   }
2090 
2091   if (!MemoryRuntimeCheck)
2092     return std::make_pair(nullptr, nullptr);
2093 
2094   // We have to do this trickery because the IRBuilder might fold the check to a
2095   // constant expression in which case there is no Instruction anchored in a
2096   // the block.
2097   Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
2098                                                  ConstantInt::getTrue(Ctx));
2099   ChkBuilder.Insert(Check, "memcheck.conflict");
2100   FirstInst = getFirstInst(FirstInst, Check, Loc);
2101   return std::make_pair(FirstInst, Check);
2102 }
2103 
2104 std::pair<Instruction *, Instruction *>
2105 LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const {
2106   if (!PtrRtChecking->Need)
2107     return std::make_pair(nullptr, nullptr);
2108 
2109   return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
2110 }
2111 
2112 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2113   Value *Ptr = nullptr;
2114   if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2115     Ptr = LI->getPointerOperand();
2116   else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2117     Ptr = SI->getPointerOperand();
2118   else
2119     return;
2120 
2121   Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2122   if (!Stride)
2123     return;
2124 
2125   DEBUG(dbgs() << "LAA: Found a strided access that we can version");
2126   DEBUG(dbgs() << "  Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2127   SymbolicStrides[Ptr] = Stride;
2128   StrideSet.insert(Stride);
2129 }
2130 
2131 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
2132                                const TargetLibraryInfo *TLI, AliasAnalysis *AA,
2133                                DominatorTree *DT, LoopInfo *LI)
2134     : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2135       PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
2136       DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2137       NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2138       StoreToLoopInvariantAddress(false) {
2139   if (canAnalyzeLoop())
2140     analyzeLoop(AA, LI, TLI, DT);
2141 }
2142 
2143 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2144   if (CanVecMem) {
2145     OS.indent(Depth) << "Memory dependences are safe";
2146     if (MaxSafeDepDistBytes != -1ULL)
2147       OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2148          << " bytes";
2149     if (PtrRtChecking->Need)
2150       OS << " with run-time checks";
2151     OS << "\n";
2152   }
2153 
2154   if (Report)
2155     OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2156 
2157   if (auto *Dependences = DepChecker->getDependences()) {
2158     OS.indent(Depth) << "Dependences:\n";
2159     for (auto &Dep : *Dependences) {
2160       Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2161       OS << "\n";
2162     }
2163   } else
2164     OS.indent(Depth) << "Too many dependences, not recorded\n";
2165 
2166   // List the pair of accesses need run-time checks to prove independence.
2167   PtrRtChecking->print(OS, Depth);
2168   OS << "\n";
2169 
2170   OS.indent(Depth) << "Store to invariant address was "
2171                    << (StoreToLoopInvariantAddress ? "" : "not ")
2172                    << "found in loop.\n";
2173 
2174   OS.indent(Depth) << "SCEV assumptions:\n";
2175   PSE->getUnionPredicate().print(OS, Depth);
2176 
2177   OS << "\n";
2178 
2179   OS.indent(Depth) << "Expressions re-written:\n";
2180   PSE->print(OS, Depth);
2181 }
2182 
2183 const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
2184   auto &LAI = LoopAccessInfoMap[L];
2185 
2186   if (!LAI)
2187     LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2188 
2189   return *LAI.get();
2190 }
2191 
2192 void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {
2193   LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2194 
2195   for (Loop *TopLevelLoop : *LI)
2196     for (Loop *L : depth_first(TopLevelLoop)) {
2197       OS.indent(2) << L->getHeader()->getName() << ":\n";
2198       auto &LAI = LAA.getInfo(L);
2199       LAI.print(OS, 4);
2200     }
2201 }
2202 
2203 bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) {
2204   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2205   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2206   TLI = TLIP ? &TLIP->getTLI() : nullptr;
2207   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2208   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2209   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2210 
2211   return false;
2212 }
2213 
2214 void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
2215     AU.addRequired<ScalarEvolutionWrapperPass>();
2216     AU.addRequired<AAResultsWrapperPass>();
2217     AU.addRequired<DominatorTreeWrapperPass>();
2218     AU.addRequired<LoopInfoWrapperPass>();
2219 
2220     AU.setPreservesAll();
2221 }
2222 
2223 char LoopAccessLegacyAnalysis::ID = 0;
2224 static const char laa_name[] = "Loop Access Analysis";
2225 #define LAA_NAME "loop-accesses"
2226 
2227 INITIALIZE_PASS_BEGIN(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
2228 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2229 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
2230 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2231 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
2232 INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
2233 
2234 AnalysisKey LoopAccessAnalysis::Key;
2235 
2236 LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM,
2237                                        LoopStandardAnalysisResults &AR) {
2238   return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2239 }
2240 
2241 namespace llvm {
2242 
2243   Pass *createLAAPass() {
2244     return new LoopAccessLegacyAnalysis();
2245   }
2246 
2247 } // end namespace llvm
2248