10456327cSAdam Nemet //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
20456327cSAdam Nemet //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60456327cSAdam Nemet //
70456327cSAdam Nemet //===----------------------------------------------------------------------===//
80456327cSAdam Nemet //
90456327cSAdam Nemet // The implementation for the loop memory dependence that was originally
100456327cSAdam Nemet // developed for the loop vectorizer.
110456327cSAdam Nemet //
120456327cSAdam Nemet //===----------------------------------------------------------------------===//
130456327cSAdam Nemet 
143bab7e1aSChandler Carruth #include "llvm/Analysis/LoopAccessAnalysis.h"
15a3fe70d2SEugene Zelenko #include "llvm/ADT/APInt.h"
16a3fe70d2SEugene Zelenko #include "llvm/ADT/DenseMap.h"
17a3fe70d2SEugene Zelenko #include "llvm/ADT/DepthFirstIterator.h"
18a3fe70d2SEugene Zelenko #include "llvm/ADT/EquivalenceClasses.h"
19a3fe70d2SEugene Zelenko #include "llvm/ADT/PointerIntPair.h"
203bab7e1aSChandler Carruth #include "llvm/ADT/STLExtras.h"
21a3fe70d2SEugene Zelenko #include "llvm/ADT/SetVector.h"
22a3fe70d2SEugene Zelenko #include "llvm/ADT/SmallPtrSet.h"
23a3fe70d2SEugene Zelenko #include "llvm/ADT/SmallSet.h"
24a3fe70d2SEugene Zelenko #include "llvm/ADT/SmallVector.h"
253bab7e1aSChandler Carruth #include "llvm/ADT/iterator_range.h"
26a3fe70d2SEugene Zelenko #include "llvm/Analysis/AliasAnalysis.h"
27a3fe70d2SEugene Zelenko #include "llvm/Analysis/AliasSetTracker.h"
283bab7e1aSChandler Carruth #include "llvm/Analysis/LoopAnalysisManager.h"
290456327cSAdam Nemet #include "llvm/Analysis/LoopInfo.h"
30a3fe70d2SEugene Zelenko #include "llvm/Analysis/MemoryLocation.h"
310965da20SAdam Nemet #include "llvm/Analysis/OptimizationRemarkEmitter.h"
32a3fe70d2SEugene Zelenko #include "llvm/Analysis/ScalarEvolution.h"
33a3fe70d2SEugene Zelenko #include "llvm/Analysis/ScalarEvolutionExpressions.h"
34799003bfSBenjamin Kramer #include "llvm/Analysis/TargetLibraryInfo.h"
350456327cSAdam Nemet #include "llvm/Analysis/ValueTracking.h"
36f45594c9SAdam Nemet #include "llvm/Analysis/VectorUtils.h"
37a3fe70d2SEugene Zelenko #include "llvm/IR/BasicBlock.h"
38a3fe70d2SEugene Zelenko #include "llvm/IR/Constants.h"
39a3fe70d2SEugene Zelenko #include "llvm/IR/DataLayout.h"
40a3fe70d2SEugene Zelenko #include "llvm/IR/DebugLoc.h"
41a3fe70d2SEugene Zelenko #include "llvm/IR/DerivedTypes.h"
42a3fe70d2SEugene Zelenko #include "llvm/IR/DiagnosticInfo.h"
430456327cSAdam Nemet #include "llvm/IR/Dominators.h"
44a3fe70d2SEugene Zelenko #include "llvm/IR/Function.h"
45a3fe70d2SEugene Zelenko #include "llvm/IR/InstrTypes.h"
46a3fe70d2SEugene Zelenko #include "llvm/IR/Instruction.h"
47a3fe70d2SEugene Zelenko #include "llvm/IR/Instructions.h"
48a3fe70d2SEugene Zelenko #include "llvm/IR/Operator.h"
498a021317SXinliang David Li #include "llvm/IR/PassManager.h"
50a3fe70d2SEugene Zelenko #include "llvm/IR/Type.h"
51a3fe70d2SEugene Zelenko #include "llvm/IR/Value.h"
52a3fe70d2SEugene Zelenko #include "llvm/IR/ValueHandle.h"
5305da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
54a3fe70d2SEugene Zelenko #include "llvm/Pass.h"
55a3fe70d2SEugene Zelenko #include "llvm/Support/Casting.h"
56a3fe70d2SEugene Zelenko #include "llvm/Support/CommandLine.h"
570456327cSAdam Nemet #include "llvm/Support/Debug.h"
58a3fe70d2SEugene Zelenko #include "llvm/Support/ErrorHandling.h"
59799003bfSBenjamin Kramer #include "llvm/Support/raw_ostream.h"
60a3fe70d2SEugene Zelenko #include <algorithm>
61a3fe70d2SEugene Zelenko #include <cassert>
62a3fe70d2SEugene Zelenko #include <cstdint>
63a3fe70d2SEugene Zelenko #include <iterator>
64a3fe70d2SEugene Zelenko #include <utility>
65a3fe70d2SEugene Zelenko #include <vector>
66a3fe70d2SEugene Zelenko 
670456327cSAdam Nemet using namespace llvm;
680456327cSAdam Nemet 
69339f42b3SAdam Nemet #define DEBUG_TYPE "loop-accesses"
700456327cSAdam Nemet 
71f219c647SAdam Nemet static cl::opt<unsigned, true>
72f219c647SAdam Nemet VectorizationFactor("force-vector-width", cl::Hidden,
73f219c647SAdam Nemet                     cl::desc("Sets the SIMD width. Zero is autoselect."),
74f219c647SAdam Nemet                     cl::location(VectorizerParams::VectorizationFactor));
751d862af7SAdam Nemet unsigned VectorizerParams::VectorizationFactor;
76f219c647SAdam Nemet 
77f219c647SAdam Nemet static cl::opt<unsigned, true>
78f219c647SAdam Nemet VectorizationInterleave("force-vector-interleave", cl::Hidden,
79f219c647SAdam Nemet                         cl::desc("Sets the vectorization interleave count. "
80f219c647SAdam Nemet                                  "Zero is autoselect."),
81f219c647SAdam Nemet                         cl::location(
82f219c647SAdam Nemet                             VectorizerParams::VectorizationInterleave));
831d862af7SAdam Nemet unsigned VectorizerParams::VectorizationInterleave;
84f219c647SAdam Nemet 
851d862af7SAdam Nemet static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
861d862af7SAdam Nemet     "runtime-memory-check-threshold", cl::Hidden,
871d862af7SAdam Nemet     cl::desc("When performing memory disambiguation checks at runtime do not "
881d862af7SAdam Nemet              "generate more than this number of comparisons (default = 8)."),
891d862af7SAdam Nemet     cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
901d862af7SAdam Nemet unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
91f219c647SAdam Nemet 
925f8f34e4SAdrian Prantl /// The maximum iterations used to merge memory checks
931b6b50a9SSilviu Baranga static cl::opt<unsigned> MemoryCheckMergeThreshold(
941b6b50a9SSilviu Baranga     "memory-check-merge-threshold", cl::Hidden,
951b6b50a9SSilviu Baranga     cl::desc("Maximum number of comparisons done when trying to merge "
961b6b50a9SSilviu Baranga              "runtime memory checks. (default = 100)"),
971b6b50a9SSilviu Baranga     cl::init(100));
981b6b50a9SSilviu Baranga 
99f219c647SAdam Nemet /// Maximum SIMD width.
100f219c647SAdam Nemet const unsigned VectorizerParams::MaxVectorWidth = 64;
101f219c647SAdam Nemet 
1025f8f34e4SAdrian Prantl /// We collect dependences up to this threshold.
103a2df750fSAdam Nemet static cl::opt<unsigned>
104a2df750fSAdam Nemet     MaxDependences("max-dependences", cl::Hidden,
105a2df750fSAdam Nemet                    cl::desc("Maximum number of dependences collected by "
1069c926579SAdam Nemet                             "loop-access analysis (default = 100)"),
1079c926579SAdam Nemet                    cl::init(100));
1089c926579SAdam Nemet 
109a9f09c62SAdam Nemet /// This enables versioning on the strides of symbolically striding memory
110a9f09c62SAdam Nemet /// accesses in code like the following.
111a9f09c62SAdam Nemet ///   for (i = 0; i < N; ++i)
112a9f09c62SAdam Nemet ///     A[i * Stride1] += B[i * Stride2] ...
113a9f09c62SAdam Nemet ///
114a9f09c62SAdam Nemet /// Will be roughly translated to
115a9f09c62SAdam Nemet ///    if (Stride1 == 1 && Stride2 == 1) {
116a9f09c62SAdam Nemet ///      for (i = 0; i < N; i+=4)
117a9f09c62SAdam Nemet ///       A[i:i+3] += ...
118a9f09c62SAdam Nemet ///    } else
119a9f09c62SAdam Nemet ///      ...
120a9f09c62SAdam Nemet static cl::opt<bool> EnableMemAccessVersioning(
121a9f09c62SAdam Nemet     "enable-mem-access-versioning", cl::init(true), cl::Hidden,
122a9f09c62SAdam Nemet     cl::desc("Enable symbolic stride memory access versioning"));
123a9f09c62SAdam Nemet 
1245f8f34e4SAdrian Prantl /// Enable store-to-load forwarding conflict detection. This option can
12537ec5f91SMatthew Simpson /// be disabled for correctness testing.
12637ec5f91SMatthew Simpson static cl::opt<bool> EnableForwardingConflictDetection(
12737ec5f91SMatthew Simpson     "store-to-load-forwarding-conflict-detection", cl::Hidden,
128a250dc9fSMatthew Simpson     cl::desc("Enable conflict detection in loop-access analysis"),
129a250dc9fSMatthew Simpson     cl::init(true));
130a250dc9fSMatthew Simpson 
131f219c647SAdam Nemet bool VectorizerParams::isInterleaveForced() {
132f219c647SAdam Nemet   return ::VectorizationInterleave.getNumOccurrences() > 0;
133f219c647SAdam Nemet }
134f219c647SAdam Nemet 
1350456327cSAdam Nemet Value *llvm::stripIntegerCast(Value *V) {
1368b401013SDavid Majnemer   if (auto *CI = dyn_cast<CastInst>(V))
1370456327cSAdam Nemet     if (CI->getOperand(0)->getType()->isIntegerTy())
1380456327cSAdam Nemet       return CI->getOperand(0);
1390456327cSAdam Nemet   return V;
1400456327cSAdam Nemet }
1410456327cSAdam Nemet 
1429cd9a7e3SSilviu Baranga const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
1438bc61df9SAdam Nemet                                             const ValueToValueMap &PtrToStride,
144f4726e72SFlorian Hahn                                             Value *Ptr) {
1459cd9a7e3SSilviu Baranga   const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
1460456327cSAdam Nemet 
1470456327cSAdam Nemet   // If there is an entry in the map return the SCEV of the pointer with the
1480456327cSAdam Nemet   // symbolic stride replaced by one.
149f4726e72SFlorian Hahn   ValueToValueMap::const_iterator SI = PtrToStride.find(Ptr);
150b3a8a153SPhilip Reames   if (SI == PtrToStride.end())
151b3a8a153SPhilip Reames     // For a non-symbolic stride, just return the original expression.
152b3a8a153SPhilip Reames     return OrigSCEV;
1530456327cSAdam Nemet 
154b3a8a153SPhilip Reames   Value *StrideVal = stripIntegerCast(SI->second);
1550456327cSAdam Nemet 
1569cd9a7e3SSilviu Baranga   ScalarEvolution *SE = PSE.getSE();
157e3c0534bSSilviu Baranga   const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
158e3c0534bSSilviu Baranga   const auto *CT =
159e3c0534bSSilviu Baranga     static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
160e3c0534bSSilviu Baranga 
1619cd9a7e3SSilviu Baranga   PSE.addPredicate(*SE->getEqualPredicate(U, CT));
1629cd9a7e3SSilviu Baranga   auto *Expr = PSE.getSCEV(Ptr);
163e3c0534bSSilviu Baranga 
164d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
165d34e60caSNicola Zaghen 	     << " by: " << *Expr << "\n");
1669cd9a7e3SSilviu Baranga   return Expr;
1670456327cSAdam Nemet }
1680456327cSAdam Nemet 
169616657b3SFlorian Hahn RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
170616657b3SFlorian Hahn     unsigned Index, RuntimePointerChecking &RtCheck)
1716d753b07SFlorian Hahn     : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
1726d753b07SFlorian Hahn       AddressSpace(RtCheck.Pointers[Index]
1736d753b07SFlorian Hahn                        .PointerValue->getType()
1746d753b07SFlorian Hahn                        ->getPointerAddressSpace()) {
175616657b3SFlorian Hahn   Members.push_back(Index);
176616657b3SFlorian Hahn }
177616657b3SFlorian Hahn 
1783622fbfcSElena Demikhovsky /// Calculate Start and End points of memory access.
1793622fbfcSElena Demikhovsky /// Let's assume A is the first access and B is a memory access on N-th loop
1803622fbfcSElena Demikhovsky /// iteration. Then B is calculated as:
1813622fbfcSElena Demikhovsky ///   B = A + Step*N .
1823622fbfcSElena Demikhovsky /// Step value may be positive or negative.
1833622fbfcSElena Demikhovsky /// N is a calculated back-edge taken count:
1843622fbfcSElena Demikhovsky ///     N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
1853622fbfcSElena Demikhovsky /// Start and End points are calculated in the following way:
1863622fbfcSElena Demikhovsky /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
1873622fbfcSElena Demikhovsky /// where SizeOfElt is the size of single memory access in bytes.
1883622fbfcSElena Demikhovsky ///
1893622fbfcSElena Demikhovsky /// There is no conflict when the intervals are disjoint:
1903622fbfcSElena Demikhovsky /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
191ff31020eSArthur Eubanks void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, Type *AccessTy,
192ff31020eSArthur Eubanks                                     bool WritePtr, unsigned DepSetId,
193ff31020eSArthur Eubanks                                     unsigned ASId,
194e3c0534bSSilviu Baranga                                     const ValueToValueMap &Strides,
1959cd9a7e3SSilviu Baranga                                     PredicatedScalarEvolution &PSE) {
1960456327cSAdam Nemet   // Get the stride replaced scev.
1979cd9a7e3SSilviu Baranga   const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
198279784ffSAdam Nemet   ScalarEvolution *SE = PSE.getSE();
199279784ffSAdam Nemet 
200279784ffSAdam Nemet   const SCEV *ScStart;
201279784ffSAdam Nemet   const SCEV *ScEnd;
202279784ffSAdam Nemet 
203e908e063SMindong Chen   if (SE->isLoopInvariant(Sc, Lp)) {
204279784ffSAdam Nemet     ScStart = ScEnd = Sc;
205e908e063SMindong Chen   } else {
2060456327cSAdam Nemet     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
2070456327cSAdam Nemet     assert(AR && "Invalid addrec expression");
2086f444dfdSSilviu Baranga     const SCEV *Ex = PSE.getBackedgeTakenCount();
2090e5804a6SSilviu Baranga 
210279784ffSAdam Nemet     ScStart = AR->getStart();
211279784ffSAdam Nemet     ScEnd = AR->evaluateAtIteration(Ex, *SE);
2120e5804a6SSilviu Baranga     const SCEV *Step = AR->getStepRecurrence(*SE);
2130e5804a6SSilviu Baranga 
2140e5804a6SSilviu Baranga     // For expressions with negative step, the upper bound is ScStart and the
2150e5804a6SSilviu Baranga     // lower bound is ScEnd.
2168b401013SDavid Majnemer     if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
2170e5804a6SSilviu Baranga       if (CStep->getValue()->isNegative())
2180e5804a6SSilviu Baranga         std::swap(ScStart, ScEnd);
2190e5804a6SSilviu Baranga     } else {
2203622fbfcSElena Demikhovsky       // Fallback case: the step is not constant, but we can still
2210e5804a6SSilviu Baranga       // get the upper and lower bounds of the interval by using min/max
2220e5804a6SSilviu Baranga       // expressions.
2230e5804a6SSilviu Baranga       ScStart = SE->getUMinExpr(ScStart, ScEnd);
2240e5804a6SSilviu Baranga       ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
2250e5804a6SSilviu Baranga     }
226e908e063SMindong Chen   }
2273622fbfcSElena Demikhovsky   // Add the size of the pointed element to ScEnd.
228a73166a4SFlorian Hahn   auto &DL = Lp->getHeader()->getModule()->getDataLayout();
22906654a53SJoe Ellis   Type *IdxTy = DL.getIndexType(Ptr->getType());
230ff31020eSArthur Eubanks   const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
2313622fbfcSElena Demikhovsky   ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
2320e5804a6SSilviu Baranga 
2330e5804a6SSilviu Baranga   Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
2341b6b50a9SSilviu Baranga }
2351b6b50a9SSilviu Baranga 
236616657b3SFlorian Hahn SmallVector<RuntimePointerCheck, 4>
23738530887SAdam Nemet RuntimePointerChecking::generateChecks() const {
238616657b3SFlorian Hahn   SmallVector<RuntimePointerCheck, 4> Checks;
239bbe1f1deSAdam Nemet 
2407c52e052SAdam Nemet   for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
2417c52e052SAdam Nemet     for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
242616657b3SFlorian Hahn       const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I];
243616657b3SFlorian Hahn       const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];
244bbe1f1deSAdam Nemet 
24538530887SAdam Nemet       if (needsChecking(CGI, CGJ))
246bbe1f1deSAdam Nemet         Checks.push_back(std::make_pair(&CGI, &CGJ));
247bbe1f1deSAdam Nemet     }
248bbe1f1deSAdam Nemet   }
249bbe1f1deSAdam Nemet   return Checks;
250bbe1f1deSAdam Nemet }
251bbe1f1deSAdam Nemet 
25215840393SAdam Nemet void RuntimePointerChecking::generateChecks(
25315840393SAdam Nemet     MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
25415840393SAdam Nemet   assert(Checks.empty() && "Checks is not empty");
25515840393SAdam Nemet   groupChecks(DepCands, UseDependencies);
25615840393SAdam Nemet   Checks = generateChecks();
25715840393SAdam Nemet }
25815840393SAdam Nemet 
259616657b3SFlorian Hahn bool RuntimePointerChecking::needsChecking(
260616657b3SFlorian Hahn     const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
2611b6b50a9SSilviu Baranga   for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
2621b6b50a9SSilviu Baranga     for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
263651a5a24SAdam Nemet       if (needsChecking(M.Members[I], N.Members[J]))
2641b6b50a9SSilviu Baranga         return true;
2651b6b50a9SSilviu Baranga   return false;
2661b6b50a9SSilviu Baranga }
2671b6b50a9SSilviu Baranga 
2681b6b50a9SSilviu Baranga /// Compare \p I and \p J and return the minimum.
2691b6b50a9SSilviu Baranga /// Return nullptr in case we couldn't find an answer.
2701b6b50a9SSilviu Baranga static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
2711b6b50a9SSilviu Baranga                                    ScalarEvolution *SE) {
2721b6b50a9SSilviu Baranga   const SCEV *Diff = SE->getMinusSCEV(J, I);
2731b6b50a9SSilviu Baranga   const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
2741b6b50a9SSilviu Baranga 
2751b6b50a9SSilviu Baranga   if (!C)
2761b6b50a9SSilviu Baranga     return nullptr;
2771b6b50a9SSilviu Baranga   if (C->getValue()->isNegative())
2781b6b50a9SSilviu Baranga     return J;
2791b6b50a9SSilviu Baranga   return I;
2801b6b50a9SSilviu Baranga }
2811b6b50a9SSilviu Baranga 
2826d753b07SFlorian Hahn bool RuntimeCheckingPtrGroup::addPointer(unsigned Index,
2836d753b07SFlorian Hahn                                          RuntimePointerChecking &RtCheck) {
2846d753b07SFlorian Hahn   return addPointer(
2856d753b07SFlorian Hahn       Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
2866d753b07SFlorian Hahn       RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
2876d753b07SFlorian Hahn       *RtCheck.SE);
2886d753b07SFlorian Hahn }
2896d753b07SFlorian Hahn 
2906d753b07SFlorian Hahn bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
2916d753b07SFlorian Hahn                                          const SCEV *End, unsigned AS,
2926d753b07SFlorian Hahn                                          ScalarEvolution &SE) {
2936d753b07SFlorian Hahn   assert(AddressSpace == AS &&
2946d753b07SFlorian Hahn          "all pointers in a checking group must be in the same address space");
2959f7dedc3SAdam Nemet 
2961b6b50a9SSilviu Baranga   // Compare the starts and ends with the known minimum and maximum
2971b6b50a9SSilviu Baranga   // of this set. We need to know how we compare against the min/max
2981b6b50a9SSilviu Baranga   // of the set in order to be able to emit memchecks.
2996d753b07SFlorian Hahn   const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);
3001b6b50a9SSilviu Baranga   if (!Min0)
3011b6b50a9SSilviu Baranga     return false;
3021b6b50a9SSilviu Baranga 
3036d753b07SFlorian Hahn   const SCEV *Min1 = getMinFromExprs(End, High, &SE);
3041b6b50a9SSilviu Baranga   if (!Min1)
3051b6b50a9SSilviu Baranga     return false;
3061b6b50a9SSilviu Baranga 
3071b6b50a9SSilviu Baranga   // Update the low bound  expression if we've found a new min value.
3089f7dedc3SAdam Nemet   if (Min0 == Start)
3099f7dedc3SAdam Nemet     Low = Start;
3101b6b50a9SSilviu Baranga 
3111b6b50a9SSilviu Baranga   // Update the high bound expression if we've found a new max value.
3129f7dedc3SAdam Nemet   if (Min1 != End)
3139f7dedc3SAdam Nemet     High = End;
3141b6b50a9SSilviu Baranga 
3151b6b50a9SSilviu Baranga   Members.push_back(Index);
3161b6b50a9SSilviu Baranga   return true;
3171b6b50a9SSilviu Baranga }
3181b6b50a9SSilviu Baranga 
3197cdebac0SAdam Nemet void RuntimePointerChecking::groupChecks(
3207cdebac0SAdam Nemet     MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
3211b6b50a9SSilviu Baranga   // We build the groups from dependency candidates equivalence classes
3221b6b50a9SSilviu Baranga   // because:
3231b6b50a9SSilviu Baranga   //    - We know that pointers in the same equivalence class share
3241b6b50a9SSilviu Baranga   //      the same underlying object and therefore there is a chance
3251b6b50a9SSilviu Baranga   //      that we can compare pointers
3261b6b50a9SSilviu Baranga   //    - We wouldn't be able to merge two pointers for which we need
3271b6b50a9SSilviu Baranga   //      to emit a memcheck. The classes in DepCands are already
3281b6b50a9SSilviu Baranga   //      conveniently built such that no two pointers in the same
3291b6b50a9SSilviu Baranga   //      class need checking against each other.
3301b6b50a9SSilviu Baranga 
3311b6b50a9SSilviu Baranga   // We use the following (greedy) algorithm to construct the groups
3321b6b50a9SSilviu Baranga   // For every pointer in the equivalence class:
3331b6b50a9SSilviu Baranga   //   For each existing group:
3341b6b50a9SSilviu Baranga   //   - if the difference between this pointer and the min/max bounds
3351b6b50a9SSilviu Baranga   //     of the group is a constant, then make the pointer part of the
3361b6b50a9SSilviu Baranga   //     group and update the min/max bounds of that group as required.
3371b6b50a9SSilviu Baranga 
3381b6b50a9SSilviu Baranga   CheckingGroups.clear();
3391b6b50a9SSilviu Baranga 
34048250600SSilviu Baranga   // If we need to check two pointers to the same underlying object
34148250600SSilviu Baranga   // with a non-constant difference, we shouldn't perform any pointer
34248250600SSilviu Baranga   // grouping with those pointers. This is because we can easily get
34348250600SSilviu Baranga   // into cases where the resulting check would return false, even when
34448250600SSilviu Baranga   // the accesses are safe.
34548250600SSilviu Baranga   //
34648250600SSilviu Baranga   // The following example shows this:
34748250600SSilviu Baranga   // for (i = 0; i < 1000; ++i)
34848250600SSilviu Baranga   //   a[5000 + i * m] = a[i] + a[i + 9000]
34948250600SSilviu Baranga   //
35048250600SSilviu Baranga   // Here grouping gives a check of (5000, 5000 + 1000 * m) against
35148250600SSilviu Baranga   // (0, 10000) which is always false. However, if m is 1, there is no
35248250600SSilviu Baranga   // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
35348250600SSilviu Baranga   // us to perform an accurate check in this case.
35448250600SSilviu Baranga   //
35548250600SSilviu Baranga   // The above case requires that we have an UnknownDependence between
35648250600SSilviu Baranga   // accesses to the same underlying object. This cannot happen unless
357ef307b8cSFlorian Hahn   // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
35848250600SSilviu Baranga   // is also false. In this case we will use the fallback path and create
35948250600SSilviu Baranga   // separate checking groups for all pointers.
36048250600SSilviu Baranga 
3611b6b50a9SSilviu Baranga   // If we don't have the dependency partitions, construct a new
36248250600SSilviu Baranga   // checking pointer group for each pointer. This is also required
36348250600SSilviu Baranga   // for correctness, because in this case we can have checking between
36448250600SSilviu Baranga   // pointers to the same underlying object.
3651b6b50a9SSilviu Baranga   if (!UseDependencies) {
3661b6b50a9SSilviu Baranga     for (unsigned I = 0; I < Pointers.size(); ++I)
367616657b3SFlorian Hahn       CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
3681b6b50a9SSilviu Baranga     return;
3691b6b50a9SSilviu Baranga   }
3701b6b50a9SSilviu Baranga 
3711b6b50a9SSilviu Baranga   unsigned TotalComparisons = 0;
3721b6b50a9SSilviu Baranga 
3731b6b50a9SSilviu Baranga   DenseMap<Value *, unsigned> PositionMap;
3749f7dedc3SAdam Nemet   for (unsigned Index = 0; Index < Pointers.size(); ++Index)
3759f7dedc3SAdam Nemet     PositionMap[Pointers[Index].PointerValue] = Index;
3761b6b50a9SSilviu Baranga 
377ce3877fcSSilviu Baranga   // We need to keep track of what pointers we've already seen so we
378ce3877fcSSilviu Baranga   // don't process them twice.
379ce3877fcSSilviu Baranga   SmallSet<unsigned, 2> Seen;
380ce3877fcSSilviu Baranga 
381e4b9f507SSanjay Patel   // Go through all equivalence classes, get the "pointer check groups"
382ce3877fcSSilviu Baranga   // and add them to the overall solution. We use the order in which accesses
383ce3877fcSSilviu Baranga   // appear in 'Pointers' to enforce determinism.
384ce3877fcSSilviu Baranga   for (unsigned I = 0; I < Pointers.size(); ++I) {
385ce3877fcSSilviu Baranga     // We've seen this pointer before, and therefore already processed
386ce3877fcSSilviu Baranga     // its equivalence class.
387ce3877fcSSilviu Baranga     if (Seen.count(I))
3881b6b50a9SSilviu Baranga       continue;
3891b6b50a9SSilviu Baranga 
3909f7dedc3SAdam Nemet     MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
3919f7dedc3SAdam Nemet                                            Pointers[I].IsWritePtr);
3921b6b50a9SSilviu Baranga 
393616657b3SFlorian Hahn     SmallVector<RuntimeCheckingPtrGroup, 2> Groups;
394ce3877fcSSilviu Baranga     auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
395ce3877fcSSilviu Baranga 
396a647c30fSSilviu Baranga     // Because DepCands is constructed by visiting accesses in the order in
397a647c30fSSilviu Baranga     // which they appear in alias sets (which is deterministic) and the
398a647c30fSSilviu Baranga     // iteration order within an equivalence class member is only dependent on
399a647c30fSSilviu Baranga     // the order in which unions and insertions are performed on the
400a647c30fSSilviu Baranga     // equivalence class, the iteration order is deterministic.
401ce3877fcSSilviu Baranga     for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
4021b6b50a9SSilviu Baranga          MI != ME; ++MI) {
4032062b370SFlorian Hahn       auto PointerI = PositionMap.find(MI->getPointer());
4042062b370SFlorian Hahn       assert(PointerI != PositionMap.end() &&
4052062b370SFlorian Hahn              "pointer in equivalence class not found in PositionMap");
4062062b370SFlorian Hahn       unsigned Pointer = PointerI->second;
4071b6b50a9SSilviu Baranga       bool Merged = false;
408ce3877fcSSilviu Baranga       // Mark this pointer as seen.
409ce3877fcSSilviu Baranga       Seen.insert(Pointer);
4101b6b50a9SSilviu Baranga 
4111b6b50a9SSilviu Baranga       // Go through all the existing sets and see if we can find one
4121b6b50a9SSilviu Baranga       // which can include this pointer.
413616657b3SFlorian Hahn       for (RuntimeCheckingPtrGroup &Group : Groups) {
4141b6b50a9SSilviu Baranga         // Don't perform more than a certain amount of comparisons.
4151b6b50a9SSilviu Baranga         // This should limit the cost of grouping the pointers to something
4161b6b50a9SSilviu Baranga         // reasonable.  If we do end up hitting this threshold, the algorithm
4171b6b50a9SSilviu Baranga         // will create separate groups for all remaining pointers.
4181b6b50a9SSilviu Baranga         if (TotalComparisons > MemoryCheckMergeThreshold)
4191b6b50a9SSilviu Baranga           break;
4201b6b50a9SSilviu Baranga 
4211b6b50a9SSilviu Baranga         TotalComparisons++;
4221b6b50a9SSilviu Baranga 
4236d753b07SFlorian Hahn         if (Group.addPointer(Pointer, *this)) {
4241b6b50a9SSilviu Baranga           Merged = true;
4251b6b50a9SSilviu Baranga           break;
4261b6b50a9SSilviu Baranga         }
4271b6b50a9SSilviu Baranga       }
4281b6b50a9SSilviu Baranga 
4291b6b50a9SSilviu Baranga       if (!Merged)
4301b6b50a9SSilviu Baranga         // We couldn't add this pointer to any existing set or the threshold
4311b6b50a9SSilviu Baranga         // for the number of comparisons has been reached. Create a new group
4321b6b50a9SSilviu Baranga         // to hold the current pointer.
433616657b3SFlorian Hahn         Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
4341b6b50a9SSilviu Baranga     }
4351b6b50a9SSilviu Baranga 
4361b6b50a9SSilviu Baranga     // We've computed the grouped checks for this partition.
4371b6b50a9SSilviu Baranga     // Save the results and continue with the next one.
43875709329SFangrui Song     llvm::copy(Groups, std::back_inserter(CheckingGroups));
4391b6b50a9SSilviu Baranga   }
4400456327cSAdam Nemet }
4410456327cSAdam Nemet 
442041e6debSAdam Nemet bool RuntimePointerChecking::arePointersInSamePartition(
443041e6debSAdam Nemet     const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
444041e6debSAdam Nemet     unsigned PtrIdx2) {
445041e6debSAdam Nemet   return (PtrToPartition[PtrIdx1] != -1 &&
446041e6debSAdam Nemet           PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
447041e6debSAdam Nemet }
448041e6debSAdam Nemet 
449651a5a24SAdam Nemet bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
4509f7dedc3SAdam Nemet   const PointerInfo &PointerI = Pointers[I];
4519f7dedc3SAdam Nemet   const PointerInfo &PointerJ = Pointers[J];
4529f7dedc3SAdam Nemet 
453a8945b77SAdam Nemet   // No need to check if two readonly pointers intersect.
4549f7dedc3SAdam Nemet   if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
455a8945b77SAdam Nemet     return false;
456a8945b77SAdam Nemet 
457a8945b77SAdam Nemet   // Only need to check pointers between two different dependency sets.
4589f7dedc3SAdam Nemet   if (PointerI.DependencySetId == PointerJ.DependencySetId)
459a8945b77SAdam Nemet     return false;
460a8945b77SAdam Nemet 
461a8945b77SAdam Nemet   // Only need to check pointers in the same alias set.
4629f7dedc3SAdam Nemet   if (PointerI.AliasSetId != PointerJ.AliasSetId)
463a8945b77SAdam Nemet     return false;
464a8945b77SAdam Nemet 
465a8945b77SAdam Nemet   return true;
466a8945b77SAdam Nemet }
467a8945b77SAdam Nemet 
46854f0b83eSAdam Nemet void RuntimePointerChecking::printChecks(
469616657b3SFlorian Hahn     raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks,
47054f0b83eSAdam Nemet     unsigned Depth) const {
47154f0b83eSAdam Nemet   unsigned N = 0;
47254f0b83eSAdam Nemet   for (const auto &Check : Checks) {
47354f0b83eSAdam Nemet     const auto &First = Check.first->Members, &Second = Check.second->Members;
47454f0b83eSAdam Nemet 
47554f0b83eSAdam Nemet     OS.indent(Depth) << "Check " << N++ << ":\n";
47654f0b83eSAdam Nemet 
47754f0b83eSAdam Nemet     OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
47854f0b83eSAdam Nemet     for (unsigned K = 0; K < First.size(); ++K)
47954f0b83eSAdam Nemet       OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
48054f0b83eSAdam Nemet 
48154f0b83eSAdam Nemet     OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
48254f0b83eSAdam Nemet     for (unsigned K = 0; K < Second.size(); ++K)
48354f0b83eSAdam Nemet       OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
48454f0b83eSAdam Nemet   }
48554f0b83eSAdam Nemet }
48654f0b83eSAdam Nemet 
4873a91e947SAdam Nemet void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
488e91cc6efSAdam Nemet 
489e91cc6efSAdam Nemet   OS.indent(Depth) << "Run-time memory checks:\n";
49015840393SAdam Nemet   printChecks(OS, Checks, Depth);
4911b6b50a9SSilviu Baranga 
4921b6b50a9SSilviu Baranga   OS.indent(Depth) << "Grouped accesses:\n";
4931b6b50a9SSilviu Baranga   for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
49454f0b83eSAdam Nemet     const auto &CG = CheckingGroups[I];
49554f0b83eSAdam Nemet 
49654f0b83eSAdam Nemet     OS.indent(Depth + 2) << "Group " << &CG << ":\n";
49754f0b83eSAdam Nemet     OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
49854f0b83eSAdam Nemet                          << ")\n";
49954f0b83eSAdam Nemet     for (unsigned J = 0; J < CG.Members.size(); ++J) {
50054f0b83eSAdam Nemet       OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
5011b6b50a9SSilviu Baranga                            << "\n";
5021b6b50a9SSilviu Baranga     }
503e91cc6efSAdam Nemet   }
504e91cc6efSAdam Nemet }
505e91cc6efSAdam Nemet 
5060456327cSAdam Nemet namespace {
507a3fe70d2SEugene Zelenko 
5085f8f34e4SAdrian Prantl /// Analyses memory accesses in a loop.
5090456327cSAdam Nemet ///
5100456327cSAdam Nemet /// Checks whether run time pointer checks are needed and builds sets for data
5110456327cSAdam Nemet /// dependence checking.
5120456327cSAdam Nemet class AccessAnalysis {
5130456327cSAdam Nemet public:
5145f8f34e4SAdrian Prantl   /// Read or write access location.
5150456327cSAdam Nemet   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
5165448e989SAmjad Aboud   typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
5170456327cSAdam Nemet 
518b0eb40caSVitaly Buka   AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
519b0eb40caSVitaly Buka                  MemoryDepChecker::DepCandidates &DA,
5209cd9a7e3SSilviu Baranga                  PredicatedScalarEvolution &PSE)
521b752eb88SKazu Hirata       : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {}
5220456327cSAdam Nemet 
5235f8f34e4SAdrian Prantl   /// Register a load  and whether it is only read from.
524ff31020eSArthur Eubanks   void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
5250456327cSAdam Nemet     Value *Ptr = const_cast<Value*>(Loc.Ptr);
5264df8efceSNikita Popov     AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
527ff31020eSArthur Eubanks     Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
5280456327cSAdam Nemet     if (IsReadOnly)
5290456327cSAdam Nemet       ReadOnlyPtr.insert(Ptr);
5300456327cSAdam Nemet   }
5310456327cSAdam Nemet 
5325f8f34e4SAdrian Prantl   /// Register a store.
533ff31020eSArthur Eubanks   void addStore(MemoryLocation &Loc, Type *AccessTy) {
5340456327cSAdam Nemet     Value *Ptr = const_cast<Value*>(Loc.Ptr);
5354df8efceSNikita Popov     AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
536ff31020eSArthur Eubanks     Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
5370456327cSAdam Nemet   }
5380456327cSAdam Nemet 
5395f8f34e4SAdrian Prantl   /// Check if we can emit a run-time no-alias check for \p Access.
540ac920f77SSilviu Baranga   ///
541ac920f77SSilviu Baranga   /// Returns true if we can emit a run-time no alias check for \p Access.
542ac920f77SSilviu Baranga   /// If we can check this access, this also adds it to a dependence set and
543ac920f77SSilviu Baranga   /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
544ac920f77SSilviu Baranga   /// we will attempt to use additional run-time checks in order to get
545ac920f77SSilviu Baranga   /// the bounds of the pointer.
546ac920f77SSilviu Baranga   bool createCheckForAccess(RuntimePointerChecking &RtCheck,
547ff31020eSArthur Eubanks                             MemAccessInfo Access, Type *AccessTy,
548ac920f77SSilviu Baranga                             const ValueToValueMap &Strides,
549ac920f77SSilviu Baranga                             DenseMap<Value *, unsigned> &DepSetId,
550ac920f77SSilviu Baranga                             Loop *TheLoop, unsigned &RunningDepId,
551ff31020eSArthur Eubanks                             unsigned ASId, bool ShouldCheckStride, bool Assume);
552ac920f77SSilviu Baranga 
5535f8f34e4SAdrian Prantl   /// Check whether we can check the pointers at runtime for
554ee61474aSAdam Nemet   /// non-intersection.
555ee61474aSAdam Nemet   ///
556ee61474aSAdam Nemet   /// Returns true if we need no check or if we do and we can generate them
557ee61474aSAdam Nemet   /// (i.e. the pointers have computable bounds).
5587cdebac0SAdam Nemet   bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
5597cdebac0SAdam Nemet                        Loop *TheLoop, const ValueToValueMap &Strides,
5609f1c6fbfSMalhar Jajoo                        Value *&UncomputablePtr, bool ShouldCheckWrap = false);
5610456327cSAdam Nemet 
5625f8f34e4SAdrian Prantl   /// Goes over all memory accesses, checks whether a RT check is needed
5630456327cSAdam Nemet   /// and builds sets of dependent accesses.
5640456327cSAdam Nemet   void buildDependenceSets() {
5650456327cSAdam Nemet     processMemAccesses();
5660456327cSAdam Nemet   }
5670456327cSAdam Nemet 
5685f8f34e4SAdrian Prantl   /// Initial processing of memory accesses determined that we need to
5695dc3b2cfSAdam Nemet   /// perform dependency checking.
5705dc3b2cfSAdam Nemet   ///
5715dc3b2cfSAdam Nemet   /// Note that this can later be cleared if we retry memcheck analysis without
572ef307b8cSFlorian Hahn   /// dependency checking (i.e. FoundNonConstantDistanceDependence).
5730456327cSAdam Nemet   bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
574df3dc5b9SAdam Nemet 
575df3dc5b9SAdam Nemet   /// We decided that no dependence analysis would be used.  Reset the state.
576df3dc5b9SAdam Nemet   void resetDepChecks(MemoryDepChecker &DepChecker) {
577df3dc5b9SAdam Nemet     CheckDeps.clear();
578a2df750fSAdam Nemet     DepChecker.clearDependences();
579df3dc5b9SAdam Nemet   }
5800456327cSAdam Nemet 
5815448e989SAmjad Aboud   MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
5820456327cSAdam Nemet 
5830456327cSAdam Nemet private:
584ff31020eSArthur Eubanks   typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
5850456327cSAdam Nemet 
5865f8f34e4SAdrian Prantl   /// Go over all memory access and check whether runtime pointer checks
587b41d2d3fSAdam Nemet   /// are needed and build sets of dependency check candidates.
5880456327cSAdam Nemet   void processMemAccesses();
5890456327cSAdam Nemet 
590ff31020eSArthur Eubanks   /// Map of all accesses. Values are the types used to access memory pointed to
591ff31020eSArthur Eubanks   /// by the pointer.
592ff31020eSArthur Eubanks   PtrAccessMap Accesses;
5930456327cSAdam Nemet 
59477eeac3dSManoj Gupta   /// The loop being checked.
59577eeac3dSManoj Gupta   const Loop *TheLoop;
59677eeac3dSManoj Gupta 
5975448e989SAmjad Aboud   /// List of accesses that need a further dependence check.
5985448e989SAmjad Aboud   MemAccessInfoList CheckDeps;
5990456327cSAdam Nemet 
6000456327cSAdam Nemet   /// Set of pointers that are read only.
6010456327cSAdam Nemet   SmallPtrSet<Value*, 16> ReadOnlyPtr;
6020456327cSAdam Nemet 
6030456327cSAdam Nemet   /// An alias set tracker to partition the access set by underlying object and
6040456327cSAdam Nemet   //intrinsic property (such as TBAA metadata).
6050456327cSAdam Nemet   AliasSetTracker AST;
6060456327cSAdam Nemet 
607e2b885c4SAdam Nemet   LoopInfo *LI;
608e2b885c4SAdam Nemet 
6090456327cSAdam Nemet   /// Sets of potentially dependent accesses - members of one set share an
6100456327cSAdam Nemet   /// underlying pointer. The set "CheckDeps" identfies which sets really need a
6110456327cSAdam Nemet   /// dependence check.
612dee666bcSAdam Nemet   MemoryDepChecker::DepCandidates &DepCands;
6130456327cSAdam Nemet 
6145f8f34e4SAdrian Prantl   /// Initial processing of memory accesses determined that we may need
6155dc3b2cfSAdam Nemet   /// to add memchecks.  Perform the analysis to determine the necessary checks.
6165dc3b2cfSAdam Nemet   ///
6175dc3b2cfSAdam Nemet   /// Note that, this is different from isDependencyCheckNeeded.  When we retry
6185dc3b2cfSAdam Nemet   /// memcheck analysis without dependency checking
619ef307b8cSFlorian Hahn   /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
620ef307b8cSFlorian Hahn   /// cleared while this remains set if we have potentially dependent accesses.
621b752eb88SKazu Hirata   bool IsRTCheckAnalysisNeeded = false;
622e3c0534bSSilviu Baranga 
623e3c0534bSSilviu Baranga   /// The SCEV predicate containing all the SCEV-related assumptions.
6249cd9a7e3SSilviu Baranga   PredicatedScalarEvolution &PSE;
6250456327cSAdam Nemet };
6260456327cSAdam Nemet 
6270456327cSAdam Nemet } // end anonymous namespace
6280456327cSAdam Nemet 
6295f8f34e4SAdrian Prantl /// Check whether a pointer can participate in a runtime bounds check.
630ac920f77SSilviu Baranga /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
631ac920f77SSilviu Baranga /// by adding run-time checks (overflow checks) if necessary.
6329cd9a7e3SSilviu Baranga static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
633e3c0534bSSilviu Baranga                                 const ValueToValueMap &Strides, Value *Ptr,
634ac920f77SSilviu Baranga                                 Loop *L, bool Assume) {
6359cd9a7e3SSilviu Baranga   const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
636279784ffSAdam Nemet 
637279784ffSAdam Nemet   // The bounds for loop-invariant pointer is trivial.
638279784ffSAdam Nemet   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
639279784ffSAdam Nemet     return true;
640279784ffSAdam Nemet 
6410456327cSAdam Nemet   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
642ac920f77SSilviu Baranga 
643ac920f77SSilviu Baranga   if (!AR && Assume)
644ac920f77SSilviu Baranga     AR = PSE.getAsAddRec(Ptr);
645ac920f77SSilviu Baranga 
6460456327cSAdam Nemet   if (!AR)
6470456327cSAdam Nemet     return false;
6480456327cSAdam Nemet 
6490456327cSAdam Nemet   return AR->isAffine();
6500456327cSAdam Nemet }
6510456327cSAdam Nemet 
6525f8f34e4SAdrian Prantl /// Check whether a pointer address cannot wrap.
6539f02c586SAndrey Turetskiy static bool isNoWrap(PredicatedScalarEvolution &PSE,
654ff31020eSArthur Eubanks                      const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy,
655ff31020eSArthur Eubanks                      Loop *L) {
6569f02c586SAndrey Turetskiy   const SCEV *PtrScev = PSE.getSCEV(Ptr);
6579f02c586SAndrey Turetskiy   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
6589f02c586SAndrey Turetskiy     return true;
6599f02c586SAndrey Turetskiy 
66045c46734SNikita Popov   int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
661ac920f77SSilviu Baranga   if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
662ac920f77SSilviu Baranga     return true;
663ac920f77SSilviu Baranga 
664ac920f77SSilviu Baranga   return false;
665ac920f77SSilviu Baranga }
666ac920f77SSilviu Baranga 
66773a05cc8SFlorian Hahn static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
66873a05cc8SFlorian Hahn                           function_ref<void(Value *)> AddPointer) {
66973a05cc8SFlorian Hahn   SmallPtrSet<Value *, 8> Visited;
67073a05cc8SFlorian Hahn   SmallVector<Value *> WorkList;
67173a05cc8SFlorian Hahn   WorkList.push_back(StartPtr);
67273a05cc8SFlorian Hahn 
67373a05cc8SFlorian Hahn   while (!WorkList.empty()) {
67473a05cc8SFlorian Hahn     Value *Ptr = WorkList.pop_back_val();
67573a05cc8SFlorian Hahn     if (!Visited.insert(Ptr).second)
67673a05cc8SFlorian Hahn       continue;
67773a05cc8SFlorian Hahn     auto *PN = dyn_cast<PHINode>(Ptr);
67873a05cc8SFlorian Hahn     // SCEV does not look through non-header PHIs inside the loop. Such phis
67973a05cc8SFlorian Hahn     // can be analyzed by adding separate accesses for each incoming pointer
68073a05cc8SFlorian Hahn     // value.
68173a05cc8SFlorian Hahn     if (PN && InnermostLoop.contains(PN->getParent()) &&
68273a05cc8SFlorian Hahn         PN->getParent() != InnermostLoop.getHeader()) {
68373a05cc8SFlorian Hahn       for (const Use &Inc : PN->incoming_values())
68473a05cc8SFlorian Hahn         WorkList.push_back(Inc);
68573a05cc8SFlorian Hahn     } else
68673a05cc8SFlorian Hahn       AddPointer(Ptr);
68773a05cc8SFlorian Hahn   }
68873a05cc8SFlorian Hahn }
68973a05cc8SFlorian Hahn 
690ac920f77SSilviu Baranga bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
691ff31020eSArthur Eubanks                                           MemAccessInfo Access, Type *AccessTy,
692ac920f77SSilviu Baranga                                           const ValueToValueMap &StridesMap,
693ac920f77SSilviu Baranga                                           DenseMap<Value *, unsigned> &DepSetId,
694ac920f77SSilviu Baranga                                           Loop *TheLoop, unsigned &RunningDepId,
695ac920f77SSilviu Baranga                                           unsigned ASId, bool ShouldCheckWrap,
696ac920f77SSilviu Baranga                                           bool Assume) {
697ac920f77SSilviu Baranga   Value *Ptr = Access.getPointer();
698ac920f77SSilviu Baranga 
699ac920f77SSilviu Baranga   if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
700ac920f77SSilviu Baranga     return false;
701ac920f77SSilviu Baranga 
702ac920f77SSilviu Baranga   // When we run after a failing dependency check we have to make sure
703ac920f77SSilviu Baranga   // we don't have wrapping pointers.
704ff31020eSArthur Eubanks   if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
705ac920f77SSilviu Baranga     auto *Expr = PSE.getSCEV(Ptr);
706ac920f77SSilviu Baranga     if (!Assume || !isa<SCEVAddRecExpr>(Expr))
707ac920f77SSilviu Baranga       return false;
708ac920f77SSilviu Baranga     PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
709ac920f77SSilviu Baranga   }
710ac920f77SSilviu Baranga 
711ac920f77SSilviu Baranga   // The id of the dependence set.
712ac920f77SSilviu Baranga   unsigned DepId;
713ac920f77SSilviu Baranga 
714ac920f77SSilviu Baranga   if (isDependencyCheckNeeded()) {
715ac920f77SSilviu Baranga     Value *Leader = DepCands.getLeaderValue(Access).getPointer();
716ac920f77SSilviu Baranga     unsigned &LeaderId = DepSetId[Leader];
717ac920f77SSilviu Baranga     if (!LeaderId)
718ac920f77SSilviu Baranga       LeaderId = RunningDepId++;
719ac920f77SSilviu Baranga     DepId = LeaderId;
720ac920f77SSilviu Baranga   } else
721ac920f77SSilviu Baranga     // Each access has its own dependence set.
722ac920f77SSilviu Baranga     DepId = RunningDepId++;
723ac920f77SSilviu Baranga 
724ac920f77SSilviu Baranga   bool IsWrite = Access.getInt();
725ff31020eSArthur Eubanks   RtCheck.insert(TheLoop, Ptr, AccessTy, IsWrite, DepId, ASId, StridesMap, PSE);
726d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
727ac920f77SSilviu Baranga 
728ac920f77SSilviu Baranga   return true;
7299f02c586SAndrey Turetskiy }
7309f02c586SAndrey Turetskiy 
7317cdebac0SAdam Nemet bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
7327cdebac0SAdam Nemet                                      ScalarEvolution *SE, Loop *TheLoop,
7337cdebac0SAdam Nemet                                      const ValueToValueMap &StridesMap,
7349f1c6fbfSMalhar Jajoo                                      Value *&UncomputablePtr, bool ShouldCheckWrap) {
7350456327cSAdam Nemet   // Find pointers with computable bounds. We are going to use this information
7360456327cSAdam Nemet   // to place a runtime bound check.
7370456327cSAdam Nemet   bool CanDoRT = true;
7380456327cSAdam Nemet 
7399b507b21SFlorian Hahn   bool MayNeedRTCheck = false;
7402d038982SFlorian Hahn   if (!IsRTCheckAnalysisNeeded) return true;
74198a13719SSilviu Baranga 
7420456327cSAdam Nemet   bool IsDepCheckNeeded = isDependencyCheckNeeded();
7430456327cSAdam Nemet 
7440456327cSAdam Nemet   // We assign a consecutive id to access from different alias sets.
7450456327cSAdam Nemet   // Accesses between different groups doesn't need to be checked.
7466176f044SFlorian Hahn   unsigned ASId = 0;
7470456327cSAdam Nemet   for (auto &AS : AST) {
748424edc6cSAdam Nemet     int NumReadPtrChecks = 0;
749424edc6cSAdam Nemet     int NumWritePtrChecks = 0;
750ac920f77SSilviu Baranga     bool CanDoAliasSetRT = true;
7516176f044SFlorian Hahn     ++ASId;
752424edc6cSAdam Nemet 
7530456327cSAdam Nemet     // We assign consecutive id to access from different dependence sets.
7540456327cSAdam Nemet     // Accesses within the same set don't need a runtime check.
7550456327cSAdam Nemet     unsigned RunningDepId = 1;
7560456327cSAdam Nemet     DenseMap<Value *, unsigned> DepSetId;
7570456327cSAdam Nemet 
758ac920f77SSilviu Baranga     SmallVector<MemAccessInfo, 4> Retries;
759ac920f77SSilviu Baranga 
7602062b370SFlorian Hahn     // First, count how many write and read accesses are in the alias set. Also
7612062b370SFlorian Hahn     // collect MemAccessInfos for later.
7622062b370SFlorian Hahn     SmallVector<MemAccessInfo, 4> AccessInfos;
76371b89b14SSimon Pilgrim     for (const auto &A : AS) {
7640456327cSAdam Nemet       Value *Ptr = A.getValue();
7650456327cSAdam Nemet       bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
7660456327cSAdam Nemet 
767424edc6cSAdam Nemet       if (IsWrite)
768424edc6cSAdam Nemet         ++NumWritePtrChecks;
769424edc6cSAdam Nemet       else
770424edc6cSAdam Nemet         ++NumReadPtrChecks;
7712062b370SFlorian Hahn       AccessInfos.emplace_back(Ptr, IsWrite);
7720456327cSAdam Nemet     }
7730456327cSAdam Nemet 
7742062b370SFlorian Hahn     // We do not need runtime checks for this alias set, if there are no writes
7752062b370SFlorian Hahn     // or a single write and no reads.
7762062b370SFlorian Hahn     if (NumWritePtrChecks == 0 ||
7772062b370SFlorian Hahn         (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
7786176f044SFlorian Hahn       assert((AS.size() <= 1 ||
7796176f044SFlorian Hahn               all_of(AS,
7806176f044SFlorian Hahn                      [this](auto AC) {
7816176f044SFlorian Hahn                        MemAccessInfo AccessWrite(AC.getValue(), true);
7822062b370SFlorian Hahn                        return DepCands.findValue(AccessWrite) == DepCands.end();
7836176f044SFlorian Hahn                      })) &&
7846176f044SFlorian Hahn              "Can only skip updating CanDoRT below, if all entries in AS "
7856176f044SFlorian Hahn              "are reads or there is at most 1 entry");
7866176f044SFlorian Hahn       continue;
7876176f044SFlorian Hahn     }
7882062b370SFlorian Hahn 
7892062b370SFlorian Hahn     for (auto &Access : AccessInfos) {
790ff31020eSArthur Eubanks       for (auto &AccessTy : Accesses[Access]) {
791ff31020eSArthur Eubanks         if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
792ff31020eSArthur Eubanks                                   DepSetId, TheLoop, RunningDepId, ASId,
793ff31020eSArthur Eubanks                                   ShouldCheckWrap, false)) {
7942062b370SFlorian Hahn           LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
7952062b370SFlorian Hahn                             << *Access.getPointer() << '\n');
7962062b370SFlorian Hahn           Retries.push_back(Access);
7972062b370SFlorian Hahn           CanDoAliasSetRT = false;
7986176f044SFlorian Hahn         }
7992062b370SFlorian Hahn       }
800ff31020eSArthur Eubanks     }
8012062b370SFlorian Hahn 
8022062b370SFlorian Hahn     // Note that this function computes CanDoRT and MayNeedRTCheck
8032062b370SFlorian Hahn     // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
8042062b370SFlorian Hahn     // we have a pointer for which we couldn't find the bounds but we don't
8052062b370SFlorian Hahn     // actually need to emit any checks so it does not matter.
8062062b370SFlorian Hahn     //
8072062b370SFlorian Hahn     // We need runtime checks for this alias set, if there are at least 2
8082062b370SFlorian Hahn     // dependence sets (in which case RunningDepId > 2) or if we need to re-try
8092062b370SFlorian Hahn     // any bound checks (because in that case the number of dependence sets is
8102062b370SFlorian Hahn     // incomplete).
8112062b370SFlorian Hahn     bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
812424edc6cSAdam Nemet 
813ac920f77SSilviu Baranga     // We need to perform run-time alias checks, but some pointers had bounds
814ac920f77SSilviu Baranga     // that couldn't be checked.
815ac920f77SSilviu Baranga     if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
816ac920f77SSilviu Baranga       // Reset the CanDoSetRt flag and retry all accesses that have failed.
817ac920f77SSilviu Baranga       // We know that we need these checks, so we can now be more aggressive
818ac920f77SSilviu Baranga       // and add further checks if required (overflow checks).
819ac920f77SSilviu Baranga       CanDoAliasSetRT = true;
820ff31020eSArthur Eubanks       for (auto Access : Retries) {
821ff31020eSArthur Eubanks         for (auto &AccessTy : Accesses[Access]) {
822ff31020eSArthur Eubanks           if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
823ff31020eSArthur Eubanks                                     DepSetId, TheLoop, RunningDepId, ASId,
824ac920f77SSilviu Baranga                                     ShouldCheckWrap, /*Assume=*/true)) {
825ac920f77SSilviu Baranga             CanDoAliasSetRT = false;
8269f1c6fbfSMalhar Jajoo             UncomputablePtr = Access.getPointer();
827ac920f77SSilviu Baranga             break;
828ac920f77SSilviu Baranga           }
829ac920f77SSilviu Baranga         }
830ff31020eSArthur Eubanks       }
831ff31020eSArthur Eubanks     }
832ac920f77SSilviu Baranga 
833ac920f77SSilviu Baranga     CanDoRT &= CanDoAliasSetRT;
8349b507b21SFlorian Hahn     MayNeedRTCheck |= NeedsAliasSetRTCheck;
8350456327cSAdam Nemet     ++ASId;
8360456327cSAdam Nemet   }
8370456327cSAdam Nemet 
8380456327cSAdam Nemet   // If the pointers that we would use for the bounds comparison have different
8390456327cSAdam Nemet   // address spaces, assume the values aren't directly comparable, so we can't
8400456327cSAdam Nemet   // use them for the runtime check. We also have to assume they could
8410456327cSAdam Nemet   // overlap. In the future there should be metadata for whether address spaces
8420456327cSAdam Nemet   // are disjoint.
8430456327cSAdam Nemet   unsigned NumPointers = RtCheck.Pointers.size();
8440456327cSAdam Nemet   for (unsigned i = 0; i < NumPointers; ++i) {
8450456327cSAdam Nemet     for (unsigned j = i + 1; j < NumPointers; ++j) {
8460456327cSAdam Nemet       // Only need to check pointers between two different dependency sets.
8479f7dedc3SAdam Nemet       if (RtCheck.Pointers[i].DependencySetId ==
8489f7dedc3SAdam Nemet           RtCheck.Pointers[j].DependencySetId)
8490456327cSAdam Nemet        continue;
8500456327cSAdam Nemet       // Only need to check pointers in the same alias set.
8519f7dedc3SAdam Nemet       if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
8520456327cSAdam Nemet         continue;
8530456327cSAdam Nemet 
8549f7dedc3SAdam Nemet       Value *PtrI = RtCheck.Pointers[i].PointerValue;
8559f7dedc3SAdam Nemet       Value *PtrJ = RtCheck.Pointers[j].PointerValue;
8560456327cSAdam Nemet 
8570456327cSAdam Nemet       unsigned ASi = PtrI->getType()->getPointerAddressSpace();
8580456327cSAdam Nemet       unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
8590456327cSAdam Nemet       if (ASi != ASj) {
860d34e60caSNicola Zaghen         LLVM_DEBUG(
861d34e60caSNicola Zaghen             dbgs() << "LAA: Runtime check would require comparison between"
8620456327cSAdam Nemet                       " different address spaces\n");
8630456327cSAdam Nemet         return false;
8640456327cSAdam Nemet       }
8650456327cSAdam Nemet     }
8660456327cSAdam Nemet   }
8670456327cSAdam Nemet 
8689b507b21SFlorian Hahn   if (MayNeedRTCheck && CanDoRT)
86915840393SAdam Nemet     RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
8701b6b50a9SSilviu Baranga 
871d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
872ee61474aSAdam Nemet                     << " pointer comparisons.\n");
873ee61474aSAdam Nemet 
8749b507b21SFlorian Hahn   // If we can do run-time checks, but there are no checks, no runtime checks
8759b507b21SFlorian Hahn   // are needed. This can happen when all pointers point to the same underlying
8769b507b21SFlorian Hahn   // object for example.
8779b507b21SFlorian Hahn   RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
878ee61474aSAdam Nemet 
8799b507b21SFlorian Hahn   bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
880ee61474aSAdam Nemet   if (!CanDoRTIfNeeded)
881ee61474aSAdam Nemet     RtCheck.reset();
882ee61474aSAdam Nemet   return CanDoRTIfNeeded;
8830456327cSAdam Nemet }
8840456327cSAdam Nemet 
8850456327cSAdam Nemet void AccessAnalysis::processMemAccesses() {
8860456327cSAdam Nemet   // We process the set twice: first we process read-write pointers, last we
8870456327cSAdam Nemet   // process read-only pointers. This allows us to skip dependence tests for
8880456327cSAdam Nemet   // read-only pointers.
8890456327cSAdam Nemet 
890d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
891d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "  AST: "; AST.dump());
892d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LAA:   Accesses(" << Accesses.size() << "):\n");
893d34e60caSNicola Zaghen   LLVM_DEBUG({
8940456327cSAdam Nemet     for (auto A : Accesses)
895ff31020eSArthur Eubanks       dbgs() << "\t" << *A.first.getPointer() << " ("
896ff31020eSArthur Eubanks              << (A.first.getInt()
897ff31020eSArthur Eubanks                      ? "write"
898ff31020eSArthur Eubanks                      : (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only"
899ff31020eSArthur Eubanks                                                                 : "read"))
900ff31020eSArthur Eubanks              << ")\n";
9010456327cSAdam Nemet   });
9020456327cSAdam Nemet 
9030456327cSAdam Nemet   // The AliasSetTracker has nicely partitioned our pointers by metadata
9040456327cSAdam Nemet   // compatibility and potential for underlying-object overlap. As a result, we
9050456327cSAdam Nemet   // only need to check for potential pointer dependencies within each alias
9060456327cSAdam Nemet   // set.
90771b89b14SSimon Pilgrim   for (const auto &AS : AST) {
9080456327cSAdam Nemet     // Note that both the alias-set tracker and the alias sets themselves used
9090456327cSAdam Nemet     // linked lists internally and so the iteration order here is deterministic
9100456327cSAdam Nemet     // (matching the original instruction order within each set).
9110456327cSAdam Nemet 
9120456327cSAdam Nemet     bool SetHasWrite = false;
9130456327cSAdam Nemet 
9140456327cSAdam Nemet     // Map of pointers to last access encountered.
91571e8c6f2SBjorn Pettersson     typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
9160456327cSAdam Nemet     UnderlyingObjToAccessMap ObjToLastAccess;
9170456327cSAdam Nemet 
9180456327cSAdam Nemet     // Set of access to check after all writes have been processed.
919ff31020eSArthur Eubanks     PtrAccessMap DeferredAccesses;
9200456327cSAdam Nemet 
9210456327cSAdam Nemet     // Iterate over each alias set twice, once to process read/write pointers,
9220456327cSAdam Nemet     // and then to process read-only pointers.
9230456327cSAdam Nemet     for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
9240456327cSAdam Nemet       bool UseDeferred = SetIteration > 0;
925ff31020eSArthur Eubanks       PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
9260456327cSAdam Nemet 
92771b89b14SSimon Pilgrim       for (const auto &AV : AS) {
9280456327cSAdam Nemet         Value *Ptr = AV.getValue();
9290456327cSAdam Nemet 
9300456327cSAdam Nemet         // For a single memory access in AliasSetTracker, Accesses may contain
9310456327cSAdam Nemet         // both read and write, and they both need to be handled for CheckDeps.
93271b89b14SSimon Pilgrim         for (const auto &AC : S) {
933ff31020eSArthur Eubanks           if (AC.first.getPointer() != Ptr)
9340456327cSAdam Nemet             continue;
9350456327cSAdam Nemet 
936ff31020eSArthur Eubanks           bool IsWrite = AC.first.getInt();
9370456327cSAdam Nemet 
9380456327cSAdam Nemet           // If we're using the deferred access set, then it contains only
9390456327cSAdam Nemet           // reads.
9400456327cSAdam Nemet           bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
9410456327cSAdam Nemet           if (UseDeferred && !IsReadOnlyPtr)
9420456327cSAdam Nemet             continue;
9430456327cSAdam Nemet           // Otherwise, the pointer must be in the PtrAccessSet, either as a
9440456327cSAdam Nemet           // read or a write.
9450456327cSAdam Nemet           assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
9460456327cSAdam Nemet                   S.count(MemAccessInfo(Ptr, false))) &&
9470456327cSAdam Nemet                  "Alias-set pointer not in the access set?");
9480456327cSAdam Nemet 
9490456327cSAdam Nemet           MemAccessInfo Access(Ptr, IsWrite);
9500456327cSAdam Nemet           DepCands.insert(Access);
9510456327cSAdam Nemet 
9520456327cSAdam Nemet           // Memorize read-only pointers for later processing and skip them in
9530456327cSAdam Nemet           // the first round (they need to be checked after we have seen all
9540456327cSAdam Nemet           // write pointers). Note: we also mark pointer that are not
9550456327cSAdam Nemet           // consecutive as "read-only" pointers (so that we check
9560456327cSAdam Nemet           // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
9570456327cSAdam Nemet           if (!UseDeferred && IsReadOnlyPtr) {
958ff31020eSArthur Eubanks             // We only use the pointer keys, the types vector values don't
959ff31020eSArthur Eubanks             // matter.
960ff31020eSArthur Eubanks             DeferredAccesses.insert({Access, {}});
9610456327cSAdam Nemet             continue;
9620456327cSAdam Nemet           }
9630456327cSAdam Nemet 
9640456327cSAdam Nemet           // If this is a write - check other reads and writes for conflicts. If
9650456327cSAdam Nemet           // this is a read only check other writes for conflicts (but only if
9660456327cSAdam Nemet           // there is no other write to the ptr - this is an optimization to
9670456327cSAdam Nemet           // catch "a[i] = a[i] + " without having to do a dependence check).
9680456327cSAdam Nemet           if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
9695448e989SAmjad Aboud             CheckDeps.push_back(Access);
9705dc3b2cfSAdam Nemet             IsRTCheckAnalysisNeeded = true;
9710456327cSAdam Nemet           }
9720456327cSAdam Nemet 
9730456327cSAdam Nemet           if (IsWrite)
9740456327cSAdam Nemet             SetHasWrite = true;
9750456327cSAdam Nemet 
9760456327cSAdam Nemet           // Create sets of pointers connected by a shared alias set and
9770456327cSAdam Nemet           // underlying object.
97871e8c6f2SBjorn Pettersson           typedef SmallVector<const Value *, 16> ValueVector;
9790456327cSAdam Nemet           ValueVector TempObjects;
980e2b885c4SAdam Nemet 
981b0eb40caSVitaly Buka           getUnderlyingObjects(Ptr, TempObjects, LI);
982d34e60caSNicola Zaghen           LLVM_DEBUG(dbgs()
983d34e60caSNicola Zaghen                      << "Underlying objects for pointer " << *Ptr << "\n");
98471e8c6f2SBjorn Pettersson           for (const Value *UnderlyingObj : TempObjects) {
985afd13519SMehdi Amini             // nullptr never alias, don't join sets for pointer that have "null"
986afd13519SMehdi Amini             // in their UnderlyingObjects list.
98777eeac3dSManoj Gupta             if (isa<ConstantPointerNull>(UnderlyingObj) &&
98877eeac3dSManoj Gupta                 !NullPointerIsDefined(
98977eeac3dSManoj Gupta                     TheLoop->getHeader()->getParent(),
99077eeac3dSManoj Gupta                     UnderlyingObj->getType()->getPointerAddressSpace()))
991afd13519SMehdi Amini               continue;
992afd13519SMehdi Amini 
9930456327cSAdam Nemet             UnderlyingObjToAccessMap::iterator Prev =
9940456327cSAdam Nemet                 ObjToLastAccess.find(UnderlyingObj);
9950456327cSAdam Nemet             if (Prev != ObjToLastAccess.end())
9960456327cSAdam Nemet               DepCands.unionSets(Access, Prev->second);
9970456327cSAdam Nemet 
9980456327cSAdam Nemet             ObjToLastAccess[UnderlyingObj] = Access;
999d34e60caSNicola Zaghen             LLVM_DEBUG(dbgs() << "  " << *UnderlyingObj << "\n");
10000456327cSAdam Nemet           }
10010456327cSAdam Nemet         }
10020456327cSAdam Nemet       }
10030456327cSAdam Nemet     }
10040456327cSAdam Nemet   }
10050456327cSAdam Nemet }
10060456327cSAdam Nemet 
10070456327cSAdam Nemet static bool isInBoundsGep(Value *Ptr) {
10080456327cSAdam Nemet   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
10090456327cSAdam Nemet     return GEP->isInBounds();
10100456327cSAdam Nemet   return false;
10110456327cSAdam Nemet }
10120456327cSAdam Nemet 
10135f8f34e4SAdrian Prantl /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
1014c4866d29SAdam Nemet /// i.e. monotonically increasing/decreasing.
1015c4866d29SAdam Nemet static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
1016ea63a7f5SSilviu Baranga                            PredicatedScalarEvolution &PSE, const Loop *L) {
1017c4866d29SAdam Nemet   // FIXME: This should probably only return true for NUW.
1018c4866d29SAdam Nemet   if (AR->getNoWrapFlags(SCEV::NoWrapMask))
1019c4866d29SAdam Nemet     return true;
1020c4866d29SAdam Nemet 
1021c4866d29SAdam Nemet   // Scalar evolution does not propagate the non-wrapping flags to values that
1022c4866d29SAdam Nemet   // are derived from a non-wrapping induction variable because non-wrapping
1023c4866d29SAdam Nemet   // could be flow-sensitive.
1024c4866d29SAdam Nemet   //
1025c4866d29SAdam Nemet   // Look through the potentially overflowing instruction to try to prove
1026c4866d29SAdam Nemet   // non-wrapping for the *specific* value of Ptr.
1027c4866d29SAdam Nemet 
1028c4866d29SAdam Nemet   // The arithmetic implied by an inbounds GEP can't overflow.
1029c4866d29SAdam Nemet   auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1030c4866d29SAdam Nemet   if (!GEP || !GEP->isInBounds())
1031c4866d29SAdam Nemet     return false;
1032c4866d29SAdam Nemet 
1033c4866d29SAdam Nemet   // Make sure there is only one non-const index and analyze that.
1034c4866d29SAdam Nemet   Value *NonConstIndex = nullptr;
10356a6e3821SKazu Hirata   for (Value *Index : GEP->indices())
10368b401013SDavid Majnemer     if (!isa<ConstantInt>(Index)) {
1037c4866d29SAdam Nemet       if (NonConstIndex)
1038c4866d29SAdam Nemet         return false;
10398b401013SDavid Majnemer       NonConstIndex = Index;
1040c4866d29SAdam Nemet     }
1041c4866d29SAdam Nemet   if (!NonConstIndex)
1042c4866d29SAdam Nemet     // The recurrence is on the pointer, ignore for now.
1043c4866d29SAdam Nemet     return false;
1044c4866d29SAdam Nemet 
1045c4866d29SAdam Nemet   // The index in GEP is signed.  It is non-wrapping if it's derived from a NSW
1046c4866d29SAdam Nemet   // AddRec using a NSW operation.
1047c4866d29SAdam Nemet   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
1048c4866d29SAdam Nemet     if (OBO->hasNoSignedWrap() &&
1049c4866d29SAdam Nemet         // Assume constant for other the operand so that the AddRec can be
1050c4866d29SAdam Nemet         // easily found.
1051c4866d29SAdam Nemet         isa<ConstantInt>(OBO->getOperand(1))) {
1052ea63a7f5SSilviu Baranga       auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
1053c4866d29SAdam Nemet 
1054c4866d29SAdam Nemet       if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
1055c4866d29SAdam Nemet         return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
1056c4866d29SAdam Nemet     }
1057c4866d29SAdam Nemet 
1058c4866d29SAdam Nemet   return false;
1059c4866d29SAdam Nemet }
1060c4866d29SAdam Nemet 
10615f8f34e4SAdrian Prantl /// Check whether the access through \p Ptr has a constant stride.
106245c46734SNikita Popov int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
106345c46734SNikita Popov                            Value *Ptr, const Loop *Lp,
106445c46734SNikita Popov                            const ValueToValueMap &StridesMap, bool Assume,
106545c46734SNikita Popov                            bool ShouldCheckWrap) {
1066e3dcce97SCraig Topper   Type *Ty = Ptr->getType();
10670456327cSAdam Nemet   assert(Ty->isPointerTy() && "Unexpected non-ptr");
10680456327cSAdam Nemet 
1069787b66ebSPeter Waller   if (isa<ScalableVectorType>(AccessTy)) {
1070787b66ebSPeter Waller     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
1071787b66ebSPeter Waller                       << "\n");
1072787b66ebSPeter Waller     return 0;
1073787b66ebSPeter Waller   }
1074787b66ebSPeter Waller 
10759cd9a7e3SSilviu Baranga   const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
10760456327cSAdam Nemet 
10770456327cSAdam Nemet   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1078ea63a7f5SSilviu Baranga   if (Assume && !AR)
1079d68ed854SSilviu Baranga     AR = PSE.getAsAddRec(Ptr);
1080ea63a7f5SSilviu Baranga 
10810456327cSAdam Nemet   if (!AR) {
1082d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1083ea63a7f5SSilviu Baranga                       << " SCEV: " << *PtrScev << "\n");
10840456327cSAdam Nemet     return 0;
10850456327cSAdam Nemet   }
10860456327cSAdam Nemet 
1087c437f310SHiroshi Inoue   // The access function must stride over the innermost loop.
10880456327cSAdam Nemet   if (Lp != AR->getLoop()) {
1089d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1090d34e60caSNicola Zaghen                       << *Ptr << " SCEV: " << *AR << "\n");
1091a02ce98bSKyle Butt     return 0;
10920456327cSAdam Nemet   }
10930456327cSAdam Nemet 
10940456327cSAdam Nemet   // The address calculation must not wrap. Otherwise, a dependence could be
10950456327cSAdam Nemet   // inverted.
10960456327cSAdam Nemet   // An inbounds getelementptr that is a AddRec with a unit stride
10970456327cSAdam Nemet   // cannot wrap per definition. The unit stride requirement is checked later.
10980456327cSAdam Nemet   // An getelementptr without an inbounds attribute and unit stride would have
10990456327cSAdam Nemet   // to access the pointer value "0" which is undefined behavior in address
11000456327cSAdam Nemet   // space 0, therefore we can also vectorize this case.
11010a00d64eSFlorian Hahn   unsigned AddrSpace = Ty->getPointerAddressSpace();
11020456327cSAdam Nemet   bool IsInBoundsGEP = isInBoundsGep(Ptr);
11035f8cc0c3SElena Demikhovsky   bool IsNoWrapAddRec = !ShouldCheckWrap ||
1104ea63a7f5SSilviu Baranga     PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
1105ea63a7f5SSilviu Baranga     isNoWrapAddRec(Ptr, AR, PSE, Lp);
110677eeac3dSManoj Gupta   if (!IsNoWrapAddRec && !IsInBoundsGEP &&
110745c46734SNikita Popov       NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace)) {
1108ea63a7f5SSilviu Baranga     if (Assume) {
1109ea63a7f5SSilviu Baranga       PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1110ea63a7f5SSilviu Baranga       IsNoWrapAddRec = true;
1111d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1112ea63a7f5SSilviu Baranga                         << "LAA:   Pointer: " << *Ptr << "\n"
1113ea63a7f5SSilviu Baranga                         << "LAA:   SCEV: " << *AR << "\n"
1114ea63a7f5SSilviu Baranga                         << "LAA:   Added an overflow assumption\n");
1115ea63a7f5SSilviu Baranga     } else {
1116d34e60caSNicola Zaghen       LLVM_DEBUG(
1117d34e60caSNicola Zaghen           dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1118ea63a7f5SSilviu Baranga                  << *Ptr << " SCEV: " << *AR << "\n");
11190456327cSAdam Nemet       return 0;
11200456327cSAdam Nemet     }
1121ea63a7f5SSilviu Baranga   }
11220456327cSAdam Nemet 
11230456327cSAdam Nemet   // Check the step is constant.
11249cd9a7e3SSilviu Baranga   const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
11250456327cSAdam Nemet 
1126943befedSAdam Nemet   // Calculate the pointer stride and check if it is constant.
11270456327cSAdam Nemet   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
11280456327cSAdam Nemet   if (!C) {
1129d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1130d34e60caSNicola Zaghen                       << " SCEV: " << *AR << "\n");
11310456327cSAdam Nemet     return 0;
11320456327cSAdam Nemet   }
11330456327cSAdam Nemet 
1134a28d91d8SMehdi Amini   auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1135787b66ebSPeter Waller   TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
1136787b66ebSPeter Waller   int64_t Size = AllocSize.getFixedSize();
11370de2feceSSanjoy Das   const APInt &APStepVal = C->getAPInt();
11380456327cSAdam Nemet 
11390456327cSAdam Nemet   // Huge step value - give up.
11400456327cSAdam Nemet   if (APStepVal.getBitWidth() > 64)
11410456327cSAdam Nemet     return 0;
11420456327cSAdam Nemet 
11430456327cSAdam Nemet   int64_t StepVal = APStepVal.getSExtValue();
11440456327cSAdam Nemet 
11450456327cSAdam Nemet   // Strided access.
11460456327cSAdam Nemet   int64_t Stride = StepVal / Size;
11470456327cSAdam Nemet   int64_t Rem = StepVal % Size;
11480456327cSAdam Nemet   if (Rem)
11490456327cSAdam Nemet     return 0;
11500456327cSAdam Nemet 
11510456327cSAdam Nemet   // If the SCEV could wrap but we have an inbounds gep with a unit stride we
11520456327cSAdam Nemet   // know we can't "wrap around the address space". In case of address space
11530456327cSAdam Nemet   // zero we know that this won't happen without triggering undefined behavior.
115477eeac3dSManoj Gupta   if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
115577eeac3dSManoj Gupta       (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
115645c46734SNikita Popov                                               AddrSpace))) {
1157ea63a7f5SSilviu Baranga     if (Assume) {
1158ea63a7f5SSilviu Baranga       // We can avoid this case by adding a run-time check.
1159d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1160c437f310SHiroshi Inoue                         << "inbounds or in address space 0 may wrap:\n"
1161ea63a7f5SSilviu Baranga                         << "LAA:   Pointer: " << *Ptr << "\n"
1162ea63a7f5SSilviu Baranga                         << "LAA:   SCEV: " << *AR << "\n"
1163ea63a7f5SSilviu Baranga                         << "LAA:   Added an overflow assumption\n");
1164ea63a7f5SSilviu Baranga       PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1165ea63a7f5SSilviu Baranga     } else
11660456327cSAdam Nemet       return 0;
1167ea63a7f5SSilviu Baranga   }
11680456327cSAdam Nemet 
11690456327cSAdam Nemet   return Stride;
11700456327cSAdam Nemet }
11710456327cSAdam Nemet 
117200d3f7ccSNikita Popov Optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
117300d3f7ccSNikita Popov                                     Value *PtrB, const DataLayout &DL,
117400d3f7ccSNikita Popov                                     ScalarEvolution &SE, bool StrictCheck,
117500d3f7ccSNikita Popov                                     bool CheckType) {
117699203f20SAlexey Bataev   assert(PtrA && PtrB && "Expected non-nullptr pointers.");
117700d3f7ccSNikita Popov   assert(cast<PointerType>(PtrA->getType())
117800d3f7ccSNikita Popov              ->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type");
117900d3f7ccSNikita Popov   assert(cast<PointerType>(PtrB->getType())
118000d3f7ccSNikita Popov              ->isOpaqueOrPointeeTypeMatches(ElemTyB) && "Wrong PtrB type");
118100d3f7ccSNikita Popov 
118299203f20SAlexey Bataev   // Make sure that A and B are different pointers.
118399203f20SAlexey Bataev   if (PtrA == PtrB)
118499203f20SAlexey Bataev     return 0;
118599203f20SAlexey Bataev 
118600d3f7ccSNikita Popov   // Make sure that the element types are the same if required.
118700d3f7ccSNikita Popov   if (CheckType && ElemTyA != ElemTyB)
118899203f20SAlexey Bataev     return None;
118999203f20SAlexey Bataev 
119099203f20SAlexey Bataev   unsigned ASA = PtrA->getType()->getPointerAddressSpace();
119199203f20SAlexey Bataev   unsigned ASB = PtrB->getType()->getPointerAddressSpace();
119299203f20SAlexey Bataev 
119399203f20SAlexey Bataev   // Check that the address spaces match.
119499203f20SAlexey Bataev   if (ASA != ASB)
119599203f20SAlexey Bataev     return None;
119699203f20SAlexey Bataev   unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
119799203f20SAlexey Bataev 
119899203f20SAlexey Bataev   APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
119999203f20SAlexey Bataev   Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
120099203f20SAlexey Bataev   Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
120199203f20SAlexey Bataev 
120299203f20SAlexey Bataev   int Val;
120399203f20SAlexey Bataev   if (PtrA1 == PtrB1) {
120499203f20SAlexey Bataev     // Retrieve the address space again as pointer stripping now tracks through
120599203f20SAlexey Bataev     // `addrspacecast`.
120699203f20SAlexey Bataev     ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
120799203f20SAlexey Bataev     ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
120899203f20SAlexey Bataev     // Check that the address spaces match and that the pointers are valid.
120999203f20SAlexey Bataev     if (ASA != ASB)
121099203f20SAlexey Bataev       return None;
121199203f20SAlexey Bataev 
121299203f20SAlexey Bataev     IdxWidth = DL.getIndexSizeInBits(ASA);
121399203f20SAlexey Bataev     OffsetA = OffsetA.sextOrTrunc(IdxWidth);
121499203f20SAlexey Bataev     OffsetB = OffsetB.sextOrTrunc(IdxWidth);
121599203f20SAlexey Bataev 
121699203f20SAlexey Bataev     OffsetB -= OffsetA;
121799203f20SAlexey Bataev     Val = OffsetB.getSExtValue();
121899203f20SAlexey Bataev   } else {
121999203f20SAlexey Bataev     // Otherwise compute the distance with SCEV between the base pointers.
122099203f20SAlexey Bataev     const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
122199203f20SAlexey Bataev     const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
122299203f20SAlexey Bataev     const auto *Diff =
122399203f20SAlexey Bataev         dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
122499203f20SAlexey Bataev     if (!Diff)
122599203f20SAlexey Bataev       return None;
122699203f20SAlexey Bataev     Val = Diff->getAPInt().getSExtValue();
122799203f20SAlexey Bataev   }
122800d3f7ccSNikita Popov   int Size = DL.getTypeStoreSize(ElemTyA);
122999203f20SAlexey Bataev   int Dist = Val / Size;
123099203f20SAlexey Bataev 
123199203f20SAlexey Bataev   // Ensure that the calculated distance matches the type-based one after all
123299203f20SAlexey Bataev   // the bitcasts removal in the provided pointers.
123399203f20SAlexey Bataev   if (!StrictCheck || Dist * Size == Val)
123499203f20SAlexey Bataev     return Dist;
123599203f20SAlexey Bataev   return None;
123699203f20SAlexey Bataev }
123799203f20SAlexey Bataev 
123800d3f7ccSNikita Popov bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
123900d3f7ccSNikita Popov                            const DataLayout &DL, ScalarEvolution &SE,
1240428e9d9dSAlexey Bataev                            SmallVectorImpl<unsigned> &SortedIndices) {
1241428e9d9dSAlexey Bataev   assert(llvm::all_of(
1242428e9d9dSAlexey Bataev              VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1243428e9d9dSAlexey Bataev          "Expected list of pointer operands.");
1244428e9d9dSAlexey Bataev   // Walk over the pointers, and map each of them to an offset relative to
1245428e9d9dSAlexey Bataev   // first pointer in the array.
1246428e9d9dSAlexey Bataev   Value *Ptr0 = VL[0];
1247428e9d9dSAlexey Bataev 
124899203f20SAlexey Bataev   using DistOrdPair = std::pair<int64_t, int>;
124999203f20SAlexey Bataev   auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) {
125099203f20SAlexey Bataev     return L.first < R.first;
125199203f20SAlexey Bataev   };
125299203f20SAlexey Bataev   std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
125399203f20SAlexey Bataev   Offsets.emplace(0, 0);
125499203f20SAlexey Bataev   int Cnt = 1;
125599203f20SAlexey Bataev   bool IsConsecutive = true;
125699203f20SAlexey Bataev   for (auto *Ptr : VL.drop_front()) {
125700d3f7ccSNikita Popov     Optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
125800d3f7ccSNikita Popov                                          /*StrictCheck=*/true);
1259428e9d9dSAlexey Bataev     if (!Diff)
1260428e9d9dSAlexey Bataev       return false;
1261428e9d9dSAlexey Bataev 
1262428e9d9dSAlexey Bataev     // Check if the pointer with the same offset is found.
126399203f20SAlexey Bataev     int64_t Offset = *Diff;
126499203f20SAlexey Bataev     auto Res = Offsets.emplace(Offset, Cnt);
126599203f20SAlexey Bataev     if (!Res.second)
1266428e9d9dSAlexey Bataev       return false;
126799203f20SAlexey Bataev     // Consecutive order if the inserted element is the last one.
126899203f20SAlexey Bataev     IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
126999203f20SAlexey Bataev     ++Cnt;
1270428e9d9dSAlexey Bataev   }
1271428e9d9dSAlexey Bataev   SortedIndices.clear();
127299203f20SAlexey Bataev   if (!IsConsecutive) {
127399203f20SAlexey Bataev     // Fill SortedIndices array only if it is non-consecutive.
1274428e9d9dSAlexey Bataev     SortedIndices.resize(VL.size());
127599203f20SAlexey Bataev     Cnt = 0;
127699203f20SAlexey Bataev     for (const std::pair<int64_t, int> &Pair : Offsets) {
127799203f20SAlexey Bataev       SortedIndices[Cnt] = Pair.second;
127899203f20SAlexey Bataev       ++Cnt;
1279f1c00a22SHaicheng Wu     }
128099203f20SAlexey Bataev   }
128199203f20SAlexey Bataev   return true;
1282f1b47ad2SAlexey Bataev }
1283f1b47ad2SAlexey Bataev 
1284f1c00a22SHaicheng Wu /// Returns true if the memory operations \p A and \p B are consecutive.
1285f1c00a22SHaicheng Wu bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
1286f1c00a22SHaicheng Wu                                ScalarEvolution &SE, bool CheckType) {
1287038ede2aSRenato Golin   Value *PtrA = getLoadStorePointerOperand(A);
1288038ede2aSRenato Golin   Value *PtrB = getLoadStorePointerOperand(B);
128999203f20SAlexey Bataev   if (!PtrA || !PtrB)
1290f1c00a22SHaicheng Wu     return false;
129100d3f7ccSNikita Popov   Type *ElemTyA = getLoadStoreType(A);
129200d3f7ccSNikita Popov   Type *ElemTyB = getLoadStoreType(B);
129300d3f7ccSNikita Popov   Optional<int> Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
129400d3f7ccSNikita Popov                                        /*StrictCheck=*/true, CheckType);
129599203f20SAlexey Bataev   return Diff && *Diff == 1;
1296f1c00a22SHaicheng Wu }
1297f1c00a22SHaicheng Wu 
1298e248d690SFlorian Hahn void MemoryDepChecker::addAccess(StoreInst *SI) {
1299e248d690SFlorian Hahn   visitPointers(SI->getPointerOperand(), *InnermostLoop,
1300e248d690SFlorian Hahn                 [this, SI](Value *Ptr) {
1301e248d690SFlorian Hahn                   Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1302e248d690SFlorian Hahn                   InstMap.push_back(SI);
1303e248d690SFlorian Hahn                   ++AccessIdx;
1304e248d690SFlorian Hahn                 });
1305e248d690SFlorian Hahn }
1306e248d690SFlorian Hahn 
1307e248d690SFlorian Hahn void MemoryDepChecker::addAccess(LoadInst *LI) {
1308e248d690SFlorian Hahn   visitPointers(LI->getPointerOperand(), *InnermostLoop,
1309e248d690SFlorian Hahn                 [this, LI](Value *Ptr) {
1310e248d690SFlorian Hahn                   Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1311e248d690SFlorian Hahn                   InstMap.push_back(LI);
1312e248d690SFlorian Hahn                   ++AccessIdx;
1313e248d690SFlorian Hahn                 });
1314e248d690SFlorian Hahn }
1315e248d690SFlorian Hahn 
1316485f2826SFlorian Hahn MemoryDepChecker::VectorizationSafetyStatus
1317485f2826SFlorian Hahn MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
13189c926579SAdam Nemet   switch (Type) {
13199c926579SAdam Nemet   case NoDep:
13209c926579SAdam Nemet   case Forward:
13219c926579SAdam Nemet   case BackwardVectorizable:
1322485f2826SFlorian Hahn     return VectorizationSafetyStatus::Safe;
13239c926579SAdam Nemet 
13249c926579SAdam Nemet   case Unknown:
1325ef307b8cSFlorian Hahn     return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
13269c926579SAdam Nemet   case ForwardButPreventsForwarding:
13279c926579SAdam Nemet   case Backward:
13289c926579SAdam Nemet   case BackwardVectorizableButPreventsForwarding:
1329485f2826SFlorian Hahn     return VectorizationSafetyStatus::Unsafe;
13309c926579SAdam Nemet   }
1331d388e930SDavid Majnemer   llvm_unreachable("unexpected DepType!");
13329c926579SAdam Nemet }
13339c926579SAdam Nemet 
1334397f5829SAdam Nemet bool MemoryDepChecker::Dependence::isBackward() const {
13359c926579SAdam Nemet   switch (Type) {
13369c926579SAdam Nemet   case NoDep:
13379c926579SAdam Nemet   case Forward:
13389c926579SAdam Nemet   case ForwardButPreventsForwarding:
1339397f5829SAdam Nemet   case Unknown:
13409c926579SAdam Nemet     return false;
13419c926579SAdam Nemet 
13429c926579SAdam Nemet   case BackwardVectorizable:
13439c926579SAdam Nemet   case Backward:
13449c926579SAdam Nemet   case BackwardVectorizableButPreventsForwarding:
13459c926579SAdam Nemet     return true;
13469c926579SAdam Nemet   }
1347d388e930SDavid Majnemer   llvm_unreachable("unexpected DepType!");
13489c926579SAdam Nemet }
13499c926579SAdam Nemet 
1350397f5829SAdam Nemet bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
1351397f5829SAdam Nemet   return isBackward() || Type == Unknown;
1352397f5829SAdam Nemet }
1353397f5829SAdam Nemet 
1354397f5829SAdam Nemet bool MemoryDepChecker::Dependence::isForward() const {
1355397f5829SAdam Nemet   switch (Type) {
1356397f5829SAdam Nemet   case Forward:
1357397f5829SAdam Nemet   case ForwardButPreventsForwarding:
1358397f5829SAdam Nemet     return true;
1359397f5829SAdam Nemet 
1360397f5829SAdam Nemet   case NoDep:
1361397f5829SAdam Nemet   case Unknown:
1362397f5829SAdam Nemet   case BackwardVectorizable:
1363397f5829SAdam Nemet   case Backward:
1364397f5829SAdam Nemet   case BackwardVectorizableButPreventsForwarding:
1365397f5829SAdam Nemet     return false;
1366397f5829SAdam Nemet   }
1367397f5829SAdam Nemet   llvm_unreachable("unexpected DepType!");
1368397f5829SAdam Nemet }
1369397f5829SAdam Nemet 
13707afb46d3SDavid Majnemer bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
13717afb46d3SDavid Majnemer                                                     uint64_t TypeByteSize) {
13720456327cSAdam Nemet   // If loads occur at a distance that is not a multiple of a feasible vector
13730456327cSAdam Nemet   // factor store-load forwarding does not take place.
13740456327cSAdam Nemet   // Positive dependences might cause troubles because vectorizing them might
13750456327cSAdam Nemet   // prevent store-load forwarding making vectorized code run a lot slower.
13760456327cSAdam Nemet   //   a[i] = a[i-3] ^ a[i-8];
13770456327cSAdam Nemet   //   The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
13780456327cSAdam Nemet   //   hence on your typical architecture store-load forwarding does not take
13790456327cSAdam Nemet   //   place. Vectorizing in such cases does not make sense.
13800456327cSAdam Nemet   // Store-load forwarding distance.
1381884d313bSAdam Nemet 
1382884d313bSAdam Nemet   // After this many iterations store-to-load forwarding conflicts should not
1383884d313bSAdam Nemet   // cause any slowdowns.
13847afb46d3SDavid Majnemer   const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
13850456327cSAdam Nemet   // Maximum vector factor.
13867afb46d3SDavid Majnemer   uint64_t MaxVFWithoutSLForwardIssues = std::min(
13872c34ab51SAdam Nemet       VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
13880456327cSAdam Nemet 
1389884d313bSAdam Nemet   // Compute the smallest VF at which the store and load would be misaligned.
13907afb46d3SDavid Majnemer   for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
13919b5852aeSAdam Nemet        VF *= 2) {
1392884d313bSAdam Nemet     // If the number of vector iteration between the store and the load are
1393884d313bSAdam Nemet     // small we could incur conflicts.
1394884d313bSAdam Nemet     if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1395fa6d8977SSimon Pilgrim       MaxVFWithoutSLForwardIssues = (VF >> 1);
13960456327cSAdam Nemet       break;
13970456327cSAdam Nemet     }
13980456327cSAdam Nemet   }
13990456327cSAdam Nemet 
14000456327cSAdam Nemet   if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1401d34e60caSNicola Zaghen     LLVM_DEBUG(
1402d34e60caSNicola Zaghen         dbgs() << "LAA: Distance " << Distance
14039b5852aeSAdam Nemet                << " that could cause a store-load forwarding conflict\n");
14040456327cSAdam Nemet     return true;
14050456327cSAdam Nemet   }
14060456327cSAdam Nemet 
14070456327cSAdam Nemet   if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1408f219c647SAdam Nemet       MaxVFWithoutSLForwardIssues !=
1409f219c647SAdam Nemet           VectorizerParams::MaxVectorWidth * TypeByteSize)
14100456327cSAdam Nemet     MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
14110456327cSAdam Nemet   return false;
14120456327cSAdam Nemet }
14130456327cSAdam Nemet 
1414485f2826SFlorian Hahn void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1415485f2826SFlorian Hahn   if (Status < S)
1416485f2826SFlorian Hahn     Status = S;
1417485f2826SFlorian Hahn }
1418485f2826SFlorian Hahn 
1419eac89d73SDorit Nuzman /// Given a non-constant (unknown) dependence-distance \p Dist between two
1420eac89d73SDorit Nuzman /// memory accesses, that have the same stride whose absolute value is given
1421eac89d73SDorit Nuzman /// in \p Stride, and that have the same type size \p TypeByteSize,
1422eac89d73SDorit Nuzman /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1423eac89d73SDorit Nuzman /// possible to prove statically that the dependence distance is larger
1424eac89d73SDorit Nuzman /// than the range that the accesses will travel through the execution of
1425eac89d73SDorit Nuzman /// the loop. If so, return true; false otherwise. This is useful for
1426eac89d73SDorit Nuzman /// example in loops such as the following (PR31098):
1427eac89d73SDorit Nuzman ///     for (i = 0; i < D; ++i) {
1428eac89d73SDorit Nuzman ///                = out[i];
1429eac89d73SDorit Nuzman ///       out[i+D] =
1430eac89d73SDorit Nuzman ///     }
1431eac89d73SDorit Nuzman static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
1432eac89d73SDorit Nuzman                                      const SCEV &BackedgeTakenCount,
1433eac89d73SDorit Nuzman                                      const SCEV &Dist, uint64_t Stride,
1434eac89d73SDorit Nuzman                                      uint64_t TypeByteSize) {
1435eac89d73SDorit Nuzman 
1436eac89d73SDorit Nuzman   // If we can prove that
1437eac89d73SDorit Nuzman   //      (**) |Dist| > BackedgeTakenCount * Step
1438eac89d73SDorit Nuzman   // where Step is the absolute stride of the memory accesses in bytes,
1439eac89d73SDorit Nuzman   // then there is no dependence.
1440eac89d73SDorit Nuzman   //
1441c437f310SHiroshi Inoue   // Rationale:
1442eac89d73SDorit Nuzman   // We basically want to check if the absolute distance (|Dist/Step|)
1443eac89d73SDorit Nuzman   // is >= the loop iteration count (or > BackedgeTakenCount).
1444eac89d73SDorit Nuzman   // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1445eac89d73SDorit Nuzman   // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1446eac89d73SDorit Nuzman   // that the dependence distance is >= VF; This is checked elsewhere.
1447eac89d73SDorit Nuzman   // But in some cases we can prune unknown dependence distances early, and
1448eac89d73SDorit Nuzman   // even before selecting the VF, and without a runtime test, by comparing
1449eac89d73SDorit Nuzman   // the distance against the loop iteration count. Since the vectorized code
1450eac89d73SDorit Nuzman   // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1451eac89d73SDorit Nuzman   // also guarantees that distance >= VF.
1452eac89d73SDorit Nuzman   //
1453eac89d73SDorit Nuzman   const uint64_t ByteStride = Stride * TypeByteSize;
1454eac89d73SDorit Nuzman   const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1455eac89d73SDorit Nuzman   const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1456eac89d73SDorit Nuzman 
1457eac89d73SDorit Nuzman   const SCEV *CastedDist = &Dist;
1458eac89d73SDorit Nuzman   const SCEV *CastedProduct = Product;
14597ee30a0eSChang-Sun Lin Jr   uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
14607ee30a0eSChang-Sun Lin Jr   uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
1461eac89d73SDorit Nuzman 
1462eac89d73SDorit Nuzman   // The dependence distance can be positive/negative, so we sign extend Dist;
1463eac89d73SDorit Nuzman   // The multiplication of the absolute stride in bytes and the
1464c437f310SHiroshi Inoue   // backedgeTakenCount is non-negative, so we zero extend Product.
14657ee30a0eSChang-Sun Lin Jr   if (DistTypeSizeBits > ProductTypeSizeBits)
1466eac89d73SDorit Nuzman     CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1467eac89d73SDorit Nuzman   else
1468eac89d73SDorit Nuzman     CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1469eac89d73SDorit Nuzman 
1470eac89d73SDorit Nuzman   // Is  Dist - (BackedgeTakenCount * Step) > 0 ?
1471eac89d73SDorit Nuzman   // (If so, then we have proven (**) because |Dist| >= Dist)
1472eac89d73SDorit Nuzman   const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1473eac89d73SDorit Nuzman   if (SE.isKnownPositive(Minus))
1474eac89d73SDorit Nuzman     return true;
1475eac89d73SDorit Nuzman 
1476eac89d73SDorit Nuzman   // Second try: Is  -Dist - (BackedgeTakenCount * Step) > 0 ?
1477eac89d73SDorit Nuzman   // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1478eac89d73SDorit Nuzman   const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1479eac89d73SDorit Nuzman   Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1480eac89d73SDorit Nuzman   if (SE.isKnownPositive(Minus))
1481eac89d73SDorit Nuzman     return true;
1482eac89d73SDorit Nuzman 
1483eac89d73SDorit Nuzman   return false;
1484eac89d73SDorit Nuzman }
1485eac89d73SDorit Nuzman 
14865f8f34e4SAdrian Prantl /// Check the dependence for two accesses with the same stride \p Stride.
1487751004a6SHao Liu /// \p Distance is the positive distance and \p TypeByteSize is type size in
1488751004a6SHao Liu /// bytes.
1489751004a6SHao Liu ///
1490751004a6SHao Liu /// \returns true if they are independent.
14917afb46d3SDavid Majnemer static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
14927afb46d3SDavid Majnemer                                           uint64_t TypeByteSize) {
1493751004a6SHao Liu   assert(Stride > 1 && "The stride must be greater than 1");
1494751004a6SHao Liu   assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1495751004a6SHao Liu   assert(Distance > 0 && "The distance must be non-zero");
1496751004a6SHao Liu 
1497751004a6SHao Liu   // Skip if the distance is not multiple of type byte size.
1498751004a6SHao Liu   if (Distance % TypeByteSize)
1499751004a6SHao Liu     return false;
1500751004a6SHao Liu 
15017afb46d3SDavid Majnemer   uint64_t ScaledDist = Distance / TypeByteSize;
1502751004a6SHao Liu 
1503751004a6SHao Liu   // No dependence if the scaled distance is not multiple of the stride.
1504751004a6SHao Liu   // E.g.
1505751004a6SHao Liu   //      for (i = 0; i < 1024 ; i += 4)
1506751004a6SHao Liu   //        A[i+2] = A[i] + 1;
1507751004a6SHao Liu   //
1508751004a6SHao Liu   // Two accesses in memory (scaled distance is 2, stride is 4):
1509751004a6SHao Liu   //     | A[0] |      |      |      | A[4] |      |      |      |
1510751004a6SHao Liu   //     |      |      | A[2] |      |      |      | A[6] |      |
1511751004a6SHao Liu   //
1512751004a6SHao Liu   // E.g.
1513751004a6SHao Liu   //      for (i = 0; i < 1024 ; i += 3)
1514751004a6SHao Liu   //        A[i+4] = A[i] + 1;
1515751004a6SHao Liu   //
1516751004a6SHao Liu   // Two accesses in memory (scaled distance is 4, stride is 3):
1517751004a6SHao Liu   //     | A[0] |      |      | A[3] |      |      | A[6] |      |      |
1518751004a6SHao Liu   //     |      |      |      |      | A[4] |      |      | A[7] |      |
1519751004a6SHao Liu   return ScaledDist % Stride;
1520751004a6SHao Liu }
1521751004a6SHao Liu 
15229c926579SAdam Nemet MemoryDepChecker::Dependence::DepType
15239c926579SAdam Nemet MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
15240456327cSAdam Nemet                               const MemAccessInfo &B, unsigned BIdx,
15258bc61df9SAdam Nemet                               const ValueToValueMap &Strides) {
15260456327cSAdam Nemet   assert (AIdx < BIdx && "Must pass arguments in program order");
15270456327cSAdam Nemet 
15280456327cSAdam Nemet   Value *APtr = A.getPointer();
15290456327cSAdam Nemet   Value *BPtr = B.getPointer();
15300456327cSAdam Nemet   bool AIsWrite = A.getInt();
15310456327cSAdam Nemet   bool BIsWrite = B.getInt();
1532ff31020eSArthur Eubanks   Type *ATy = getLoadStoreType(InstMap[AIdx]);
1533ff31020eSArthur Eubanks   Type *BTy = getLoadStoreType(InstMap[BIdx]);
15340456327cSAdam Nemet 
15350456327cSAdam Nemet   // Two reads are independent.
15360456327cSAdam Nemet   if (!AIsWrite && !BIsWrite)
15379c926579SAdam Nemet     return Dependence::NoDep;
15380456327cSAdam Nemet 
15390456327cSAdam Nemet   // We cannot check pointers in different address spaces.
15400456327cSAdam Nemet   if (APtr->getType()->getPointerAddressSpace() !=
15410456327cSAdam Nemet       BPtr->getType()->getPointerAddressSpace())
15429c926579SAdam Nemet     return Dependence::Unknown;
15430456327cSAdam Nemet 
154445c46734SNikita Popov   int64_t StrideAPtr =
154545c46734SNikita Popov       getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true);
154645c46734SNikita Popov   int64_t StrideBPtr =
154745c46734SNikita Popov       getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true);
15480456327cSAdam Nemet 
1549adf4b739SSilviu Baranga   const SCEV *Src = PSE.getSCEV(APtr);
1550adf4b739SSilviu Baranga   const SCEV *Sink = PSE.getSCEV(BPtr);
15510456327cSAdam Nemet 
15520456327cSAdam Nemet   // If the induction step is negative we have to invert source and sink of the
15530456327cSAdam Nemet   // dependence.
15540456327cSAdam Nemet   if (StrideAPtr < 0) {
15550456327cSAdam Nemet     std::swap(APtr, BPtr);
155645c46734SNikita Popov     std::swap(ATy, BTy);
15570456327cSAdam Nemet     std::swap(Src, Sink);
15580456327cSAdam Nemet     std::swap(AIsWrite, BIsWrite);
15590456327cSAdam Nemet     std::swap(AIdx, BIdx);
15600456327cSAdam Nemet     std::swap(StrideAPtr, StrideBPtr);
15610456327cSAdam Nemet   }
15620456327cSAdam Nemet 
15639cd9a7e3SSilviu Baranga   const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
15640456327cSAdam Nemet 
1565d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
15660456327cSAdam Nemet                     << "(Induction step: " << StrideAPtr << ")\n");
1567d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
15680456327cSAdam Nemet                     << *InstMap[BIdx] << ": " << *Dist << "\n");
15690456327cSAdam Nemet 
1570943befedSAdam Nemet   // Need accesses with constant stride. We don't want to vectorize
15710456327cSAdam Nemet   // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
15720456327cSAdam Nemet   // the address space.
15730456327cSAdam Nemet   if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1574d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
15759c926579SAdam Nemet     return Dependence::Unknown;
15760456327cSAdam Nemet   }
15770456327cSAdam Nemet 
1578eac89d73SDorit Nuzman   auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1579eac89d73SDorit Nuzman   uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
158077b2bb55SJolanta Jensen   bool HasSameSize =
158177b2bb55SJolanta Jensen       DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
1582eac89d73SDorit Nuzman   uint64_t Stride = std::abs(StrideAPtr);
15830456327cSAdam Nemet   const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
15840456327cSAdam Nemet   if (!C) {
158577b2bb55SJolanta Jensen     if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
1586eac89d73SDorit Nuzman         isSafeDependenceDistance(DL, *(PSE.getSE()),
1587eac89d73SDorit Nuzman                                  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1588eac89d73SDorit Nuzman                                  TypeByteSize))
1589eac89d73SDorit Nuzman       return Dependence::NoDep;
1590eac89d73SDorit Nuzman 
1591d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1592ef307b8cSFlorian Hahn     FoundNonConstantDistanceDependence = true;
15939c926579SAdam Nemet     return Dependence::Unknown;
15940456327cSAdam Nemet   }
15950456327cSAdam Nemet 
15960de2feceSSanjoy Das   const APInt &Val = C->getAPInt();
15976feebe98SMatthew Simpson   int64_t Distance = Val.getSExtValue();
15986feebe98SMatthew Simpson 
15996feebe98SMatthew Simpson   // Attempt to prove strided accesses independent.
160077b2bb55SJolanta Jensen   if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
16016feebe98SMatthew Simpson       areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1602d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
16036feebe98SMatthew Simpson     return Dependence::NoDep;
16046feebe98SMatthew Simpson   }
16056feebe98SMatthew Simpson 
16066feebe98SMatthew Simpson   // Negative distances are not plausible dependencies.
16070456327cSAdam Nemet   if (Val.isNegative()) {
16080456327cSAdam Nemet     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
160937ec5f91SMatthew Simpson     if (IsTrueDataDependence && EnableForwardingConflictDetection &&
16100456327cSAdam Nemet         (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
161177b2bb55SJolanta Jensen          !HasSameSize)) {
1612d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
16139c926579SAdam Nemet       return Dependence::ForwardButPreventsForwarding;
1614b8486e5aSAdam Nemet     }
16150456327cSAdam Nemet 
1616d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
16179c926579SAdam Nemet     return Dependence::Forward;
16180456327cSAdam Nemet   }
16190456327cSAdam Nemet 
16200456327cSAdam Nemet   // Write to the same location with the same size.
16210456327cSAdam Nemet   if (Val == 0) {
162277b2bb55SJolanta Jensen     if (HasSameSize)
1623d7037c56SAdam Nemet       return Dependence::Forward;
1624d34e60caSNicola Zaghen     LLVM_DEBUG(
162577b2bb55SJolanta Jensen         dbgs() << "LAA: Zero dependence difference but different type sizes\n");
16269c926579SAdam Nemet     return Dependence::Unknown;
16270456327cSAdam Nemet   }
16280456327cSAdam Nemet 
16290456327cSAdam Nemet   assert(Val.isStrictlyPositive() && "Expect a positive value");
16300456327cSAdam Nemet 
163177b2bb55SJolanta Jensen   if (!HasSameSize) {
163277b2bb55SJolanta Jensen     LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
163377b2bb55SJolanta Jensen                          "different type sizes\n");
16349c926579SAdam Nemet     return Dependence::Unknown;
16350456327cSAdam Nemet   }
16360456327cSAdam Nemet 
16370456327cSAdam Nemet   // Bail out early if passed-in parameters make vectorization not feasible.
1638f219c647SAdam Nemet   unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1639f219c647SAdam Nemet                            VectorizerParams::VectorizationFactor : 1);
1640f219c647SAdam Nemet   unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1641f219c647SAdam Nemet                            VectorizerParams::VectorizationInterleave : 1);
1642751004a6SHao Liu   // The minimum number of iterations for a vectorized/unrolled version.
1643751004a6SHao Liu   unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
16440456327cSAdam Nemet 
1645751004a6SHao Liu   // It's not vectorizable if the distance is smaller than the minimum distance
1646751004a6SHao Liu   // needed for a vectroized/unrolled version. Vectorizing one iteration in
1647751004a6SHao Liu   // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1648751004a6SHao Liu   // TypeByteSize (No need to plus the last gap distance).
1649751004a6SHao Liu   //
1650751004a6SHao Liu   // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1651751004a6SHao Liu   //      foo(int *A) {
1652751004a6SHao Liu   //        int *B = (int *)((char *)A + 14);
1653751004a6SHao Liu   //        for (i = 0 ; i < 1024 ; i += 2)
1654751004a6SHao Liu   //          B[i] = A[i] + 1;
1655751004a6SHao Liu   //      }
1656751004a6SHao Liu   //
1657751004a6SHao Liu   // Two accesses in memory (stride is 2):
1658751004a6SHao Liu   //     | A[0] |      | A[2] |      | A[4] |      | A[6] |      |
1659751004a6SHao Liu   //                              | B[0] |      | B[2] |      | B[4] |
1660751004a6SHao Liu   //
1661751004a6SHao Liu   // Distance needs for vectorizing iterations except the last iteration:
1662751004a6SHao Liu   // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1663751004a6SHao Liu   // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1664751004a6SHao Liu   //
1665751004a6SHao Liu   // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1666751004a6SHao Liu   // 12, which is less than distance.
1667751004a6SHao Liu   //
1668751004a6SHao Liu   // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1669751004a6SHao Liu   // the minimum distance needed is 28, which is greater than distance. It is
1670751004a6SHao Liu   // not safe to do vectorization.
16717afb46d3SDavid Majnemer   uint64_t MinDistanceNeeded =
1672751004a6SHao Liu       TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
16737afb46d3SDavid Majnemer   if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1674d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1675d34e60caSNicola Zaghen                       << Distance << '\n');
1676751004a6SHao Liu     return Dependence::Backward;
1677751004a6SHao Liu   }
1678751004a6SHao Liu 
1679751004a6SHao Liu   // Unsafe if the minimum distance needed is greater than max safe distance.
1680751004a6SHao Liu   if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1681d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1682751004a6SHao Liu                       << MinDistanceNeeded << " size in bytes");
16839c926579SAdam Nemet     return Dependence::Backward;
16840456327cSAdam Nemet   }
16850456327cSAdam Nemet 
16869cc0c399SAdam Nemet   // Positive distance bigger than max vectorization factor.
1687751004a6SHao Liu   // FIXME: Should use max factor instead of max distance in bytes, which could
1688751004a6SHao Liu   // not handle different types.
1689751004a6SHao Liu   // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1690751004a6SHao Liu   //      void foo (int *A, char *B) {
1691751004a6SHao Liu   //        for (unsigned i = 0; i < 1024; i++) {
1692751004a6SHao Liu   //          A[i+2] = A[i] + 1;
1693751004a6SHao Liu   //          B[i+2] = B[i] + 1;
1694751004a6SHao Liu   //        }
1695751004a6SHao Liu   //      }
1696751004a6SHao Liu   //
1697751004a6SHao Liu   // This case is currently unsafe according to the max safe distance. If we
1698751004a6SHao Liu   // analyze the two accesses on array B, the max safe dependence distance
1699751004a6SHao Liu   // is 2. Then we analyze the accesses on array A, the minimum distance needed
1700751004a6SHao Liu   // is 8, which is less than 2 and forbidden vectorization, But actually
1701751004a6SHao Liu   // both A and B could be vectorized by 2 iterations.
1702751004a6SHao Liu   MaxSafeDepDistBytes =
17037afb46d3SDavid Majnemer       std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
17040456327cSAdam Nemet 
17050456327cSAdam Nemet   bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
170637ec5f91SMatthew Simpson   if (IsTrueDataDependence && EnableForwardingConflictDetection &&
17070456327cSAdam Nemet       couldPreventStoreLoadForward(Distance, TypeByteSize))
17089c926579SAdam Nemet     return Dependence::BackwardVectorizableButPreventsForwarding;
17090456327cSAdam Nemet 
1710682cfc1dSAlon Kom   uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1711d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1712682cfc1dSAlon Kom                     << " with max VF = " << MaxVF << '\n');
1713682cfc1dSAlon Kom   uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
17141ba4b82fSCullen Rhodes   MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
17159c926579SAdam Nemet   return Dependence::BackwardVectorizable;
17160456327cSAdam Nemet }
17170456327cSAdam Nemet 
1718dee666bcSAdam Nemet bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
17195448e989SAmjad Aboud                                    MemAccessInfoList &CheckDeps,
17208bc61df9SAdam Nemet                                    const ValueToValueMap &Strides) {
17210456327cSAdam Nemet 
17227afb46d3SDavid Majnemer   MaxSafeDepDistBytes = -1;
17235448e989SAmjad Aboud   SmallPtrSet<MemAccessInfo, 8> Visited;
17245448e989SAmjad Aboud   for (MemAccessInfo CurAccess : CheckDeps) {
17255448e989SAmjad Aboud     if (Visited.count(CurAccess))
17265448e989SAmjad Aboud       continue;
17270456327cSAdam Nemet 
17280456327cSAdam Nemet     // Get the relevant memory access set.
17290456327cSAdam Nemet     EquivalenceClasses<MemAccessInfo>::iterator I =
17300456327cSAdam Nemet       AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
17310456327cSAdam Nemet 
17320456327cSAdam Nemet     // Check accesses within this set.
17337a083814SRichard Trieu     EquivalenceClasses<MemAccessInfo>::member_iterator AI =
17347a083814SRichard Trieu         AccessSets.member_begin(I);
17357a083814SRichard Trieu     EquivalenceClasses<MemAccessInfo>::member_iterator AE =
17367a083814SRichard Trieu         AccessSets.member_end();
17370456327cSAdam Nemet 
17380456327cSAdam Nemet     // Check every access pair.
17390456327cSAdam Nemet     while (AI != AE) {
17405448e989SAmjad Aboud       Visited.insert(*AI);
174109fac245SHideki Saito       bool AIIsWrite = AI->getInt();
174209fac245SHideki Saito       // Check loads only against next equivalent class, but stores also against
174309fac245SHideki Saito       // other stores in the same equivalence class - to the same address.
174409fac245SHideki Saito       EquivalenceClasses<MemAccessInfo>::member_iterator OI =
174509fac245SHideki Saito           (AIIsWrite ? AI : std::next(AI));
17460456327cSAdam Nemet       while (OI != AE) {
17470456327cSAdam Nemet         // Check every accessing instruction pair in program order.
17480456327cSAdam Nemet         for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
17490456327cSAdam Nemet              I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
175009fac245SHideki Saito           // Scan all accesses of another equivalence class, but only the next
175109fac245SHideki Saito           // accesses of the same equivalent class.
175209fac245SHideki Saito           for (std::vector<unsigned>::iterator
175309fac245SHideki Saito                    I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
175409fac245SHideki Saito                    I2E = (OI == AI ? I1E : Accesses[*OI].end());
175509fac245SHideki Saito                I2 != I2E; ++I2) {
17569c926579SAdam Nemet             auto A = std::make_pair(&*AI, *I1);
17579c926579SAdam Nemet             auto B = std::make_pair(&*OI, *I2);
17589c926579SAdam Nemet 
17599c926579SAdam Nemet             assert(*I1 != *I2);
17609c926579SAdam Nemet             if (*I1 > *I2)
17619c926579SAdam Nemet               std::swap(A, B);
17629c926579SAdam Nemet 
17639c926579SAdam Nemet             Dependence::DepType Type =
17649c926579SAdam Nemet                 isDependent(*A.first, A.second, *B.first, B.second, Strides);
1765485f2826SFlorian Hahn             mergeInStatus(Dependence::isSafeForVectorization(Type));
17669c926579SAdam Nemet 
1767a2df750fSAdam Nemet             // Gather dependences unless we accumulated MaxDependences
17689c926579SAdam Nemet             // dependences.  In that case return as soon as we find the first
17699c926579SAdam Nemet             // unsafe dependence.  This puts a limit on this quadratic
17709c926579SAdam Nemet             // algorithm.
1771a2df750fSAdam Nemet             if (RecordDependences) {
1772a2df750fSAdam Nemet               if (Type != Dependence::NoDep)
1773a2df750fSAdam Nemet                 Dependences.push_back(Dependence(A.second, B.second, Type));
17749c926579SAdam Nemet 
1775a2df750fSAdam Nemet               if (Dependences.size() >= MaxDependences) {
1776a2df750fSAdam Nemet                 RecordDependences = false;
1777a2df750fSAdam Nemet                 Dependences.clear();
1778d34e60caSNicola Zaghen                 LLVM_DEBUG(dbgs()
1779d34e60caSNicola Zaghen                            << "Too many dependences, stopped recording\n");
17809c926579SAdam Nemet               }
17819c926579SAdam Nemet             }
1782485f2826SFlorian Hahn             if (!RecordDependences && !isSafeForVectorization())
17830456327cSAdam Nemet               return false;
17840456327cSAdam Nemet           }
17850456327cSAdam Nemet         ++OI;
17860456327cSAdam Nemet       }
17870456327cSAdam Nemet       AI++;
17880456327cSAdam Nemet     }
17890456327cSAdam Nemet   }
17909c926579SAdam Nemet 
1791d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1792485f2826SFlorian Hahn   return isSafeForVectorization();
17930456327cSAdam Nemet }
17940456327cSAdam Nemet 
1795ec1e2bb6SAdam Nemet SmallVector<Instruction *, 4>
1796ec1e2bb6SAdam Nemet MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
1797ec1e2bb6SAdam Nemet   MemAccessInfo Access(Ptr, isWrite);
1798ec1e2bb6SAdam Nemet   auto &IndexVector = Accesses.find(Access)->second;
1799ec1e2bb6SAdam Nemet 
1800ec1e2bb6SAdam Nemet   SmallVector<Instruction *, 4> Insts;
18012d006e76SDavid Majnemer   transform(IndexVector,
1802ec1e2bb6SAdam Nemet                  std::back_inserter(Insts),
1803ec1e2bb6SAdam Nemet                  [&](unsigned Idx) { return this->InstMap[Idx]; });
1804ec1e2bb6SAdam Nemet   return Insts;
1805ec1e2bb6SAdam Nemet }
1806ec1e2bb6SAdam Nemet 
180758913d65SAdam Nemet const char *MemoryDepChecker::Dependence::DepName[] = {
180858913d65SAdam Nemet     "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
180958913d65SAdam Nemet     "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
181058913d65SAdam Nemet 
181158913d65SAdam Nemet void MemoryDepChecker::Dependence::print(
181258913d65SAdam Nemet     raw_ostream &OS, unsigned Depth,
181358913d65SAdam Nemet     const SmallVectorImpl<Instruction *> &Instrs) const {
181458913d65SAdam Nemet   OS.indent(Depth) << DepName[Type] << ":\n";
181558913d65SAdam Nemet   OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
181658913d65SAdam Nemet   OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
181758913d65SAdam Nemet }
181858913d65SAdam Nemet 
1819929c38e8SAdam Nemet bool LoopAccessInfo::canAnalyzeLoop() {
18208dcb3b6aSAdam Nemet   // We need to have a loop header.
1821d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
1822d8968f09SAdam Nemet                     << TheLoop->getHeader()->getParent()->getName() << ": "
1823d8968f09SAdam Nemet                     << TheLoop->getHeader()->getName() << '\n');
18248dcb3b6aSAdam Nemet 
1825929c38e8SAdam Nemet   // We can only analyze innermost loops.
182689c1e35fSStefanos Baziotis   if (!TheLoop->isInnermost()) {
1827d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1828877ccee8SAdam Nemet     recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1829929c38e8SAdam Nemet     return false;
1830929c38e8SAdam Nemet   }
1831929c38e8SAdam Nemet 
1832929c38e8SAdam Nemet   // We must have a single backedge.
1833929c38e8SAdam Nemet   if (TheLoop->getNumBackEdges() != 1) {
1834d34e60caSNicola Zaghen     LLVM_DEBUG(
1835d34e60caSNicola Zaghen         dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1836877ccee8SAdam Nemet     recordAnalysis("CFGNotUnderstood")
1837877ccee8SAdam Nemet         << "loop control flow is not understood by analyzer";
1838929c38e8SAdam Nemet     return false;
1839929c38e8SAdam Nemet   }
1840929c38e8SAdam Nemet 
1841929c38e8SAdam Nemet   // ScalarEvolution needs to be able to find the exit count.
184294734eefSXinliang David Li   const SCEV *ExitCount = PSE->getBackedgeTakenCount();
184310ddb927SPhilip Reames   if (isa<SCEVCouldNotCompute>(ExitCount)) {
1844877ccee8SAdam Nemet     recordAnalysis("CantComputeNumberOfIterations")
1845877ccee8SAdam Nemet         << "could not determine number of loop iterations";
1846d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1847929c38e8SAdam Nemet     return false;
1848929c38e8SAdam Nemet   }
1849929c38e8SAdam Nemet 
1850929c38e8SAdam Nemet   return true;
1851929c38e8SAdam Nemet }
1852929c38e8SAdam Nemet 
1853db69b174SSimon Pilgrim void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
18547da74abfSAdam Nemet                                  const TargetLibraryInfo *TLI,
18557da74abfSAdam Nemet                                  DominatorTree *DT) {
1856e3e3b994SMatthew Simpson   // Holds the Load and Store instructions.
1857e3e3b994SMatthew Simpson   SmallVector<LoadInst *, 16> Loads;
1858e3e3b994SMatthew Simpson   SmallVector<StoreInst *, 16> Stores;
18590456327cSAdam Nemet 
18600456327cSAdam Nemet   // Holds all the different accesses in the loop.
18610456327cSAdam Nemet   unsigned NumReads = 0;
18620456327cSAdam Nemet   unsigned NumReadWrites = 0;
18630456327cSAdam Nemet 
18642466ba97SMatt Arsenault   bool HasComplexMemInst = false;
18652466ba97SMatt Arsenault 
18662466ba97SMatt Arsenault   // A runtime check is only legal to insert if there are no convergent calls.
18672466ba97SMatt Arsenault   HasConvergentOp = false;
18682466ba97SMatt Arsenault 
1869ce030acbSXinliang David Li   PtrRtChecking->Pointers.clear();
1870ce030acbSXinliang David Li   PtrRtChecking->Need = false;
18710456327cSAdam Nemet 
18720456327cSAdam Nemet   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
18730456327cSAdam Nemet 
18747bf299c8SAyal Zaks   const bool EnableMemAccessVersioningOfLoop =
18757bf299c8SAyal Zaks       EnableMemAccessVersioning &&
18767bf299c8SAyal Zaks       !TheLoop->getHeader()->getParent()->hasOptSize();
18777bf299c8SAyal Zaks 
18780456327cSAdam Nemet   // For each block.
18798b401013SDavid Majnemer   for (BasicBlock *BB : TheLoop->blocks()) {
18802466ba97SMatt Arsenault     // Scan the BB and collect legal loads and stores. Also detect any
18812466ba97SMatt Arsenault     // convergent instructions.
18828b401013SDavid Majnemer     for (Instruction &I : *BB) {
18832466ba97SMatt Arsenault       if (auto *Call = dyn_cast<CallBase>(&I)) {
18842466ba97SMatt Arsenault         if (Call->isConvergent())
18852466ba97SMatt Arsenault           HasConvergentOp = true;
18862466ba97SMatt Arsenault       }
18872466ba97SMatt Arsenault 
18882466ba97SMatt Arsenault       // With both a non-vectorizable memory instruction and a convergent
18892466ba97SMatt Arsenault       // operation, found in this loop, no reason to continue the search.
18902466ba97SMatt Arsenault       if (HasComplexMemInst && HasConvergentOp) {
18912466ba97SMatt Arsenault         CanVecMem = false;
18922466ba97SMatt Arsenault         return;
18932466ba97SMatt Arsenault       }
18942466ba97SMatt Arsenault 
18952466ba97SMatt Arsenault       // Avoid hitting recordAnalysis multiple times.
18962466ba97SMatt Arsenault       if (HasComplexMemInst)
18972466ba97SMatt Arsenault         continue;
18982466ba97SMatt Arsenault 
18990456327cSAdam Nemet       // If this is a load, save it. If this instruction can read from memory
19000456327cSAdam Nemet       // but is not a load, then we quit. Notice that we don't handle function
19010456327cSAdam Nemet       // calls that read or write.
19028b401013SDavid Majnemer       if (I.mayReadFromMemory()) {
19030456327cSAdam Nemet         // Many math library functions read the rounding mode. We will only
19040456327cSAdam Nemet         // vectorize a loop if it contains known function calls that don't set
19050456327cSAdam Nemet         // the flag. Therefore, it is safe to ignore this read from memory.
19068b401013SDavid Majnemer         auto *Call = dyn_cast<CallInst>(&I);
1907b4b27230SDavid Majnemer         if (Call && getVectorIntrinsicIDForCall(Call, TLI))
19080456327cSAdam Nemet           continue;
19090456327cSAdam Nemet 
19109b3cf604SMichael Zolotukhin         // If the function has an explicit vectorized counterpart, we can safely
19119b3cf604SMichael Zolotukhin         // assume that it can be vectorized.
19129b3cf604SMichael Zolotukhin         if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
191366c120f0SFrancesco Petrogalli             !VFDatabase::getMappings(*Call).empty())
19149b3cf604SMichael Zolotukhin           continue;
19159b3cf604SMichael Zolotukhin 
19168b401013SDavid Majnemer         auto *Ld = dyn_cast<LoadInst>(&I);
19172466ba97SMatt Arsenault         if (!Ld) {
19182466ba97SMatt Arsenault           recordAnalysis("CantVectorizeInstruction", Ld)
19192466ba97SMatt Arsenault             << "instruction cannot be vectorized";
19202466ba97SMatt Arsenault           HasComplexMemInst = true;
19212466ba97SMatt Arsenault           continue;
19222466ba97SMatt Arsenault         }
19232466ba97SMatt Arsenault         if (!Ld->isSimple() && !IsAnnotatedParallel) {
1924877ccee8SAdam Nemet           recordAnalysis("NonSimpleLoad", Ld)
1925877ccee8SAdam Nemet               << "read with atomic ordering or volatile read";
1926d34e60caSNicola Zaghen           LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
19272466ba97SMatt Arsenault           HasComplexMemInst = true;
19282466ba97SMatt Arsenault           continue;
19290456327cSAdam Nemet         }
19300456327cSAdam Nemet         NumLoads++;
19310456327cSAdam Nemet         Loads.push_back(Ld);
1932ce030acbSXinliang David Li         DepChecker->addAccess(Ld);
19337bf299c8SAyal Zaks         if (EnableMemAccessVersioningOfLoop)
1934c953bb99SAdam Nemet           collectStridedAccess(Ld);
19350456327cSAdam Nemet         continue;
19360456327cSAdam Nemet       }
19370456327cSAdam Nemet 
19380456327cSAdam Nemet       // Save 'store' instructions. Abort if other instructions write to memory.
19398b401013SDavid Majnemer       if (I.mayWriteToMemory()) {
19408b401013SDavid Majnemer         auto *St = dyn_cast<StoreInst>(&I);
19410456327cSAdam Nemet         if (!St) {
1942877ccee8SAdam Nemet           recordAnalysis("CantVectorizeInstruction", St)
1943877ccee8SAdam Nemet               << "instruction cannot be vectorized";
19442466ba97SMatt Arsenault           HasComplexMemInst = true;
19452466ba97SMatt Arsenault           continue;
19460456327cSAdam Nemet         }
19470456327cSAdam Nemet         if (!St->isSimple() && !IsAnnotatedParallel) {
1948877ccee8SAdam Nemet           recordAnalysis("NonSimpleStore", St)
1949877ccee8SAdam Nemet               << "write with atomic ordering or volatile write";
1950d34e60caSNicola Zaghen           LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
19512466ba97SMatt Arsenault           HasComplexMemInst = true;
19522466ba97SMatt Arsenault           continue;
19530456327cSAdam Nemet         }
19540456327cSAdam Nemet         NumStores++;
19550456327cSAdam Nemet         Stores.push_back(St);
1956ce030acbSXinliang David Li         DepChecker->addAccess(St);
19577bf299c8SAyal Zaks         if (EnableMemAccessVersioningOfLoop)
1958c953bb99SAdam Nemet           collectStridedAccess(St);
19590456327cSAdam Nemet       }
19600456327cSAdam Nemet     } // Next instr.
19610456327cSAdam Nemet   } // Next block.
19620456327cSAdam Nemet 
19632466ba97SMatt Arsenault   if (HasComplexMemInst) {
19642466ba97SMatt Arsenault     CanVecMem = false;
19652466ba97SMatt Arsenault     return;
19662466ba97SMatt Arsenault   }
19672466ba97SMatt Arsenault 
19680456327cSAdam Nemet   // Now we have two lists that hold the loads and the stores.
19690456327cSAdam Nemet   // Next, we find the pointers that they use.
19700456327cSAdam Nemet 
19710456327cSAdam Nemet   // Check if we see any stores. If there are no stores, then we don't
19720456327cSAdam Nemet   // care if the pointers are *restrict*.
19730456327cSAdam Nemet   if (!Stores.size()) {
1974d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1975436018c3SAdam Nemet     CanVecMem = true;
1976436018c3SAdam Nemet     return;
19770456327cSAdam Nemet   }
19780456327cSAdam Nemet 
1979dee666bcSAdam Nemet   MemoryDepChecker::DepCandidates DependentAccesses;
1980b0eb40caSVitaly Buka   AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
19810456327cSAdam Nemet 
198289051ebaSVitaly Buka   // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
19830456327cSAdam Nemet   // multiple times on the same object. If the ptr is accessed twice, once
19840456327cSAdam Nemet   // for read and once for write, it will only appear once (on the write
19850456327cSAdam Nemet   // list). This is okay, since we are going to check for conflicts between
19860456327cSAdam Nemet   // writes and between reads and writes, but not between reads and reads.
1987ff31020eSArthur Eubanks   SmallSet<std::pair<Value *, Type *>, 16> Seen;
19880456327cSAdam Nemet 
1989b1e3d453SAnna Thomas   // Record uniform store addresses to identify if we have multiple stores
1990b1e3d453SAnna Thomas   // to the same address.
1991ff31020eSArthur Eubanks   SmallPtrSet<Value *, 16> UniformStores;
1992b1e3d453SAnna Thomas 
1993e3e3b994SMatthew Simpson   for (StoreInst *ST : Stores) {
19940456327cSAdam Nemet     Value *Ptr = ST->getPointerOperand();
1995b1e3d453SAnna Thomas 
1996*4e5e042dSIgor Kirillov     if (isUniform(Ptr)) {
1997*4e5e042dSIgor Kirillov       // Record store instructions to loop invariant addresses
1998*4e5e042dSIgor Kirillov       StoresToInvariantAddresses.push_back(ST);
19995e9215f0SAnna Thomas       HasDependenceInvolvingLoopInvariantAddress |=
20006f732bfbSAnna Thomas           !UniformStores.insert(Ptr).second;
2001*4e5e042dSIgor Kirillov     }
2002b1e3d453SAnna Thomas 
20030456327cSAdam Nemet     // If we did *not* see this pointer before, insert it to  the read-write
20040456327cSAdam Nemet     // list. At this phase it is only a 'write' list.
2005ff31020eSArthur Eubanks     Type *AccessTy = getLoadStoreType(ST);
2006ff31020eSArthur Eubanks     if (Seen.insert({Ptr, AccessTy}).second) {
20070456327cSAdam Nemet       ++NumReadWrites;
20080456327cSAdam Nemet 
2009ac80dc75SChandler Carruth       MemoryLocation Loc = MemoryLocation::get(ST);
20100456327cSAdam Nemet       // The TBAA metadata could have a control dependency on the predication
20110456327cSAdam Nemet       // condition, so we cannot rely on it when determining whether or not we
20120456327cSAdam Nemet       // need runtime pointer checks.
201301abb2c3SAdam Nemet       if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
20140456327cSAdam Nemet         Loc.AATags.TBAA = nullptr;
20150456327cSAdam Nemet 
2016e248d690SFlorian Hahn       visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2017ff31020eSArthur Eubanks                     [&Accesses, AccessTy, Loc](Value *Ptr) {
2018e248d690SFlorian Hahn                       MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2019ff31020eSArthur Eubanks                       Accesses.addStore(NewLoc, AccessTy);
2020e248d690SFlorian Hahn                     });
20210456327cSAdam Nemet     }
20220456327cSAdam Nemet   }
20230456327cSAdam Nemet 
20240456327cSAdam Nemet   if (IsAnnotatedParallel) {
2025d34e60caSNicola Zaghen     LLVM_DEBUG(
2026d34e60caSNicola Zaghen         dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
20270456327cSAdam Nemet                << "checks.\n");
2028436018c3SAdam Nemet     CanVecMem = true;
2029436018c3SAdam Nemet     return;
20300456327cSAdam Nemet   }
20310456327cSAdam Nemet 
2032e3e3b994SMatthew Simpson   for (LoadInst *LD : Loads) {
20330456327cSAdam Nemet     Value *Ptr = LD->getPointerOperand();
20340456327cSAdam Nemet     // If we did *not* see this pointer before, insert it to the
20350456327cSAdam Nemet     // read list. If we *did* see it before, then it is already in
20360456327cSAdam Nemet     // the read-write list. This allows us to vectorize expressions
20370456327cSAdam Nemet     // such as A[i] += x;  Because the address of A[i] is a read-write
20380456327cSAdam Nemet     // pointer. This only works if the index of A[i] is consecutive.
20390456327cSAdam Nemet     // If the address of i is unknown (for example A[B[i]]) then we may
20400456327cSAdam Nemet     // read a few words, modify, and write a few words, and some of the
20410456327cSAdam Nemet     // words may be written to the same address.
20420456327cSAdam Nemet     bool IsReadOnlyPtr = false;
2043ff31020eSArthur Eubanks     Type *AccessTy = getLoadStoreType(LD);
2044ff31020eSArthur Eubanks     if (Seen.insert({Ptr, AccessTy}).second ||
204545c46734SNikita Popov         !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
20460456327cSAdam Nemet       ++NumReads;
20470456327cSAdam Nemet       IsReadOnlyPtr = true;
20480456327cSAdam Nemet     }
20490456327cSAdam Nemet 
20505e9215f0SAnna Thomas     // See if there is an unsafe dependency between a load to a uniform address and
20515e9215f0SAnna Thomas     // store to the same uniform address.
20525e9215f0SAnna Thomas     if (UniformStores.count(Ptr)) {
20535e9215f0SAnna Thomas       LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
20545e9215f0SAnna Thomas                            "load and uniform store to the same address!\n");
20555e9215f0SAnna Thomas       HasDependenceInvolvingLoopInvariantAddress = true;
20565e9215f0SAnna Thomas     }
20575e9215f0SAnna Thomas 
2058ac80dc75SChandler Carruth     MemoryLocation Loc = MemoryLocation::get(LD);
20590456327cSAdam Nemet     // The TBAA metadata could have a control dependency on the predication
20600456327cSAdam Nemet     // condition, so we cannot rely on it when determining whether or not we
20610456327cSAdam Nemet     // need runtime pointer checks.
206201abb2c3SAdam Nemet     if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
20630456327cSAdam Nemet       Loc.AATags.TBAA = nullptr;
20640456327cSAdam Nemet 
2065e248d690SFlorian Hahn     visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2066ff31020eSArthur Eubanks                   [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2067e248d690SFlorian Hahn                     MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2068ff31020eSArthur Eubanks                     Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
2069e248d690SFlorian Hahn                   });
20700456327cSAdam Nemet   }
20710456327cSAdam Nemet 
20720456327cSAdam Nemet   // If we write (or read-write) to a single destination and there are no
20730456327cSAdam Nemet   // other reads in this loop then is it safe to vectorize.
20740456327cSAdam Nemet   if (NumReadWrites == 1 && NumReads == 0) {
2075d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2076436018c3SAdam Nemet     CanVecMem = true;
2077436018c3SAdam Nemet     return;
20780456327cSAdam Nemet   }
20790456327cSAdam Nemet 
20800456327cSAdam Nemet   // Build dependence sets and check whether we need a runtime pointer bounds
20810456327cSAdam Nemet   // check.
20820456327cSAdam Nemet   Accesses.buildDependenceSets();
20830456327cSAdam Nemet 
20840456327cSAdam Nemet   // Find pointers with computable bounds. We are going to use this information
20850456327cSAdam Nemet   // to place a runtime bound check.
20869f1c6fbfSMalhar Jajoo   Value *UncomputablePtr = nullptr;
20879f1c6fbfSMalhar Jajoo   bool CanDoRTIfNeeded =
20889f1c6fbfSMalhar Jajoo       Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop,
20899f1c6fbfSMalhar Jajoo                                SymbolicStrides, UncomputablePtr, false);
2090ee61474aSAdam Nemet   if (!CanDoRTIfNeeded) {
20919f1c6fbfSMalhar Jajoo     auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
20929f1c6fbfSMalhar Jajoo     recordAnalysis("CantIdentifyArrayBounds", I)
20939f1c6fbfSMalhar Jajoo         << "cannot identify array bounds";
2094d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2095ee61474aSAdam Nemet                       << "the array bounds.\n");
2096436018c3SAdam Nemet     CanVecMem = false;
2097436018c3SAdam Nemet     return;
20980456327cSAdam Nemet   }
20990456327cSAdam Nemet 
2100d34e60caSNicola Zaghen   LLVM_DEBUG(
21012466ba97SMatt Arsenault     dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
21020456327cSAdam Nemet 
2103436018c3SAdam Nemet   CanVecMem = true;
21040456327cSAdam Nemet   if (Accesses.isDependencyCheckNeeded()) {
2105d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2106ce030acbSXinliang David Li     CanVecMem = DepChecker->areDepsSafe(
2107139ffba3SAdam Nemet         DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
2108ce030acbSXinliang David Li     MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
21090456327cSAdam Nemet 
2110ce030acbSXinliang David Li     if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
2111d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
21120456327cSAdam Nemet 
21130456327cSAdam Nemet       // Clear the dependency checks. We assume they are not needed.
2114ce030acbSXinliang David Li       Accesses.resetDepChecks(*DepChecker);
21150456327cSAdam Nemet 
2116ce030acbSXinliang David Li       PtrRtChecking->reset();
2117ce030acbSXinliang David Li       PtrRtChecking->Need = true;
21180456327cSAdam Nemet 
211994734eefSXinliang David Li       auto *SE = PSE->getSE();
21209f1c6fbfSMalhar Jajoo       UncomputablePtr = nullptr;
21219f1c6fbfSMalhar Jajoo       CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
21229f1c6fbfSMalhar Jajoo           *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true);
212398a13719SSilviu Baranga 
2124949e91a6SAdam Nemet       // Check that we found the bounds for the pointer.
2125ee61474aSAdam Nemet       if (!CanDoRTIfNeeded) {
21269f1c6fbfSMalhar Jajoo         auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
21279f1c6fbfSMalhar Jajoo         recordAnalysis("CantCheckMemDepsAtRunTime", I)
2128877ccee8SAdam Nemet             << "cannot check memory dependencies at runtime";
2129d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2130b6dc76ffSAdam Nemet         CanVecMem = false;
2131b6dc76ffSAdam Nemet         return;
2132b6dc76ffSAdam Nemet       }
2133b6dc76ffSAdam Nemet 
21340456327cSAdam Nemet       CanVecMem = true;
21350456327cSAdam Nemet     }
21360456327cSAdam Nemet   }
21370456327cSAdam Nemet 
21382466ba97SMatt Arsenault   if (HasConvergentOp) {
21392466ba97SMatt Arsenault     recordAnalysis("CantInsertRuntimeCheckWithConvergent")
21402466ba97SMatt Arsenault       << "cannot add control dependency to convergent operation";
21412466ba97SMatt Arsenault     LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
21422466ba97SMatt Arsenault                          "would be needed with a convergent operation\n");
21432466ba97SMatt Arsenault     CanVecMem = false;
21442466ba97SMatt Arsenault     return;
21452466ba97SMatt Arsenault   }
21462466ba97SMatt Arsenault 
21474bb90a71SAdam Nemet   if (CanVecMem)
2148d34e60caSNicola Zaghen     LLVM_DEBUG(
2149d34e60caSNicola Zaghen         dbgs() << "LAA: No unsafe dependent memory operations in loop.  We"
2150ce030acbSXinliang David Li                << (PtrRtChecking->Need ? "" : " don't")
21510f67c6c1SAdam Nemet                << " need runtime memory checks.\n");
2152778b455dSMalhar Jajoo   else
2153778b455dSMalhar Jajoo     emitUnsafeDependenceRemark();
2154778b455dSMalhar Jajoo }
2155778b455dSMalhar Jajoo 
2156778b455dSMalhar Jajoo void LoopAccessInfo::emitUnsafeDependenceRemark() {
2157778b455dSMalhar Jajoo   auto Deps = getDepChecker().getDependences();
2158778b455dSMalhar Jajoo   if (!Deps)
2159778b455dSMalhar Jajoo     return;
2160778b455dSMalhar Jajoo   auto Found = std::find_if(
2161778b455dSMalhar Jajoo       Deps->begin(), Deps->end(), [](const MemoryDepChecker::Dependence &D) {
2162778b455dSMalhar Jajoo         return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) !=
2163778b455dSMalhar Jajoo                MemoryDepChecker::VectorizationSafetyStatus::Safe;
2164778b455dSMalhar Jajoo       });
2165778b455dSMalhar Jajoo   if (Found == Deps->end())
2166778b455dSMalhar Jajoo     return;
2167778b455dSMalhar Jajoo   MemoryDepChecker::Dependence Dep = *Found;
2168778b455dSMalhar Jajoo 
2169778b455dSMalhar Jajoo   LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2170778b455dSMalhar Jajoo 
2171778b455dSMalhar Jajoo   // Emit remark for first unsafe dependence
2172778b455dSMalhar Jajoo   OptimizationRemarkAnalysis &R =
2173778b455dSMalhar Jajoo       recordAnalysis("UnsafeDep", Dep.getDestination(*this))
21740a77dfadSAdam Nemet       << "unsafe dependent memory operations in loop. Use "
21750a77dfadSAdam Nemet          "#pragma loop distribute(enable) to allow loop distribution "
21760a77dfadSAdam Nemet          "to attempt to isolate the offending operations into a separate "
2177877ccee8SAdam Nemet          "loop";
2178778b455dSMalhar Jajoo 
2179778b455dSMalhar Jajoo   switch (Dep.Type) {
2180778b455dSMalhar Jajoo   case MemoryDepChecker::Dependence::NoDep:
2181778b455dSMalhar Jajoo   case MemoryDepChecker::Dependence::Forward:
2182778b455dSMalhar Jajoo   case MemoryDepChecker::Dependence::BackwardVectorizable:
2183778b455dSMalhar Jajoo     llvm_unreachable("Unexpected dependence");
2184778b455dSMalhar Jajoo   case MemoryDepChecker::Dependence::Backward:
2185778b455dSMalhar Jajoo     R << "\nBackward loop carried data dependence.";
2186778b455dSMalhar Jajoo     break;
2187778b455dSMalhar Jajoo   case MemoryDepChecker::Dependence::ForwardButPreventsForwarding:
2188778b455dSMalhar Jajoo     R << "\nForward loop carried data dependence that prevents "
2189778b455dSMalhar Jajoo          "store-to-load forwarding.";
2190778b455dSMalhar Jajoo     break;
2191778b455dSMalhar Jajoo   case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding:
2192778b455dSMalhar Jajoo     R << "\nBackward loop carried data dependence that prevents "
2193778b455dSMalhar Jajoo          "store-to-load forwarding.";
2194778b455dSMalhar Jajoo     break;
2195778b455dSMalhar Jajoo   case MemoryDepChecker::Dependence::Unknown:
2196778b455dSMalhar Jajoo     R << "\nUnknown data dependence.";
2197778b455dSMalhar Jajoo     break;
2198778b455dSMalhar Jajoo   }
2199778b455dSMalhar Jajoo 
2200778b455dSMalhar Jajoo   if (Instruction *I = Dep.getSource(*this)) {
2201778b455dSMalhar Jajoo     DebugLoc SourceLoc = I->getDebugLoc();
2202778b455dSMalhar Jajoo     if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I)))
2203778b455dSMalhar Jajoo       SourceLoc = DD->getDebugLoc();
2204778b455dSMalhar Jajoo     if (SourceLoc)
2205778b455dSMalhar Jajoo       R << " Memory location is the same as accessed at "
2206778b455dSMalhar Jajoo         << ore::NV("Location", SourceLoc);
22074bb90a71SAdam Nemet   }
22080456327cSAdam Nemet }
22090456327cSAdam Nemet 
221001abb2c3SAdam Nemet bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
221101abb2c3SAdam Nemet                                            DominatorTree *DT)  {
22120456327cSAdam Nemet   assert(TheLoop->contains(BB) && "Unknown block used");
22130456327cSAdam Nemet 
22140456327cSAdam Nemet   // Blocks that do not dominate the latch need predication.
22150456327cSAdam Nemet   BasicBlock* Latch = TheLoop->getLoopLatch();
22160456327cSAdam Nemet   return !DT->dominates(BB, Latch);
22170456327cSAdam Nemet }
22180456327cSAdam Nemet 
2219877ccee8SAdam Nemet OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2220877ccee8SAdam Nemet                                                            Instruction *I) {
2221c922853bSAdam Nemet   assert(!Report && "Multiple reports generated");
2222877ccee8SAdam Nemet 
2223877ccee8SAdam Nemet   Value *CodeRegion = TheLoop->getHeader();
2224877ccee8SAdam Nemet   DebugLoc DL = TheLoop->getStartLoc();
2225877ccee8SAdam Nemet 
2226877ccee8SAdam Nemet   if (I) {
2227877ccee8SAdam Nemet     CodeRegion = I->getParent();
2228877ccee8SAdam Nemet     // If there is no debug location attached to the instruction, revert back to
2229877ccee8SAdam Nemet     // using the loop's.
2230877ccee8SAdam Nemet     if (I->getDebugLoc())
2231877ccee8SAdam Nemet       DL = I->getDebugLoc();
2232877ccee8SAdam Nemet   }
2233877ccee8SAdam Nemet 
22340eaee545SJonas Devlieghere   Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2235877ccee8SAdam Nemet                                                    CodeRegion);
2236877ccee8SAdam Nemet   return *Report;
22370456327cSAdam Nemet }
22380456327cSAdam Nemet 
223957ac766eSAdam Nemet bool LoopAccessInfo::isUniform(Value *V) const {
22403ceac2bbSMichael Kuperstein   auto *SE = PSE->getSE();
22413ceac2bbSMichael Kuperstein   // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
22423ceac2bbSMichael Kuperstein   // never considered uniform.
22433ceac2bbSMichael Kuperstein   // TODO: Is this really what we want? Even without FP SCEV, we may want some
22443ceac2bbSMichael Kuperstein   // trivially loop-invariant FP values to be considered uniform.
22453ceac2bbSMichael Kuperstein   if (!SE->isSCEVable(V->getType()))
22463ceac2bbSMichael Kuperstein     return false;
22473ceac2bbSMichael Kuperstein   return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
22480456327cSAdam Nemet }
22497206d7a5SAdam Nemet 
2250c953bb99SAdam Nemet void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2251b3a8a153SPhilip Reames   Value *Ptr = getLoadStorePointerOperand(MemAccess);
2252b3a8a153SPhilip Reames   if (!Ptr)
2253c953bb99SAdam Nemet     return;
2254c953bb99SAdam Nemet 
225594734eefSXinliang David Li   Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2256c953bb99SAdam Nemet   if (!Stride)
2257c953bb99SAdam Nemet     return;
2258c953bb99SAdam Nemet 
2259d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2260eb13dd3eSDorit Nuzman                        "versioning:");
2261d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "  Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2262eb13dd3eSDorit Nuzman 
2263eb13dd3eSDorit Nuzman   // Avoid adding the "Stride == 1" predicate when we know that
2264eb13dd3eSDorit Nuzman   // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2265eb13dd3eSDorit Nuzman   // or zero iteration loop, as Trip-Count <= Stride == 1.
2266eb13dd3eSDorit Nuzman   //
2267eb13dd3eSDorit Nuzman   // TODO: We are currently not making a very informed decision on when it is
2268eb13dd3eSDorit Nuzman   // beneficial to apply stride versioning. It might make more sense that the
2269eb13dd3eSDorit Nuzman   // users of this analysis (such as the vectorizer) will trigger it, based on
2270eb13dd3eSDorit Nuzman   // their specific cost considerations; For example, in cases where stride
2271eb13dd3eSDorit Nuzman   // versioning does  not help resolving memory accesses/dependences, the
2272eb13dd3eSDorit Nuzman   // vectorizer should evaluate the cost of the runtime test, and the benefit
2273eb13dd3eSDorit Nuzman   // of various possible stride specializations, considering the alternatives
2274eb13dd3eSDorit Nuzman   // of using gather/scatters (if available).
2275eb13dd3eSDorit Nuzman 
2276eb13dd3eSDorit Nuzman   const SCEV *StrideExpr = PSE->getSCEV(Stride);
2277eb13dd3eSDorit Nuzman   const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2278eb13dd3eSDorit Nuzman 
2279eb13dd3eSDorit Nuzman   // Match the types so we can compare the stride and the BETakenCount.
2280eb13dd3eSDorit Nuzman   // The Stride can be positive/negative, so we sign extend Stride;
228102a2bb2fSHiroshi Inoue   // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
2282eb13dd3eSDorit Nuzman   const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
22837ee30a0eSChang-Sun Lin Jr   uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
22847ee30a0eSChang-Sun Lin Jr   uint64_t BETypeSizeBits = DL.getTypeSizeInBits(BETakenCount->getType());
2285eb13dd3eSDorit Nuzman   const SCEV *CastedStride = StrideExpr;
2286eb13dd3eSDorit Nuzman   const SCEV *CastedBECount = BETakenCount;
2287eb13dd3eSDorit Nuzman   ScalarEvolution *SE = PSE->getSE();
22887ee30a0eSChang-Sun Lin Jr   if (BETypeSizeBits >= StrideTypeSizeBits)
2289eb13dd3eSDorit Nuzman     CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2290eb13dd3eSDorit Nuzman   else
2291eb13dd3eSDorit Nuzman     CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2292eb13dd3eSDorit Nuzman   const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2293eb13dd3eSDorit Nuzman   // Since TripCount == BackEdgeTakenCount + 1, checking:
2294eb13dd3eSDorit Nuzman   // "Stride >= TripCount" is equivalent to checking:
2295eb13dd3eSDorit Nuzman   // Stride - BETakenCount > 0
2296eb13dd3eSDorit Nuzman   if (SE->isKnownPositive(StrideMinusBETaken)) {
2297d34e60caSNicola Zaghen     LLVM_DEBUG(
2298d34e60caSNicola Zaghen         dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2299eb13dd3eSDorit Nuzman                   "Stride==1 predicate will imply that the loop executes "
2300eb13dd3eSDorit Nuzman                   "at most once.\n");
2301eb13dd3eSDorit Nuzman     return;
2302eb13dd3eSDorit Nuzman   }
230340f90819SThomas Preud'homme   LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
2304eb13dd3eSDorit Nuzman 
2305c953bb99SAdam Nemet   SymbolicStrides[Ptr] = Stride;
2306c953bb99SAdam Nemet   StrideSet.insert(Stride);
2307c953bb99SAdam Nemet }
2308c953bb99SAdam Nemet 
23093bfd93d7SAdam Nemet LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
2310db69b174SSimon Pilgrim                                const TargetLibraryInfo *TLI, AAResults *AA,
2311a9f09c62SAdam Nemet                                DominatorTree *DT, LoopInfo *LI)
23120eaee545SJonas Devlieghere     : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
23130eaee545SJonas Devlieghere       PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
2314b752eb88SKazu Hirata       DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) {
2315929c38e8SAdam Nemet   if (canAnalyzeLoop())
23167da74abfSAdam Nemet     analyzeLoop(AA, LI, TLI, DT);
23173bfd93d7SAdam Nemet }
23183bfd93d7SAdam Nemet 
2319e91cc6efSAdam Nemet void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2320e91cc6efSAdam Nemet   if (CanVecMem) {
23214ad38b63SAdam Nemet     OS.indent(Depth) << "Memory dependences are safe";
23227afb46d3SDavid Majnemer     if (MaxSafeDepDistBytes != -1ULL)
2323c62e554eSAdam Nemet       OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2324c62e554eSAdam Nemet          << " bytes";
2325ce030acbSXinliang David Li     if (PtrRtChecking->Need)
23264ad38b63SAdam Nemet       OS << " with run-time checks";
23274ad38b63SAdam Nemet     OS << "\n";
2328e91cc6efSAdam Nemet   }
2329e91cc6efSAdam Nemet 
23302466ba97SMatt Arsenault   if (HasConvergentOp)
23312466ba97SMatt Arsenault     OS.indent(Depth) << "Has convergent operation in loop\n";
23322466ba97SMatt Arsenault 
2333e91cc6efSAdam Nemet   if (Report)
2334877ccee8SAdam Nemet     OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2335e91cc6efSAdam Nemet 
2336ce030acbSXinliang David Li   if (auto *Dependences = DepChecker->getDependences()) {
2337a2df750fSAdam Nemet     OS.indent(Depth) << "Dependences:\n";
2338a2df750fSAdam Nemet     for (auto &Dep : *Dependences) {
2339ce030acbSXinliang David Li       Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
234058913d65SAdam Nemet       OS << "\n";
234158913d65SAdam Nemet     }
234258913d65SAdam Nemet   } else
2343a2df750fSAdam Nemet     OS.indent(Depth) << "Too many dependences, not recorded\n";
2344e91cc6efSAdam Nemet 
2345e91cc6efSAdam Nemet   // List the pair of accesses need run-time checks to prove independence.
2346ce030acbSXinliang David Li   PtrRtChecking->print(OS, Depth);
2347e91cc6efSAdam Nemet   OS << "\n";
2348c3384320SAdam Nemet 
23495e9215f0SAnna Thomas   OS.indent(Depth) << "Non vectorizable stores to invariant address were "
23505e9215f0SAnna Thomas                    << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
2351c3384320SAdam Nemet                    << "found in loop.\n";
2352e3c0534bSSilviu Baranga 
2353e3c0534bSSilviu Baranga   OS.indent(Depth) << "SCEV assumptions:\n";
23545ba11503SPhilip Reames   PSE->getPredicate().print(OS, Depth);
2355b77365b5SSilviu Baranga 
2356b77365b5SSilviu Baranga   OS << "\n";
2357b77365b5SSilviu Baranga 
2358b77365b5SSilviu Baranga   OS.indent(Depth) << "Expressions re-written:\n";
235994734eefSXinliang David Li   PSE->print(OS, Depth);
2360e91cc6efSAdam Nemet }
2361e91cc6efSAdam Nemet 
236205da2fe5SReid Kleckner LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) {
236305da2fe5SReid Kleckner   initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry());
236405da2fe5SReid Kleckner }
236505da2fe5SReid Kleckner 
23667853c1ddSXinliang David Li const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
23673bfd93d7SAdam Nemet   auto &LAI = LoopAccessInfoMap[L];
23683bfd93d7SAdam Nemet 
23691824e411SAdam Nemet   if (!LAI)
23700eaee545SJonas Devlieghere     LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
23711824e411SAdam Nemet 
23729aa52ba5SKazu Hirata   return *LAI;
23733bfd93d7SAdam Nemet }
23743bfd93d7SAdam Nemet 
23757853c1ddSXinliang David Li void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {
23767853c1ddSXinliang David Li   LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2377ecde1c7fSXinliang David Li 
2378e91cc6efSAdam Nemet   for (Loop *TopLevelLoop : *LI)
2379e91cc6efSAdam Nemet     for (Loop *L : depth_first(TopLevelLoop)) {
2380e91cc6efSAdam Nemet       OS.indent(2) << L->getHeader()->getName() << ":\n";
2381bdbc5227SAdam Nemet       auto &LAI = LAA.getInfo(L);
2382e91cc6efSAdam Nemet       LAI.print(OS, 4);
2383e91cc6efSAdam Nemet     }
2384e91cc6efSAdam Nemet }
2385e91cc6efSAdam Nemet 
23867853c1ddSXinliang David Li bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) {
2387ecde1c7fSXinliang David Li   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
23883bfd93d7SAdam Nemet   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
23899c27b59cSTeresa Johnson   TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
2390ecde1c7fSXinliang David Li   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2391ecde1c7fSXinliang David Li   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2392ecde1c7fSXinliang David Li   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
23933bfd93d7SAdam Nemet 
23943bfd93d7SAdam Nemet   return false;
23953bfd93d7SAdam Nemet }
23963bfd93d7SAdam Nemet 
23977853c1ddSXinliang David Li void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
23983ee82659SBjorn Pettersson   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
23993ee82659SBjorn Pettersson   AU.addRequiredTransitive<AAResultsWrapperPass>();
24003ee82659SBjorn Pettersson   AU.addRequiredTransitive<DominatorTreeWrapperPass>();
24013ee82659SBjorn Pettersson   AU.addRequiredTransitive<LoopInfoWrapperPass>();
24023bfd93d7SAdam Nemet 
24033bfd93d7SAdam Nemet   AU.setPreservesAll();
24043bfd93d7SAdam Nemet }
24053bfd93d7SAdam Nemet 
24067853c1ddSXinliang David Li char LoopAccessLegacyAnalysis::ID = 0;
24073bfd93d7SAdam Nemet static const char laa_name[] = "Loop Access Analysis";
24083bfd93d7SAdam Nemet #define LAA_NAME "loop-accesses"
24093bfd93d7SAdam Nemet 
24107853c1ddSXinliang David Li INITIALIZE_PASS_BEGIN(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
24117b560d40SChandler Carruth INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
24122f1fd165SChandler Carruth INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
24133bfd93d7SAdam Nemet INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2414e91cc6efSAdam Nemet INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
24157853c1ddSXinliang David Li INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
24163bfd93d7SAdam Nemet 
2417dab4eae2SChandler Carruth AnalysisKey LoopAccessAnalysis::Key;
24188a021317SXinliang David Li 
2419410eaeb0SChandler Carruth LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM,
2420410eaeb0SChandler Carruth                                        LoopStandardAnalysisResults &AR) {
2421410eaeb0SChandler Carruth   return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
24228a021317SXinliang David Li }
24238a021317SXinliang David Li 
24243bfd93d7SAdam Nemet namespace llvm {
2425a3fe70d2SEugene Zelenko 
24263bfd93d7SAdam Nemet   Pass *createLAAPass() {
24277853c1ddSXinliang David Li     return new LoopAccessLegacyAnalysis();
24283bfd93d7SAdam Nemet   }
2429a3fe70d2SEugene Zelenko 
2430a3fe70d2SEugene Zelenko } // end namespace llvm
2431