10456327cSAdam Nemet //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
20456327cSAdam Nemet //
30456327cSAdam Nemet //                     The LLVM Compiler Infrastructure
40456327cSAdam Nemet //
50456327cSAdam Nemet // This file is distributed under the University of Illinois Open Source
60456327cSAdam Nemet // License. See LICENSE.TXT for details.
70456327cSAdam Nemet //
80456327cSAdam Nemet //===----------------------------------------------------------------------===//
90456327cSAdam Nemet //
100456327cSAdam Nemet // The implementation for the loop memory dependence that was originally
110456327cSAdam Nemet // developed for the loop vectorizer.
120456327cSAdam Nemet //
130456327cSAdam Nemet //===----------------------------------------------------------------------===//
140456327cSAdam Nemet 
150456327cSAdam Nemet #include "llvm/Analysis/LoopAccessAnalysis.h"
160456327cSAdam Nemet #include "llvm/Analysis/LoopInfo.h"
177206d7a5SAdam Nemet #include "llvm/Analysis/ScalarEvolutionExpander.h"
180456327cSAdam Nemet #include "llvm/Analysis/ValueTracking.h"
190456327cSAdam Nemet #include "llvm/IR/DiagnosticInfo.h"
200456327cSAdam Nemet #include "llvm/IR/Dominators.h"
217206d7a5SAdam Nemet #include "llvm/IR/IRBuilder.h"
220456327cSAdam Nemet #include "llvm/Support/Debug.h"
230456327cSAdam Nemet #include "llvm/Transforms/Utils/VectorUtils.h"
240456327cSAdam Nemet using namespace llvm;
250456327cSAdam Nemet 
260456327cSAdam Nemet #define DEBUG_TYPE "loop-vectorize"
270456327cSAdam Nemet 
280456327cSAdam Nemet void VectorizationReport::emitAnalysis(VectorizationReport &Message,
290456327cSAdam Nemet                                        const Function *TheFunction,
300456327cSAdam Nemet                                        const Loop *TheLoop) {
310456327cSAdam Nemet   DebugLoc DL = TheLoop->getStartLoc();
320456327cSAdam Nemet   if (Instruction *I = Message.getInstr())
330456327cSAdam Nemet     DL = I->getDebugLoc();
340456327cSAdam Nemet   emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
350456327cSAdam Nemet                                  *TheFunction, DL, Message.str());
360456327cSAdam Nemet }
370456327cSAdam Nemet 
380456327cSAdam Nemet Value *llvm::stripIntegerCast(Value *V) {
390456327cSAdam Nemet   if (CastInst *CI = dyn_cast<CastInst>(V))
400456327cSAdam Nemet     if (CI->getOperand(0)->getType()->isIntegerTy())
410456327cSAdam Nemet       return CI->getOperand(0);
420456327cSAdam Nemet   return V;
430456327cSAdam Nemet }
440456327cSAdam Nemet 
450456327cSAdam Nemet const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
460456327cSAdam Nemet                                             ValueToValueMap &PtrToStride,
470456327cSAdam Nemet                                             Value *Ptr, Value *OrigPtr) {
480456327cSAdam Nemet 
490456327cSAdam Nemet   const SCEV *OrigSCEV = SE->getSCEV(Ptr);
500456327cSAdam Nemet 
510456327cSAdam Nemet   // If there is an entry in the map return the SCEV of the pointer with the
520456327cSAdam Nemet   // symbolic stride replaced by one.
530456327cSAdam Nemet   ValueToValueMap::iterator SI = PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
540456327cSAdam Nemet   if (SI != PtrToStride.end()) {
550456327cSAdam Nemet     Value *StrideVal = SI->second;
560456327cSAdam Nemet 
570456327cSAdam Nemet     // Strip casts.
580456327cSAdam Nemet     StrideVal = stripIntegerCast(StrideVal);
590456327cSAdam Nemet 
600456327cSAdam Nemet     // Replace symbolic stride by one.
610456327cSAdam Nemet     Value *One = ConstantInt::get(StrideVal->getType(), 1);
620456327cSAdam Nemet     ValueToValueMap RewriteMap;
630456327cSAdam Nemet     RewriteMap[StrideVal] = One;
640456327cSAdam Nemet 
650456327cSAdam Nemet     const SCEV *ByOne =
660456327cSAdam Nemet         SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
670456327cSAdam Nemet     DEBUG(dbgs() << "LV: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
680456327cSAdam Nemet                  << "\n");
690456327cSAdam Nemet     return ByOne;
700456327cSAdam Nemet   }
710456327cSAdam Nemet 
720456327cSAdam Nemet   // Otherwise, just return the SCEV of the original pointer.
730456327cSAdam Nemet   return SE->getSCEV(Ptr);
740456327cSAdam Nemet }
750456327cSAdam Nemet 
76*30f16e16SAdam Nemet void LoopAccessInfo::RuntimePointerCheck::insert(ScalarEvolution *SE, Loop *Lp,
77*30f16e16SAdam Nemet                                                  Value *Ptr, bool WritePtr,
780456327cSAdam Nemet                                                  unsigned DepSetId,
790456327cSAdam Nemet                                                  unsigned ASId,
800456327cSAdam Nemet                                                  ValueToValueMap &Strides) {
810456327cSAdam Nemet   // Get the stride replaced scev.
820456327cSAdam Nemet   const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
830456327cSAdam Nemet   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
840456327cSAdam Nemet   assert(AR && "Invalid addrec expression");
850456327cSAdam Nemet   const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
860456327cSAdam Nemet   const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
870456327cSAdam Nemet   Pointers.push_back(Ptr);
880456327cSAdam Nemet   Starts.push_back(AR->getStart());
890456327cSAdam Nemet   Ends.push_back(ScEnd);
900456327cSAdam Nemet   IsWritePtr.push_back(WritePtr);
910456327cSAdam Nemet   DependencySetId.push_back(DepSetId);
920456327cSAdam Nemet   AliasSetId.push_back(ASId);
930456327cSAdam Nemet }
940456327cSAdam Nemet 
950456327cSAdam Nemet namespace {
960456327cSAdam Nemet /// \brief Analyses memory accesses in a loop.
970456327cSAdam Nemet ///
980456327cSAdam Nemet /// Checks whether run time pointer checks are needed and builds sets for data
990456327cSAdam Nemet /// dependence checking.
1000456327cSAdam Nemet class AccessAnalysis {
1010456327cSAdam Nemet public:
1020456327cSAdam Nemet   /// \brief Read or write access location.
1030456327cSAdam Nemet   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
1040456327cSAdam Nemet   typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
1050456327cSAdam Nemet 
1060456327cSAdam Nemet   /// \brief Set of potential dependent memory accesses.
1070456327cSAdam Nemet   typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
1080456327cSAdam Nemet 
1090456327cSAdam Nemet   AccessAnalysis(const DataLayout *Dl, AliasAnalysis *AA, DepCandidates &DA) :
1100456327cSAdam Nemet     DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {}
1110456327cSAdam Nemet 
1120456327cSAdam Nemet   /// \brief Register a load  and whether it is only read from.
1130456327cSAdam Nemet   void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
1140456327cSAdam Nemet     Value *Ptr = const_cast<Value*>(Loc.Ptr);
1150456327cSAdam Nemet     AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
1160456327cSAdam Nemet     Accesses.insert(MemAccessInfo(Ptr, false));
1170456327cSAdam Nemet     if (IsReadOnly)
1180456327cSAdam Nemet       ReadOnlyPtr.insert(Ptr);
1190456327cSAdam Nemet   }
1200456327cSAdam Nemet 
1210456327cSAdam Nemet   /// \brief Register a store.
1220456327cSAdam Nemet   void addStore(AliasAnalysis::Location &Loc) {
1230456327cSAdam Nemet     Value *Ptr = const_cast<Value*>(Loc.Ptr);
1240456327cSAdam Nemet     AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
1250456327cSAdam Nemet     Accesses.insert(MemAccessInfo(Ptr, true));
1260456327cSAdam Nemet   }
1270456327cSAdam Nemet 
1280456327cSAdam Nemet   /// \brief Check whether we can check the pointers at runtime for
1290456327cSAdam Nemet   /// non-intersection.
130*30f16e16SAdam Nemet   bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
1310456327cSAdam Nemet                        unsigned &NumComparisons,
1320456327cSAdam Nemet                        ScalarEvolution *SE, Loop *TheLoop,
1330456327cSAdam Nemet                        ValueToValueMap &Strides,
1340456327cSAdam Nemet                        bool ShouldCheckStride = false);
1350456327cSAdam Nemet 
1360456327cSAdam Nemet   /// \brief Goes over all memory accesses, checks whether a RT check is needed
1370456327cSAdam Nemet   /// and builds sets of dependent accesses.
1380456327cSAdam Nemet   void buildDependenceSets() {
1390456327cSAdam Nemet     processMemAccesses();
1400456327cSAdam Nemet   }
1410456327cSAdam Nemet 
1420456327cSAdam Nemet   bool isRTCheckNeeded() { return IsRTCheckNeeded; }
1430456327cSAdam Nemet 
1440456327cSAdam Nemet   bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
1450456327cSAdam Nemet   void resetDepChecks() { CheckDeps.clear(); }
1460456327cSAdam Nemet 
1470456327cSAdam Nemet   MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
1480456327cSAdam Nemet 
1490456327cSAdam Nemet private:
1500456327cSAdam Nemet   typedef SetVector<MemAccessInfo> PtrAccessSet;
1510456327cSAdam Nemet 
1520456327cSAdam Nemet   /// \brief Go over all memory access and check whether runtime pointer checks
1530456327cSAdam Nemet   /// are needed /// and build sets of dependency check candidates.
1540456327cSAdam Nemet   void processMemAccesses();
1550456327cSAdam Nemet 
1560456327cSAdam Nemet   /// Set of all accesses.
1570456327cSAdam Nemet   PtrAccessSet Accesses;
1580456327cSAdam Nemet 
1590456327cSAdam Nemet   /// Set of accesses that need a further dependence check.
1600456327cSAdam Nemet   MemAccessInfoSet CheckDeps;
1610456327cSAdam Nemet 
1620456327cSAdam Nemet   /// Set of pointers that are read only.
1630456327cSAdam Nemet   SmallPtrSet<Value*, 16> ReadOnlyPtr;
1640456327cSAdam Nemet 
1650456327cSAdam Nemet   const DataLayout *DL;
1660456327cSAdam Nemet 
1670456327cSAdam Nemet   /// An alias set tracker to partition the access set by underlying object and
1680456327cSAdam Nemet   //intrinsic property (such as TBAA metadata).
1690456327cSAdam Nemet   AliasSetTracker AST;
1700456327cSAdam Nemet 
1710456327cSAdam Nemet   /// Sets of potentially dependent accesses - members of one set share an
1720456327cSAdam Nemet   /// underlying pointer. The set "CheckDeps" identfies which sets really need a
1730456327cSAdam Nemet   /// dependence check.
1740456327cSAdam Nemet   DepCandidates &DepCands;
1750456327cSAdam Nemet 
1760456327cSAdam Nemet   bool IsRTCheckNeeded;
1770456327cSAdam Nemet };
1780456327cSAdam Nemet 
1790456327cSAdam Nemet } // end anonymous namespace
1800456327cSAdam Nemet 
1810456327cSAdam Nemet /// \brief Check whether a pointer can participate in a runtime bounds check.
1820456327cSAdam Nemet static bool hasComputableBounds(ScalarEvolution *SE, ValueToValueMap &Strides,
1830456327cSAdam Nemet                                 Value *Ptr) {
1840456327cSAdam Nemet   const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
1850456327cSAdam Nemet   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1860456327cSAdam Nemet   if (!AR)
1870456327cSAdam Nemet     return false;
1880456327cSAdam Nemet 
1890456327cSAdam Nemet   return AR->isAffine();
1900456327cSAdam Nemet }
1910456327cSAdam Nemet 
1920456327cSAdam Nemet /// \brief Check the stride of the pointer and ensure that it does not wrap in
1930456327cSAdam Nemet /// the address space.
1940456327cSAdam Nemet static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
1950456327cSAdam Nemet                         const Loop *Lp, ValueToValueMap &StridesMap);
1960456327cSAdam Nemet 
1970456327cSAdam Nemet bool AccessAnalysis::canCheckPtrAtRT(
198*30f16e16SAdam Nemet     LoopAccessInfo::RuntimePointerCheck &RtCheck,
1990456327cSAdam Nemet     unsigned &NumComparisons, ScalarEvolution *SE, Loop *TheLoop,
2000456327cSAdam Nemet     ValueToValueMap &StridesMap, bool ShouldCheckStride) {
2010456327cSAdam Nemet   // Find pointers with computable bounds. We are going to use this information
2020456327cSAdam Nemet   // to place a runtime bound check.
2030456327cSAdam Nemet   bool CanDoRT = true;
2040456327cSAdam Nemet 
2050456327cSAdam Nemet   bool IsDepCheckNeeded = isDependencyCheckNeeded();
2060456327cSAdam Nemet   NumComparisons = 0;
2070456327cSAdam Nemet 
2080456327cSAdam Nemet   // We assign a consecutive id to access from different alias sets.
2090456327cSAdam Nemet   // Accesses between different groups doesn't need to be checked.
2100456327cSAdam Nemet   unsigned ASId = 1;
2110456327cSAdam Nemet   for (auto &AS : AST) {
2120456327cSAdam Nemet     unsigned NumReadPtrChecks = 0;
2130456327cSAdam Nemet     unsigned NumWritePtrChecks = 0;
2140456327cSAdam Nemet 
2150456327cSAdam Nemet     // We assign consecutive id to access from different dependence sets.
2160456327cSAdam Nemet     // Accesses within the same set don't need a runtime check.
2170456327cSAdam Nemet     unsigned RunningDepId = 1;
2180456327cSAdam Nemet     DenseMap<Value *, unsigned> DepSetId;
2190456327cSAdam Nemet 
2200456327cSAdam Nemet     for (auto A : AS) {
2210456327cSAdam Nemet       Value *Ptr = A.getValue();
2220456327cSAdam Nemet       bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
2230456327cSAdam Nemet       MemAccessInfo Access(Ptr, IsWrite);
2240456327cSAdam Nemet 
2250456327cSAdam Nemet       if (IsWrite)
2260456327cSAdam Nemet         ++NumWritePtrChecks;
2270456327cSAdam Nemet       else
2280456327cSAdam Nemet         ++NumReadPtrChecks;
2290456327cSAdam Nemet 
2300456327cSAdam Nemet       if (hasComputableBounds(SE, StridesMap, Ptr) &&
2310456327cSAdam Nemet           // When we run after a failing dependency check we have to make sure we
2320456327cSAdam Nemet           // don't have wrapping pointers.
2330456327cSAdam Nemet           (!ShouldCheckStride ||
2340456327cSAdam Nemet            isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) {
2350456327cSAdam Nemet         // The id of the dependence set.
2360456327cSAdam Nemet         unsigned DepId;
2370456327cSAdam Nemet 
2380456327cSAdam Nemet         if (IsDepCheckNeeded) {
2390456327cSAdam Nemet           Value *Leader = DepCands.getLeaderValue(Access).getPointer();
2400456327cSAdam Nemet           unsigned &LeaderId = DepSetId[Leader];
2410456327cSAdam Nemet           if (!LeaderId)
2420456327cSAdam Nemet             LeaderId = RunningDepId++;
2430456327cSAdam Nemet           DepId = LeaderId;
2440456327cSAdam Nemet         } else
2450456327cSAdam Nemet           // Each access has its own dependence set.
2460456327cSAdam Nemet           DepId = RunningDepId++;
2470456327cSAdam Nemet 
2480456327cSAdam Nemet         RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
2490456327cSAdam Nemet 
2500456327cSAdam Nemet         DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
2510456327cSAdam Nemet       } else {
2520456327cSAdam Nemet         CanDoRT = false;
2530456327cSAdam Nemet       }
2540456327cSAdam Nemet     }
2550456327cSAdam Nemet 
2560456327cSAdam Nemet     if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
2570456327cSAdam Nemet       NumComparisons += 0; // Only one dependence set.
2580456327cSAdam Nemet     else {
2590456327cSAdam Nemet       NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks +
2600456327cSAdam Nemet                                               NumWritePtrChecks - 1));
2610456327cSAdam Nemet     }
2620456327cSAdam Nemet 
2630456327cSAdam Nemet     ++ASId;
2640456327cSAdam Nemet   }
2650456327cSAdam Nemet 
2660456327cSAdam Nemet   // If the pointers that we would use for the bounds comparison have different
2670456327cSAdam Nemet   // address spaces, assume the values aren't directly comparable, so we can't
2680456327cSAdam Nemet   // use them for the runtime check. We also have to assume they could
2690456327cSAdam Nemet   // overlap. In the future there should be metadata for whether address spaces
2700456327cSAdam Nemet   // are disjoint.
2710456327cSAdam Nemet   unsigned NumPointers = RtCheck.Pointers.size();
2720456327cSAdam Nemet   for (unsigned i = 0; i < NumPointers; ++i) {
2730456327cSAdam Nemet     for (unsigned j = i + 1; j < NumPointers; ++j) {
2740456327cSAdam Nemet       // Only need to check pointers between two different dependency sets.
2750456327cSAdam Nemet       if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
2760456327cSAdam Nemet        continue;
2770456327cSAdam Nemet       // Only need to check pointers in the same alias set.
2780456327cSAdam Nemet       if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
2790456327cSAdam Nemet         continue;
2800456327cSAdam Nemet 
2810456327cSAdam Nemet       Value *PtrI = RtCheck.Pointers[i];
2820456327cSAdam Nemet       Value *PtrJ = RtCheck.Pointers[j];
2830456327cSAdam Nemet 
2840456327cSAdam Nemet       unsigned ASi = PtrI->getType()->getPointerAddressSpace();
2850456327cSAdam Nemet       unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
2860456327cSAdam Nemet       if (ASi != ASj) {
2870456327cSAdam Nemet         DEBUG(dbgs() << "LV: Runtime check would require comparison between"
2880456327cSAdam Nemet                        " different address spaces\n");
2890456327cSAdam Nemet         return false;
2900456327cSAdam Nemet       }
2910456327cSAdam Nemet     }
2920456327cSAdam Nemet   }
2930456327cSAdam Nemet 
2940456327cSAdam Nemet   return CanDoRT;
2950456327cSAdam Nemet }
2960456327cSAdam Nemet 
2970456327cSAdam Nemet void AccessAnalysis::processMemAccesses() {
2980456327cSAdam Nemet   // We process the set twice: first we process read-write pointers, last we
2990456327cSAdam Nemet   // process read-only pointers. This allows us to skip dependence tests for
3000456327cSAdam Nemet   // read-only pointers.
3010456327cSAdam Nemet 
3020456327cSAdam Nemet   DEBUG(dbgs() << "LV: Processing memory accesses...\n");
3030456327cSAdam Nemet   DEBUG(dbgs() << "  AST: "; AST.dump());
3040456327cSAdam Nemet   DEBUG(dbgs() << "LV:   Accesses:\n");
3050456327cSAdam Nemet   DEBUG({
3060456327cSAdam Nemet     for (auto A : Accesses)
3070456327cSAdam Nemet       dbgs() << "\t" << *A.getPointer() << " (" <<
3080456327cSAdam Nemet                 (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
3090456327cSAdam Nemet                                          "read-only" : "read")) << ")\n";
3100456327cSAdam Nemet   });
3110456327cSAdam Nemet 
3120456327cSAdam Nemet   // The AliasSetTracker has nicely partitioned our pointers by metadata
3130456327cSAdam Nemet   // compatibility and potential for underlying-object overlap. As a result, we
3140456327cSAdam Nemet   // only need to check for potential pointer dependencies within each alias
3150456327cSAdam Nemet   // set.
3160456327cSAdam Nemet   for (auto &AS : AST) {
3170456327cSAdam Nemet     // Note that both the alias-set tracker and the alias sets themselves used
3180456327cSAdam Nemet     // linked lists internally and so the iteration order here is deterministic
3190456327cSAdam Nemet     // (matching the original instruction order within each set).
3200456327cSAdam Nemet 
3210456327cSAdam Nemet     bool SetHasWrite = false;
3220456327cSAdam Nemet 
3230456327cSAdam Nemet     // Map of pointers to last access encountered.
3240456327cSAdam Nemet     typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
3250456327cSAdam Nemet     UnderlyingObjToAccessMap ObjToLastAccess;
3260456327cSAdam Nemet 
3270456327cSAdam Nemet     // Set of access to check after all writes have been processed.
3280456327cSAdam Nemet     PtrAccessSet DeferredAccesses;
3290456327cSAdam Nemet 
3300456327cSAdam Nemet     // Iterate over each alias set twice, once to process read/write pointers,
3310456327cSAdam Nemet     // and then to process read-only pointers.
3320456327cSAdam Nemet     for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
3330456327cSAdam Nemet       bool UseDeferred = SetIteration > 0;
3340456327cSAdam Nemet       PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
3350456327cSAdam Nemet 
3360456327cSAdam Nemet       for (auto AV : AS) {
3370456327cSAdam Nemet         Value *Ptr = AV.getValue();
3380456327cSAdam Nemet 
3390456327cSAdam Nemet         // For a single memory access in AliasSetTracker, Accesses may contain
3400456327cSAdam Nemet         // both read and write, and they both need to be handled for CheckDeps.
3410456327cSAdam Nemet         for (auto AC : S) {
3420456327cSAdam Nemet           if (AC.getPointer() != Ptr)
3430456327cSAdam Nemet             continue;
3440456327cSAdam Nemet 
3450456327cSAdam Nemet           bool IsWrite = AC.getInt();
3460456327cSAdam Nemet 
3470456327cSAdam Nemet           // If we're using the deferred access set, then it contains only
3480456327cSAdam Nemet           // reads.
3490456327cSAdam Nemet           bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
3500456327cSAdam Nemet           if (UseDeferred && !IsReadOnlyPtr)
3510456327cSAdam Nemet             continue;
3520456327cSAdam Nemet           // Otherwise, the pointer must be in the PtrAccessSet, either as a
3530456327cSAdam Nemet           // read or a write.
3540456327cSAdam Nemet           assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
3550456327cSAdam Nemet                   S.count(MemAccessInfo(Ptr, false))) &&
3560456327cSAdam Nemet                  "Alias-set pointer not in the access set?");
3570456327cSAdam Nemet 
3580456327cSAdam Nemet           MemAccessInfo Access(Ptr, IsWrite);
3590456327cSAdam Nemet           DepCands.insert(Access);
3600456327cSAdam Nemet 
3610456327cSAdam Nemet           // Memorize read-only pointers for later processing and skip them in
3620456327cSAdam Nemet           // the first round (they need to be checked after we have seen all
3630456327cSAdam Nemet           // write pointers). Note: we also mark pointer that are not
3640456327cSAdam Nemet           // consecutive as "read-only" pointers (so that we check
3650456327cSAdam Nemet           // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
3660456327cSAdam Nemet           if (!UseDeferred && IsReadOnlyPtr) {
3670456327cSAdam Nemet             DeferredAccesses.insert(Access);
3680456327cSAdam Nemet             continue;
3690456327cSAdam Nemet           }
3700456327cSAdam Nemet 
3710456327cSAdam Nemet           // If this is a write - check other reads and writes for conflicts. If
3720456327cSAdam Nemet           // this is a read only check other writes for conflicts (but only if
3730456327cSAdam Nemet           // there is no other write to the ptr - this is an optimization to
3740456327cSAdam Nemet           // catch "a[i] = a[i] + " without having to do a dependence check).
3750456327cSAdam Nemet           if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
3760456327cSAdam Nemet             CheckDeps.insert(Access);
3770456327cSAdam Nemet             IsRTCheckNeeded = true;
3780456327cSAdam Nemet           }
3790456327cSAdam Nemet 
3800456327cSAdam Nemet           if (IsWrite)
3810456327cSAdam Nemet             SetHasWrite = true;
3820456327cSAdam Nemet 
3830456327cSAdam Nemet           // Create sets of pointers connected by a shared alias set and
3840456327cSAdam Nemet           // underlying object.
3850456327cSAdam Nemet           typedef SmallVector<Value *, 16> ValueVector;
3860456327cSAdam Nemet           ValueVector TempObjects;
3870456327cSAdam Nemet           GetUnderlyingObjects(Ptr, TempObjects, DL);
3880456327cSAdam Nemet           for (Value *UnderlyingObj : TempObjects) {
3890456327cSAdam Nemet             UnderlyingObjToAccessMap::iterator Prev =
3900456327cSAdam Nemet                 ObjToLastAccess.find(UnderlyingObj);
3910456327cSAdam Nemet             if (Prev != ObjToLastAccess.end())
3920456327cSAdam Nemet               DepCands.unionSets(Access, Prev->second);
3930456327cSAdam Nemet 
3940456327cSAdam Nemet             ObjToLastAccess[UnderlyingObj] = Access;
3950456327cSAdam Nemet           }
3960456327cSAdam Nemet         }
3970456327cSAdam Nemet       }
3980456327cSAdam Nemet     }
3990456327cSAdam Nemet   }
4000456327cSAdam Nemet }
4010456327cSAdam Nemet 
4020456327cSAdam Nemet namespace {
4030456327cSAdam Nemet /// \brief Checks memory dependences among accesses to the same underlying
4040456327cSAdam Nemet /// object to determine whether there vectorization is legal or not (and at
4050456327cSAdam Nemet /// which vectorization factor).
4060456327cSAdam Nemet ///
4070456327cSAdam Nemet /// This class works under the assumption that we already checked that memory
4080456327cSAdam Nemet /// locations with different underlying pointers are "must-not alias".
4090456327cSAdam Nemet /// We use the ScalarEvolution framework to symbolically evalutate access
4100456327cSAdam Nemet /// functions pairs. Since we currently don't restructure the loop we can rely
4110456327cSAdam Nemet /// on the program order of memory accesses to determine their safety.
4120456327cSAdam Nemet /// At the moment we will only deem accesses as safe for:
4130456327cSAdam Nemet ///  * A negative constant distance assuming program order.
4140456327cSAdam Nemet ///
4150456327cSAdam Nemet ///      Safe: tmp = a[i + 1];     OR     a[i + 1] = x;
4160456327cSAdam Nemet ///            a[i] = tmp;                y = a[i];
4170456327cSAdam Nemet ///
4180456327cSAdam Nemet ///   The latter case is safe because later checks guarantuee that there can't
4190456327cSAdam Nemet ///   be a cycle through a phi node (that is, we check that "x" and "y" is not
4200456327cSAdam Nemet ///   the same variable: a header phi can only be an induction or a reduction, a
4210456327cSAdam Nemet ///   reduction can't have a memory sink, an induction can't have a memory
4220456327cSAdam Nemet ///   source). This is important and must not be violated (or we have to
4230456327cSAdam Nemet ///   resort to checking for cycles through memory).
4240456327cSAdam Nemet ///
4250456327cSAdam Nemet ///  * A positive constant distance assuming program order that is bigger
4260456327cSAdam Nemet ///    than the biggest memory access.
4270456327cSAdam Nemet ///
4280456327cSAdam Nemet ///     tmp = a[i]        OR              b[i] = x
4290456327cSAdam Nemet ///     a[i+2] = tmp                      y = b[i+2];
4300456327cSAdam Nemet ///
4310456327cSAdam Nemet ///     Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
4320456327cSAdam Nemet ///
4330456327cSAdam Nemet ///  * Zero distances and all accesses have the same size.
4340456327cSAdam Nemet ///
4350456327cSAdam Nemet class MemoryDepChecker {
4360456327cSAdam Nemet public:
4370456327cSAdam Nemet   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
4380456327cSAdam Nemet   typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
4390456327cSAdam Nemet 
4400456327cSAdam Nemet   MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L,
441*30f16e16SAdam Nemet                    const LoopAccessInfo::VectorizerParams &VectParams)
4420456327cSAdam Nemet       : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
4430456327cSAdam Nemet         ShouldRetryWithRuntimeCheck(false), VectParams(VectParams) {}
4440456327cSAdam Nemet 
4450456327cSAdam Nemet   /// \brief Register the location (instructions are given increasing numbers)
4460456327cSAdam Nemet   /// of a write access.
4470456327cSAdam Nemet   void addAccess(StoreInst *SI) {
4480456327cSAdam Nemet     Value *Ptr = SI->getPointerOperand();
4490456327cSAdam Nemet     Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
4500456327cSAdam Nemet     InstMap.push_back(SI);
4510456327cSAdam Nemet     ++AccessIdx;
4520456327cSAdam Nemet   }
4530456327cSAdam Nemet 
4540456327cSAdam Nemet   /// \brief Register the location (instructions are given increasing numbers)
4550456327cSAdam Nemet   /// of a write access.
4560456327cSAdam Nemet   void addAccess(LoadInst *LI) {
4570456327cSAdam Nemet     Value *Ptr = LI->getPointerOperand();
4580456327cSAdam Nemet     Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
4590456327cSAdam Nemet     InstMap.push_back(LI);
4600456327cSAdam Nemet     ++AccessIdx;
4610456327cSAdam Nemet   }
4620456327cSAdam Nemet 
4630456327cSAdam Nemet   /// \brief Check whether the dependencies between the accesses are safe.
4640456327cSAdam Nemet   ///
4650456327cSAdam Nemet   /// Only checks sets with elements in \p CheckDeps.
4660456327cSAdam Nemet   bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
4670456327cSAdam Nemet                    MemAccessInfoSet &CheckDeps, ValueToValueMap &Strides);
4680456327cSAdam Nemet 
4690456327cSAdam Nemet   /// \brief The maximum number of bytes of a vector register we can vectorize
4700456327cSAdam Nemet   /// the accesses safely with.
4710456327cSAdam Nemet   unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
4720456327cSAdam Nemet 
4730456327cSAdam Nemet   /// \brief In same cases when the dependency check fails we can still
4740456327cSAdam Nemet   /// vectorize the loop with a dynamic array access check.
4750456327cSAdam Nemet   bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
4760456327cSAdam Nemet 
4770456327cSAdam Nemet private:
4780456327cSAdam Nemet   ScalarEvolution *SE;
4790456327cSAdam Nemet   const DataLayout *DL;
4800456327cSAdam Nemet   const Loop *InnermostLoop;
4810456327cSAdam Nemet 
4820456327cSAdam Nemet   /// \brief Maps access locations (ptr, read/write) to program order.
4830456327cSAdam Nemet   DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
4840456327cSAdam Nemet 
4850456327cSAdam Nemet   /// \brief Memory access instructions in program order.
4860456327cSAdam Nemet   SmallVector<Instruction *, 16> InstMap;
4870456327cSAdam Nemet 
4880456327cSAdam Nemet   /// \brief The program order index to be used for the next instruction.
4890456327cSAdam Nemet   unsigned AccessIdx;
4900456327cSAdam Nemet 
4910456327cSAdam Nemet   // We can access this many bytes in parallel safely.
4920456327cSAdam Nemet   unsigned MaxSafeDepDistBytes;
4930456327cSAdam Nemet 
4940456327cSAdam Nemet   /// \brief If we see a non-constant dependence distance we can still try to
4950456327cSAdam Nemet   /// vectorize this loop with runtime checks.
4960456327cSAdam Nemet   bool ShouldRetryWithRuntimeCheck;
4970456327cSAdam Nemet 
4980456327cSAdam Nemet   /// \brief Vectorizer parameters used by the analysis.
499*30f16e16SAdam Nemet   LoopAccessInfo::VectorizerParams VectParams;
5000456327cSAdam Nemet 
5010456327cSAdam Nemet   /// \brief Check whether there is a plausible dependence between the two
5020456327cSAdam Nemet   /// accesses.
5030456327cSAdam Nemet   ///
5040456327cSAdam Nemet   /// Access \p A must happen before \p B in program order. The two indices
5050456327cSAdam Nemet   /// identify the index into the program order map.
5060456327cSAdam Nemet   ///
5070456327cSAdam Nemet   /// This function checks  whether there is a plausible dependence (or the
5080456327cSAdam Nemet   /// absence of such can't be proved) between the two accesses. If there is a
5090456327cSAdam Nemet   /// plausible dependence but the dependence distance is bigger than one
5100456327cSAdam Nemet   /// element access it records this distance in \p MaxSafeDepDistBytes (if this
5110456327cSAdam Nemet   /// distance is smaller than any other distance encountered so far).
5120456327cSAdam Nemet   /// Otherwise, this function returns true signaling a possible dependence.
5130456327cSAdam Nemet   bool isDependent(const MemAccessInfo &A, unsigned AIdx,
5140456327cSAdam Nemet                    const MemAccessInfo &B, unsigned BIdx,
5150456327cSAdam Nemet                    ValueToValueMap &Strides);
5160456327cSAdam Nemet 
5170456327cSAdam Nemet   /// \brief Check whether the data dependence could prevent store-load
5180456327cSAdam Nemet   /// forwarding.
5190456327cSAdam Nemet   bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
5200456327cSAdam Nemet };
5210456327cSAdam Nemet 
5220456327cSAdam Nemet } // end anonymous namespace
5230456327cSAdam Nemet 
5240456327cSAdam Nemet static bool isInBoundsGep(Value *Ptr) {
5250456327cSAdam Nemet   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
5260456327cSAdam Nemet     return GEP->isInBounds();
5270456327cSAdam Nemet   return false;
5280456327cSAdam Nemet }
5290456327cSAdam Nemet 
5300456327cSAdam Nemet /// \brief Check whether the access through \p Ptr has a constant stride.
5310456327cSAdam Nemet static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
5320456327cSAdam Nemet                         const Loop *Lp, ValueToValueMap &StridesMap) {
5330456327cSAdam Nemet   const Type *Ty = Ptr->getType();
5340456327cSAdam Nemet   assert(Ty->isPointerTy() && "Unexpected non-ptr");
5350456327cSAdam Nemet 
5360456327cSAdam Nemet   // Make sure that the pointer does not point to aggregate types.
5370456327cSAdam Nemet   const PointerType *PtrTy = cast<PointerType>(Ty);
5380456327cSAdam Nemet   if (PtrTy->getElementType()->isAggregateType()) {
5390456327cSAdam Nemet     DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr <<
5400456327cSAdam Nemet           "\n");
5410456327cSAdam Nemet     return 0;
5420456327cSAdam Nemet   }
5430456327cSAdam Nemet 
5440456327cSAdam Nemet   const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
5450456327cSAdam Nemet 
5460456327cSAdam Nemet   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
5470456327cSAdam Nemet   if (!AR) {
5480456327cSAdam Nemet     DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer "
5490456327cSAdam Nemet           << *Ptr << " SCEV: " << *PtrScev << "\n");
5500456327cSAdam Nemet     return 0;
5510456327cSAdam Nemet   }
5520456327cSAdam Nemet 
5530456327cSAdam Nemet   // The accesss function must stride over the innermost loop.
5540456327cSAdam Nemet   if (Lp != AR->getLoop()) {
5550456327cSAdam Nemet     DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " <<
5560456327cSAdam Nemet           *Ptr << " SCEV: " << *PtrScev << "\n");
5570456327cSAdam Nemet   }
5580456327cSAdam Nemet 
5590456327cSAdam Nemet   // The address calculation must not wrap. Otherwise, a dependence could be
5600456327cSAdam Nemet   // inverted.
5610456327cSAdam Nemet   // An inbounds getelementptr that is a AddRec with a unit stride
5620456327cSAdam Nemet   // cannot wrap per definition. The unit stride requirement is checked later.
5630456327cSAdam Nemet   // An getelementptr without an inbounds attribute and unit stride would have
5640456327cSAdam Nemet   // to access the pointer value "0" which is undefined behavior in address
5650456327cSAdam Nemet   // space 0, therefore we can also vectorize this case.
5660456327cSAdam Nemet   bool IsInBoundsGEP = isInBoundsGep(Ptr);
5670456327cSAdam Nemet   bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
5680456327cSAdam Nemet   bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
5690456327cSAdam Nemet   if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
5700456327cSAdam Nemet     DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "
5710456327cSAdam Nemet           << *Ptr << " SCEV: " << *PtrScev << "\n");
5720456327cSAdam Nemet     return 0;
5730456327cSAdam Nemet   }
5740456327cSAdam Nemet 
5750456327cSAdam Nemet   // Check the step is constant.
5760456327cSAdam Nemet   const SCEV *Step = AR->getStepRecurrence(*SE);
5770456327cSAdam Nemet 
5780456327cSAdam Nemet   // Calculate the pointer stride and check if it is consecutive.
5790456327cSAdam Nemet   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
5800456327cSAdam Nemet   if (!C) {
5810456327cSAdam Nemet     DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr <<
5820456327cSAdam Nemet           " SCEV: " << *PtrScev << "\n");
5830456327cSAdam Nemet     return 0;
5840456327cSAdam Nemet   }
5850456327cSAdam Nemet 
5860456327cSAdam Nemet   int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType());
5870456327cSAdam Nemet   const APInt &APStepVal = C->getValue()->getValue();
5880456327cSAdam Nemet 
5890456327cSAdam Nemet   // Huge step value - give up.
5900456327cSAdam Nemet   if (APStepVal.getBitWidth() > 64)
5910456327cSAdam Nemet     return 0;
5920456327cSAdam Nemet 
5930456327cSAdam Nemet   int64_t StepVal = APStepVal.getSExtValue();
5940456327cSAdam Nemet 
5950456327cSAdam Nemet   // Strided access.
5960456327cSAdam Nemet   int64_t Stride = StepVal / Size;
5970456327cSAdam Nemet   int64_t Rem = StepVal % Size;
5980456327cSAdam Nemet   if (Rem)
5990456327cSAdam Nemet     return 0;
6000456327cSAdam Nemet 
6010456327cSAdam Nemet   // If the SCEV could wrap but we have an inbounds gep with a unit stride we
6020456327cSAdam Nemet   // know we can't "wrap around the address space". In case of address space
6030456327cSAdam Nemet   // zero we know that this won't happen without triggering undefined behavior.
6040456327cSAdam Nemet   if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
6050456327cSAdam Nemet       Stride != 1 && Stride != -1)
6060456327cSAdam Nemet     return 0;
6070456327cSAdam Nemet 
6080456327cSAdam Nemet   return Stride;
6090456327cSAdam Nemet }
6100456327cSAdam Nemet 
6110456327cSAdam Nemet bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
6120456327cSAdam Nemet                                                     unsigned TypeByteSize) {
6130456327cSAdam Nemet   // If loads occur at a distance that is not a multiple of a feasible vector
6140456327cSAdam Nemet   // factor store-load forwarding does not take place.
6150456327cSAdam Nemet   // Positive dependences might cause troubles because vectorizing them might
6160456327cSAdam Nemet   // prevent store-load forwarding making vectorized code run a lot slower.
6170456327cSAdam Nemet   //   a[i] = a[i-3] ^ a[i-8];
6180456327cSAdam Nemet   //   The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
6190456327cSAdam Nemet   //   hence on your typical architecture store-load forwarding does not take
6200456327cSAdam Nemet   //   place. Vectorizing in such cases does not make sense.
6210456327cSAdam Nemet   // Store-load forwarding distance.
6220456327cSAdam Nemet   const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
6230456327cSAdam Nemet   // Maximum vector factor.
6240456327cSAdam Nemet   unsigned MaxVFWithoutSLForwardIssues = VectParams.MaxVectorWidth*TypeByteSize;
6250456327cSAdam Nemet   if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
6260456327cSAdam Nemet     MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
6270456327cSAdam Nemet 
6280456327cSAdam Nemet   for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
6290456327cSAdam Nemet        vf *= 2) {
6300456327cSAdam Nemet     if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
6310456327cSAdam Nemet       MaxVFWithoutSLForwardIssues = (vf >>=1);
6320456327cSAdam Nemet       break;
6330456327cSAdam Nemet     }
6340456327cSAdam Nemet   }
6350456327cSAdam Nemet 
6360456327cSAdam Nemet   if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
6370456327cSAdam Nemet     DEBUG(dbgs() << "LV: Distance " << Distance <<
6380456327cSAdam Nemet           " that could cause a store-load forwarding conflict\n");
6390456327cSAdam Nemet     return true;
6400456327cSAdam Nemet   }
6410456327cSAdam Nemet 
6420456327cSAdam Nemet   if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
6430456327cSAdam Nemet       MaxVFWithoutSLForwardIssues != VectParams.MaxVectorWidth*TypeByteSize)
6440456327cSAdam Nemet     MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
6450456327cSAdam Nemet   return false;
6460456327cSAdam Nemet }
6470456327cSAdam Nemet 
6480456327cSAdam Nemet bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
6490456327cSAdam Nemet                                    const MemAccessInfo &B, unsigned BIdx,
6500456327cSAdam Nemet                                    ValueToValueMap &Strides) {
6510456327cSAdam Nemet   assert (AIdx < BIdx && "Must pass arguments in program order");
6520456327cSAdam Nemet 
6530456327cSAdam Nemet   Value *APtr = A.getPointer();
6540456327cSAdam Nemet   Value *BPtr = B.getPointer();
6550456327cSAdam Nemet   bool AIsWrite = A.getInt();
6560456327cSAdam Nemet   bool BIsWrite = B.getInt();
6570456327cSAdam Nemet 
6580456327cSAdam Nemet   // Two reads are independent.
6590456327cSAdam Nemet   if (!AIsWrite && !BIsWrite)
6600456327cSAdam Nemet     return false;
6610456327cSAdam Nemet 
6620456327cSAdam Nemet   // We cannot check pointers in different address spaces.
6630456327cSAdam Nemet   if (APtr->getType()->getPointerAddressSpace() !=
6640456327cSAdam Nemet       BPtr->getType()->getPointerAddressSpace())
6650456327cSAdam Nemet     return true;
6660456327cSAdam Nemet 
6670456327cSAdam Nemet   const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
6680456327cSAdam Nemet   const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
6690456327cSAdam Nemet 
6700456327cSAdam Nemet   int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop, Strides);
6710456327cSAdam Nemet   int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop, Strides);
6720456327cSAdam Nemet 
6730456327cSAdam Nemet   const SCEV *Src = AScev;
6740456327cSAdam Nemet   const SCEV *Sink = BScev;
6750456327cSAdam Nemet 
6760456327cSAdam Nemet   // If the induction step is negative we have to invert source and sink of the
6770456327cSAdam Nemet   // dependence.
6780456327cSAdam Nemet   if (StrideAPtr < 0) {
6790456327cSAdam Nemet     //Src = BScev;
6800456327cSAdam Nemet     //Sink = AScev;
6810456327cSAdam Nemet     std::swap(APtr, BPtr);
6820456327cSAdam Nemet     std::swap(Src, Sink);
6830456327cSAdam Nemet     std::swap(AIsWrite, BIsWrite);
6840456327cSAdam Nemet     std::swap(AIdx, BIdx);
6850456327cSAdam Nemet     std::swap(StrideAPtr, StrideBPtr);
6860456327cSAdam Nemet   }
6870456327cSAdam Nemet 
6880456327cSAdam Nemet   const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
6890456327cSAdam Nemet 
6900456327cSAdam Nemet   DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink
6910456327cSAdam Nemet         << "(Induction step: " << StrideAPtr <<  ")\n");
6920456327cSAdam Nemet   DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to "
6930456327cSAdam Nemet         << *InstMap[BIdx] << ": " << *Dist << "\n");
6940456327cSAdam Nemet 
6950456327cSAdam Nemet   // Need consecutive accesses. We don't want to vectorize
6960456327cSAdam Nemet   // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
6970456327cSAdam Nemet   // the address space.
6980456327cSAdam Nemet   if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
6990456327cSAdam Nemet     DEBUG(dbgs() << "Non-consecutive pointer access\n");
7000456327cSAdam Nemet     return true;
7010456327cSAdam Nemet   }
7020456327cSAdam Nemet 
7030456327cSAdam Nemet   const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
7040456327cSAdam Nemet   if (!C) {
7050456327cSAdam Nemet     DEBUG(dbgs() << "LV: Dependence because of non-constant distance\n");
7060456327cSAdam Nemet     ShouldRetryWithRuntimeCheck = true;
7070456327cSAdam Nemet     return true;
7080456327cSAdam Nemet   }
7090456327cSAdam Nemet 
7100456327cSAdam Nemet   Type *ATy = APtr->getType()->getPointerElementType();
7110456327cSAdam Nemet   Type *BTy = BPtr->getType()->getPointerElementType();
7120456327cSAdam Nemet   unsigned TypeByteSize = DL->getTypeAllocSize(ATy);
7130456327cSAdam Nemet 
7140456327cSAdam Nemet   // Negative distances are not plausible dependencies.
7150456327cSAdam Nemet   const APInt &Val = C->getValue()->getValue();
7160456327cSAdam Nemet   if (Val.isNegative()) {
7170456327cSAdam Nemet     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
7180456327cSAdam Nemet     if (IsTrueDataDependence &&
7190456327cSAdam Nemet         (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
7200456327cSAdam Nemet          ATy != BTy))
7210456327cSAdam Nemet       return true;
7220456327cSAdam Nemet 
7230456327cSAdam Nemet     DEBUG(dbgs() << "LV: Dependence is negative: NoDep\n");
7240456327cSAdam Nemet     return false;
7250456327cSAdam Nemet   }
7260456327cSAdam Nemet 
7270456327cSAdam Nemet   // Write to the same location with the same size.
7280456327cSAdam Nemet   // Could be improved to assert type sizes are the same (i32 == float, etc).
7290456327cSAdam Nemet   if (Val == 0) {
7300456327cSAdam Nemet     if (ATy == BTy)
7310456327cSAdam Nemet       return false;
7320456327cSAdam Nemet     DEBUG(dbgs() << "LV: Zero dependence difference but different types\n");
7330456327cSAdam Nemet     return true;
7340456327cSAdam Nemet   }
7350456327cSAdam Nemet 
7360456327cSAdam Nemet   assert(Val.isStrictlyPositive() && "Expect a positive value");
7370456327cSAdam Nemet 
7380456327cSAdam Nemet   // Positive distance bigger than max vectorization factor.
7390456327cSAdam Nemet   if (ATy != BTy) {
7400456327cSAdam Nemet     DEBUG(dbgs() <<
7410456327cSAdam Nemet           "LV: ReadWrite-Write positive dependency with different types\n");
7420456327cSAdam Nemet     return false;
7430456327cSAdam Nemet   }
7440456327cSAdam Nemet 
7450456327cSAdam Nemet   unsigned Distance = (unsigned) Val.getZExtValue();
7460456327cSAdam Nemet 
7470456327cSAdam Nemet   // Bail out early if passed-in parameters make vectorization not feasible.
7480456327cSAdam Nemet   unsigned ForcedFactor = (VectParams.VectorizationFactor ?
7490456327cSAdam Nemet                            VectParams.VectorizationFactor : 1);
7500456327cSAdam Nemet   unsigned ForcedUnroll = (VectParams.VectorizationInterleave ?
7510456327cSAdam Nemet                            VectParams.VectorizationInterleave : 1);
7520456327cSAdam Nemet 
7530456327cSAdam Nemet   // The distance must be bigger than the size needed for a vectorized version
7540456327cSAdam Nemet   // of the operation and the size of the vectorized operation must not be
7550456327cSAdam Nemet   // bigger than the currrent maximum size.
7560456327cSAdam Nemet   if (Distance < 2*TypeByteSize ||
7570456327cSAdam Nemet       2*TypeByteSize > MaxSafeDepDistBytes ||
7580456327cSAdam Nemet       Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
7590456327cSAdam Nemet     DEBUG(dbgs() << "LV: Failure because of Positive distance "
7600456327cSAdam Nemet         << Val.getSExtValue() << '\n');
7610456327cSAdam Nemet     return true;
7620456327cSAdam Nemet   }
7630456327cSAdam Nemet 
7640456327cSAdam Nemet   MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?
7650456327cSAdam Nemet     Distance : MaxSafeDepDistBytes;
7660456327cSAdam Nemet 
7670456327cSAdam Nemet   bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
7680456327cSAdam Nemet   if (IsTrueDataDependence &&
7690456327cSAdam Nemet       couldPreventStoreLoadForward(Distance, TypeByteSize))
7700456327cSAdam Nemet      return true;
7710456327cSAdam Nemet 
7720456327cSAdam Nemet   DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
7730456327cSAdam Nemet         " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
7740456327cSAdam Nemet 
7750456327cSAdam Nemet   return false;
7760456327cSAdam Nemet }
7770456327cSAdam Nemet 
7780456327cSAdam Nemet bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
7790456327cSAdam Nemet                                    MemAccessInfoSet &CheckDeps,
7800456327cSAdam Nemet                                    ValueToValueMap &Strides) {
7810456327cSAdam Nemet 
7820456327cSAdam Nemet   MaxSafeDepDistBytes = -1U;
7830456327cSAdam Nemet   while (!CheckDeps.empty()) {
7840456327cSAdam Nemet     MemAccessInfo CurAccess = *CheckDeps.begin();
7850456327cSAdam Nemet 
7860456327cSAdam Nemet     // Get the relevant memory access set.
7870456327cSAdam Nemet     EquivalenceClasses<MemAccessInfo>::iterator I =
7880456327cSAdam Nemet       AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
7890456327cSAdam Nemet 
7900456327cSAdam Nemet     // Check accesses within this set.
7910456327cSAdam Nemet     EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE;
7920456327cSAdam Nemet     AI = AccessSets.member_begin(I), AE = AccessSets.member_end();
7930456327cSAdam Nemet 
7940456327cSAdam Nemet     // Check every access pair.
7950456327cSAdam Nemet     while (AI != AE) {
7960456327cSAdam Nemet       CheckDeps.erase(*AI);
7970456327cSAdam Nemet       EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
7980456327cSAdam Nemet       while (OI != AE) {
7990456327cSAdam Nemet         // Check every accessing instruction pair in program order.
8000456327cSAdam Nemet         for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
8010456327cSAdam Nemet              I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
8020456327cSAdam Nemet           for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
8030456327cSAdam Nemet                I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
8040456327cSAdam Nemet             if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2, Strides))
8050456327cSAdam Nemet               return false;
8060456327cSAdam Nemet             if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1, Strides))
8070456327cSAdam Nemet               return false;
8080456327cSAdam Nemet           }
8090456327cSAdam Nemet         ++OI;
8100456327cSAdam Nemet       }
8110456327cSAdam Nemet       AI++;
8120456327cSAdam Nemet     }
8130456327cSAdam Nemet   }
8140456327cSAdam Nemet   return true;
8150456327cSAdam Nemet }
8160456327cSAdam Nemet 
817*30f16e16SAdam Nemet bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) {
8180456327cSAdam Nemet 
8190456327cSAdam Nemet   typedef SmallVector<Value*, 16> ValueVector;
8200456327cSAdam Nemet   typedef SmallPtrSet<Value*, 16> ValueSet;
8210456327cSAdam Nemet 
8220456327cSAdam Nemet   // Holds the Load and Store *instructions*.
8230456327cSAdam Nemet   ValueVector Loads;
8240456327cSAdam Nemet   ValueVector Stores;
8250456327cSAdam Nemet 
8260456327cSAdam Nemet   // Holds all the different accesses in the loop.
8270456327cSAdam Nemet   unsigned NumReads = 0;
8280456327cSAdam Nemet   unsigned NumReadWrites = 0;
8290456327cSAdam Nemet 
8300456327cSAdam Nemet   PtrRtCheck.Pointers.clear();
8310456327cSAdam Nemet   PtrRtCheck.Need = false;
8320456327cSAdam Nemet 
8330456327cSAdam Nemet   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
8340456327cSAdam Nemet   MemoryDepChecker DepChecker(SE, DL, TheLoop, VectParams);
8350456327cSAdam Nemet 
8360456327cSAdam Nemet   // For each block.
8370456327cSAdam Nemet   for (Loop::block_iterator bb = TheLoop->block_begin(),
8380456327cSAdam Nemet        be = TheLoop->block_end(); bb != be; ++bb) {
8390456327cSAdam Nemet 
8400456327cSAdam Nemet     // Scan the BB and collect legal loads and stores.
8410456327cSAdam Nemet     for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
8420456327cSAdam Nemet          ++it) {
8430456327cSAdam Nemet 
8440456327cSAdam Nemet       // If this is a load, save it. If this instruction can read from memory
8450456327cSAdam Nemet       // but is not a load, then we quit. Notice that we don't handle function
8460456327cSAdam Nemet       // calls that read or write.
8470456327cSAdam Nemet       if (it->mayReadFromMemory()) {
8480456327cSAdam Nemet         // Many math library functions read the rounding mode. We will only
8490456327cSAdam Nemet         // vectorize a loop if it contains known function calls that don't set
8500456327cSAdam Nemet         // the flag. Therefore, it is safe to ignore this read from memory.
8510456327cSAdam Nemet         CallInst *Call = dyn_cast<CallInst>(it);
8520456327cSAdam Nemet         if (Call && getIntrinsicIDForCall(Call, TLI))
8530456327cSAdam Nemet           continue;
8540456327cSAdam Nemet 
8550456327cSAdam Nemet         LoadInst *Ld = dyn_cast<LoadInst>(it);
8560456327cSAdam Nemet         if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
8570456327cSAdam Nemet           emitAnalysis(VectorizationReport(Ld)
8580456327cSAdam Nemet                        << "read with atomic ordering or volatile read");
8590456327cSAdam Nemet           DEBUG(dbgs() << "LV: Found a non-simple load.\n");
8600456327cSAdam Nemet           return false;
8610456327cSAdam Nemet         }
8620456327cSAdam Nemet         NumLoads++;
8630456327cSAdam Nemet         Loads.push_back(Ld);
8640456327cSAdam Nemet         DepChecker.addAccess(Ld);
8650456327cSAdam Nemet         continue;
8660456327cSAdam Nemet       }
8670456327cSAdam Nemet 
8680456327cSAdam Nemet       // Save 'store' instructions. Abort if other instructions write to memory.
8690456327cSAdam Nemet       if (it->mayWriteToMemory()) {
8700456327cSAdam Nemet         StoreInst *St = dyn_cast<StoreInst>(it);
8710456327cSAdam Nemet         if (!St) {
8720456327cSAdam Nemet           emitAnalysis(VectorizationReport(it) <<
8730456327cSAdam Nemet                        "instruction cannot be vectorized");
8740456327cSAdam Nemet           return false;
8750456327cSAdam Nemet         }
8760456327cSAdam Nemet         if (!St->isSimple() && !IsAnnotatedParallel) {
8770456327cSAdam Nemet           emitAnalysis(VectorizationReport(St)
8780456327cSAdam Nemet                        << "write with atomic ordering or volatile write");
8790456327cSAdam Nemet           DEBUG(dbgs() << "LV: Found a non-simple store.\n");
8800456327cSAdam Nemet           return false;
8810456327cSAdam Nemet         }
8820456327cSAdam Nemet         NumStores++;
8830456327cSAdam Nemet         Stores.push_back(St);
8840456327cSAdam Nemet         DepChecker.addAccess(St);
8850456327cSAdam Nemet       }
8860456327cSAdam Nemet     } // Next instr.
8870456327cSAdam Nemet   } // Next block.
8880456327cSAdam Nemet 
8890456327cSAdam Nemet   // Now we have two lists that hold the loads and the stores.
8900456327cSAdam Nemet   // Next, we find the pointers that they use.
8910456327cSAdam Nemet 
8920456327cSAdam Nemet   // Check if we see any stores. If there are no stores, then we don't
8930456327cSAdam Nemet   // care if the pointers are *restrict*.
8940456327cSAdam Nemet   if (!Stores.size()) {
8950456327cSAdam Nemet     DEBUG(dbgs() << "LV: Found a read-only loop!\n");
8960456327cSAdam Nemet     return true;
8970456327cSAdam Nemet   }
8980456327cSAdam Nemet 
8990456327cSAdam Nemet   AccessAnalysis::DepCandidates DependentAccesses;
9000456327cSAdam Nemet   AccessAnalysis Accesses(DL, AA, DependentAccesses);
9010456327cSAdam Nemet 
9020456327cSAdam Nemet   // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
9030456327cSAdam Nemet   // multiple times on the same object. If the ptr is accessed twice, once
9040456327cSAdam Nemet   // for read and once for write, it will only appear once (on the write
9050456327cSAdam Nemet   // list). This is okay, since we are going to check for conflicts between
9060456327cSAdam Nemet   // writes and between reads and writes, but not between reads and reads.
9070456327cSAdam Nemet   ValueSet Seen;
9080456327cSAdam Nemet 
9090456327cSAdam Nemet   ValueVector::iterator I, IE;
9100456327cSAdam Nemet   for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
9110456327cSAdam Nemet     StoreInst *ST = cast<StoreInst>(*I);
9120456327cSAdam Nemet     Value* Ptr = ST->getPointerOperand();
9130456327cSAdam Nemet 
9140456327cSAdam Nemet     if (isUniform(Ptr)) {
9150456327cSAdam Nemet       emitAnalysis(
9160456327cSAdam Nemet           VectorizationReport(ST)
9170456327cSAdam Nemet           << "write to a loop invariant address could not be vectorized");
9180456327cSAdam Nemet       DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
9190456327cSAdam Nemet       return false;
9200456327cSAdam Nemet     }
9210456327cSAdam Nemet 
9220456327cSAdam Nemet     // If we did *not* see this pointer before, insert it to  the read-write
9230456327cSAdam Nemet     // list. At this phase it is only a 'write' list.
9240456327cSAdam Nemet     if (Seen.insert(Ptr).second) {
9250456327cSAdam Nemet       ++NumReadWrites;
9260456327cSAdam Nemet 
9270456327cSAdam Nemet       AliasAnalysis::Location Loc = AA->getLocation(ST);
9280456327cSAdam Nemet       // The TBAA metadata could have a control dependency on the predication
9290456327cSAdam Nemet       // condition, so we cannot rely on it when determining whether or not we
9300456327cSAdam Nemet       // need runtime pointer checks.
9310456327cSAdam Nemet       if (blockNeedsPredication(ST->getParent()))
9320456327cSAdam Nemet         Loc.AATags.TBAA = nullptr;
9330456327cSAdam Nemet 
9340456327cSAdam Nemet       Accesses.addStore(Loc);
9350456327cSAdam Nemet     }
9360456327cSAdam Nemet   }
9370456327cSAdam Nemet 
9380456327cSAdam Nemet   if (IsAnnotatedParallel) {
9390456327cSAdam Nemet     DEBUG(dbgs()
9400456327cSAdam Nemet           << "LV: A loop annotated parallel, ignore memory dependency "
9410456327cSAdam Nemet           << "checks.\n");
9420456327cSAdam Nemet     return true;
9430456327cSAdam Nemet   }
9440456327cSAdam Nemet 
9450456327cSAdam Nemet   for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
9460456327cSAdam Nemet     LoadInst *LD = cast<LoadInst>(*I);
9470456327cSAdam Nemet     Value* Ptr = LD->getPointerOperand();
9480456327cSAdam Nemet     // If we did *not* see this pointer before, insert it to the
9490456327cSAdam Nemet     // read list. If we *did* see it before, then it is already in
9500456327cSAdam Nemet     // the read-write list. This allows us to vectorize expressions
9510456327cSAdam Nemet     // such as A[i] += x;  Because the address of A[i] is a read-write
9520456327cSAdam Nemet     // pointer. This only works if the index of A[i] is consecutive.
9530456327cSAdam Nemet     // If the address of i is unknown (for example A[B[i]]) then we may
9540456327cSAdam Nemet     // read a few words, modify, and write a few words, and some of the
9550456327cSAdam Nemet     // words may be written to the same address.
9560456327cSAdam Nemet     bool IsReadOnlyPtr = false;
9570456327cSAdam Nemet     if (Seen.insert(Ptr).second ||
9580456327cSAdam Nemet         !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) {
9590456327cSAdam Nemet       ++NumReads;
9600456327cSAdam Nemet       IsReadOnlyPtr = true;
9610456327cSAdam Nemet     }
9620456327cSAdam Nemet 
9630456327cSAdam Nemet     AliasAnalysis::Location Loc = AA->getLocation(LD);
9640456327cSAdam Nemet     // The TBAA metadata could have a control dependency on the predication
9650456327cSAdam Nemet     // condition, so we cannot rely on it when determining whether or not we
9660456327cSAdam Nemet     // need runtime pointer checks.
9670456327cSAdam Nemet     if (blockNeedsPredication(LD->getParent()))
9680456327cSAdam Nemet       Loc.AATags.TBAA = nullptr;
9690456327cSAdam Nemet 
9700456327cSAdam Nemet     Accesses.addLoad(Loc, IsReadOnlyPtr);
9710456327cSAdam Nemet   }
9720456327cSAdam Nemet 
9730456327cSAdam Nemet   // If we write (or read-write) to a single destination and there are no
9740456327cSAdam Nemet   // other reads in this loop then is it safe to vectorize.
9750456327cSAdam Nemet   if (NumReadWrites == 1 && NumReads == 0) {
9760456327cSAdam Nemet     DEBUG(dbgs() << "LV: Found a write-only loop!\n");
9770456327cSAdam Nemet     return true;
9780456327cSAdam Nemet   }
9790456327cSAdam Nemet 
9800456327cSAdam Nemet   // Build dependence sets and check whether we need a runtime pointer bounds
9810456327cSAdam Nemet   // check.
9820456327cSAdam Nemet   Accesses.buildDependenceSets();
9830456327cSAdam Nemet   bool NeedRTCheck = Accesses.isRTCheckNeeded();
9840456327cSAdam Nemet 
9850456327cSAdam Nemet   // Find pointers with computable bounds. We are going to use this information
9860456327cSAdam Nemet   // to place a runtime bound check.
9870456327cSAdam Nemet   unsigned NumComparisons = 0;
9880456327cSAdam Nemet   bool CanDoRT = false;
9890456327cSAdam Nemet   if (NeedRTCheck)
9900456327cSAdam Nemet     CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
9910456327cSAdam Nemet                                        Strides);
9920456327cSAdam Nemet 
9930456327cSAdam Nemet   DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
9940456327cSAdam Nemet         " pointer comparisons.\n");
9950456327cSAdam Nemet 
9960456327cSAdam Nemet   // If we only have one set of dependences to check pointers among we don't
9970456327cSAdam Nemet   // need a runtime check.
9980456327cSAdam Nemet   if (NumComparisons == 0 && NeedRTCheck)
9990456327cSAdam Nemet     NeedRTCheck = false;
10000456327cSAdam Nemet 
10010456327cSAdam Nemet   // Check that we did not collect too many pointers or found an unsizeable
10020456327cSAdam Nemet   // pointer.
10030456327cSAdam Nemet   if (!CanDoRT || NumComparisons > VectParams.RuntimeMemoryCheckThreshold) {
10040456327cSAdam Nemet     PtrRtCheck.reset();
10050456327cSAdam Nemet     CanDoRT = false;
10060456327cSAdam Nemet   }
10070456327cSAdam Nemet 
10080456327cSAdam Nemet   if (CanDoRT) {
10090456327cSAdam Nemet     DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
10100456327cSAdam Nemet   }
10110456327cSAdam Nemet 
10120456327cSAdam Nemet   if (NeedRTCheck && !CanDoRT) {
10130456327cSAdam Nemet     emitAnalysis(VectorizationReport() << "cannot identify array bounds");
10140456327cSAdam Nemet     DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
10150456327cSAdam Nemet           "the array bounds.\n");
10160456327cSAdam Nemet     PtrRtCheck.reset();
10170456327cSAdam Nemet     return false;
10180456327cSAdam Nemet   }
10190456327cSAdam Nemet 
10200456327cSAdam Nemet   PtrRtCheck.Need = NeedRTCheck;
10210456327cSAdam Nemet 
10220456327cSAdam Nemet   bool CanVecMem = true;
10230456327cSAdam Nemet   if (Accesses.isDependencyCheckNeeded()) {
10240456327cSAdam Nemet     DEBUG(dbgs() << "LV: Checking memory dependencies\n");
10250456327cSAdam Nemet     CanVecMem = DepChecker.areDepsSafe(
10260456327cSAdam Nemet         DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
10270456327cSAdam Nemet     MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
10280456327cSAdam Nemet 
10290456327cSAdam Nemet     if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
10300456327cSAdam Nemet       DEBUG(dbgs() << "LV: Retrying with memory checks\n");
10310456327cSAdam Nemet       NeedRTCheck = true;
10320456327cSAdam Nemet 
10330456327cSAdam Nemet       // Clear the dependency checks. We assume they are not needed.
10340456327cSAdam Nemet       Accesses.resetDepChecks();
10350456327cSAdam Nemet 
10360456327cSAdam Nemet       PtrRtCheck.reset();
10370456327cSAdam Nemet       PtrRtCheck.Need = true;
10380456327cSAdam Nemet 
10390456327cSAdam Nemet       CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
10400456327cSAdam Nemet                                          TheLoop, Strides, true);
10410456327cSAdam Nemet       // Check that we did not collect too many pointers or found an unsizeable
10420456327cSAdam Nemet       // pointer.
10430456327cSAdam Nemet       if (!CanDoRT || NumComparisons > VectParams.RuntimeMemoryCheckThreshold) {
10440456327cSAdam Nemet         if (!CanDoRT && NumComparisons > 0)
10450456327cSAdam Nemet           emitAnalysis(VectorizationReport()
10460456327cSAdam Nemet                        << "cannot check memory dependencies at runtime");
10470456327cSAdam Nemet         else
10480456327cSAdam Nemet           emitAnalysis(VectorizationReport()
10490456327cSAdam Nemet                        << NumComparisons << " exceeds limit of "
10500456327cSAdam Nemet                        << VectParams.RuntimeMemoryCheckThreshold
10510456327cSAdam Nemet                        << " dependent memory operations checked at runtime");
10520456327cSAdam Nemet         DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
10530456327cSAdam Nemet         PtrRtCheck.reset();
10540456327cSAdam Nemet         return false;
10550456327cSAdam Nemet       }
10560456327cSAdam Nemet 
10570456327cSAdam Nemet       CanVecMem = true;
10580456327cSAdam Nemet     }
10590456327cSAdam Nemet   }
10600456327cSAdam Nemet 
10610456327cSAdam Nemet   if (!CanVecMem)
10620456327cSAdam Nemet     emitAnalysis(VectorizationReport() <<
10630456327cSAdam Nemet                  "unsafe dependent memory operations in loop");
10640456327cSAdam Nemet 
10650456327cSAdam Nemet   DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
10660456327cSAdam Nemet         " need a runtime memory check.\n");
10670456327cSAdam Nemet 
10680456327cSAdam Nemet   return CanVecMem;
10690456327cSAdam Nemet }
10700456327cSAdam Nemet 
1071*30f16e16SAdam Nemet bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB)  {
10720456327cSAdam Nemet   assert(TheLoop->contains(BB) && "Unknown block used");
10730456327cSAdam Nemet 
10740456327cSAdam Nemet   // Blocks that do not dominate the latch need predication.
10750456327cSAdam Nemet   BasicBlock* Latch = TheLoop->getLoopLatch();
10760456327cSAdam Nemet   return !DT->dominates(BB, Latch);
10770456327cSAdam Nemet }
10780456327cSAdam Nemet 
1079*30f16e16SAdam Nemet void LoopAccessInfo::emitAnalysis(VectorizationReport &Message) {
10800456327cSAdam Nemet   VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop);
10810456327cSAdam Nemet }
10820456327cSAdam Nemet 
1083*30f16e16SAdam Nemet bool LoopAccessInfo::isUniform(Value *V) {
10840456327cSAdam Nemet   return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
10850456327cSAdam Nemet }
10867206d7a5SAdam Nemet 
10877206d7a5SAdam Nemet // FIXME: this function is currently a duplicate of the one in
10887206d7a5SAdam Nemet // LoopVectorize.cpp.
10897206d7a5SAdam Nemet static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
10907206d7a5SAdam Nemet                                  Instruction *Loc) {
10917206d7a5SAdam Nemet   if (FirstInst)
10927206d7a5SAdam Nemet     return FirstInst;
10937206d7a5SAdam Nemet   if (Instruction *I = dyn_cast<Instruction>(V))
10947206d7a5SAdam Nemet     return I->getParent() == Loc->getParent() ? I : nullptr;
10957206d7a5SAdam Nemet   return nullptr;
10967206d7a5SAdam Nemet }
10977206d7a5SAdam Nemet 
10987206d7a5SAdam Nemet std::pair<Instruction *, Instruction *>
1099*30f16e16SAdam Nemet LoopAccessInfo::addRuntimeCheck(Instruction *Loc) {
11007206d7a5SAdam Nemet   Instruction *tnullptr = nullptr;
11017206d7a5SAdam Nemet   if (!PtrRtCheck.Need)
11027206d7a5SAdam Nemet     return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
11037206d7a5SAdam Nemet 
11047206d7a5SAdam Nemet   unsigned NumPointers = PtrRtCheck.Pointers.size();
11057206d7a5SAdam Nemet   SmallVector<TrackingVH<Value> , 2> Starts;
11067206d7a5SAdam Nemet   SmallVector<TrackingVH<Value> , 2> Ends;
11077206d7a5SAdam Nemet 
11087206d7a5SAdam Nemet   LLVMContext &Ctx = Loc->getContext();
11097206d7a5SAdam Nemet   SCEVExpander Exp(*SE, "induction");
11107206d7a5SAdam Nemet   Instruction *FirstInst = nullptr;
11117206d7a5SAdam Nemet 
11127206d7a5SAdam Nemet   for (unsigned i = 0; i < NumPointers; ++i) {
11137206d7a5SAdam Nemet     Value *Ptr = PtrRtCheck.Pointers[i];
11147206d7a5SAdam Nemet     const SCEV *Sc = SE->getSCEV(Ptr);
11157206d7a5SAdam Nemet 
11167206d7a5SAdam Nemet     if (SE->isLoopInvariant(Sc, TheLoop)) {
11177206d7a5SAdam Nemet       DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
11187206d7a5SAdam Nemet             *Ptr <<"\n");
11197206d7a5SAdam Nemet       Starts.push_back(Ptr);
11207206d7a5SAdam Nemet       Ends.push_back(Ptr);
11217206d7a5SAdam Nemet     } else {
11227206d7a5SAdam Nemet       DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
11237206d7a5SAdam Nemet       unsigned AS = Ptr->getType()->getPointerAddressSpace();
11247206d7a5SAdam Nemet 
11257206d7a5SAdam Nemet       // Use this type for pointer arithmetic.
11267206d7a5SAdam Nemet       Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
11277206d7a5SAdam Nemet 
11287206d7a5SAdam Nemet       Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc);
11297206d7a5SAdam Nemet       Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc);
11307206d7a5SAdam Nemet       Starts.push_back(Start);
11317206d7a5SAdam Nemet       Ends.push_back(End);
11327206d7a5SAdam Nemet     }
11337206d7a5SAdam Nemet   }
11347206d7a5SAdam Nemet 
11357206d7a5SAdam Nemet   IRBuilder<> ChkBuilder(Loc);
11367206d7a5SAdam Nemet   // Our instructions might fold to a constant.
11377206d7a5SAdam Nemet   Value *MemoryRuntimeCheck = nullptr;
11387206d7a5SAdam Nemet   for (unsigned i = 0; i < NumPointers; ++i) {
11397206d7a5SAdam Nemet     for (unsigned j = i+1; j < NumPointers; ++j) {
11407206d7a5SAdam Nemet       // No need to check if two readonly pointers intersect.
11417206d7a5SAdam Nemet       if (!PtrRtCheck.IsWritePtr[i] && !PtrRtCheck.IsWritePtr[j])
11427206d7a5SAdam Nemet         continue;
11437206d7a5SAdam Nemet 
11447206d7a5SAdam Nemet       // Only need to check pointers between two different dependency sets.
11457206d7a5SAdam Nemet       if (PtrRtCheck.DependencySetId[i] == PtrRtCheck.DependencySetId[j])
11467206d7a5SAdam Nemet        continue;
11477206d7a5SAdam Nemet       // Only need to check pointers in the same alias set.
11487206d7a5SAdam Nemet       if (PtrRtCheck.AliasSetId[i] != PtrRtCheck.AliasSetId[j])
11497206d7a5SAdam Nemet         continue;
11507206d7a5SAdam Nemet 
11517206d7a5SAdam Nemet       unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
11527206d7a5SAdam Nemet       unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
11537206d7a5SAdam Nemet 
11547206d7a5SAdam Nemet       assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
11557206d7a5SAdam Nemet              (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
11567206d7a5SAdam Nemet              "Trying to bounds check pointers with different address spaces");
11577206d7a5SAdam Nemet 
11587206d7a5SAdam Nemet       Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
11597206d7a5SAdam Nemet       Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
11607206d7a5SAdam Nemet 
11617206d7a5SAdam Nemet       Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
11627206d7a5SAdam Nemet       Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
11637206d7a5SAdam Nemet       Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy1, "bc");
11647206d7a5SAdam Nemet       Value *End1 =   ChkBuilder.CreateBitCast(Ends[j],   PtrArithTy0, "bc");
11657206d7a5SAdam Nemet 
11667206d7a5SAdam Nemet       Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
11677206d7a5SAdam Nemet       FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
11687206d7a5SAdam Nemet       Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
11697206d7a5SAdam Nemet       FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
11707206d7a5SAdam Nemet       Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
11717206d7a5SAdam Nemet       FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
11727206d7a5SAdam Nemet       if (MemoryRuntimeCheck) {
11737206d7a5SAdam Nemet         IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
11747206d7a5SAdam Nemet                                          "conflict.rdx");
11757206d7a5SAdam Nemet         FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
11767206d7a5SAdam Nemet       }
11777206d7a5SAdam Nemet       MemoryRuntimeCheck = IsConflict;
11787206d7a5SAdam Nemet     }
11797206d7a5SAdam Nemet   }
11807206d7a5SAdam Nemet 
11817206d7a5SAdam Nemet   // We have to do this trickery because the IRBuilder might fold the check to a
11827206d7a5SAdam Nemet   // constant expression in which case there is no Instruction anchored in a
11837206d7a5SAdam Nemet   // the block.
11847206d7a5SAdam Nemet   Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
11857206d7a5SAdam Nemet                                                  ConstantInt::getTrue(Ctx));
11867206d7a5SAdam Nemet   ChkBuilder.Insert(Check, "memcheck.conflict");
11877206d7a5SAdam Nemet   FirstInst = getFirstInst(FirstInst, Check, Loc);
11887206d7a5SAdam Nemet   return std::make_pair(FirstInst, Check);
11897206d7a5SAdam Nemet }
1190