10456327cSAdam Nemet //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==// 20456327cSAdam Nemet // 30456327cSAdam Nemet // The LLVM Compiler Infrastructure 40456327cSAdam Nemet // 50456327cSAdam Nemet // This file is distributed under the University of Illinois Open Source 60456327cSAdam Nemet // License. See LICENSE.TXT for details. 70456327cSAdam Nemet // 80456327cSAdam Nemet //===----------------------------------------------------------------------===// 90456327cSAdam Nemet // 100456327cSAdam Nemet // The implementation for the loop memory dependence that was originally 110456327cSAdam Nemet // developed for the loop vectorizer. 120456327cSAdam Nemet // 130456327cSAdam Nemet //===----------------------------------------------------------------------===// 140456327cSAdam Nemet 150456327cSAdam Nemet #include "llvm/Analysis/LoopAccessAnalysis.h" 160456327cSAdam Nemet #include "llvm/Analysis/LoopInfo.h" 177206d7a5SAdam Nemet #include "llvm/Analysis/ScalarEvolutionExpander.h" 180456327cSAdam Nemet #include "llvm/Analysis/ValueTracking.h" 190456327cSAdam Nemet #include "llvm/IR/DiagnosticInfo.h" 200456327cSAdam Nemet #include "llvm/IR/Dominators.h" 217206d7a5SAdam Nemet #include "llvm/IR/IRBuilder.h" 220456327cSAdam Nemet #include "llvm/Support/Debug.h" 230456327cSAdam Nemet #include "llvm/Transforms/Utils/VectorUtils.h" 240456327cSAdam Nemet using namespace llvm; 250456327cSAdam Nemet 260456327cSAdam Nemet #define DEBUG_TYPE "loop-vectorize" 270456327cSAdam Nemet 280456327cSAdam Nemet void VectorizationReport::emitAnalysis(VectorizationReport &Message, 290456327cSAdam Nemet const Function *TheFunction, 300456327cSAdam Nemet const Loop *TheLoop) { 310456327cSAdam Nemet DebugLoc DL = TheLoop->getStartLoc(); 320456327cSAdam Nemet if (Instruction *I = Message.getInstr()) 330456327cSAdam Nemet DL = I->getDebugLoc(); 340456327cSAdam Nemet emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE, 350456327cSAdam Nemet *TheFunction, DL, Message.str()); 360456327cSAdam Nemet } 370456327cSAdam Nemet 380456327cSAdam Nemet Value *llvm::stripIntegerCast(Value *V) { 390456327cSAdam Nemet if (CastInst *CI = dyn_cast<CastInst>(V)) 400456327cSAdam Nemet if (CI->getOperand(0)->getType()->isIntegerTy()) 410456327cSAdam Nemet return CI->getOperand(0); 420456327cSAdam Nemet return V; 430456327cSAdam Nemet } 440456327cSAdam Nemet 450456327cSAdam Nemet const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, 460456327cSAdam Nemet ValueToValueMap &PtrToStride, 470456327cSAdam Nemet Value *Ptr, Value *OrigPtr) { 480456327cSAdam Nemet 490456327cSAdam Nemet const SCEV *OrigSCEV = SE->getSCEV(Ptr); 500456327cSAdam Nemet 510456327cSAdam Nemet // If there is an entry in the map return the SCEV of the pointer with the 520456327cSAdam Nemet // symbolic stride replaced by one. 530456327cSAdam Nemet ValueToValueMap::iterator SI = PtrToStride.find(OrigPtr ? OrigPtr : Ptr); 540456327cSAdam Nemet if (SI != PtrToStride.end()) { 550456327cSAdam Nemet Value *StrideVal = SI->second; 560456327cSAdam Nemet 570456327cSAdam Nemet // Strip casts. 580456327cSAdam Nemet StrideVal = stripIntegerCast(StrideVal); 590456327cSAdam Nemet 600456327cSAdam Nemet // Replace symbolic stride by one. 610456327cSAdam Nemet Value *One = ConstantInt::get(StrideVal->getType(), 1); 620456327cSAdam Nemet ValueToValueMap RewriteMap; 630456327cSAdam Nemet RewriteMap[StrideVal] = One; 640456327cSAdam Nemet 650456327cSAdam Nemet const SCEV *ByOne = 660456327cSAdam Nemet SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true); 670456327cSAdam Nemet DEBUG(dbgs() << "LV: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne 680456327cSAdam Nemet << "\n"); 690456327cSAdam Nemet return ByOne; 700456327cSAdam Nemet } 710456327cSAdam Nemet 720456327cSAdam Nemet // Otherwise, just return the SCEV of the original pointer. 730456327cSAdam Nemet return SE->getSCEV(Ptr); 740456327cSAdam Nemet } 750456327cSAdam Nemet 76*30f16e16SAdam Nemet void LoopAccessInfo::RuntimePointerCheck::insert(ScalarEvolution *SE, Loop *Lp, 77*30f16e16SAdam Nemet Value *Ptr, bool WritePtr, 780456327cSAdam Nemet unsigned DepSetId, 790456327cSAdam Nemet unsigned ASId, 800456327cSAdam Nemet ValueToValueMap &Strides) { 810456327cSAdam Nemet // Get the stride replaced scev. 820456327cSAdam Nemet const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); 830456327cSAdam Nemet const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); 840456327cSAdam Nemet assert(AR && "Invalid addrec expression"); 850456327cSAdam Nemet const SCEV *Ex = SE->getBackedgeTakenCount(Lp); 860456327cSAdam Nemet const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); 870456327cSAdam Nemet Pointers.push_back(Ptr); 880456327cSAdam Nemet Starts.push_back(AR->getStart()); 890456327cSAdam Nemet Ends.push_back(ScEnd); 900456327cSAdam Nemet IsWritePtr.push_back(WritePtr); 910456327cSAdam Nemet DependencySetId.push_back(DepSetId); 920456327cSAdam Nemet AliasSetId.push_back(ASId); 930456327cSAdam Nemet } 940456327cSAdam Nemet 950456327cSAdam Nemet namespace { 960456327cSAdam Nemet /// \brief Analyses memory accesses in a loop. 970456327cSAdam Nemet /// 980456327cSAdam Nemet /// Checks whether run time pointer checks are needed and builds sets for data 990456327cSAdam Nemet /// dependence checking. 1000456327cSAdam Nemet class AccessAnalysis { 1010456327cSAdam Nemet public: 1020456327cSAdam Nemet /// \brief Read or write access location. 1030456327cSAdam Nemet typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; 1040456327cSAdam Nemet typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; 1050456327cSAdam Nemet 1060456327cSAdam Nemet /// \brief Set of potential dependent memory accesses. 1070456327cSAdam Nemet typedef EquivalenceClasses<MemAccessInfo> DepCandidates; 1080456327cSAdam Nemet 1090456327cSAdam Nemet AccessAnalysis(const DataLayout *Dl, AliasAnalysis *AA, DepCandidates &DA) : 1100456327cSAdam Nemet DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {} 1110456327cSAdam Nemet 1120456327cSAdam Nemet /// \brief Register a load and whether it is only read from. 1130456327cSAdam Nemet void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) { 1140456327cSAdam Nemet Value *Ptr = const_cast<Value*>(Loc.Ptr); 1150456327cSAdam Nemet AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags); 1160456327cSAdam Nemet Accesses.insert(MemAccessInfo(Ptr, false)); 1170456327cSAdam Nemet if (IsReadOnly) 1180456327cSAdam Nemet ReadOnlyPtr.insert(Ptr); 1190456327cSAdam Nemet } 1200456327cSAdam Nemet 1210456327cSAdam Nemet /// \brief Register a store. 1220456327cSAdam Nemet void addStore(AliasAnalysis::Location &Loc) { 1230456327cSAdam Nemet Value *Ptr = const_cast<Value*>(Loc.Ptr); 1240456327cSAdam Nemet AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags); 1250456327cSAdam Nemet Accesses.insert(MemAccessInfo(Ptr, true)); 1260456327cSAdam Nemet } 1270456327cSAdam Nemet 1280456327cSAdam Nemet /// \brief Check whether we can check the pointers at runtime for 1290456327cSAdam Nemet /// non-intersection. 130*30f16e16SAdam Nemet bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck, 1310456327cSAdam Nemet unsigned &NumComparisons, 1320456327cSAdam Nemet ScalarEvolution *SE, Loop *TheLoop, 1330456327cSAdam Nemet ValueToValueMap &Strides, 1340456327cSAdam Nemet bool ShouldCheckStride = false); 1350456327cSAdam Nemet 1360456327cSAdam Nemet /// \brief Goes over all memory accesses, checks whether a RT check is needed 1370456327cSAdam Nemet /// and builds sets of dependent accesses. 1380456327cSAdam Nemet void buildDependenceSets() { 1390456327cSAdam Nemet processMemAccesses(); 1400456327cSAdam Nemet } 1410456327cSAdam Nemet 1420456327cSAdam Nemet bool isRTCheckNeeded() { return IsRTCheckNeeded; } 1430456327cSAdam Nemet 1440456327cSAdam Nemet bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } 1450456327cSAdam Nemet void resetDepChecks() { CheckDeps.clear(); } 1460456327cSAdam Nemet 1470456327cSAdam Nemet MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } 1480456327cSAdam Nemet 1490456327cSAdam Nemet private: 1500456327cSAdam Nemet typedef SetVector<MemAccessInfo> PtrAccessSet; 1510456327cSAdam Nemet 1520456327cSAdam Nemet /// \brief Go over all memory access and check whether runtime pointer checks 1530456327cSAdam Nemet /// are needed /// and build sets of dependency check candidates. 1540456327cSAdam Nemet void processMemAccesses(); 1550456327cSAdam Nemet 1560456327cSAdam Nemet /// Set of all accesses. 1570456327cSAdam Nemet PtrAccessSet Accesses; 1580456327cSAdam Nemet 1590456327cSAdam Nemet /// Set of accesses that need a further dependence check. 1600456327cSAdam Nemet MemAccessInfoSet CheckDeps; 1610456327cSAdam Nemet 1620456327cSAdam Nemet /// Set of pointers that are read only. 1630456327cSAdam Nemet SmallPtrSet<Value*, 16> ReadOnlyPtr; 1640456327cSAdam Nemet 1650456327cSAdam Nemet const DataLayout *DL; 1660456327cSAdam Nemet 1670456327cSAdam Nemet /// An alias set tracker to partition the access set by underlying object and 1680456327cSAdam Nemet //intrinsic property (such as TBAA metadata). 1690456327cSAdam Nemet AliasSetTracker AST; 1700456327cSAdam Nemet 1710456327cSAdam Nemet /// Sets of potentially dependent accesses - members of one set share an 1720456327cSAdam Nemet /// underlying pointer. The set "CheckDeps" identfies which sets really need a 1730456327cSAdam Nemet /// dependence check. 1740456327cSAdam Nemet DepCandidates &DepCands; 1750456327cSAdam Nemet 1760456327cSAdam Nemet bool IsRTCheckNeeded; 1770456327cSAdam Nemet }; 1780456327cSAdam Nemet 1790456327cSAdam Nemet } // end anonymous namespace 1800456327cSAdam Nemet 1810456327cSAdam Nemet /// \brief Check whether a pointer can participate in a runtime bounds check. 1820456327cSAdam Nemet static bool hasComputableBounds(ScalarEvolution *SE, ValueToValueMap &Strides, 1830456327cSAdam Nemet Value *Ptr) { 1840456327cSAdam Nemet const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr); 1850456327cSAdam Nemet const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); 1860456327cSAdam Nemet if (!AR) 1870456327cSAdam Nemet return false; 1880456327cSAdam Nemet 1890456327cSAdam Nemet return AR->isAffine(); 1900456327cSAdam Nemet } 1910456327cSAdam Nemet 1920456327cSAdam Nemet /// \brief Check the stride of the pointer and ensure that it does not wrap in 1930456327cSAdam Nemet /// the address space. 1940456327cSAdam Nemet static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, 1950456327cSAdam Nemet const Loop *Lp, ValueToValueMap &StridesMap); 1960456327cSAdam Nemet 1970456327cSAdam Nemet bool AccessAnalysis::canCheckPtrAtRT( 198*30f16e16SAdam Nemet LoopAccessInfo::RuntimePointerCheck &RtCheck, 1990456327cSAdam Nemet unsigned &NumComparisons, ScalarEvolution *SE, Loop *TheLoop, 2000456327cSAdam Nemet ValueToValueMap &StridesMap, bool ShouldCheckStride) { 2010456327cSAdam Nemet // Find pointers with computable bounds. We are going to use this information 2020456327cSAdam Nemet // to place a runtime bound check. 2030456327cSAdam Nemet bool CanDoRT = true; 2040456327cSAdam Nemet 2050456327cSAdam Nemet bool IsDepCheckNeeded = isDependencyCheckNeeded(); 2060456327cSAdam Nemet NumComparisons = 0; 2070456327cSAdam Nemet 2080456327cSAdam Nemet // We assign a consecutive id to access from different alias sets. 2090456327cSAdam Nemet // Accesses between different groups doesn't need to be checked. 2100456327cSAdam Nemet unsigned ASId = 1; 2110456327cSAdam Nemet for (auto &AS : AST) { 2120456327cSAdam Nemet unsigned NumReadPtrChecks = 0; 2130456327cSAdam Nemet unsigned NumWritePtrChecks = 0; 2140456327cSAdam Nemet 2150456327cSAdam Nemet // We assign consecutive id to access from different dependence sets. 2160456327cSAdam Nemet // Accesses within the same set don't need a runtime check. 2170456327cSAdam Nemet unsigned RunningDepId = 1; 2180456327cSAdam Nemet DenseMap<Value *, unsigned> DepSetId; 2190456327cSAdam Nemet 2200456327cSAdam Nemet for (auto A : AS) { 2210456327cSAdam Nemet Value *Ptr = A.getValue(); 2220456327cSAdam Nemet bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); 2230456327cSAdam Nemet MemAccessInfo Access(Ptr, IsWrite); 2240456327cSAdam Nemet 2250456327cSAdam Nemet if (IsWrite) 2260456327cSAdam Nemet ++NumWritePtrChecks; 2270456327cSAdam Nemet else 2280456327cSAdam Nemet ++NumReadPtrChecks; 2290456327cSAdam Nemet 2300456327cSAdam Nemet if (hasComputableBounds(SE, StridesMap, Ptr) && 2310456327cSAdam Nemet // When we run after a failing dependency check we have to make sure we 2320456327cSAdam Nemet // don't have wrapping pointers. 2330456327cSAdam Nemet (!ShouldCheckStride || 2340456327cSAdam Nemet isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) { 2350456327cSAdam Nemet // The id of the dependence set. 2360456327cSAdam Nemet unsigned DepId; 2370456327cSAdam Nemet 2380456327cSAdam Nemet if (IsDepCheckNeeded) { 2390456327cSAdam Nemet Value *Leader = DepCands.getLeaderValue(Access).getPointer(); 2400456327cSAdam Nemet unsigned &LeaderId = DepSetId[Leader]; 2410456327cSAdam Nemet if (!LeaderId) 2420456327cSAdam Nemet LeaderId = RunningDepId++; 2430456327cSAdam Nemet DepId = LeaderId; 2440456327cSAdam Nemet } else 2450456327cSAdam Nemet // Each access has its own dependence set. 2460456327cSAdam Nemet DepId = RunningDepId++; 2470456327cSAdam Nemet 2480456327cSAdam Nemet RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); 2490456327cSAdam Nemet 2500456327cSAdam Nemet DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n'); 2510456327cSAdam Nemet } else { 2520456327cSAdam Nemet CanDoRT = false; 2530456327cSAdam Nemet } 2540456327cSAdam Nemet } 2550456327cSAdam Nemet 2560456327cSAdam Nemet if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2) 2570456327cSAdam Nemet NumComparisons += 0; // Only one dependence set. 2580456327cSAdam Nemet else { 2590456327cSAdam Nemet NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks + 2600456327cSAdam Nemet NumWritePtrChecks - 1)); 2610456327cSAdam Nemet } 2620456327cSAdam Nemet 2630456327cSAdam Nemet ++ASId; 2640456327cSAdam Nemet } 2650456327cSAdam Nemet 2660456327cSAdam Nemet // If the pointers that we would use for the bounds comparison have different 2670456327cSAdam Nemet // address spaces, assume the values aren't directly comparable, so we can't 2680456327cSAdam Nemet // use them for the runtime check. We also have to assume they could 2690456327cSAdam Nemet // overlap. In the future there should be metadata for whether address spaces 2700456327cSAdam Nemet // are disjoint. 2710456327cSAdam Nemet unsigned NumPointers = RtCheck.Pointers.size(); 2720456327cSAdam Nemet for (unsigned i = 0; i < NumPointers; ++i) { 2730456327cSAdam Nemet for (unsigned j = i + 1; j < NumPointers; ++j) { 2740456327cSAdam Nemet // Only need to check pointers between two different dependency sets. 2750456327cSAdam Nemet if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j]) 2760456327cSAdam Nemet continue; 2770456327cSAdam Nemet // Only need to check pointers in the same alias set. 2780456327cSAdam Nemet if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j]) 2790456327cSAdam Nemet continue; 2800456327cSAdam Nemet 2810456327cSAdam Nemet Value *PtrI = RtCheck.Pointers[i]; 2820456327cSAdam Nemet Value *PtrJ = RtCheck.Pointers[j]; 2830456327cSAdam Nemet 2840456327cSAdam Nemet unsigned ASi = PtrI->getType()->getPointerAddressSpace(); 2850456327cSAdam Nemet unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); 2860456327cSAdam Nemet if (ASi != ASj) { 2870456327cSAdam Nemet DEBUG(dbgs() << "LV: Runtime check would require comparison between" 2880456327cSAdam Nemet " different address spaces\n"); 2890456327cSAdam Nemet return false; 2900456327cSAdam Nemet } 2910456327cSAdam Nemet } 2920456327cSAdam Nemet } 2930456327cSAdam Nemet 2940456327cSAdam Nemet return CanDoRT; 2950456327cSAdam Nemet } 2960456327cSAdam Nemet 2970456327cSAdam Nemet void AccessAnalysis::processMemAccesses() { 2980456327cSAdam Nemet // We process the set twice: first we process read-write pointers, last we 2990456327cSAdam Nemet // process read-only pointers. This allows us to skip dependence tests for 3000456327cSAdam Nemet // read-only pointers. 3010456327cSAdam Nemet 3020456327cSAdam Nemet DEBUG(dbgs() << "LV: Processing memory accesses...\n"); 3030456327cSAdam Nemet DEBUG(dbgs() << " AST: "; AST.dump()); 3040456327cSAdam Nemet DEBUG(dbgs() << "LV: Accesses:\n"); 3050456327cSAdam Nemet DEBUG({ 3060456327cSAdam Nemet for (auto A : Accesses) 3070456327cSAdam Nemet dbgs() << "\t" << *A.getPointer() << " (" << 3080456327cSAdam Nemet (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? 3090456327cSAdam Nemet "read-only" : "read")) << ")\n"; 3100456327cSAdam Nemet }); 3110456327cSAdam Nemet 3120456327cSAdam Nemet // The AliasSetTracker has nicely partitioned our pointers by metadata 3130456327cSAdam Nemet // compatibility and potential for underlying-object overlap. As a result, we 3140456327cSAdam Nemet // only need to check for potential pointer dependencies within each alias 3150456327cSAdam Nemet // set. 3160456327cSAdam Nemet for (auto &AS : AST) { 3170456327cSAdam Nemet // Note that both the alias-set tracker and the alias sets themselves used 3180456327cSAdam Nemet // linked lists internally and so the iteration order here is deterministic 3190456327cSAdam Nemet // (matching the original instruction order within each set). 3200456327cSAdam Nemet 3210456327cSAdam Nemet bool SetHasWrite = false; 3220456327cSAdam Nemet 3230456327cSAdam Nemet // Map of pointers to last access encountered. 3240456327cSAdam Nemet typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap; 3250456327cSAdam Nemet UnderlyingObjToAccessMap ObjToLastAccess; 3260456327cSAdam Nemet 3270456327cSAdam Nemet // Set of access to check after all writes have been processed. 3280456327cSAdam Nemet PtrAccessSet DeferredAccesses; 3290456327cSAdam Nemet 3300456327cSAdam Nemet // Iterate over each alias set twice, once to process read/write pointers, 3310456327cSAdam Nemet // and then to process read-only pointers. 3320456327cSAdam Nemet for (int SetIteration = 0; SetIteration < 2; ++SetIteration) { 3330456327cSAdam Nemet bool UseDeferred = SetIteration > 0; 3340456327cSAdam Nemet PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; 3350456327cSAdam Nemet 3360456327cSAdam Nemet for (auto AV : AS) { 3370456327cSAdam Nemet Value *Ptr = AV.getValue(); 3380456327cSAdam Nemet 3390456327cSAdam Nemet // For a single memory access in AliasSetTracker, Accesses may contain 3400456327cSAdam Nemet // both read and write, and they both need to be handled for CheckDeps. 3410456327cSAdam Nemet for (auto AC : S) { 3420456327cSAdam Nemet if (AC.getPointer() != Ptr) 3430456327cSAdam Nemet continue; 3440456327cSAdam Nemet 3450456327cSAdam Nemet bool IsWrite = AC.getInt(); 3460456327cSAdam Nemet 3470456327cSAdam Nemet // If we're using the deferred access set, then it contains only 3480456327cSAdam Nemet // reads. 3490456327cSAdam Nemet bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; 3500456327cSAdam Nemet if (UseDeferred && !IsReadOnlyPtr) 3510456327cSAdam Nemet continue; 3520456327cSAdam Nemet // Otherwise, the pointer must be in the PtrAccessSet, either as a 3530456327cSAdam Nemet // read or a write. 3540456327cSAdam Nemet assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || 3550456327cSAdam Nemet S.count(MemAccessInfo(Ptr, false))) && 3560456327cSAdam Nemet "Alias-set pointer not in the access set?"); 3570456327cSAdam Nemet 3580456327cSAdam Nemet MemAccessInfo Access(Ptr, IsWrite); 3590456327cSAdam Nemet DepCands.insert(Access); 3600456327cSAdam Nemet 3610456327cSAdam Nemet // Memorize read-only pointers for later processing and skip them in 3620456327cSAdam Nemet // the first round (they need to be checked after we have seen all 3630456327cSAdam Nemet // write pointers). Note: we also mark pointer that are not 3640456327cSAdam Nemet // consecutive as "read-only" pointers (so that we check 3650456327cSAdam Nemet // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". 3660456327cSAdam Nemet if (!UseDeferred && IsReadOnlyPtr) { 3670456327cSAdam Nemet DeferredAccesses.insert(Access); 3680456327cSAdam Nemet continue; 3690456327cSAdam Nemet } 3700456327cSAdam Nemet 3710456327cSAdam Nemet // If this is a write - check other reads and writes for conflicts. If 3720456327cSAdam Nemet // this is a read only check other writes for conflicts (but only if 3730456327cSAdam Nemet // there is no other write to the ptr - this is an optimization to 3740456327cSAdam Nemet // catch "a[i] = a[i] + " without having to do a dependence check). 3750456327cSAdam Nemet if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { 3760456327cSAdam Nemet CheckDeps.insert(Access); 3770456327cSAdam Nemet IsRTCheckNeeded = true; 3780456327cSAdam Nemet } 3790456327cSAdam Nemet 3800456327cSAdam Nemet if (IsWrite) 3810456327cSAdam Nemet SetHasWrite = true; 3820456327cSAdam Nemet 3830456327cSAdam Nemet // Create sets of pointers connected by a shared alias set and 3840456327cSAdam Nemet // underlying object. 3850456327cSAdam Nemet typedef SmallVector<Value *, 16> ValueVector; 3860456327cSAdam Nemet ValueVector TempObjects; 3870456327cSAdam Nemet GetUnderlyingObjects(Ptr, TempObjects, DL); 3880456327cSAdam Nemet for (Value *UnderlyingObj : TempObjects) { 3890456327cSAdam Nemet UnderlyingObjToAccessMap::iterator Prev = 3900456327cSAdam Nemet ObjToLastAccess.find(UnderlyingObj); 3910456327cSAdam Nemet if (Prev != ObjToLastAccess.end()) 3920456327cSAdam Nemet DepCands.unionSets(Access, Prev->second); 3930456327cSAdam Nemet 3940456327cSAdam Nemet ObjToLastAccess[UnderlyingObj] = Access; 3950456327cSAdam Nemet } 3960456327cSAdam Nemet } 3970456327cSAdam Nemet } 3980456327cSAdam Nemet } 3990456327cSAdam Nemet } 4000456327cSAdam Nemet } 4010456327cSAdam Nemet 4020456327cSAdam Nemet namespace { 4030456327cSAdam Nemet /// \brief Checks memory dependences among accesses to the same underlying 4040456327cSAdam Nemet /// object to determine whether there vectorization is legal or not (and at 4050456327cSAdam Nemet /// which vectorization factor). 4060456327cSAdam Nemet /// 4070456327cSAdam Nemet /// This class works under the assumption that we already checked that memory 4080456327cSAdam Nemet /// locations with different underlying pointers are "must-not alias". 4090456327cSAdam Nemet /// We use the ScalarEvolution framework to symbolically evalutate access 4100456327cSAdam Nemet /// functions pairs. Since we currently don't restructure the loop we can rely 4110456327cSAdam Nemet /// on the program order of memory accesses to determine their safety. 4120456327cSAdam Nemet /// At the moment we will only deem accesses as safe for: 4130456327cSAdam Nemet /// * A negative constant distance assuming program order. 4140456327cSAdam Nemet /// 4150456327cSAdam Nemet /// Safe: tmp = a[i + 1]; OR a[i + 1] = x; 4160456327cSAdam Nemet /// a[i] = tmp; y = a[i]; 4170456327cSAdam Nemet /// 4180456327cSAdam Nemet /// The latter case is safe because later checks guarantuee that there can't 4190456327cSAdam Nemet /// be a cycle through a phi node (that is, we check that "x" and "y" is not 4200456327cSAdam Nemet /// the same variable: a header phi can only be an induction or a reduction, a 4210456327cSAdam Nemet /// reduction can't have a memory sink, an induction can't have a memory 4220456327cSAdam Nemet /// source). This is important and must not be violated (or we have to 4230456327cSAdam Nemet /// resort to checking for cycles through memory). 4240456327cSAdam Nemet /// 4250456327cSAdam Nemet /// * A positive constant distance assuming program order that is bigger 4260456327cSAdam Nemet /// than the biggest memory access. 4270456327cSAdam Nemet /// 4280456327cSAdam Nemet /// tmp = a[i] OR b[i] = x 4290456327cSAdam Nemet /// a[i+2] = tmp y = b[i+2]; 4300456327cSAdam Nemet /// 4310456327cSAdam Nemet /// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively. 4320456327cSAdam Nemet /// 4330456327cSAdam Nemet /// * Zero distances and all accesses have the same size. 4340456327cSAdam Nemet /// 4350456327cSAdam Nemet class MemoryDepChecker { 4360456327cSAdam Nemet public: 4370456327cSAdam Nemet typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; 4380456327cSAdam Nemet typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; 4390456327cSAdam Nemet 4400456327cSAdam Nemet MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L, 441*30f16e16SAdam Nemet const LoopAccessInfo::VectorizerParams &VectParams) 4420456327cSAdam Nemet : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0), 4430456327cSAdam Nemet ShouldRetryWithRuntimeCheck(false), VectParams(VectParams) {} 4440456327cSAdam Nemet 4450456327cSAdam Nemet /// \brief Register the location (instructions are given increasing numbers) 4460456327cSAdam Nemet /// of a write access. 4470456327cSAdam Nemet void addAccess(StoreInst *SI) { 4480456327cSAdam Nemet Value *Ptr = SI->getPointerOperand(); 4490456327cSAdam Nemet Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx); 4500456327cSAdam Nemet InstMap.push_back(SI); 4510456327cSAdam Nemet ++AccessIdx; 4520456327cSAdam Nemet } 4530456327cSAdam Nemet 4540456327cSAdam Nemet /// \brief Register the location (instructions are given increasing numbers) 4550456327cSAdam Nemet /// of a write access. 4560456327cSAdam Nemet void addAccess(LoadInst *LI) { 4570456327cSAdam Nemet Value *Ptr = LI->getPointerOperand(); 4580456327cSAdam Nemet Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx); 4590456327cSAdam Nemet InstMap.push_back(LI); 4600456327cSAdam Nemet ++AccessIdx; 4610456327cSAdam Nemet } 4620456327cSAdam Nemet 4630456327cSAdam Nemet /// \brief Check whether the dependencies between the accesses are safe. 4640456327cSAdam Nemet /// 4650456327cSAdam Nemet /// Only checks sets with elements in \p CheckDeps. 4660456327cSAdam Nemet bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets, 4670456327cSAdam Nemet MemAccessInfoSet &CheckDeps, ValueToValueMap &Strides); 4680456327cSAdam Nemet 4690456327cSAdam Nemet /// \brief The maximum number of bytes of a vector register we can vectorize 4700456327cSAdam Nemet /// the accesses safely with. 4710456327cSAdam Nemet unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; } 4720456327cSAdam Nemet 4730456327cSAdam Nemet /// \brief In same cases when the dependency check fails we can still 4740456327cSAdam Nemet /// vectorize the loop with a dynamic array access check. 4750456327cSAdam Nemet bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; } 4760456327cSAdam Nemet 4770456327cSAdam Nemet private: 4780456327cSAdam Nemet ScalarEvolution *SE; 4790456327cSAdam Nemet const DataLayout *DL; 4800456327cSAdam Nemet const Loop *InnermostLoop; 4810456327cSAdam Nemet 4820456327cSAdam Nemet /// \brief Maps access locations (ptr, read/write) to program order. 4830456327cSAdam Nemet DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses; 4840456327cSAdam Nemet 4850456327cSAdam Nemet /// \brief Memory access instructions in program order. 4860456327cSAdam Nemet SmallVector<Instruction *, 16> InstMap; 4870456327cSAdam Nemet 4880456327cSAdam Nemet /// \brief The program order index to be used for the next instruction. 4890456327cSAdam Nemet unsigned AccessIdx; 4900456327cSAdam Nemet 4910456327cSAdam Nemet // We can access this many bytes in parallel safely. 4920456327cSAdam Nemet unsigned MaxSafeDepDistBytes; 4930456327cSAdam Nemet 4940456327cSAdam Nemet /// \brief If we see a non-constant dependence distance we can still try to 4950456327cSAdam Nemet /// vectorize this loop with runtime checks. 4960456327cSAdam Nemet bool ShouldRetryWithRuntimeCheck; 4970456327cSAdam Nemet 4980456327cSAdam Nemet /// \brief Vectorizer parameters used by the analysis. 499*30f16e16SAdam Nemet LoopAccessInfo::VectorizerParams VectParams; 5000456327cSAdam Nemet 5010456327cSAdam Nemet /// \brief Check whether there is a plausible dependence between the two 5020456327cSAdam Nemet /// accesses. 5030456327cSAdam Nemet /// 5040456327cSAdam Nemet /// Access \p A must happen before \p B in program order. The two indices 5050456327cSAdam Nemet /// identify the index into the program order map. 5060456327cSAdam Nemet /// 5070456327cSAdam Nemet /// This function checks whether there is a plausible dependence (or the 5080456327cSAdam Nemet /// absence of such can't be proved) between the two accesses. If there is a 5090456327cSAdam Nemet /// plausible dependence but the dependence distance is bigger than one 5100456327cSAdam Nemet /// element access it records this distance in \p MaxSafeDepDistBytes (if this 5110456327cSAdam Nemet /// distance is smaller than any other distance encountered so far). 5120456327cSAdam Nemet /// Otherwise, this function returns true signaling a possible dependence. 5130456327cSAdam Nemet bool isDependent(const MemAccessInfo &A, unsigned AIdx, 5140456327cSAdam Nemet const MemAccessInfo &B, unsigned BIdx, 5150456327cSAdam Nemet ValueToValueMap &Strides); 5160456327cSAdam Nemet 5170456327cSAdam Nemet /// \brief Check whether the data dependence could prevent store-load 5180456327cSAdam Nemet /// forwarding. 5190456327cSAdam Nemet bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize); 5200456327cSAdam Nemet }; 5210456327cSAdam Nemet 5220456327cSAdam Nemet } // end anonymous namespace 5230456327cSAdam Nemet 5240456327cSAdam Nemet static bool isInBoundsGep(Value *Ptr) { 5250456327cSAdam Nemet if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) 5260456327cSAdam Nemet return GEP->isInBounds(); 5270456327cSAdam Nemet return false; 5280456327cSAdam Nemet } 5290456327cSAdam Nemet 5300456327cSAdam Nemet /// \brief Check whether the access through \p Ptr has a constant stride. 5310456327cSAdam Nemet static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, 5320456327cSAdam Nemet const Loop *Lp, ValueToValueMap &StridesMap) { 5330456327cSAdam Nemet const Type *Ty = Ptr->getType(); 5340456327cSAdam Nemet assert(Ty->isPointerTy() && "Unexpected non-ptr"); 5350456327cSAdam Nemet 5360456327cSAdam Nemet // Make sure that the pointer does not point to aggregate types. 5370456327cSAdam Nemet const PointerType *PtrTy = cast<PointerType>(Ty); 5380456327cSAdam Nemet if (PtrTy->getElementType()->isAggregateType()) { 5390456327cSAdam Nemet DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr << 5400456327cSAdam Nemet "\n"); 5410456327cSAdam Nemet return 0; 5420456327cSAdam Nemet } 5430456327cSAdam Nemet 5440456327cSAdam Nemet const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr); 5450456327cSAdam Nemet 5460456327cSAdam Nemet const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); 5470456327cSAdam Nemet if (!AR) { 5480456327cSAdam Nemet DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer " 5490456327cSAdam Nemet << *Ptr << " SCEV: " << *PtrScev << "\n"); 5500456327cSAdam Nemet return 0; 5510456327cSAdam Nemet } 5520456327cSAdam Nemet 5530456327cSAdam Nemet // The accesss function must stride over the innermost loop. 5540456327cSAdam Nemet if (Lp != AR->getLoop()) { 5550456327cSAdam Nemet DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " << 5560456327cSAdam Nemet *Ptr << " SCEV: " << *PtrScev << "\n"); 5570456327cSAdam Nemet } 5580456327cSAdam Nemet 5590456327cSAdam Nemet // The address calculation must not wrap. Otherwise, a dependence could be 5600456327cSAdam Nemet // inverted. 5610456327cSAdam Nemet // An inbounds getelementptr that is a AddRec with a unit stride 5620456327cSAdam Nemet // cannot wrap per definition. The unit stride requirement is checked later. 5630456327cSAdam Nemet // An getelementptr without an inbounds attribute and unit stride would have 5640456327cSAdam Nemet // to access the pointer value "0" which is undefined behavior in address 5650456327cSAdam Nemet // space 0, therefore we can also vectorize this case. 5660456327cSAdam Nemet bool IsInBoundsGEP = isInBoundsGep(Ptr); 5670456327cSAdam Nemet bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask); 5680456327cSAdam Nemet bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; 5690456327cSAdam Nemet if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { 5700456327cSAdam Nemet DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space " 5710456327cSAdam Nemet << *Ptr << " SCEV: " << *PtrScev << "\n"); 5720456327cSAdam Nemet return 0; 5730456327cSAdam Nemet } 5740456327cSAdam Nemet 5750456327cSAdam Nemet // Check the step is constant. 5760456327cSAdam Nemet const SCEV *Step = AR->getStepRecurrence(*SE); 5770456327cSAdam Nemet 5780456327cSAdam Nemet // Calculate the pointer stride and check if it is consecutive. 5790456327cSAdam Nemet const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); 5800456327cSAdam Nemet if (!C) { 5810456327cSAdam Nemet DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr << 5820456327cSAdam Nemet " SCEV: " << *PtrScev << "\n"); 5830456327cSAdam Nemet return 0; 5840456327cSAdam Nemet } 5850456327cSAdam Nemet 5860456327cSAdam Nemet int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType()); 5870456327cSAdam Nemet const APInt &APStepVal = C->getValue()->getValue(); 5880456327cSAdam Nemet 5890456327cSAdam Nemet // Huge step value - give up. 5900456327cSAdam Nemet if (APStepVal.getBitWidth() > 64) 5910456327cSAdam Nemet return 0; 5920456327cSAdam Nemet 5930456327cSAdam Nemet int64_t StepVal = APStepVal.getSExtValue(); 5940456327cSAdam Nemet 5950456327cSAdam Nemet // Strided access. 5960456327cSAdam Nemet int64_t Stride = StepVal / Size; 5970456327cSAdam Nemet int64_t Rem = StepVal % Size; 5980456327cSAdam Nemet if (Rem) 5990456327cSAdam Nemet return 0; 6000456327cSAdam Nemet 6010456327cSAdam Nemet // If the SCEV could wrap but we have an inbounds gep with a unit stride we 6020456327cSAdam Nemet // know we can't "wrap around the address space". In case of address space 6030456327cSAdam Nemet // zero we know that this won't happen without triggering undefined behavior. 6040456327cSAdam Nemet if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) && 6050456327cSAdam Nemet Stride != 1 && Stride != -1) 6060456327cSAdam Nemet return 0; 6070456327cSAdam Nemet 6080456327cSAdam Nemet return Stride; 6090456327cSAdam Nemet } 6100456327cSAdam Nemet 6110456327cSAdam Nemet bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, 6120456327cSAdam Nemet unsigned TypeByteSize) { 6130456327cSAdam Nemet // If loads occur at a distance that is not a multiple of a feasible vector 6140456327cSAdam Nemet // factor store-load forwarding does not take place. 6150456327cSAdam Nemet // Positive dependences might cause troubles because vectorizing them might 6160456327cSAdam Nemet // prevent store-load forwarding making vectorized code run a lot slower. 6170456327cSAdam Nemet // a[i] = a[i-3] ^ a[i-8]; 6180456327cSAdam Nemet // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and 6190456327cSAdam Nemet // hence on your typical architecture store-load forwarding does not take 6200456327cSAdam Nemet // place. Vectorizing in such cases does not make sense. 6210456327cSAdam Nemet // Store-load forwarding distance. 6220456327cSAdam Nemet const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize; 6230456327cSAdam Nemet // Maximum vector factor. 6240456327cSAdam Nemet unsigned MaxVFWithoutSLForwardIssues = VectParams.MaxVectorWidth*TypeByteSize; 6250456327cSAdam Nemet if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues) 6260456327cSAdam Nemet MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes; 6270456327cSAdam Nemet 6280456327cSAdam Nemet for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues; 6290456327cSAdam Nemet vf *= 2) { 6300456327cSAdam Nemet if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) { 6310456327cSAdam Nemet MaxVFWithoutSLForwardIssues = (vf >>=1); 6320456327cSAdam Nemet break; 6330456327cSAdam Nemet } 6340456327cSAdam Nemet } 6350456327cSAdam Nemet 6360456327cSAdam Nemet if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) { 6370456327cSAdam Nemet DEBUG(dbgs() << "LV: Distance " << Distance << 6380456327cSAdam Nemet " that could cause a store-load forwarding conflict\n"); 6390456327cSAdam Nemet return true; 6400456327cSAdam Nemet } 6410456327cSAdam Nemet 6420456327cSAdam Nemet if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes && 6430456327cSAdam Nemet MaxVFWithoutSLForwardIssues != VectParams.MaxVectorWidth*TypeByteSize) 6440456327cSAdam Nemet MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues; 6450456327cSAdam Nemet return false; 6460456327cSAdam Nemet } 6470456327cSAdam Nemet 6480456327cSAdam Nemet bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, 6490456327cSAdam Nemet const MemAccessInfo &B, unsigned BIdx, 6500456327cSAdam Nemet ValueToValueMap &Strides) { 6510456327cSAdam Nemet assert (AIdx < BIdx && "Must pass arguments in program order"); 6520456327cSAdam Nemet 6530456327cSAdam Nemet Value *APtr = A.getPointer(); 6540456327cSAdam Nemet Value *BPtr = B.getPointer(); 6550456327cSAdam Nemet bool AIsWrite = A.getInt(); 6560456327cSAdam Nemet bool BIsWrite = B.getInt(); 6570456327cSAdam Nemet 6580456327cSAdam Nemet // Two reads are independent. 6590456327cSAdam Nemet if (!AIsWrite && !BIsWrite) 6600456327cSAdam Nemet return false; 6610456327cSAdam Nemet 6620456327cSAdam Nemet // We cannot check pointers in different address spaces. 6630456327cSAdam Nemet if (APtr->getType()->getPointerAddressSpace() != 6640456327cSAdam Nemet BPtr->getType()->getPointerAddressSpace()) 6650456327cSAdam Nemet return true; 6660456327cSAdam Nemet 6670456327cSAdam Nemet const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); 6680456327cSAdam Nemet const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); 6690456327cSAdam Nemet 6700456327cSAdam Nemet int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop, Strides); 6710456327cSAdam Nemet int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop, Strides); 6720456327cSAdam Nemet 6730456327cSAdam Nemet const SCEV *Src = AScev; 6740456327cSAdam Nemet const SCEV *Sink = BScev; 6750456327cSAdam Nemet 6760456327cSAdam Nemet // If the induction step is negative we have to invert source and sink of the 6770456327cSAdam Nemet // dependence. 6780456327cSAdam Nemet if (StrideAPtr < 0) { 6790456327cSAdam Nemet //Src = BScev; 6800456327cSAdam Nemet //Sink = AScev; 6810456327cSAdam Nemet std::swap(APtr, BPtr); 6820456327cSAdam Nemet std::swap(Src, Sink); 6830456327cSAdam Nemet std::swap(AIsWrite, BIsWrite); 6840456327cSAdam Nemet std::swap(AIdx, BIdx); 6850456327cSAdam Nemet std::swap(StrideAPtr, StrideBPtr); 6860456327cSAdam Nemet } 6870456327cSAdam Nemet 6880456327cSAdam Nemet const SCEV *Dist = SE->getMinusSCEV(Sink, Src); 6890456327cSAdam Nemet 6900456327cSAdam Nemet DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink 6910456327cSAdam Nemet << "(Induction step: " << StrideAPtr << ")\n"); 6920456327cSAdam Nemet DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to " 6930456327cSAdam Nemet << *InstMap[BIdx] << ": " << *Dist << "\n"); 6940456327cSAdam Nemet 6950456327cSAdam Nemet // Need consecutive accesses. We don't want to vectorize 6960456327cSAdam Nemet // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in 6970456327cSAdam Nemet // the address space. 6980456327cSAdam Nemet if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ 6990456327cSAdam Nemet DEBUG(dbgs() << "Non-consecutive pointer access\n"); 7000456327cSAdam Nemet return true; 7010456327cSAdam Nemet } 7020456327cSAdam Nemet 7030456327cSAdam Nemet const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist); 7040456327cSAdam Nemet if (!C) { 7050456327cSAdam Nemet DEBUG(dbgs() << "LV: Dependence because of non-constant distance\n"); 7060456327cSAdam Nemet ShouldRetryWithRuntimeCheck = true; 7070456327cSAdam Nemet return true; 7080456327cSAdam Nemet } 7090456327cSAdam Nemet 7100456327cSAdam Nemet Type *ATy = APtr->getType()->getPointerElementType(); 7110456327cSAdam Nemet Type *BTy = BPtr->getType()->getPointerElementType(); 7120456327cSAdam Nemet unsigned TypeByteSize = DL->getTypeAllocSize(ATy); 7130456327cSAdam Nemet 7140456327cSAdam Nemet // Negative distances are not plausible dependencies. 7150456327cSAdam Nemet const APInt &Val = C->getValue()->getValue(); 7160456327cSAdam Nemet if (Val.isNegative()) { 7170456327cSAdam Nemet bool IsTrueDataDependence = (AIsWrite && !BIsWrite); 7180456327cSAdam Nemet if (IsTrueDataDependence && 7190456327cSAdam Nemet (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || 7200456327cSAdam Nemet ATy != BTy)) 7210456327cSAdam Nemet return true; 7220456327cSAdam Nemet 7230456327cSAdam Nemet DEBUG(dbgs() << "LV: Dependence is negative: NoDep\n"); 7240456327cSAdam Nemet return false; 7250456327cSAdam Nemet } 7260456327cSAdam Nemet 7270456327cSAdam Nemet // Write to the same location with the same size. 7280456327cSAdam Nemet // Could be improved to assert type sizes are the same (i32 == float, etc). 7290456327cSAdam Nemet if (Val == 0) { 7300456327cSAdam Nemet if (ATy == BTy) 7310456327cSAdam Nemet return false; 7320456327cSAdam Nemet DEBUG(dbgs() << "LV: Zero dependence difference but different types\n"); 7330456327cSAdam Nemet return true; 7340456327cSAdam Nemet } 7350456327cSAdam Nemet 7360456327cSAdam Nemet assert(Val.isStrictlyPositive() && "Expect a positive value"); 7370456327cSAdam Nemet 7380456327cSAdam Nemet // Positive distance bigger than max vectorization factor. 7390456327cSAdam Nemet if (ATy != BTy) { 7400456327cSAdam Nemet DEBUG(dbgs() << 7410456327cSAdam Nemet "LV: ReadWrite-Write positive dependency with different types\n"); 7420456327cSAdam Nemet return false; 7430456327cSAdam Nemet } 7440456327cSAdam Nemet 7450456327cSAdam Nemet unsigned Distance = (unsigned) Val.getZExtValue(); 7460456327cSAdam Nemet 7470456327cSAdam Nemet // Bail out early if passed-in parameters make vectorization not feasible. 7480456327cSAdam Nemet unsigned ForcedFactor = (VectParams.VectorizationFactor ? 7490456327cSAdam Nemet VectParams.VectorizationFactor : 1); 7500456327cSAdam Nemet unsigned ForcedUnroll = (VectParams.VectorizationInterleave ? 7510456327cSAdam Nemet VectParams.VectorizationInterleave : 1); 7520456327cSAdam Nemet 7530456327cSAdam Nemet // The distance must be bigger than the size needed for a vectorized version 7540456327cSAdam Nemet // of the operation and the size of the vectorized operation must not be 7550456327cSAdam Nemet // bigger than the currrent maximum size. 7560456327cSAdam Nemet if (Distance < 2*TypeByteSize || 7570456327cSAdam Nemet 2*TypeByteSize > MaxSafeDepDistBytes || 7580456327cSAdam Nemet Distance < TypeByteSize * ForcedUnroll * ForcedFactor) { 7590456327cSAdam Nemet DEBUG(dbgs() << "LV: Failure because of Positive distance " 7600456327cSAdam Nemet << Val.getSExtValue() << '\n'); 7610456327cSAdam Nemet return true; 7620456327cSAdam Nemet } 7630456327cSAdam Nemet 7640456327cSAdam Nemet MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ? 7650456327cSAdam Nemet Distance : MaxSafeDepDistBytes; 7660456327cSAdam Nemet 7670456327cSAdam Nemet bool IsTrueDataDependence = (!AIsWrite && BIsWrite); 7680456327cSAdam Nemet if (IsTrueDataDependence && 7690456327cSAdam Nemet couldPreventStoreLoadForward(Distance, TypeByteSize)) 7700456327cSAdam Nemet return true; 7710456327cSAdam Nemet 7720456327cSAdam Nemet DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() << 7730456327cSAdam Nemet " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n'); 7740456327cSAdam Nemet 7750456327cSAdam Nemet return false; 7760456327cSAdam Nemet } 7770456327cSAdam Nemet 7780456327cSAdam Nemet bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets, 7790456327cSAdam Nemet MemAccessInfoSet &CheckDeps, 7800456327cSAdam Nemet ValueToValueMap &Strides) { 7810456327cSAdam Nemet 7820456327cSAdam Nemet MaxSafeDepDistBytes = -1U; 7830456327cSAdam Nemet while (!CheckDeps.empty()) { 7840456327cSAdam Nemet MemAccessInfo CurAccess = *CheckDeps.begin(); 7850456327cSAdam Nemet 7860456327cSAdam Nemet // Get the relevant memory access set. 7870456327cSAdam Nemet EquivalenceClasses<MemAccessInfo>::iterator I = 7880456327cSAdam Nemet AccessSets.findValue(AccessSets.getLeaderValue(CurAccess)); 7890456327cSAdam Nemet 7900456327cSAdam Nemet // Check accesses within this set. 7910456327cSAdam Nemet EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE; 7920456327cSAdam Nemet AI = AccessSets.member_begin(I), AE = AccessSets.member_end(); 7930456327cSAdam Nemet 7940456327cSAdam Nemet // Check every access pair. 7950456327cSAdam Nemet while (AI != AE) { 7960456327cSAdam Nemet CheckDeps.erase(*AI); 7970456327cSAdam Nemet EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI); 7980456327cSAdam Nemet while (OI != AE) { 7990456327cSAdam Nemet // Check every accessing instruction pair in program order. 8000456327cSAdam Nemet for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(), 8010456327cSAdam Nemet I1E = Accesses[*AI].end(); I1 != I1E; ++I1) 8020456327cSAdam Nemet for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(), 8030456327cSAdam Nemet I2E = Accesses[*OI].end(); I2 != I2E; ++I2) { 8040456327cSAdam Nemet if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2, Strides)) 8050456327cSAdam Nemet return false; 8060456327cSAdam Nemet if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1, Strides)) 8070456327cSAdam Nemet return false; 8080456327cSAdam Nemet } 8090456327cSAdam Nemet ++OI; 8100456327cSAdam Nemet } 8110456327cSAdam Nemet AI++; 8120456327cSAdam Nemet } 8130456327cSAdam Nemet } 8140456327cSAdam Nemet return true; 8150456327cSAdam Nemet } 8160456327cSAdam Nemet 817*30f16e16SAdam Nemet bool LoopAccessInfo::canVectorizeMemory(ValueToValueMap &Strides) { 8180456327cSAdam Nemet 8190456327cSAdam Nemet typedef SmallVector<Value*, 16> ValueVector; 8200456327cSAdam Nemet typedef SmallPtrSet<Value*, 16> ValueSet; 8210456327cSAdam Nemet 8220456327cSAdam Nemet // Holds the Load and Store *instructions*. 8230456327cSAdam Nemet ValueVector Loads; 8240456327cSAdam Nemet ValueVector Stores; 8250456327cSAdam Nemet 8260456327cSAdam Nemet // Holds all the different accesses in the loop. 8270456327cSAdam Nemet unsigned NumReads = 0; 8280456327cSAdam Nemet unsigned NumReadWrites = 0; 8290456327cSAdam Nemet 8300456327cSAdam Nemet PtrRtCheck.Pointers.clear(); 8310456327cSAdam Nemet PtrRtCheck.Need = false; 8320456327cSAdam Nemet 8330456327cSAdam Nemet const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 8340456327cSAdam Nemet MemoryDepChecker DepChecker(SE, DL, TheLoop, VectParams); 8350456327cSAdam Nemet 8360456327cSAdam Nemet // For each block. 8370456327cSAdam Nemet for (Loop::block_iterator bb = TheLoop->block_begin(), 8380456327cSAdam Nemet be = TheLoop->block_end(); bb != be; ++bb) { 8390456327cSAdam Nemet 8400456327cSAdam Nemet // Scan the BB and collect legal loads and stores. 8410456327cSAdam Nemet for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; 8420456327cSAdam Nemet ++it) { 8430456327cSAdam Nemet 8440456327cSAdam Nemet // If this is a load, save it. If this instruction can read from memory 8450456327cSAdam Nemet // but is not a load, then we quit. Notice that we don't handle function 8460456327cSAdam Nemet // calls that read or write. 8470456327cSAdam Nemet if (it->mayReadFromMemory()) { 8480456327cSAdam Nemet // Many math library functions read the rounding mode. We will only 8490456327cSAdam Nemet // vectorize a loop if it contains known function calls that don't set 8500456327cSAdam Nemet // the flag. Therefore, it is safe to ignore this read from memory. 8510456327cSAdam Nemet CallInst *Call = dyn_cast<CallInst>(it); 8520456327cSAdam Nemet if (Call && getIntrinsicIDForCall(Call, TLI)) 8530456327cSAdam Nemet continue; 8540456327cSAdam Nemet 8550456327cSAdam Nemet LoadInst *Ld = dyn_cast<LoadInst>(it); 8560456327cSAdam Nemet if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { 8570456327cSAdam Nemet emitAnalysis(VectorizationReport(Ld) 8580456327cSAdam Nemet << "read with atomic ordering or volatile read"); 8590456327cSAdam Nemet DEBUG(dbgs() << "LV: Found a non-simple load.\n"); 8600456327cSAdam Nemet return false; 8610456327cSAdam Nemet } 8620456327cSAdam Nemet NumLoads++; 8630456327cSAdam Nemet Loads.push_back(Ld); 8640456327cSAdam Nemet DepChecker.addAccess(Ld); 8650456327cSAdam Nemet continue; 8660456327cSAdam Nemet } 8670456327cSAdam Nemet 8680456327cSAdam Nemet // Save 'store' instructions. Abort if other instructions write to memory. 8690456327cSAdam Nemet if (it->mayWriteToMemory()) { 8700456327cSAdam Nemet StoreInst *St = dyn_cast<StoreInst>(it); 8710456327cSAdam Nemet if (!St) { 8720456327cSAdam Nemet emitAnalysis(VectorizationReport(it) << 8730456327cSAdam Nemet "instruction cannot be vectorized"); 8740456327cSAdam Nemet return false; 8750456327cSAdam Nemet } 8760456327cSAdam Nemet if (!St->isSimple() && !IsAnnotatedParallel) { 8770456327cSAdam Nemet emitAnalysis(VectorizationReport(St) 8780456327cSAdam Nemet << "write with atomic ordering or volatile write"); 8790456327cSAdam Nemet DEBUG(dbgs() << "LV: Found a non-simple store.\n"); 8800456327cSAdam Nemet return false; 8810456327cSAdam Nemet } 8820456327cSAdam Nemet NumStores++; 8830456327cSAdam Nemet Stores.push_back(St); 8840456327cSAdam Nemet DepChecker.addAccess(St); 8850456327cSAdam Nemet } 8860456327cSAdam Nemet } // Next instr. 8870456327cSAdam Nemet } // Next block. 8880456327cSAdam Nemet 8890456327cSAdam Nemet // Now we have two lists that hold the loads and the stores. 8900456327cSAdam Nemet // Next, we find the pointers that they use. 8910456327cSAdam Nemet 8920456327cSAdam Nemet // Check if we see any stores. If there are no stores, then we don't 8930456327cSAdam Nemet // care if the pointers are *restrict*. 8940456327cSAdam Nemet if (!Stores.size()) { 8950456327cSAdam Nemet DEBUG(dbgs() << "LV: Found a read-only loop!\n"); 8960456327cSAdam Nemet return true; 8970456327cSAdam Nemet } 8980456327cSAdam Nemet 8990456327cSAdam Nemet AccessAnalysis::DepCandidates DependentAccesses; 9000456327cSAdam Nemet AccessAnalysis Accesses(DL, AA, DependentAccesses); 9010456327cSAdam Nemet 9020456327cSAdam Nemet // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects 9030456327cSAdam Nemet // multiple times on the same object. If the ptr is accessed twice, once 9040456327cSAdam Nemet // for read and once for write, it will only appear once (on the write 9050456327cSAdam Nemet // list). This is okay, since we are going to check for conflicts between 9060456327cSAdam Nemet // writes and between reads and writes, but not between reads and reads. 9070456327cSAdam Nemet ValueSet Seen; 9080456327cSAdam Nemet 9090456327cSAdam Nemet ValueVector::iterator I, IE; 9100456327cSAdam Nemet for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) { 9110456327cSAdam Nemet StoreInst *ST = cast<StoreInst>(*I); 9120456327cSAdam Nemet Value* Ptr = ST->getPointerOperand(); 9130456327cSAdam Nemet 9140456327cSAdam Nemet if (isUniform(Ptr)) { 9150456327cSAdam Nemet emitAnalysis( 9160456327cSAdam Nemet VectorizationReport(ST) 9170456327cSAdam Nemet << "write to a loop invariant address could not be vectorized"); 9180456327cSAdam Nemet DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); 9190456327cSAdam Nemet return false; 9200456327cSAdam Nemet } 9210456327cSAdam Nemet 9220456327cSAdam Nemet // If we did *not* see this pointer before, insert it to the read-write 9230456327cSAdam Nemet // list. At this phase it is only a 'write' list. 9240456327cSAdam Nemet if (Seen.insert(Ptr).second) { 9250456327cSAdam Nemet ++NumReadWrites; 9260456327cSAdam Nemet 9270456327cSAdam Nemet AliasAnalysis::Location Loc = AA->getLocation(ST); 9280456327cSAdam Nemet // The TBAA metadata could have a control dependency on the predication 9290456327cSAdam Nemet // condition, so we cannot rely on it when determining whether or not we 9300456327cSAdam Nemet // need runtime pointer checks. 9310456327cSAdam Nemet if (blockNeedsPredication(ST->getParent())) 9320456327cSAdam Nemet Loc.AATags.TBAA = nullptr; 9330456327cSAdam Nemet 9340456327cSAdam Nemet Accesses.addStore(Loc); 9350456327cSAdam Nemet } 9360456327cSAdam Nemet } 9370456327cSAdam Nemet 9380456327cSAdam Nemet if (IsAnnotatedParallel) { 9390456327cSAdam Nemet DEBUG(dbgs() 9400456327cSAdam Nemet << "LV: A loop annotated parallel, ignore memory dependency " 9410456327cSAdam Nemet << "checks.\n"); 9420456327cSAdam Nemet return true; 9430456327cSAdam Nemet } 9440456327cSAdam Nemet 9450456327cSAdam Nemet for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { 9460456327cSAdam Nemet LoadInst *LD = cast<LoadInst>(*I); 9470456327cSAdam Nemet Value* Ptr = LD->getPointerOperand(); 9480456327cSAdam Nemet // If we did *not* see this pointer before, insert it to the 9490456327cSAdam Nemet // read list. If we *did* see it before, then it is already in 9500456327cSAdam Nemet // the read-write list. This allows us to vectorize expressions 9510456327cSAdam Nemet // such as A[i] += x; Because the address of A[i] is a read-write 9520456327cSAdam Nemet // pointer. This only works if the index of A[i] is consecutive. 9530456327cSAdam Nemet // If the address of i is unknown (for example A[B[i]]) then we may 9540456327cSAdam Nemet // read a few words, modify, and write a few words, and some of the 9550456327cSAdam Nemet // words may be written to the same address. 9560456327cSAdam Nemet bool IsReadOnlyPtr = false; 9570456327cSAdam Nemet if (Seen.insert(Ptr).second || 9580456327cSAdam Nemet !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) { 9590456327cSAdam Nemet ++NumReads; 9600456327cSAdam Nemet IsReadOnlyPtr = true; 9610456327cSAdam Nemet } 9620456327cSAdam Nemet 9630456327cSAdam Nemet AliasAnalysis::Location Loc = AA->getLocation(LD); 9640456327cSAdam Nemet // The TBAA metadata could have a control dependency on the predication 9650456327cSAdam Nemet // condition, so we cannot rely on it when determining whether or not we 9660456327cSAdam Nemet // need runtime pointer checks. 9670456327cSAdam Nemet if (blockNeedsPredication(LD->getParent())) 9680456327cSAdam Nemet Loc.AATags.TBAA = nullptr; 9690456327cSAdam Nemet 9700456327cSAdam Nemet Accesses.addLoad(Loc, IsReadOnlyPtr); 9710456327cSAdam Nemet } 9720456327cSAdam Nemet 9730456327cSAdam Nemet // If we write (or read-write) to a single destination and there are no 9740456327cSAdam Nemet // other reads in this loop then is it safe to vectorize. 9750456327cSAdam Nemet if (NumReadWrites == 1 && NumReads == 0) { 9760456327cSAdam Nemet DEBUG(dbgs() << "LV: Found a write-only loop!\n"); 9770456327cSAdam Nemet return true; 9780456327cSAdam Nemet } 9790456327cSAdam Nemet 9800456327cSAdam Nemet // Build dependence sets and check whether we need a runtime pointer bounds 9810456327cSAdam Nemet // check. 9820456327cSAdam Nemet Accesses.buildDependenceSets(); 9830456327cSAdam Nemet bool NeedRTCheck = Accesses.isRTCheckNeeded(); 9840456327cSAdam Nemet 9850456327cSAdam Nemet // Find pointers with computable bounds. We are going to use this information 9860456327cSAdam Nemet // to place a runtime bound check. 9870456327cSAdam Nemet unsigned NumComparisons = 0; 9880456327cSAdam Nemet bool CanDoRT = false; 9890456327cSAdam Nemet if (NeedRTCheck) 9900456327cSAdam Nemet CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop, 9910456327cSAdam Nemet Strides); 9920456327cSAdam Nemet 9930456327cSAdam Nemet DEBUG(dbgs() << "LV: We need to do " << NumComparisons << 9940456327cSAdam Nemet " pointer comparisons.\n"); 9950456327cSAdam Nemet 9960456327cSAdam Nemet // If we only have one set of dependences to check pointers among we don't 9970456327cSAdam Nemet // need a runtime check. 9980456327cSAdam Nemet if (NumComparisons == 0 && NeedRTCheck) 9990456327cSAdam Nemet NeedRTCheck = false; 10000456327cSAdam Nemet 10010456327cSAdam Nemet // Check that we did not collect too many pointers or found an unsizeable 10020456327cSAdam Nemet // pointer. 10030456327cSAdam Nemet if (!CanDoRT || NumComparisons > VectParams.RuntimeMemoryCheckThreshold) { 10040456327cSAdam Nemet PtrRtCheck.reset(); 10050456327cSAdam Nemet CanDoRT = false; 10060456327cSAdam Nemet } 10070456327cSAdam Nemet 10080456327cSAdam Nemet if (CanDoRT) { 10090456327cSAdam Nemet DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n"); 10100456327cSAdam Nemet } 10110456327cSAdam Nemet 10120456327cSAdam Nemet if (NeedRTCheck && !CanDoRT) { 10130456327cSAdam Nemet emitAnalysis(VectorizationReport() << "cannot identify array bounds"); 10140456327cSAdam Nemet DEBUG(dbgs() << "LV: We can't vectorize because we can't find " << 10150456327cSAdam Nemet "the array bounds.\n"); 10160456327cSAdam Nemet PtrRtCheck.reset(); 10170456327cSAdam Nemet return false; 10180456327cSAdam Nemet } 10190456327cSAdam Nemet 10200456327cSAdam Nemet PtrRtCheck.Need = NeedRTCheck; 10210456327cSAdam Nemet 10220456327cSAdam Nemet bool CanVecMem = true; 10230456327cSAdam Nemet if (Accesses.isDependencyCheckNeeded()) { 10240456327cSAdam Nemet DEBUG(dbgs() << "LV: Checking memory dependencies\n"); 10250456327cSAdam Nemet CanVecMem = DepChecker.areDepsSafe( 10260456327cSAdam Nemet DependentAccesses, Accesses.getDependenciesToCheck(), Strides); 10270456327cSAdam Nemet MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes(); 10280456327cSAdam Nemet 10290456327cSAdam Nemet if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) { 10300456327cSAdam Nemet DEBUG(dbgs() << "LV: Retrying with memory checks\n"); 10310456327cSAdam Nemet NeedRTCheck = true; 10320456327cSAdam Nemet 10330456327cSAdam Nemet // Clear the dependency checks. We assume they are not needed. 10340456327cSAdam Nemet Accesses.resetDepChecks(); 10350456327cSAdam Nemet 10360456327cSAdam Nemet PtrRtCheck.reset(); 10370456327cSAdam Nemet PtrRtCheck.Need = true; 10380456327cSAdam Nemet 10390456327cSAdam Nemet CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, 10400456327cSAdam Nemet TheLoop, Strides, true); 10410456327cSAdam Nemet // Check that we did not collect too many pointers or found an unsizeable 10420456327cSAdam Nemet // pointer. 10430456327cSAdam Nemet if (!CanDoRT || NumComparisons > VectParams.RuntimeMemoryCheckThreshold) { 10440456327cSAdam Nemet if (!CanDoRT && NumComparisons > 0) 10450456327cSAdam Nemet emitAnalysis(VectorizationReport() 10460456327cSAdam Nemet << "cannot check memory dependencies at runtime"); 10470456327cSAdam Nemet else 10480456327cSAdam Nemet emitAnalysis(VectorizationReport() 10490456327cSAdam Nemet << NumComparisons << " exceeds limit of " 10500456327cSAdam Nemet << VectParams.RuntimeMemoryCheckThreshold 10510456327cSAdam Nemet << " dependent memory operations checked at runtime"); 10520456327cSAdam Nemet DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n"); 10530456327cSAdam Nemet PtrRtCheck.reset(); 10540456327cSAdam Nemet return false; 10550456327cSAdam Nemet } 10560456327cSAdam Nemet 10570456327cSAdam Nemet CanVecMem = true; 10580456327cSAdam Nemet } 10590456327cSAdam Nemet } 10600456327cSAdam Nemet 10610456327cSAdam Nemet if (!CanVecMem) 10620456327cSAdam Nemet emitAnalysis(VectorizationReport() << 10630456327cSAdam Nemet "unsafe dependent memory operations in loop"); 10640456327cSAdam Nemet 10650456327cSAdam Nemet DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") << 10660456327cSAdam Nemet " need a runtime memory check.\n"); 10670456327cSAdam Nemet 10680456327cSAdam Nemet return CanVecMem; 10690456327cSAdam Nemet } 10700456327cSAdam Nemet 1071*30f16e16SAdam Nemet bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB) { 10720456327cSAdam Nemet assert(TheLoop->contains(BB) && "Unknown block used"); 10730456327cSAdam Nemet 10740456327cSAdam Nemet // Blocks that do not dominate the latch need predication. 10750456327cSAdam Nemet BasicBlock* Latch = TheLoop->getLoopLatch(); 10760456327cSAdam Nemet return !DT->dominates(BB, Latch); 10770456327cSAdam Nemet } 10780456327cSAdam Nemet 1079*30f16e16SAdam Nemet void LoopAccessInfo::emitAnalysis(VectorizationReport &Message) { 10800456327cSAdam Nemet VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop); 10810456327cSAdam Nemet } 10820456327cSAdam Nemet 1083*30f16e16SAdam Nemet bool LoopAccessInfo::isUniform(Value *V) { 10840456327cSAdam Nemet return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); 10850456327cSAdam Nemet } 10867206d7a5SAdam Nemet 10877206d7a5SAdam Nemet // FIXME: this function is currently a duplicate of the one in 10887206d7a5SAdam Nemet // LoopVectorize.cpp. 10897206d7a5SAdam Nemet static Instruction *getFirstInst(Instruction *FirstInst, Value *V, 10907206d7a5SAdam Nemet Instruction *Loc) { 10917206d7a5SAdam Nemet if (FirstInst) 10927206d7a5SAdam Nemet return FirstInst; 10937206d7a5SAdam Nemet if (Instruction *I = dyn_cast<Instruction>(V)) 10947206d7a5SAdam Nemet return I->getParent() == Loc->getParent() ? I : nullptr; 10957206d7a5SAdam Nemet return nullptr; 10967206d7a5SAdam Nemet } 10977206d7a5SAdam Nemet 10987206d7a5SAdam Nemet std::pair<Instruction *, Instruction *> 1099*30f16e16SAdam Nemet LoopAccessInfo::addRuntimeCheck(Instruction *Loc) { 11007206d7a5SAdam Nemet Instruction *tnullptr = nullptr; 11017206d7a5SAdam Nemet if (!PtrRtCheck.Need) 11027206d7a5SAdam Nemet return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr); 11037206d7a5SAdam Nemet 11047206d7a5SAdam Nemet unsigned NumPointers = PtrRtCheck.Pointers.size(); 11057206d7a5SAdam Nemet SmallVector<TrackingVH<Value> , 2> Starts; 11067206d7a5SAdam Nemet SmallVector<TrackingVH<Value> , 2> Ends; 11077206d7a5SAdam Nemet 11087206d7a5SAdam Nemet LLVMContext &Ctx = Loc->getContext(); 11097206d7a5SAdam Nemet SCEVExpander Exp(*SE, "induction"); 11107206d7a5SAdam Nemet Instruction *FirstInst = nullptr; 11117206d7a5SAdam Nemet 11127206d7a5SAdam Nemet for (unsigned i = 0; i < NumPointers; ++i) { 11137206d7a5SAdam Nemet Value *Ptr = PtrRtCheck.Pointers[i]; 11147206d7a5SAdam Nemet const SCEV *Sc = SE->getSCEV(Ptr); 11157206d7a5SAdam Nemet 11167206d7a5SAdam Nemet if (SE->isLoopInvariant(Sc, TheLoop)) { 11177206d7a5SAdam Nemet DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" << 11187206d7a5SAdam Nemet *Ptr <<"\n"); 11197206d7a5SAdam Nemet Starts.push_back(Ptr); 11207206d7a5SAdam Nemet Ends.push_back(Ptr); 11217206d7a5SAdam Nemet } else { 11227206d7a5SAdam Nemet DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n'); 11237206d7a5SAdam Nemet unsigned AS = Ptr->getType()->getPointerAddressSpace(); 11247206d7a5SAdam Nemet 11257206d7a5SAdam Nemet // Use this type for pointer arithmetic. 11267206d7a5SAdam Nemet Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); 11277206d7a5SAdam Nemet 11287206d7a5SAdam Nemet Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc); 11297206d7a5SAdam Nemet Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc); 11307206d7a5SAdam Nemet Starts.push_back(Start); 11317206d7a5SAdam Nemet Ends.push_back(End); 11327206d7a5SAdam Nemet } 11337206d7a5SAdam Nemet } 11347206d7a5SAdam Nemet 11357206d7a5SAdam Nemet IRBuilder<> ChkBuilder(Loc); 11367206d7a5SAdam Nemet // Our instructions might fold to a constant. 11377206d7a5SAdam Nemet Value *MemoryRuntimeCheck = nullptr; 11387206d7a5SAdam Nemet for (unsigned i = 0; i < NumPointers; ++i) { 11397206d7a5SAdam Nemet for (unsigned j = i+1; j < NumPointers; ++j) { 11407206d7a5SAdam Nemet // No need to check if two readonly pointers intersect. 11417206d7a5SAdam Nemet if (!PtrRtCheck.IsWritePtr[i] && !PtrRtCheck.IsWritePtr[j]) 11427206d7a5SAdam Nemet continue; 11437206d7a5SAdam Nemet 11447206d7a5SAdam Nemet // Only need to check pointers between two different dependency sets. 11457206d7a5SAdam Nemet if (PtrRtCheck.DependencySetId[i] == PtrRtCheck.DependencySetId[j]) 11467206d7a5SAdam Nemet continue; 11477206d7a5SAdam Nemet // Only need to check pointers in the same alias set. 11487206d7a5SAdam Nemet if (PtrRtCheck.AliasSetId[i] != PtrRtCheck.AliasSetId[j]) 11497206d7a5SAdam Nemet continue; 11507206d7a5SAdam Nemet 11517206d7a5SAdam Nemet unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); 11527206d7a5SAdam Nemet unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); 11537206d7a5SAdam Nemet 11547206d7a5SAdam Nemet assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && 11557206d7a5SAdam Nemet (AS1 == Ends[i]->getType()->getPointerAddressSpace()) && 11567206d7a5SAdam Nemet "Trying to bounds check pointers with different address spaces"); 11577206d7a5SAdam Nemet 11587206d7a5SAdam Nemet Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); 11597206d7a5SAdam Nemet Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); 11607206d7a5SAdam Nemet 11617206d7a5SAdam Nemet Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc"); 11627206d7a5SAdam Nemet Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc"); 11637206d7a5SAdam Nemet Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc"); 11647206d7a5SAdam Nemet Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc"); 11657206d7a5SAdam Nemet 11667206d7a5SAdam Nemet Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); 11677206d7a5SAdam Nemet FirstInst = getFirstInst(FirstInst, Cmp0, Loc); 11687206d7a5SAdam Nemet Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); 11697206d7a5SAdam Nemet FirstInst = getFirstInst(FirstInst, Cmp1, Loc); 11707206d7a5SAdam Nemet Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); 11717206d7a5SAdam Nemet FirstInst = getFirstInst(FirstInst, IsConflict, Loc); 11727206d7a5SAdam Nemet if (MemoryRuntimeCheck) { 11737206d7a5SAdam Nemet IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, 11747206d7a5SAdam Nemet "conflict.rdx"); 11757206d7a5SAdam Nemet FirstInst = getFirstInst(FirstInst, IsConflict, Loc); 11767206d7a5SAdam Nemet } 11777206d7a5SAdam Nemet MemoryRuntimeCheck = IsConflict; 11787206d7a5SAdam Nemet } 11797206d7a5SAdam Nemet } 11807206d7a5SAdam Nemet 11817206d7a5SAdam Nemet // We have to do this trickery because the IRBuilder might fold the check to a 11827206d7a5SAdam Nemet // constant expression in which case there is no Instruction anchored in a 11837206d7a5SAdam Nemet // the block. 11847206d7a5SAdam Nemet Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck, 11857206d7a5SAdam Nemet ConstantInt::getTrue(Ctx)); 11867206d7a5SAdam Nemet ChkBuilder.Insert(Check, "memcheck.conflict"); 11877206d7a5SAdam Nemet FirstInst = getFirstInst(FirstInst, Check, Loc); 11887206d7a5SAdam Nemet return std::make_pair(FirstInst, Check); 11897206d7a5SAdam Nemet } 1190