10456327cSAdam Nemet //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==// 20456327cSAdam Nemet // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60456327cSAdam Nemet // 70456327cSAdam Nemet //===----------------------------------------------------------------------===// 80456327cSAdam Nemet // 90456327cSAdam Nemet // The implementation for the loop memory dependence that was originally 100456327cSAdam Nemet // developed for the loop vectorizer. 110456327cSAdam Nemet // 120456327cSAdam Nemet //===----------------------------------------------------------------------===// 130456327cSAdam Nemet 143bab7e1aSChandler Carruth #include "llvm/Analysis/LoopAccessAnalysis.h" 15a3fe70d2SEugene Zelenko #include "llvm/ADT/APInt.h" 16a3fe70d2SEugene Zelenko #include "llvm/ADT/DenseMap.h" 17a3fe70d2SEugene Zelenko #include "llvm/ADT/DepthFirstIterator.h" 18a3fe70d2SEugene Zelenko #include "llvm/ADT/EquivalenceClasses.h" 19a3fe70d2SEugene Zelenko #include "llvm/ADT/PointerIntPair.h" 203bab7e1aSChandler Carruth #include "llvm/ADT/STLExtras.h" 21a3fe70d2SEugene Zelenko #include "llvm/ADT/SetVector.h" 22a3fe70d2SEugene Zelenko #include "llvm/ADT/SmallPtrSet.h" 23a3fe70d2SEugene Zelenko #include "llvm/ADT/SmallSet.h" 24a3fe70d2SEugene Zelenko #include "llvm/ADT/SmallVector.h" 253bab7e1aSChandler Carruth #include "llvm/ADT/iterator_range.h" 26a3fe70d2SEugene Zelenko #include "llvm/Analysis/AliasAnalysis.h" 27a3fe70d2SEugene Zelenko #include "llvm/Analysis/AliasSetTracker.h" 283bab7e1aSChandler Carruth #include "llvm/Analysis/LoopAnalysisManager.h" 290456327cSAdam Nemet #include "llvm/Analysis/LoopInfo.h" 30a3fe70d2SEugene Zelenko #include "llvm/Analysis/MemoryLocation.h" 310965da20SAdam Nemet #include "llvm/Analysis/OptimizationRemarkEmitter.h" 32a3fe70d2SEugene Zelenko #include "llvm/Analysis/ScalarEvolution.h" 33a3fe70d2SEugene Zelenko #include "llvm/Analysis/ScalarEvolutionExpressions.h" 34799003bfSBenjamin Kramer #include "llvm/Analysis/TargetLibraryInfo.h" 350456327cSAdam Nemet #include "llvm/Analysis/ValueTracking.h" 36f45594c9SAdam Nemet #include "llvm/Analysis/VectorUtils.h" 37a3fe70d2SEugene Zelenko #include "llvm/IR/BasicBlock.h" 38a3fe70d2SEugene Zelenko #include "llvm/IR/Constants.h" 39a3fe70d2SEugene Zelenko #include "llvm/IR/DataLayout.h" 40a3fe70d2SEugene Zelenko #include "llvm/IR/DebugLoc.h" 41a3fe70d2SEugene Zelenko #include "llvm/IR/DerivedTypes.h" 42a3fe70d2SEugene Zelenko #include "llvm/IR/DiagnosticInfo.h" 430456327cSAdam Nemet #include "llvm/IR/Dominators.h" 44a3fe70d2SEugene Zelenko #include "llvm/IR/Function.h" 45a3fe70d2SEugene Zelenko #include "llvm/IR/InstrTypes.h" 46a3fe70d2SEugene Zelenko #include "llvm/IR/Instruction.h" 47a3fe70d2SEugene Zelenko #include "llvm/IR/Instructions.h" 48a3fe70d2SEugene Zelenko #include "llvm/IR/Operator.h" 498a021317SXinliang David Li #include "llvm/IR/PassManager.h" 50a3fe70d2SEugene Zelenko #include "llvm/IR/Type.h" 51a3fe70d2SEugene Zelenko #include "llvm/IR/Value.h" 52a3fe70d2SEugene Zelenko #include "llvm/IR/ValueHandle.h" 5305da2fe5SReid Kleckner #include "llvm/InitializePasses.h" 54a3fe70d2SEugene Zelenko #include "llvm/Pass.h" 55a3fe70d2SEugene Zelenko #include "llvm/Support/Casting.h" 56a3fe70d2SEugene Zelenko #include "llvm/Support/CommandLine.h" 570456327cSAdam Nemet #include "llvm/Support/Debug.h" 58a3fe70d2SEugene Zelenko #include "llvm/Support/ErrorHandling.h" 59799003bfSBenjamin Kramer #include "llvm/Support/raw_ostream.h" 60a3fe70d2SEugene Zelenko #include <algorithm> 61a3fe70d2SEugene Zelenko #include <cassert> 62a3fe70d2SEugene Zelenko #include <cstdint> 63a3fe70d2SEugene Zelenko #include <iterator> 64a3fe70d2SEugene Zelenko #include <utility> 65a3fe70d2SEugene Zelenko #include <vector> 66a3fe70d2SEugene Zelenko 670456327cSAdam Nemet using namespace llvm; 680456327cSAdam Nemet 69339f42b3SAdam Nemet #define DEBUG_TYPE "loop-accesses" 700456327cSAdam Nemet 71f219c647SAdam Nemet static cl::opt<unsigned, true> 72f219c647SAdam Nemet VectorizationFactor("force-vector-width", cl::Hidden, 73f219c647SAdam Nemet cl::desc("Sets the SIMD width. Zero is autoselect."), 74f219c647SAdam Nemet cl::location(VectorizerParams::VectorizationFactor)); 751d862af7SAdam Nemet unsigned VectorizerParams::VectorizationFactor; 76f219c647SAdam Nemet 77f219c647SAdam Nemet static cl::opt<unsigned, true> 78f219c647SAdam Nemet VectorizationInterleave("force-vector-interleave", cl::Hidden, 79f219c647SAdam Nemet cl::desc("Sets the vectorization interleave count. " 80f219c647SAdam Nemet "Zero is autoselect."), 81f219c647SAdam Nemet cl::location( 82f219c647SAdam Nemet VectorizerParams::VectorizationInterleave)); 831d862af7SAdam Nemet unsigned VectorizerParams::VectorizationInterleave; 84f219c647SAdam Nemet 851d862af7SAdam Nemet static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold( 861d862af7SAdam Nemet "runtime-memory-check-threshold", cl::Hidden, 871d862af7SAdam Nemet cl::desc("When performing memory disambiguation checks at runtime do not " 881d862af7SAdam Nemet "generate more than this number of comparisons (default = 8)."), 891d862af7SAdam Nemet cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8)); 901d862af7SAdam Nemet unsigned VectorizerParams::RuntimeMemoryCheckThreshold; 91f219c647SAdam Nemet 925f8f34e4SAdrian Prantl /// The maximum iterations used to merge memory checks 931b6b50a9SSilviu Baranga static cl::opt<unsigned> MemoryCheckMergeThreshold( 941b6b50a9SSilviu Baranga "memory-check-merge-threshold", cl::Hidden, 951b6b50a9SSilviu Baranga cl::desc("Maximum number of comparisons done when trying to merge " 961b6b50a9SSilviu Baranga "runtime memory checks. (default = 100)"), 971b6b50a9SSilviu Baranga cl::init(100)); 981b6b50a9SSilviu Baranga 99f219c647SAdam Nemet /// Maximum SIMD width. 100f219c647SAdam Nemet const unsigned VectorizerParams::MaxVectorWidth = 64; 101f219c647SAdam Nemet 1025f8f34e4SAdrian Prantl /// We collect dependences up to this threshold. 103a2df750fSAdam Nemet static cl::opt<unsigned> 104a2df750fSAdam Nemet MaxDependences("max-dependences", cl::Hidden, 105a2df750fSAdam Nemet cl::desc("Maximum number of dependences collected by " 1069c926579SAdam Nemet "loop-access analysis (default = 100)"), 1079c926579SAdam Nemet cl::init(100)); 1089c926579SAdam Nemet 109a9f09c62SAdam Nemet /// This enables versioning on the strides of symbolically striding memory 110a9f09c62SAdam Nemet /// accesses in code like the following. 111a9f09c62SAdam Nemet /// for (i = 0; i < N; ++i) 112a9f09c62SAdam Nemet /// A[i * Stride1] += B[i * Stride2] ... 113a9f09c62SAdam Nemet /// 114a9f09c62SAdam Nemet /// Will be roughly translated to 115a9f09c62SAdam Nemet /// if (Stride1 == 1 && Stride2 == 1) { 116a9f09c62SAdam Nemet /// for (i = 0; i < N; i+=4) 117a9f09c62SAdam Nemet /// A[i:i+3] += ... 118a9f09c62SAdam Nemet /// } else 119a9f09c62SAdam Nemet /// ... 120a9f09c62SAdam Nemet static cl::opt<bool> EnableMemAccessVersioning( 121a9f09c62SAdam Nemet "enable-mem-access-versioning", cl::init(true), cl::Hidden, 122a9f09c62SAdam Nemet cl::desc("Enable symbolic stride memory access versioning")); 123a9f09c62SAdam Nemet 1245f8f34e4SAdrian Prantl /// Enable store-to-load forwarding conflict detection. This option can 12537ec5f91SMatthew Simpson /// be disabled for correctness testing. 12637ec5f91SMatthew Simpson static cl::opt<bool> EnableForwardingConflictDetection( 12737ec5f91SMatthew Simpson "store-to-load-forwarding-conflict-detection", cl::Hidden, 128a250dc9fSMatthew Simpson cl::desc("Enable conflict detection in loop-access analysis"), 129a250dc9fSMatthew Simpson cl::init(true)); 130a250dc9fSMatthew Simpson 131f219c647SAdam Nemet bool VectorizerParams::isInterleaveForced() { 132f219c647SAdam Nemet return ::VectorizationInterleave.getNumOccurrences() > 0; 133f219c647SAdam Nemet } 134f219c647SAdam Nemet 1350456327cSAdam Nemet Value *llvm::stripIntegerCast(Value *V) { 1368b401013SDavid Majnemer if (auto *CI = dyn_cast<CastInst>(V)) 1370456327cSAdam Nemet if (CI->getOperand(0)->getType()->isIntegerTy()) 1380456327cSAdam Nemet return CI->getOperand(0); 1390456327cSAdam Nemet return V; 1400456327cSAdam Nemet } 1410456327cSAdam Nemet 1429cd9a7e3SSilviu Baranga const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, 1438bc61df9SAdam Nemet const ValueToValueMap &PtrToStride, 144f4726e72SFlorian Hahn Value *Ptr) { 1459cd9a7e3SSilviu Baranga const SCEV *OrigSCEV = PSE.getSCEV(Ptr); 1460456327cSAdam Nemet 1470456327cSAdam Nemet // If there is an entry in the map return the SCEV of the pointer with the 1480456327cSAdam Nemet // symbolic stride replaced by one. 149f4726e72SFlorian Hahn ValueToValueMap::const_iterator SI = PtrToStride.find(Ptr); 150b3a8a153SPhilip Reames if (SI == PtrToStride.end()) 151b3a8a153SPhilip Reames // For a non-symbolic stride, just return the original expression. 152b3a8a153SPhilip Reames return OrigSCEV; 1530456327cSAdam Nemet 154b3a8a153SPhilip Reames Value *StrideVal = stripIntegerCast(SI->second); 1550456327cSAdam Nemet 1569cd9a7e3SSilviu Baranga ScalarEvolution *SE = PSE.getSE(); 157e3c0534bSSilviu Baranga const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal)); 158e3c0534bSSilviu Baranga const auto *CT = 159e3c0534bSSilviu Baranga static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType())); 160e3c0534bSSilviu Baranga 1619cd9a7e3SSilviu Baranga PSE.addPredicate(*SE->getEqualPredicate(U, CT)); 1629cd9a7e3SSilviu Baranga auto *Expr = PSE.getSCEV(Ptr); 163e3c0534bSSilviu Baranga 164d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV 165d34e60caSNicola Zaghen << " by: " << *Expr << "\n"); 1669cd9a7e3SSilviu Baranga return Expr; 1670456327cSAdam Nemet } 1680456327cSAdam Nemet 169616657b3SFlorian Hahn RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( 170616657b3SFlorian Hahn unsigned Index, RuntimePointerChecking &RtCheck) 1716d753b07SFlorian Hahn : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start), 1726d753b07SFlorian Hahn AddressSpace(RtCheck.Pointers[Index] 1736d753b07SFlorian Hahn .PointerValue->getType() 1746d753b07SFlorian Hahn ->getPointerAddressSpace()) { 175616657b3SFlorian Hahn Members.push_back(Index); 176616657b3SFlorian Hahn } 177616657b3SFlorian Hahn 1783622fbfcSElena Demikhovsky /// Calculate Start and End points of memory access. 1793622fbfcSElena Demikhovsky /// Let's assume A is the first access and B is a memory access on N-th loop 1803622fbfcSElena Demikhovsky /// iteration. Then B is calculated as: 1813622fbfcSElena Demikhovsky /// B = A + Step*N . 1823622fbfcSElena Demikhovsky /// Step value may be positive or negative. 1833622fbfcSElena Demikhovsky /// N is a calculated back-edge taken count: 1843622fbfcSElena Demikhovsky /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 1853622fbfcSElena Demikhovsky /// Start and End points are calculated in the following way: 1863622fbfcSElena Demikhovsky /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt, 1873622fbfcSElena Demikhovsky /// where SizeOfElt is the size of single memory access in bytes. 1883622fbfcSElena Demikhovsky /// 1893622fbfcSElena Demikhovsky /// There is no conflict when the intervals are disjoint: 1903622fbfcSElena Demikhovsky /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) 191ff31020eSArthur Eubanks void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, Type *AccessTy, 192ff31020eSArthur Eubanks bool WritePtr, unsigned DepSetId, 193ff31020eSArthur Eubanks unsigned ASId, 194e3c0534bSSilviu Baranga const ValueToValueMap &Strides, 1959cd9a7e3SSilviu Baranga PredicatedScalarEvolution &PSE) { 1960456327cSAdam Nemet // Get the stride replaced scev. 1979cd9a7e3SSilviu Baranga const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); 198279784ffSAdam Nemet ScalarEvolution *SE = PSE.getSE(); 199279784ffSAdam Nemet 200279784ffSAdam Nemet const SCEV *ScStart; 201279784ffSAdam Nemet const SCEV *ScEnd; 202279784ffSAdam Nemet 203e908e063SMindong Chen if (SE->isLoopInvariant(Sc, Lp)) { 204279784ffSAdam Nemet ScStart = ScEnd = Sc; 205e908e063SMindong Chen } else { 2060456327cSAdam Nemet const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); 2070456327cSAdam Nemet assert(AR && "Invalid addrec expression"); 2086f444dfdSSilviu Baranga const SCEV *Ex = PSE.getBackedgeTakenCount(); 2090e5804a6SSilviu Baranga 210279784ffSAdam Nemet ScStart = AR->getStart(); 211279784ffSAdam Nemet ScEnd = AR->evaluateAtIteration(Ex, *SE); 2120e5804a6SSilviu Baranga const SCEV *Step = AR->getStepRecurrence(*SE); 2130e5804a6SSilviu Baranga 2140e5804a6SSilviu Baranga // For expressions with negative step, the upper bound is ScStart and the 2150e5804a6SSilviu Baranga // lower bound is ScEnd. 2168b401013SDavid Majnemer if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) { 2170e5804a6SSilviu Baranga if (CStep->getValue()->isNegative()) 2180e5804a6SSilviu Baranga std::swap(ScStart, ScEnd); 2190e5804a6SSilviu Baranga } else { 2203622fbfcSElena Demikhovsky // Fallback case: the step is not constant, but we can still 2210e5804a6SSilviu Baranga // get the upper and lower bounds of the interval by using min/max 2220e5804a6SSilviu Baranga // expressions. 2230e5804a6SSilviu Baranga ScStart = SE->getUMinExpr(ScStart, ScEnd); 2240e5804a6SSilviu Baranga ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); 2250e5804a6SSilviu Baranga } 226e908e063SMindong Chen } 2273622fbfcSElena Demikhovsky // Add the size of the pointed element to ScEnd. 228a73166a4SFlorian Hahn auto &DL = Lp->getHeader()->getModule()->getDataLayout(); 22906654a53SJoe Ellis Type *IdxTy = DL.getIndexType(Ptr->getType()); 230ff31020eSArthur Eubanks const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy); 2313622fbfcSElena Demikhovsky ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); 2320e5804a6SSilviu Baranga 2330e5804a6SSilviu Baranga Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); 2341b6b50a9SSilviu Baranga } 2351b6b50a9SSilviu Baranga 236616657b3SFlorian Hahn SmallVector<RuntimePointerCheck, 4> 23738530887SAdam Nemet RuntimePointerChecking::generateChecks() const { 238616657b3SFlorian Hahn SmallVector<RuntimePointerCheck, 4> Checks; 239bbe1f1deSAdam Nemet 2407c52e052SAdam Nemet for (unsigned I = 0; I < CheckingGroups.size(); ++I) { 2417c52e052SAdam Nemet for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) { 242616657b3SFlorian Hahn const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I]; 243616657b3SFlorian Hahn const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J]; 244bbe1f1deSAdam Nemet 24538530887SAdam Nemet if (needsChecking(CGI, CGJ)) 246bbe1f1deSAdam Nemet Checks.push_back(std::make_pair(&CGI, &CGJ)); 247bbe1f1deSAdam Nemet } 248bbe1f1deSAdam Nemet } 249bbe1f1deSAdam Nemet return Checks; 250bbe1f1deSAdam Nemet } 251bbe1f1deSAdam Nemet 25215840393SAdam Nemet void RuntimePointerChecking::generateChecks( 25315840393SAdam Nemet MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { 25415840393SAdam Nemet assert(Checks.empty() && "Checks is not empty"); 25515840393SAdam Nemet groupChecks(DepCands, UseDependencies); 25615840393SAdam Nemet Checks = generateChecks(); 25715840393SAdam Nemet } 25815840393SAdam Nemet 259616657b3SFlorian Hahn bool RuntimePointerChecking::needsChecking( 260616657b3SFlorian Hahn const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const { 2611b6b50a9SSilviu Baranga for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I) 2621b6b50a9SSilviu Baranga for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) 263651a5a24SAdam Nemet if (needsChecking(M.Members[I], N.Members[J])) 2641b6b50a9SSilviu Baranga return true; 2651b6b50a9SSilviu Baranga return false; 2661b6b50a9SSilviu Baranga } 2671b6b50a9SSilviu Baranga 2681b6b50a9SSilviu Baranga /// Compare \p I and \p J and return the minimum. 2691b6b50a9SSilviu Baranga /// Return nullptr in case we couldn't find an answer. 2701b6b50a9SSilviu Baranga static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J, 2711b6b50a9SSilviu Baranga ScalarEvolution *SE) { 2721b6b50a9SSilviu Baranga const SCEV *Diff = SE->getMinusSCEV(J, I); 2731b6b50a9SSilviu Baranga const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff); 2741b6b50a9SSilviu Baranga 2751b6b50a9SSilviu Baranga if (!C) 2761b6b50a9SSilviu Baranga return nullptr; 2771b6b50a9SSilviu Baranga if (C->getValue()->isNegative()) 2781b6b50a9SSilviu Baranga return J; 2791b6b50a9SSilviu Baranga return I; 2801b6b50a9SSilviu Baranga } 2811b6b50a9SSilviu Baranga 2826d753b07SFlorian Hahn bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, 2836d753b07SFlorian Hahn RuntimePointerChecking &RtCheck) { 2846d753b07SFlorian Hahn return addPointer( 2856d753b07SFlorian Hahn Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End, 2866d753b07SFlorian Hahn RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(), 2876d753b07SFlorian Hahn *RtCheck.SE); 2886d753b07SFlorian Hahn } 2896d753b07SFlorian Hahn 2906d753b07SFlorian Hahn bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start, 2916d753b07SFlorian Hahn const SCEV *End, unsigned AS, 2926d753b07SFlorian Hahn ScalarEvolution &SE) { 2936d753b07SFlorian Hahn assert(AddressSpace == AS && 2946d753b07SFlorian Hahn "all pointers in a checking group must be in the same address space"); 2959f7dedc3SAdam Nemet 2961b6b50a9SSilviu Baranga // Compare the starts and ends with the known minimum and maximum 2971b6b50a9SSilviu Baranga // of this set. We need to know how we compare against the min/max 2981b6b50a9SSilviu Baranga // of the set in order to be able to emit memchecks. 2996d753b07SFlorian Hahn const SCEV *Min0 = getMinFromExprs(Start, Low, &SE); 3001b6b50a9SSilviu Baranga if (!Min0) 3011b6b50a9SSilviu Baranga return false; 3021b6b50a9SSilviu Baranga 3036d753b07SFlorian Hahn const SCEV *Min1 = getMinFromExprs(End, High, &SE); 3041b6b50a9SSilviu Baranga if (!Min1) 3051b6b50a9SSilviu Baranga return false; 3061b6b50a9SSilviu Baranga 3071b6b50a9SSilviu Baranga // Update the low bound expression if we've found a new min value. 3089f7dedc3SAdam Nemet if (Min0 == Start) 3099f7dedc3SAdam Nemet Low = Start; 3101b6b50a9SSilviu Baranga 3111b6b50a9SSilviu Baranga // Update the high bound expression if we've found a new max value. 3129f7dedc3SAdam Nemet if (Min1 != End) 3139f7dedc3SAdam Nemet High = End; 3141b6b50a9SSilviu Baranga 3151b6b50a9SSilviu Baranga Members.push_back(Index); 3161b6b50a9SSilviu Baranga return true; 3171b6b50a9SSilviu Baranga } 3181b6b50a9SSilviu Baranga 3197cdebac0SAdam Nemet void RuntimePointerChecking::groupChecks( 3207cdebac0SAdam Nemet MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { 3211b6b50a9SSilviu Baranga // We build the groups from dependency candidates equivalence classes 3221b6b50a9SSilviu Baranga // because: 3231b6b50a9SSilviu Baranga // - We know that pointers in the same equivalence class share 3241b6b50a9SSilviu Baranga // the same underlying object and therefore there is a chance 3251b6b50a9SSilviu Baranga // that we can compare pointers 3261b6b50a9SSilviu Baranga // - We wouldn't be able to merge two pointers for which we need 3271b6b50a9SSilviu Baranga // to emit a memcheck. The classes in DepCands are already 3281b6b50a9SSilviu Baranga // conveniently built such that no two pointers in the same 3291b6b50a9SSilviu Baranga // class need checking against each other. 3301b6b50a9SSilviu Baranga 3311b6b50a9SSilviu Baranga // We use the following (greedy) algorithm to construct the groups 3321b6b50a9SSilviu Baranga // For every pointer in the equivalence class: 3331b6b50a9SSilviu Baranga // For each existing group: 3341b6b50a9SSilviu Baranga // - if the difference between this pointer and the min/max bounds 3351b6b50a9SSilviu Baranga // of the group is a constant, then make the pointer part of the 3361b6b50a9SSilviu Baranga // group and update the min/max bounds of that group as required. 3371b6b50a9SSilviu Baranga 3381b6b50a9SSilviu Baranga CheckingGroups.clear(); 3391b6b50a9SSilviu Baranga 34048250600SSilviu Baranga // If we need to check two pointers to the same underlying object 34148250600SSilviu Baranga // with a non-constant difference, we shouldn't perform any pointer 34248250600SSilviu Baranga // grouping with those pointers. This is because we can easily get 34348250600SSilviu Baranga // into cases where the resulting check would return false, even when 34448250600SSilviu Baranga // the accesses are safe. 34548250600SSilviu Baranga // 34648250600SSilviu Baranga // The following example shows this: 34748250600SSilviu Baranga // for (i = 0; i < 1000; ++i) 34848250600SSilviu Baranga // a[5000 + i * m] = a[i] + a[i + 9000] 34948250600SSilviu Baranga // 35048250600SSilviu Baranga // Here grouping gives a check of (5000, 5000 + 1000 * m) against 35148250600SSilviu Baranga // (0, 10000) which is always false. However, if m is 1, there is no 35248250600SSilviu Baranga // dependence. Not grouping the checks for a[i] and a[i + 9000] allows 35348250600SSilviu Baranga // us to perform an accurate check in this case. 35448250600SSilviu Baranga // 35548250600SSilviu Baranga // The above case requires that we have an UnknownDependence between 35648250600SSilviu Baranga // accesses to the same underlying object. This cannot happen unless 357ef307b8cSFlorian Hahn // FoundNonConstantDistanceDependence is set, and therefore UseDependencies 35848250600SSilviu Baranga // is also false. In this case we will use the fallback path and create 35948250600SSilviu Baranga // separate checking groups for all pointers. 36048250600SSilviu Baranga 3611b6b50a9SSilviu Baranga // If we don't have the dependency partitions, construct a new 36248250600SSilviu Baranga // checking pointer group for each pointer. This is also required 36348250600SSilviu Baranga // for correctness, because in this case we can have checking between 36448250600SSilviu Baranga // pointers to the same underlying object. 3651b6b50a9SSilviu Baranga if (!UseDependencies) { 3661b6b50a9SSilviu Baranga for (unsigned I = 0; I < Pointers.size(); ++I) 367616657b3SFlorian Hahn CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this)); 3681b6b50a9SSilviu Baranga return; 3691b6b50a9SSilviu Baranga } 3701b6b50a9SSilviu Baranga 3711b6b50a9SSilviu Baranga unsigned TotalComparisons = 0; 3721b6b50a9SSilviu Baranga 3731b6b50a9SSilviu Baranga DenseMap<Value *, unsigned> PositionMap; 3749f7dedc3SAdam Nemet for (unsigned Index = 0; Index < Pointers.size(); ++Index) 3759f7dedc3SAdam Nemet PositionMap[Pointers[Index].PointerValue] = Index; 3761b6b50a9SSilviu Baranga 377ce3877fcSSilviu Baranga // We need to keep track of what pointers we've already seen so we 378ce3877fcSSilviu Baranga // don't process them twice. 379ce3877fcSSilviu Baranga SmallSet<unsigned, 2> Seen; 380ce3877fcSSilviu Baranga 381e4b9f507SSanjay Patel // Go through all equivalence classes, get the "pointer check groups" 382ce3877fcSSilviu Baranga // and add them to the overall solution. We use the order in which accesses 383ce3877fcSSilviu Baranga // appear in 'Pointers' to enforce determinism. 384ce3877fcSSilviu Baranga for (unsigned I = 0; I < Pointers.size(); ++I) { 385ce3877fcSSilviu Baranga // We've seen this pointer before, and therefore already processed 386ce3877fcSSilviu Baranga // its equivalence class. 387ce3877fcSSilviu Baranga if (Seen.count(I)) 3881b6b50a9SSilviu Baranga continue; 3891b6b50a9SSilviu Baranga 3909f7dedc3SAdam Nemet MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue, 3919f7dedc3SAdam Nemet Pointers[I].IsWritePtr); 3921b6b50a9SSilviu Baranga 393616657b3SFlorian Hahn SmallVector<RuntimeCheckingPtrGroup, 2> Groups; 394ce3877fcSSilviu Baranga auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access)); 395ce3877fcSSilviu Baranga 396a647c30fSSilviu Baranga // Because DepCands is constructed by visiting accesses in the order in 397a647c30fSSilviu Baranga // which they appear in alias sets (which is deterministic) and the 398a647c30fSSilviu Baranga // iteration order within an equivalence class member is only dependent on 399a647c30fSSilviu Baranga // the order in which unions and insertions are performed on the 400a647c30fSSilviu Baranga // equivalence class, the iteration order is deterministic. 401ce3877fcSSilviu Baranga for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end(); 4021b6b50a9SSilviu Baranga MI != ME; ++MI) { 4032062b370SFlorian Hahn auto PointerI = PositionMap.find(MI->getPointer()); 4042062b370SFlorian Hahn assert(PointerI != PositionMap.end() && 4052062b370SFlorian Hahn "pointer in equivalence class not found in PositionMap"); 4062062b370SFlorian Hahn unsigned Pointer = PointerI->second; 4071b6b50a9SSilviu Baranga bool Merged = false; 408ce3877fcSSilviu Baranga // Mark this pointer as seen. 409ce3877fcSSilviu Baranga Seen.insert(Pointer); 4101b6b50a9SSilviu Baranga 4111b6b50a9SSilviu Baranga // Go through all the existing sets and see if we can find one 4121b6b50a9SSilviu Baranga // which can include this pointer. 413616657b3SFlorian Hahn for (RuntimeCheckingPtrGroup &Group : Groups) { 4141b6b50a9SSilviu Baranga // Don't perform more than a certain amount of comparisons. 4151b6b50a9SSilviu Baranga // This should limit the cost of grouping the pointers to something 4161b6b50a9SSilviu Baranga // reasonable. If we do end up hitting this threshold, the algorithm 4171b6b50a9SSilviu Baranga // will create separate groups for all remaining pointers. 4181b6b50a9SSilviu Baranga if (TotalComparisons > MemoryCheckMergeThreshold) 4191b6b50a9SSilviu Baranga break; 4201b6b50a9SSilviu Baranga 4211b6b50a9SSilviu Baranga TotalComparisons++; 4221b6b50a9SSilviu Baranga 4236d753b07SFlorian Hahn if (Group.addPointer(Pointer, *this)) { 4241b6b50a9SSilviu Baranga Merged = true; 4251b6b50a9SSilviu Baranga break; 4261b6b50a9SSilviu Baranga } 4271b6b50a9SSilviu Baranga } 4281b6b50a9SSilviu Baranga 4291b6b50a9SSilviu Baranga if (!Merged) 4301b6b50a9SSilviu Baranga // We couldn't add this pointer to any existing set or the threshold 4311b6b50a9SSilviu Baranga // for the number of comparisons has been reached. Create a new group 4321b6b50a9SSilviu Baranga // to hold the current pointer. 433616657b3SFlorian Hahn Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this)); 4341b6b50a9SSilviu Baranga } 4351b6b50a9SSilviu Baranga 4361b6b50a9SSilviu Baranga // We've computed the grouped checks for this partition. 4371b6b50a9SSilviu Baranga // Save the results and continue with the next one. 43875709329SFangrui Song llvm::copy(Groups, std::back_inserter(CheckingGroups)); 4391b6b50a9SSilviu Baranga } 4400456327cSAdam Nemet } 4410456327cSAdam Nemet 442041e6debSAdam Nemet bool RuntimePointerChecking::arePointersInSamePartition( 443041e6debSAdam Nemet const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1, 444041e6debSAdam Nemet unsigned PtrIdx2) { 445041e6debSAdam Nemet return (PtrToPartition[PtrIdx1] != -1 && 446041e6debSAdam Nemet PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]); 447041e6debSAdam Nemet } 448041e6debSAdam Nemet 449651a5a24SAdam Nemet bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const { 4509f7dedc3SAdam Nemet const PointerInfo &PointerI = Pointers[I]; 4519f7dedc3SAdam Nemet const PointerInfo &PointerJ = Pointers[J]; 4529f7dedc3SAdam Nemet 453a8945b77SAdam Nemet // No need to check if two readonly pointers intersect. 4549f7dedc3SAdam Nemet if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr) 455a8945b77SAdam Nemet return false; 456a8945b77SAdam Nemet 457a8945b77SAdam Nemet // Only need to check pointers between two different dependency sets. 4589f7dedc3SAdam Nemet if (PointerI.DependencySetId == PointerJ.DependencySetId) 459a8945b77SAdam Nemet return false; 460a8945b77SAdam Nemet 461a8945b77SAdam Nemet // Only need to check pointers in the same alias set. 4629f7dedc3SAdam Nemet if (PointerI.AliasSetId != PointerJ.AliasSetId) 463a8945b77SAdam Nemet return false; 464a8945b77SAdam Nemet 465a8945b77SAdam Nemet return true; 466a8945b77SAdam Nemet } 467a8945b77SAdam Nemet 46854f0b83eSAdam Nemet void RuntimePointerChecking::printChecks( 469616657b3SFlorian Hahn raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks, 47054f0b83eSAdam Nemet unsigned Depth) const { 47154f0b83eSAdam Nemet unsigned N = 0; 47254f0b83eSAdam Nemet for (const auto &Check : Checks) { 47354f0b83eSAdam Nemet const auto &First = Check.first->Members, &Second = Check.second->Members; 47454f0b83eSAdam Nemet 47554f0b83eSAdam Nemet OS.indent(Depth) << "Check " << N++ << ":\n"; 47654f0b83eSAdam Nemet 47754f0b83eSAdam Nemet OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n"; 47854f0b83eSAdam Nemet for (unsigned K = 0; K < First.size(); ++K) 47954f0b83eSAdam Nemet OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n"; 48054f0b83eSAdam Nemet 48154f0b83eSAdam Nemet OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n"; 48254f0b83eSAdam Nemet for (unsigned K = 0; K < Second.size(); ++K) 48354f0b83eSAdam Nemet OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n"; 48454f0b83eSAdam Nemet } 48554f0b83eSAdam Nemet } 48654f0b83eSAdam Nemet 4873a91e947SAdam Nemet void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { 488e91cc6efSAdam Nemet 489e91cc6efSAdam Nemet OS.indent(Depth) << "Run-time memory checks:\n"; 49015840393SAdam Nemet printChecks(OS, Checks, Depth); 4911b6b50a9SSilviu Baranga 4921b6b50a9SSilviu Baranga OS.indent(Depth) << "Grouped accesses:\n"; 4931b6b50a9SSilviu Baranga for (unsigned I = 0; I < CheckingGroups.size(); ++I) { 49454f0b83eSAdam Nemet const auto &CG = CheckingGroups[I]; 49554f0b83eSAdam Nemet 49654f0b83eSAdam Nemet OS.indent(Depth + 2) << "Group " << &CG << ":\n"; 49754f0b83eSAdam Nemet OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High 49854f0b83eSAdam Nemet << ")\n"; 49954f0b83eSAdam Nemet for (unsigned J = 0; J < CG.Members.size(); ++J) { 50054f0b83eSAdam Nemet OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr 5011b6b50a9SSilviu Baranga << "\n"; 5021b6b50a9SSilviu Baranga } 503e91cc6efSAdam Nemet } 504e91cc6efSAdam Nemet } 505e91cc6efSAdam Nemet 5060456327cSAdam Nemet namespace { 507a3fe70d2SEugene Zelenko 5085f8f34e4SAdrian Prantl /// Analyses memory accesses in a loop. 5090456327cSAdam Nemet /// 5100456327cSAdam Nemet /// Checks whether run time pointer checks are needed and builds sets for data 5110456327cSAdam Nemet /// dependence checking. 5120456327cSAdam Nemet class AccessAnalysis { 5130456327cSAdam Nemet public: 5145f8f34e4SAdrian Prantl /// Read or write access location. 5150456327cSAdam Nemet typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; 5165448e989SAmjad Aboud typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; 5170456327cSAdam Nemet 518b0eb40caSVitaly Buka AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, 519b0eb40caSVitaly Buka MemoryDepChecker::DepCandidates &DA, 5209cd9a7e3SSilviu Baranga PredicatedScalarEvolution &PSE) 521b752eb88SKazu Hirata : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {} 5220456327cSAdam Nemet 5235f8f34e4SAdrian Prantl /// Register a load and whether it is only read from. 524ff31020eSArthur Eubanks void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) { 5250456327cSAdam Nemet Value *Ptr = const_cast<Value*>(Loc.Ptr); 5264df8efceSNikita Popov AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); 527ff31020eSArthur Eubanks Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy); 5280456327cSAdam Nemet if (IsReadOnly) 5290456327cSAdam Nemet ReadOnlyPtr.insert(Ptr); 5300456327cSAdam Nemet } 5310456327cSAdam Nemet 5325f8f34e4SAdrian Prantl /// Register a store. 533ff31020eSArthur Eubanks void addStore(MemoryLocation &Loc, Type *AccessTy) { 5340456327cSAdam Nemet Value *Ptr = const_cast<Value*>(Loc.Ptr); 5354df8efceSNikita Popov AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); 536ff31020eSArthur Eubanks Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy); 5370456327cSAdam Nemet } 5380456327cSAdam Nemet 5395f8f34e4SAdrian Prantl /// Check if we can emit a run-time no-alias check for \p Access. 540ac920f77SSilviu Baranga /// 541ac920f77SSilviu Baranga /// Returns true if we can emit a run-time no alias check for \p Access. 542ac920f77SSilviu Baranga /// If we can check this access, this also adds it to a dependence set and 543ac920f77SSilviu Baranga /// adds a run-time to check for it to \p RtCheck. If \p Assume is true, 544ac920f77SSilviu Baranga /// we will attempt to use additional run-time checks in order to get 545ac920f77SSilviu Baranga /// the bounds of the pointer. 546ac920f77SSilviu Baranga bool createCheckForAccess(RuntimePointerChecking &RtCheck, 547ff31020eSArthur Eubanks MemAccessInfo Access, Type *AccessTy, 548ac920f77SSilviu Baranga const ValueToValueMap &Strides, 549ac920f77SSilviu Baranga DenseMap<Value *, unsigned> &DepSetId, 550ac920f77SSilviu Baranga Loop *TheLoop, unsigned &RunningDepId, 551ff31020eSArthur Eubanks unsigned ASId, bool ShouldCheckStride, bool Assume); 552ac920f77SSilviu Baranga 5535f8f34e4SAdrian Prantl /// Check whether we can check the pointers at runtime for 554ee61474aSAdam Nemet /// non-intersection. 555ee61474aSAdam Nemet /// 556ee61474aSAdam Nemet /// Returns true if we need no check or if we do and we can generate them 557ee61474aSAdam Nemet /// (i.e. the pointers have computable bounds). 5587cdebac0SAdam Nemet bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, 5597cdebac0SAdam Nemet Loop *TheLoop, const ValueToValueMap &Strides, 5609f1c6fbfSMalhar Jajoo Value *&UncomputablePtr, bool ShouldCheckWrap = false); 5610456327cSAdam Nemet 5625f8f34e4SAdrian Prantl /// Goes over all memory accesses, checks whether a RT check is needed 5630456327cSAdam Nemet /// and builds sets of dependent accesses. 5640456327cSAdam Nemet void buildDependenceSets() { 5650456327cSAdam Nemet processMemAccesses(); 5660456327cSAdam Nemet } 5670456327cSAdam Nemet 5685f8f34e4SAdrian Prantl /// Initial processing of memory accesses determined that we need to 5695dc3b2cfSAdam Nemet /// perform dependency checking. 5705dc3b2cfSAdam Nemet /// 5715dc3b2cfSAdam Nemet /// Note that this can later be cleared if we retry memcheck analysis without 572ef307b8cSFlorian Hahn /// dependency checking (i.e. FoundNonConstantDistanceDependence). 5730456327cSAdam Nemet bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } 574df3dc5b9SAdam Nemet 575df3dc5b9SAdam Nemet /// We decided that no dependence analysis would be used. Reset the state. 576df3dc5b9SAdam Nemet void resetDepChecks(MemoryDepChecker &DepChecker) { 577df3dc5b9SAdam Nemet CheckDeps.clear(); 578a2df750fSAdam Nemet DepChecker.clearDependences(); 579df3dc5b9SAdam Nemet } 5800456327cSAdam Nemet 5815448e989SAmjad Aboud MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } 5820456327cSAdam Nemet 5830456327cSAdam Nemet private: 584ff31020eSArthur Eubanks typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap; 5850456327cSAdam Nemet 5865f8f34e4SAdrian Prantl /// Go over all memory access and check whether runtime pointer checks 587b41d2d3fSAdam Nemet /// are needed and build sets of dependency check candidates. 5880456327cSAdam Nemet void processMemAccesses(); 5890456327cSAdam Nemet 590ff31020eSArthur Eubanks /// Map of all accesses. Values are the types used to access memory pointed to 591ff31020eSArthur Eubanks /// by the pointer. 592ff31020eSArthur Eubanks PtrAccessMap Accesses; 5930456327cSAdam Nemet 59477eeac3dSManoj Gupta /// The loop being checked. 59577eeac3dSManoj Gupta const Loop *TheLoop; 59677eeac3dSManoj Gupta 5975448e989SAmjad Aboud /// List of accesses that need a further dependence check. 5985448e989SAmjad Aboud MemAccessInfoList CheckDeps; 5990456327cSAdam Nemet 6000456327cSAdam Nemet /// Set of pointers that are read only. 6010456327cSAdam Nemet SmallPtrSet<Value*, 16> ReadOnlyPtr; 6020456327cSAdam Nemet 6030456327cSAdam Nemet /// An alias set tracker to partition the access set by underlying object and 6040456327cSAdam Nemet //intrinsic property (such as TBAA metadata). 6050456327cSAdam Nemet AliasSetTracker AST; 6060456327cSAdam Nemet 607e2b885c4SAdam Nemet LoopInfo *LI; 608e2b885c4SAdam Nemet 6090456327cSAdam Nemet /// Sets of potentially dependent accesses - members of one set share an 6100456327cSAdam Nemet /// underlying pointer. The set "CheckDeps" identfies which sets really need a 6110456327cSAdam Nemet /// dependence check. 612dee666bcSAdam Nemet MemoryDepChecker::DepCandidates &DepCands; 6130456327cSAdam Nemet 6145f8f34e4SAdrian Prantl /// Initial processing of memory accesses determined that we may need 6155dc3b2cfSAdam Nemet /// to add memchecks. Perform the analysis to determine the necessary checks. 6165dc3b2cfSAdam Nemet /// 6175dc3b2cfSAdam Nemet /// Note that, this is different from isDependencyCheckNeeded. When we retry 6185dc3b2cfSAdam Nemet /// memcheck analysis without dependency checking 619ef307b8cSFlorian Hahn /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is 620ef307b8cSFlorian Hahn /// cleared while this remains set if we have potentially dependent accesses. 621b752eb88SKazu Hirata bool IsRTCheckAnalysisNeeded = false; 622e3c0534bSSilviu Baranga 623e3c0534bSSilviu Baranga /// The SCEV predicate containing all the SCEV-related assumptions. 6249cd9a7e3SSilviu Baranga PredicatedScalarEvolution &PSE; 6250456327cSAdam Nemet }; 6260456327cSAdam Nemet 6270456327cSAdam Nemet } // end anonymous namespace 6280456327cSAdam Nemet 6295f8f34e4SAdrian Prantl /// Check whether a pointer can participate in a runtime bounds check. 630ac920f77SSilviu Baranga /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr 631ac920f77SSilviu Baranga /// by adding run-time checks (overflow checks) if necessary. 6329cd9a7e3SSilviu Baranga static bool hasComputableBounds(PredicatedScalarEvolution &PSE, 633e3c0534bSSilviu Baranga const ValueToValueMap &Strides, Value *Ptr, 634ac920f77SSilviu Baranga Loop *L, bool Assume) { 6359cd9a7e3SSilviu Baranga const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); 636279784ffSAdam Nemet 637279784ffSAdam Nemet // The bounds for loop-invariant pointer is trivial. 638279784ffSAdam Nemet if (PSE.getSE()->isLoopInvariant(PtrScev, L)) 639279784ffSAdam Nemet return true; 640279784ffSAdam Nemet 6410456327cSAdam Nemet const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); 642ac920f77SSilviu Baranga 643ac920f77SSilviu Baranga if (!AR && Assume) 644ac920f77SSilviu Baranga AR = PSE.getAsAddRec(Ptr); 645ac920f77SSilviu Baranga 6460456327cSAdam Nemet if (!AR) 6470456327cSAdam Nemet return false; 6480456327cSAdam Nemet 6490456327cSAdam Nemet return AR->isAffine(); 6500456327cSAdam Nemet } 6510456327cSAdam Nemet 6525f8f34e4SAdrian Prantl /// Check whether a pointer address cannot wrap. 6539f02c586SAndrey Turetskiy static bool isNoWrap(PredicatedScalarEvolution &PSE, 654ff31020eSArthur Eubanks const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy, 655ff31020eSArthur Eubanks Loop *L) { 6569f02c586SAndrey Turetskiy const SCEV *PtrScev = PSE.getSCEV(Ptr); 6579f02c586SAndrey Turetskiy if (PSE.getSE()->isLoopInvariant(PtrScev, L)) 6589f02c586SAndrey Turetskiy return true; 6599f02c586SAndrey Turetskiy 66045c46734SNikita Popov int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides); 661ac920f77SSilviu Baranga if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) 662ac920f77SSilviu Baranga return true; 663ac920f77SSilviu Baranga 664ac920f77SSilviu Baranga return false; 665ac920f77SSilviu Baranga } 666ac920f77SSilviu Baranga 66773a05cc8SFlorian Hahn static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, 66873a05cc8SFlorian Hahn function_ref<void(Value *)> AddPointer) { 66973a05cc8SFlorian Hahn SmallPtrSet<Value *, 8> Visited; 67073a05cc8SFlorian Hahn SmallVector<Value *> WorkList; 67173a05cc8SFlorian Hahn WorkList.push_back(StartPtr); 67273a05cc8SFlorian Hahn 67373a05cc8SFlorian Hahn while (!WorkList.empty()) { 67473a05cc8SFlorian Hahn Value *Ptr = WorkList.pop_back_val(); 67573a05cc8SFlorian Hahn if (!Visited.insert(Ptr).second) 67673a05cc8SFlorian Hahn continue; 67773a05cc8SFlorian Hahn auto *PN = dyn_cast<PHINode>(Ptr); 67873a05cc8SFlorian Hahn // SCEV does not look through non-header PHIs inside the loop. Such phis 67973a05cc8SFlorian Hahn // can be analyzed by adding separate accesses for each incoming pointer 68073a05cc8SFlorian Hahn // value. 68173a05cc8SFlorian Hahn if (PN && InnermostLoop.contains(PN->getParent()) && 68273a05cc8SFlorian Hahn PN->getParent() != InnermostLoop.getHeader()) { 68373a05cc8SFlorian Hahn for (const Use &Inc : PN->incoming_values()) 68473a05cc8SFlorian Hahn WorkList.push_back(Inc); 68573a05cc8SFlorian Hahn } else 68673a05cc8SFlorian Hahn AddPointer(Ptr); 68773a05cc8SFlorian Hahn } 68873a05cc8SFlorian Hahn } 68973a05cc8SFlorian Hahn 690ac920f77SSilviu Baranga bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, 691ff31020eSArthur Eubanks MemAccessInfo Access, Type *AccessTy, 692ac920f77SSilviu Baranga const ValueToValueMap &StridesMap, 693ac920f77SSilviu Baranga DenseMap<Value *, unsigned> &DepSetId, 694ac920f77SSilviu Baranga Loop *TheLoop, unsigned &RunningDepId, 695ac920f77SSilviu Baranga unsigned ASId, bool ShouldCheckWrap, 696ac920f77SSilviu Baranga bool Assume) { 697ac920f77SSilviu Baranga Value *Ptr = Access.getPointer(); 698ac920f77SSilviu Baranga 699ac920f77SSilviu Baranga if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume)) 700ac920f77SSilviu Baranga return false; 701ac920f77SSilviu Baranga 702ac920f77SSilviu Baranga // When we run after a failing dependency check we have to make sure 703ac920f77SSilviu Baranga // we don't have wrapping pointers. 704ff31020eSArthur Eubanks if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) { 705ac920f77SSilviu Baranga auto *Expr = PSE.getSCEV(Ptr); 706ac920f77SSilviu Baranga if (!Assume || !isa<SCEVAddRecExpr>(Expr)) 707ac920f77SSilviu Baranga return false; 708ac920f77SSilviu Baranga PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); 709ac920f77SSilviu Baranga } 710ac920f77SSilviu Baranga 711ac920f77SSilviu Baranga // The id of the dependence set. 712ac920f77SSilviu Baranga unsigned DepId; 713ac920f77SSilviu Baranga 714ac920f77SSilviu Baranga if (isDependencyCheckNeeded()) { 715ac920f77SSilviu Baranga Value *Leader = DepCands.getLeaderValue(Access).getPointer(); 716ac920f77SSilviu Baranga unsigned &LeaderId = DepSetId[Leader]; 717ac920f77SSilviu Baranga if (!LeaderId) 718ac920f77SSilviu Baranga LeaderId = RunningDepId++; 719ac920f77SSilviu Baranga DepId = LeaderId; 720ac920f77SSilviu Baranga } else 721ac920f77SSilviu Baranga // Each access has its own dependence set. 722ac920f77SSilviu Baranga DepId = RunningDepId++; 723ac920f77SSilviu Baranga 724ac920f77SSilviu Baranga bool IsWrite = Access.getInt(); 725ff31020eSArthur Eubanks RtCheck.insert(TheLoop, Ptr, AccessTy, IsWrite, DepId, ASId, StridesMap, PSE); 726d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); 727ac920f77SSilviu Baranga 728ac920f77SSilviu Baranga return true; 7299f02c586SAndrey Turetskiy } 7309f02c586SAndrey Turetskiy 7317cdebac0SAdam Nemet bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, 7327cdebac0SAdam Nemet ScalarEvolution *SE, Loop *TheLoop, 7337cdebac0SAdam Nemet const ValueToValueMap &StridesMap, 7349f1c6fbfSMalhar Jajoo Value *&UncomputablePtr, bool ShouldCheckWrap) { 7350456327cSAdam Nemet // Find pointers with computable bounds. We are going to use this information 7360456327cSAdam Nemet // to place a runtime bound check. 7370456327cSAdam Nemet bool CanDoRT = true; 7380456327cSAdam Nemet 7399b507b21SFlorian Hahn bool MayNeedRTCheck = false; 7402d038982SFlorian Hahn if (!IsRTCheckAnalysisNeeded) return true; 74198a13719SSilviu Baranga 7420456327cSAdam Nemet bool IsDepCheckNeeded = isDependencyCheckNeeded(); 7430456327cSAdam Nemet 7440456327cSAdam Nemet // We assign a consecutive id to access from different alias sets. 7450456327cSAdam Nemet // Accesses between different groups doesn't need to be checked. 7466176f044SFlorian Hahn unsigned ASId = 0; 7470456327cSAdam Nemet for (auto &AS : AST) { 748424edc6cSAdam Nemet int NumReadPtrChecks = 0; 749424edc6cSAdam Nemet int NumWritePtrChecks = 0; 750ac920f77SSilviu Baranga bool CanDoAliasSetRT = true; 7516176f044SFlorian Hahn ++ASId; 752424edc6cSAdam Nemet 7530456327cSAdam Nemet // We assign consecutive id to access from different dependence sets. 7540456327cSAdam Nemet // Accesses within the same set don't need a runtime check. 7550456327cSAdam Nemet unsigned RunningDepId = 1; 7560456327cSAdam Nemet DenseMap<Value *, unsigned> DepSetId; 7570456327cSAdam Nemet 758ac920f77SSilviu Baranga SmallVector<MemAccessInfo, 4> Retries; 759ac920f77SSilviu Baranga 7602062b370SFlorian Hahn // First, count how many write and read accesses are in the alias set. Also 7612062b370SFlorian Hahn // collect MemAccessInfos for later. 7622062b370SFlorian Hahn SmallVector<MemAccessInfo, 4> AccessInfos; 76371b89b14SSimon Pilgrim for (const auto &A : AS) { 7640456327cSAdam Nemet Value *Ptr = A.getValue(); 7650456327cSAdam Nemet bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); 7660456327cSAdam Nemet 767424edc6cSAdam Nemet if (IsWrite) 768424edc6cSAdam Nemet ++NumWritePtrChecks; 769424edc6cSAdam Nemet else 770424edc6cSAdam Nemet ++NumReadPtrChecks; 7712062b370SFlorian Hahn AccessInfos.emplace_back(Ptr, IsWrite); 7720456327cSAdam Nemet } 7730456327cSAdam Nemet 7742062b370SFlorian Hahn // We do not need runtime checks for this alias set, if there are no writes 7752062b370SFlorian Hahn // or a single write and no reads. 7762062b370SFlorian Hahn if (NumWritePtrChecks == 0 || 7772062b370SFlorian Hahn (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) { 7786176f044SFlorian Hahn assert((AS.size() <= 1 || 7796176f044SFlorian Hahn all_of(AS, 7806176f044SFlorian Hahn [this](auto AC) { 7816176f044SFlorian Hahn MemAccessInfo AccessWrite(AC.getValue(), true); 7822062b370SFlorian Hahn return DepCands.findValue(AccessWrite) == DepCands.end(); 7836176f044SFlorian Hahn })) && 7846176f044SFlorian Hahn "Can only skip updating CanDoRT below, if all entries in AS " 7856176f044SFlorian Hahn "are reads or there is at most 1 entry"); 7866176f044SFlorian Hahn continue; 7876176f044SFlorian Hahn } 7882062b370SFlorian Hahn 7892062b370SFlorian Hahn for (auto &Access : AccessInfos) { 790ff31020eSArthur Eubanks for (auto &AccessTy : Accesses[Access]) { 791ff31020eSArthur Eubanks if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, 792ff31020eSArthur Eubanks DepSetId, TheLoop, RunningDepId, ASId, 793ff31020eSArthur Eubanks ShouldCheckWrap, false)) { 7942062b370SFlorian Hahn LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" 7952062b370SFlorian Hahn << *Access.getPointer() << '\n'); 7962062b370SFlorian Hahn Retries.push_back(Access); 7972062b370SFlorian Hahn CanDoAliasSetRT = false; 7986176f044SFlorian Hahn } 7992062b370SFlorian Hahn } 800ff31020eSArthur Eubanks } 8012062b370SFlorian Hahn 8022062b370SFlorian Hahn // Note that this function computes CanDoRT and MayNeedRTCheck 8032062b370SFlorian Hahn // independently. For example CanDoRT=false, MayNeedRTCheck=false means that 8042062b370SFlorian Hahn // we have a pointer for which we couldn't find the bounds but we don't 8052062b370SFlorian Hahn // actually need to emit any checks so it does not matter. 8062062b370SFlorian Hahn // 8072062b370SFlorian Hahn // We need runtime checks for this alias set, if there are at least 2 8082062b370SFlorian Hahn // dependence sets (in which case RunningDepId > 2) or if we need to re-try 8092062b370SFlorian Hahn // any bound checks (because in that case the number of dependence sets is 8102062b370SFlorian Hahn // incomplete). 8112062b370SFlorian Hahn bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty(); 812424edc6cSAdam Nemet 813ac920f77SSilviu Baranga // We need to perform run-time alias checks, but some pointers had bounds 814ac920f77SSilviu Baranga // that couldn't be checked. 815ac920f77SSilviu Baranga if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) { 816ac920f77SSilviu Baranga // Reset the CanDoSetRt flag and retry all accesses that have failed. 817ac920f77SSilviu Baranga // We know that we need these checks, so we can now be more aggressive 818ac920f77SSilviu Baranga // and add further checks if required (overflow checks). 819ac920f77SSilviu Baranga CanDoAliasSetRT = true; 820ff31020eSArthur Eubanks for (auto Access : Retries) { 821ff31020eSArthur Eubanks for (auto &AccessTy : Accesses[Access]) { 822ff31020eSArthur Eubanks if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, 823ff31020eSArthur Eubanks DepSetId, TheLoop, RunningDepId, ASId, 824ac920f77SSilviu Baranga ShouldCheckWrap, /*Assume=*/true)) { 825ac920f77SSilviu Baranga CanDoAliasSetRT = false; 8269f1c6fbfSMalhar Jajoo UncomputablePtr = Access.getPointer(); 827ac920f77SSilviu Baranga break; 828ac920f77SSilviu Baranga } 829ac920f77SSilviu Baranga } 830ff31020eSArthur Eubanks } 831ff31020eSArthur Eubanks } 832ac920f77SSilviu Baranga 833ac920f77SSilviu Baranga CanDoRT &= CanDoAliasSetRT; 8349b507b21SFlorian Hahn MayNeedRTCheck |= NeedsAliasSetRTCheck; 8350456327cSAdam Nemet ++ASId; 8360456327cSAdam Nemet } 8370456327cSAdam Nemet 8380456327cSAdam Nemet // If the pointers that we would use for the bounds comparison have different 8390456327cSAdam Nemet // address spaces, assume the values aren't directly comparable, so we can't 8400456327cSAdam Nemet // use them for the runtime check. We also have to assume they could 8410456327cSAdam Nemet // overlap. In the future there should be metadata for whether address spaces 8420456327cSAdam Nemet // are disjoint. 8430456327cSAdam Nemet unsigned NumPointers = RtCheck.Pointers.size(); 8440456327cSAdam Nemet for (unsigned i = 0; i < NumPointers; ++i) { 8450456327cSAdam Nemet for (unsigned j = i + 1; j < NumPointers; ++j) { 8460456327cSAdam Nemet // Only need to check pointers between two different dependency sets. 8479f7dedc3SAdam Nemet if (RtCheck.Pointers[i].DependencySetId == 8489f7dedc3SAdam Nemet RtCheck.Pointers[j].DependencySetId) 8490456327cSAdam Nemet continue; 8500456327cSAdam Nemet // Only need to check pointers in the same alias set. 8519f7dedc3SAdam Nemet if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId) 8520456327cSAdam Nemet continue; 8530456327cSAdam Nemet 8549f7dedc3SAdam Nemet Value *PtrI = RtCheck.Pointers[i].PointerValue; 8559f7dedc3SAdam Nemet Value *PtrJ = RtCheck.Pointers[j].PointerValue; 8560456327cSAdam Nemet 8570456327cSAdam Nemet unsigned ASi = PtrI->getType()->getPointerAddressSpace(); 8580456327cSAdam Nemet unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); 8590456327cSAdam Nemet if (ASi != ASj) { 860d34e60caSNicola Zaghen LLVM_DEBUG( 861d34e60caSNicola Zaghen dbgs() << "LAA: Runtime check would require comparison between" 8620456327cSAdam Nemet " different address spaces\n"); 8630456327cSAdam Nemet return false; 8640456327cSAdam Nemet } 8650456327cSAdam Nemet } 8660456327cSAdam Nemet } 8670456327cSAdam Nemet 8689b507b21SFlorian Hahn if (MayNeedRTCheck && CanDoRT) 86915840393SAdam Nemet RtCheck.generateChecks(DepCands, IsDepCheckNeeded); 8701b6b50a9SSilviu Baranga 871d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() 872ee61474aSAdam Nemet << " pointer comparisons.\n"); 873ee61474aSAdam Nemet 8749b507b21SFlorian Hahn // If we can do run-time checks, but there are no checks, no runtime checks 8759b507b21SFlorian Hahn // are needed. This can happen when all pointers point to the same underlying 8769b507b21SFlorian Hahn // object for example. 8779b507b21SFlorian Hahn RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck; 878ee61474aSAdam Nemet 8799b507b21SFlorian Hahn bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT; 880ee61474aSAdam Nemet if (!CanDoRTIfNeeded) 881ee61474aSAdam Nemet RtCheck.reset(); 882ee61474aSAdam Nemet return CanDoRTIfNeeded; 8830456327cSAdam Nemet } 8840456327cSAdam Nemet 8850456327cSAdam Nemet void AccessAnalysis::processMemAccesses() { 8860456327cSAdam Nemet // We process the set twice: first we process read-write pointers, last we 8870456327cSAdam Nemet // process read-only pointers. This allows us to skip dependence tests for 8880456327cSAdam Nemet // read-only pointers. 8890456327cSAdam Nemet 890d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); 891d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << " AST: "; AST.dump()); 892d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"); 893d34e60caSNicola Zaghen LLVM_DEBUG({ 8940456327cSAdam Nemet for (auto A : Accesses) 895ff31020eSArthur Eubanks dbgs() << "\t" << *A.first.getPointer() << " (" 896ff31020eSArthur Eubanks << (A.first.getInt() 897ff31020eSArthur Eubanks ? "write" 898ff31020eSArthur Eubanks : (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only" 899ff31020eSArthur Eubanks : "read")) 900ff31020eSArthur Eubanks << ")\n"; 9010456327cSAdam Nemet }); 9020456327cSAdam Nemet 9030456327cSAdam Nemet // The AliasSetTracker has nicely partitioned our pointers by metadata 9040456327cSAdam Nemet // compatibility and potential for underlying-object overlap. As a result, we 9050456327cSAdam Nemet // only need to check for potential pointer dependencies within each alias 9060456327cSAdam Nemet // set. 90771b89b14SSimon Pilgrim for (const auto &AS : AST) { 9080456327cSAdam Nemet // Note that both the alias-set tracker and the alias sets themselves used 9090456327cSAdam Nemet // linked lists internally and so the iteration order here is deterministic 9100456327cSAdam Nemet // (matching the original instruction order within each set). 9110456327cSAdam Nemet 9120456327cSAdam Nemet bool SetHasWrite = false; 9130456327cSAdam Nemet 9140456327cSAdam Nemet // Map of pointers to last access encountered. 91571e8c6f2SBjorn Pettersson typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap; 9160456327cSAdam Nemet UnderlyingObjToAccessMap ObjToLastAccess; 9170456327cSAdam Nemet 9180456327cSAdam Nemet // Set of access to check after all writes have been processed. 919ff31020eSArthur Eubanks PtrAccessMap DeferredAccesses; 9200456327cSAdam Nemet 9210456327cSAdam Nemet // Iterate over each alias set twice, once to process read/write pointers, 9220456327cSAdam Nemet // and then to process read-only pointers. 9230456327cSAdam Nemet for (int SetIteration = 0; SetIteration < 2; ++SetIteration) { 9240456327cSAdam Nemet bool UseDeferred = SetIteration > 0; 925ff31020eSArthur Eubanks PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses; 9260456327cSAdam Nemet 92771b89b14SSimon Pilgrim for (const auto &AV : AS) { 9280456327cSAdam Nemet Value *Ptr = AV.getValue(); 9290456327cSAdam Nemet 9300456327cSAdam Nemet // For a single memory access in AliasSetTracker, Accesses may contain 9310456327cSAdam Nemet // both read and write, and they both need to be handled for CheckDeps. 93271b89b14SSimon Pilgrim for (const auto &AC : S) { 933ff31020eSArthur Eubanks if (AC.first.getPointer() != Ptr) 9340456327cSAdam Nemet continue; 9350456327cSAdam Nemet 936ff31020eSArthur Eubanks bool IsWrite = AC.first.getInt(); 9370456327cSAdam Nemet 9380456327cSAdam Nemet // If we're using the deferred access set, then it contains only 9390456327cSAdam Nemet // reads. 9400456327cSAdam Nemet bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; 9410456327cSAdam Nemet if (UseDeferred && !IsReadOnlyPtr) 9420456327cSAdam Nemet continue; 9430456327cSAdam Nemet // Otherwise, the pointer must be in the PtrAccessSet, either as a 9440456327cSAdam Nemet // read or a write. 9450456327cSAdam Nemet assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || 9460456327cSAdam Nemet S.count(MemAccessInfo(Ptr, false))) && 9470456327cSAdam Nemet "Alias-set pointer not in the access set?"); 9480456327cSAdam Nemet 9490456327cSAdam Nemet MemAccessInfo Access(Ptr, IsWrite); 9500456327cSAdam Nemet DepCands.insert(Access); 9510456327cSAdam Nemet 9520456327cSAdam Nemet // Memorize read-only pointers for later processing and skip them in 9530456327cSAdam Nemet // the first round (they need to be checked after we have seen all 9540456327cSAdam Nemet // write pointers). Note: we also mark pointer that are not 9550456327cSAdam Nemet // consecutive as "read-only" pointers (so that we check 9560456327cSAdam Nemet // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". 9570456327cSAdam Nemet if (!UseDeferred && IsReadOnlyPtr) { 958ff31020eSArthur Eubanks // We only use the pointer keys, the types vector values don't 959ff31020eSArthur Eubanks // matter. 960ff31020eSArthur Eubanks DeferredAccesses.insert({Access, {}}); 9610456327cSAdam Nemet continue; 9620456327cSAdam Nemet } 9630456327cSAdam Nemet 9640456327cSAdam Nemet // If this is a write - check other reads and writes for conflicts. If 9650456327cSAdam Nemet // this is a read only check other writes for conflicts (but only if 9660456327cSAdam Nemet // there is no other write to the ptr - this is an optimization to 9670456327cSAdam Nemet // catch "a[i] = a[i] + " without having to do a dependence check). 9680456327cSAdam Nemet if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { 9695448e989SAmjad Aboud CheckDeps.push_back(Access); 9705dc3b2cfSAdam Nemet IsRTCheckAnalysisNeeded = true; 9710456327cSAdam Nemet } 9720456327cSAdam Nemet 9730456327cSAdam Nemet if (IsWrite) 9740456327cSAdam Nemet SetHasWrite = true; 9750456327cSAdam Nemet 9760456327cSAdam Nemet // Create sets of pointers connected by a shared alias set and 9770456327cSAdam Nemet // underlying object. 97871e8c6f2SBjorn Pettersson typedef SmallVector<const Value *, 16> ValueVector; 9790456327cSAdam Nemet ValueVector TempObjects; 980e2b885c4SAdam Nemet 981b0eb40caSVitaly Buka getUnderlyingObjects(Ptr, TempObjects, LI); 982d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() 983d34e60caSNicola Zaghen << "Underlying objects for pointer " << *Ptr << "\n"); 98471e8c6f2SBjorn Pettersson for (const Value *UnderlyingObj : TempObjects) { 985afd13519SMehdi Amini // nullptr never alias, don't join sets for pointer that have "null" 986afd13519SMehdi Amini // in their UnderlyingObjects list. 98777eeac3dSManoj Gupta if (isa<ConstantPointerNull>(UnderlyingObj) && 98877eeac3dSManoj Gupta !NullPointerIsDefined( 98977eeac3dSManoj Gupta TheLoop->getHeader()->getParent(), 99077eeac3dSManoj Gupta UnderlyingObj->getType()->getPointerAddressSpace())) 991afd13519SMehdi Amini continue; 992afd13519SMehdi Amini 9930456327cSAdam Nemet UnderlyingObjToAccessMap::iterator Prev = 9940456327cSAdam Nemet ObjToLastAccess.find(UnderlyingObj); 9950456327cSAdam Nemet if (Prev != ObjToLastAccess.end()) 9960456327cSAdam Nemet DepCands.unionSets(Access, Prev->second); 9970456327cSAdam Nemet 9980456327cSAdam Nemet ObjToLastAccess[UnderlyingObj] = Access; 999d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n"); 10000456327cSAdam Nemet } 10010456327cSAdam Nemet } 10020456327cSAdam Nemet } 10030456327cSAdam Nemet } 10040456327cSAdam Nemet } 10050456327cSAdam Nemet } 10060456327cSAdam Nemet 10070456327cSAdam Nemet static bool isInBoundsGep(Value *Ptr) { 10080456327cSAdam Nemet if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) 10090456327cSAdam Nemet return GEP->isInBounds(); 10100456327cSAdam Nemet return false; 10110456327cSAdam Nemet } 10120456327cSAdam Nemet 10135f8f34e4SAdrian Prantl /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping, 1014c4866d29SAdam Nemet /// i.e. monotonically increasing/decreasing. 1015c4866d29SAdam Nemet static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, 1016ea63a7f5SSilviu Baranga PredicatedScalarEvolution &PSE, const Loop *L) { 1017c4866d29SAdam Nemet // FIXME: This should probably only return true for NUW. 1018c4866d29SAdam Nemet if (AR->getNoWrapFlags(SCEV::NoWrapMask)) 1019c4866d29SAdam Nemet return true; 1020c4866d29SAdam Nemet 1021c4866d29SAdam Nemet // Scalar evolution does not propagate the non-wrapping flags to values that 1022c4866d29SAdam Nemet // are derived from a non-wrapping induction variable because non-wrapping 1023c4866d29SAdam Nemet // could be flow-sensitive. 1024c4866d29SAdam Nemet // 1025c4866d29SAdam Nemet // Look through the potentially overflowing instruction to try to prove 1026c4866d29SAdam Nemet // non-wrapping for the *specific* value of Ptr. 1027c4866d29SAdam Nemet 1028c4866d29SAdam Nemet // The arithmetic implied by an inbounds GEP can't overflow. 1029c4866d29SAdam Nemet auto *GEP = dyn_cast<GetElementPtrInst>(Ptr); 1030c4866d29SAdam Nemet if (!GEP || !GEP->isInBounds()) 1031c4866d29SAdam Nemet return false; 1032c4866d29SAdam Nemet 1033c4866d29SAdam Nemet // Make sure there is only one non-const index and analyze that. 1034c4866d29SAdam Nemet Value *NonConstIndex = nullptr; 10356a6e3821SKazu Hirata for (Value *Index : GEP->indices()) 10368b401013SDavid Majnemer if (!isa<ConstantInt>(Index)) { 1037c4866d29SAdam Nemet if (NonConstIndex) 1038c4866d29SAdam Nemet return false; 10398b401013SDavid Majnemer NonConstIndex = Index; 1040c4866d29SAdam Nemet } 1041c4866d29SAdam Nemet if (!NonConstIndex) 1042c4866d29SAdam Nemet // The recurrence is on the pointer, ignore for now. 1043c4866d29SAdam Nemet return false; 1044c4866d29SAdam Nemet 1045c4866d29SAdam Nemet // The index in GEP is signed. It is non-wrapping if it's derived from a NSW 1046c4866d29SAdam Nemet // AddRec using a NSW operation. 1047c4866d29SAdam Nemet if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex)) 1048c4866d29SAdam Nemet if (OBO->hasNoSignedWrap() && 1049c4866d29SAdam Nemet // Assume constant for other the operand so that the AddRec can be 1050c4866d29SAdam Nemet // easily found. 1051c4866d29SAdam Nemet isa<ConstantInt>(OBO->getOperand(1))) { 1052ea63a7f5SSilviu Baranga auto *OpScev = PSE.getSCEV(OBO->getOperand(0)); 1053c4866d29SAdam Nemet 1054c4866d29SAdam Nemet if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev)) 1055c4866d29SAdam Nemet return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW); 1056c4866d29SAdam Nemet } 1057c4866d29SAdam Nemet 1058c4866d29SAdam Nemet return false; 1059c4866d29SAdam Nemet } 1060c4866d29SAdam Nemet 10615f8f34e4SAdrian Prantl /// Check whether the access through \p Ptr has a constant stride. 106245c46734SNikita Popov int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, 106345c46734SNikita Popov Value *Ptr, const Loop *Lp, 106445c46734SNikita Popov const ValueToValueMap &StridesMap, bool Assume, 106545c46734SNikita Popov bool ShouldCheckWrap) { 1066e3dcce97SCraig Topper Type *Ty = Ptr->getType(); 10670456327cSAdam Nemet assert(Ty->isPointerTy() && "Unexpected non-ptr"); 10680456327cSAdam Nemet 1069787b66ebSPeter Waller if (isa<ScalableVectorType>(AccessTy)) { 1070787b66ebSPeter Waller LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy 1071787b66ebSPeter Waller << "\n"); 1072787b66ebSPeter Waller return 0; 1073787b66ebSPeter Waller } 1074787b66ebSPeter Waller 10759cd9a7e3SSilviu Baranga const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); 10760456327cSAdam Nemet 10770456327cSAdam Nemet const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); 1078ea63a7f5SSilviu Baranga if (Assume && !AR) 1079d68ed854SSilviu Baranga AR = PSE.getAsAddRec(Ptr); 1080ea63a7f5SSilviu Baranga 10810456327cSAdam Nemet if (!AR) { 1082d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr 1083ea63a7f5SSilviu Baranga << " SCEV: " << *PtrScev << "\n"); 10840456327cSAdam Nemet return 0; 10850456327cSAdam Nemet } 10860456327cSAdam Nemet 1087c437f310SHiroshi Inoue // The access function must stride over the innermost loop. 10880456327cSAdam Nemet if (Lp != AR->getLoop()) { 1089d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " 1090d34e60caSNicola Zaghen << *Ptr << " SCEV: " << *AR << "\n"); 1091a02ce98bSKyle Butt return 0; 10920456327cSAdam Nemet } 10930456327cSAdam Nemet 10940456327cSAdam Nemet // The address calculation must not wrap. Otherwise, a dependence could be 10950456327cSAdam Nemet // inverted. 10960456327cSAdam Nemet // An inbounds getelementptr that is a AddRec with a unit stride 10970456327cSAdam Nemet // cannot wrap per definition. The unit stride requirement is checked later. 10980456327cSAdam Nemet // An getelementptr without an inbounds attribute and unit stride would have 10990456327cSAdam Nemet // to access the pointer value "0" which is undefined behavior in address 11000456327cSAdam Nemet // space 0, therefore we can also vectorize this case. 11010a00d64eSFlorian Hahn unsigned AddrSpace = Ty->getPointerAddressSpace(); 11020456327cSAdam Nemet bool IsInBoundsGEP = isInBoundsGep(Ptr); 11035f8cc0c3SElena Demikhovsky bool IsNoWrapAddRec = !ShouldCheckWrap || 1104ea63a7f5SSilviu Baranga PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || 1105ea63a7f5SSilviu Baranga isNoWrapAddRec(Ptr, AR, PSE, Lp); 110677eeac3dSManoj Gupta if (!IsNoWrapAddRec && !IsInBoundsGEP && 110745c46734SNikita Popov NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace)) { 1108ea63a7f5SSilviu Baranga if (Assume) { 1109ea63a7f5SSilviu Baranga PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); 1110ea63a7f5SSilviu Baranga IsNoWrapAddRec = true; 1111d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n" 1112ea63a7f5SSilviu Baranga << "LAA: Pointer: " << *Ptr << "\n" 1113ea63a7f5SSilviu Baranga << "LAA: SCEV: " << *AR << "\n" 1114ea63a7f5SSilviu Baranga << "LAA: Added an overflow assumption\n"); 1115ea63a7f5SSilviu Baranga } else { 1116d34e60caSNicola Zaghen LLVM_DEBUG( 1117d34e60caSNicola Zaghen dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " 1118ea63a7f5SSilviu Baranga << *Ptr << " SCEV: " << *AR << "\n"); 11190456327cSAdam Nemet return 0; 11200456327cSAdam Nemet } 1121ea63a7f5SSilviu Baranga } 11220456327cSAdam Nemet 11230456327cSAdam Nemet // Check the step is constant. 11249cd9a7e3SSilviu Baranga const SCEV *Step = AR->getStepRecurrence(*PSE.getSE()); 11250456327cSAdam Nemet 1126943befedSAdam Nemet // Calculate the pointer stride and check if it is constant. 11270456327cSAdam Nemet const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); 11280456327cSAdam Nemet if (!C) { 1129d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr 1130d34e60caSNicola Zaghen << " SCEV: " << *AR << "\n"); 11310456327cSAdam Nemet return 0; 11320456327cSAdam Nemet } 11330456327cSAdam Nemet 1134a28d91d8SMehdi Amini auto &DL = Lp->getHeader()->getModule()->getDataLayout(); 1135787b66ebSPeter Waller TypeSize AllocSize = DL.getTypeAllocSize(AccessTy); 1136787b66ebSPeter Waller int64_t Size = AllocSize.getFixedSize(); 11370de2feceSSanjoy Das const APInt &APStepVal = C->getAPInt(); 11380456327cSAdam Nemet 11390456327cSAdam Nemet // Huge step value - give up. 11400456327cSAdam Nemet if (APStepVal.getBitWidth() > 64) 11410456327cSAdam Nemet return 0; 11420456327cSAdam Nemet 11430456327cSAdam Nemet int64_t StepVal = APStepVal.getSExtValue(); 11440456327cSAdam Nemet 11450456327cSAdam Nemet // Strided access. 11460456327cSAdam Nemet int64_t Stride = StepVal / Size; 11470456327cSAdam Nemet int64_t Rem = StepVal % Size; 11480456327cSAdam Nemet if (Rem) 11490456327cSAdam Nemet return 0; 11500456327cSAdam Nemet 11510456327cSAdam Nemet // If the SCEV could wrap but we have an inbounds gep with a unit stride we 11520456327cSAdam Nemet // know we can't "wrap around the address space". In case of address space 11530456327cSAdam Nemet // zero we know that this won't happen without triggering undefined behavior. 115477eeac3dSManoj Gupta if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 && 115577eeac3dSManoj Gupta (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(), 115645c46734SNikita Popov AddrSpace))) { 1157ea63a7f5SSilviu Baranga if (Assume) { 1158ea63a7f5SSilviu Baranga // We can avoid this case by adding a run-time check. 1159d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either " 1160c437f310SHiroshi Inoue << "inbounds or in address space 0 may wrap:\n" 1161ea63a7f5SSilviu Baranga << "LAA: Pointer: " << *Ptr << "\n" 1162ea63a7f5SSilviu Baranga << "LAA: SCEV: " << *AR << "\n" 1163ea63a7f5SSilviu Baranga << "LAA: Added an overflow assumption\n"); 1164ea63a7f5SSilviu Baranga PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); 1165ea63a7f5SSilviu Baranga } else 11660456327cSAdam Nemet return 0; 1167ea63a7f5SSilviu Baranga } 11680456327cSAdam Nemet 11690456327cSAdam Nemet return Stride; 11700456327cSAdam Nemet } 11710456327cSAdam Nemet 117200d3f7ccSNikita Popov Optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, 117300d3f7ccSNikita Popov Value *PtrB, const DataLayout &DL, 117400d3f7ccSNikita Popov ScalarEvolution &SE, bool StrictCheck, 117500d3f7ccSNikita Popov bool CheckType) { 117699203f20SAlexey Bataev assert(PtrA && PtrB && "Expected non-nullptr pointers."); 117700d3f7ccSNikita Popov assert(cast<PointerType>(PtrA->getType()) 117800d3f7ccSNikita Popov ->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type"); 117900d3f7ccSNikita Popov assert(cast<PointerType>(PtrB->getType()) 118000d3f7ccSNikita Popov ->isOpaqueOrPointeeTypeMatches(ElemTyB) && "Wrong PtrB type"); 118100d3f7ccSNikita Popov 118299203f20SAlexey Bataev // Make sure that A and B are different pointers. 118399203f20SAlexey Bataev if (PtrA == PtrB) 118499203f20SAlexey Bataev return 0; 118599203f20SAlexey Bataev 118600d3f7ccSNikita Popov // Make sure that the element types are the same if required. 118700d3f7ccSNikita Popov if (CheckType && ElemTyA != ElemTyB) 118899203f20SAlexey Bataev return None; 118999203f20SAlexey Bataev 119099203f20SAlexey Bataev unsigned ASA = PtrA->getType()->getPointerAddressSpace(); 119199203f20SAlexey Bataev unsigned ASB = PtrB->getType()->getPointerAddressSpace(); 119299203f20SAlexey Bataev 119399203f20SAlexey Bataev // Check that the address spaces match. 119499203f20SAlexey Bataev if (ASA != ASB) 119599203f20SAlexey Bataev return None; 119699203f20SAlexey Bataev unsigned IdxWidth = DL.getIndexSizeInBits(ASA); 119799203f20SAlexey Bataev 119899203f20SAlexey Bataev APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); 119999203f20SAlexey Bataev Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); 120099203f20SAlexey Bataev Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); 120199203f20SAlexey Bataev 120299203f20SAlexey Bataev int Val; 120399203f20SAlexey Bataev if (PtrA1 == PtrB1) { 120499203f20SAlexey Bataev // Retrieve the address space again as pointer stripping now tracks through 120599203f20SAlexey Bataev // `addrspacecast`. 120699203f20SAlexey Bataev ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace(); 120799203f20SAlexey Bataev ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace(); 120899203f20SAlexey Bataev // Check that the address spaces match and that the pointers are valid. 120999203f20SAlexey Bataev if (ASA != ASB) 121099203f20SAlexey Bataev return None; 121199203f20SAlexey Bataev 121299203f20SAlexey Bataev IdxWidth = DL.getIndexSizeInBits(ASA); 121399203f20SAlexey Bataev OffsetA = OffsetA.sextOrTrunc(IdxWidth); 121499203f20SAlexey Bataev OffsetB = OffsetB.sextOrTrunc(IdxWidth); 121599203f20SAlexey Bataev 121699203f20SAlexey Bataev OffsetB -= OffsetA; 121799203f20SAlexey Bataev Val = OffsetB.getSExtValue(); 121899203f20SAlexey Bataev } else { 121999203f20SAlexey Bataev // Otherwise compute the distance with SCEV between the base pointers. 122099203f20SAlexey Bataev const SCEV *PtrSCEVA = SE.getSCEV(PtrA); 122199203f20SAlexey Bataev const SCEV *PtrSCEVB = SE.getSCEV(PtrB); 122299203f20SAlexey Bataev const auto *Diff = 122399203f20SAlexey Bataev dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA)); 122499203f20SAlexey Bataev if (!Diff) 122599203f20SAlexey Bataev return None; 122699203f20SAlexey Bataev Val = Diff->getAPInt().getSExtValue(); 122799203f20SAlexey Bataev } 122800d3f7ccSNikita Popov int Size = DL.getTypeStoreSize(ElemTyA); 122999203f20SAlexey Bataev int Dist = Val / Size; 123099203f20SAlexey Bataev 123199203f20SAlexey Bataev // Ensure that the calculated distance matches the type-based one after all 123299203f20SAlexey Bataev // the bitcasts removal in the provided pointers. 123399203f20SAlexey Bataev if (!StrictCheck || Dist * Size == Val) 123499203f20SAlexey Bataev return Dist; 123599203f20SAlexey Bataev return None; 123699203f20SAlexey Bataev } 123799203f20SAlexey Bataev 123800d3f7ccSNikita Popov bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, 123900d3f7ccSNikita Popov const DataLayout &DL, ScalarEvolution &SE, 1240428e9d9dSAlexey Bataev SmallVectorImpl<unsigned> &SortedIndices) { 1241428e9d9dSAlexey Bataev assert(llvm::all_of( 1242428e9d9dSAlexey Bataev VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && 1243428e9d9dSAlexey Bataev "Expected list of pointer operands."); 1244428e9d9dSAlexey Bataev // Walk over the pointers, and map each of them to an offset relative to 1245428e9d9dSAlexey Bataev // first pointer in the array. 1246428e9d9dSAlexey Bataev Value *Ptr0 = VL[0]; 1247428e9d9dSAlexey Bataev 124899203f20SAlexey Bataev using DistOrdPair = std::pair<int64_t, int>; 124999203f20SAlexey Bataev auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) { 125099203f20SAlexey Bataev return L.first < R.first; 125199203f20SAlexey Bataev }; 125299203f20SAlexey Bataev std::set<DistOrdPair, decltype(Compare)> Offsets(Compare); 125399203f20SAlexey Bataev Offsets.emplace(0, 0); 125499203f20SAlexey Bataev int Cnt = 1; 125599203f20SAlexey Bataev bool IsConsecutive = true; 125699203f20SAlexey Bataev for (auto *Ptr : VL.drop_front()) { 125700d3f7ccSNikita Popov Optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE, 125800d3f7ccSNikita Popov /*StrictCheck=*/true); 1259428e9d9dSAlexey Bataev if (!Diff) 1260428e9d9dSAlexey Bataev return false; 1261428e9d9dSAlexey Bataev 1262428e9d9dSAlexey Bataev // Check if the pointer with the same offset is found. 126399203f20SAlexey Bataev int64_t Offset = *Diff; 126499203f20SAlexey Bataev auto Res = Offsets.emplace(Offset, Cnt); 126599203f20SAlexey Bataev if (!Res.second) 1266428e9d9dSAlexey Bataev return false; 126799203f20SAlexey Bataev // Consecutive order if the inserted element is the last one. 126899203f20SAlexey Bataev IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end(); 126999203f20SAlexey Bataev ++Cnt; 1270428e9d9dSAlexey Bataev } 1271428e9d9dSAlexey Bataev SortedIndices.clear(); 127299203f20SAlexey Bataev if (!IsConsecutive) { 127399203f20SAlexey Bataev // Fill SortedIndices array only if it is non-consecutive. 1274428e9d9dSAlexey Bataev SortedIndices.resize(VL.size()); 127599203f20SAlexey Bataev Cnt = 0; 127699203f20SAlexey Bataev for (const std::pair<int64_t, int> &Pair : Offsets) { 127799203f20SAlexey Bataev SortedIndices[Cnt] = Pair.second; 127899203f20SAlexey Bataev ++Cnt; 1279f1c00a22SHaicheng Wu } 128099203f20SAlexey Bataev } 128199203f20SAlexey Bataev return true; 1282f1b47ad2SAlexey Bataev } 1283f1b47ad2SAlexey Bataev 1284f1c00a22SHaicheng Wu /// Returns true if the memory operations \p A and \p B are consecutive. 1285f1c00a22SHaicheng Wu bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, 1286f1c00a22SHaicheng Wu ScalarEvolution &SE, bool CheckType) { 1287038ede2aSRenato Golin Value *PtrA = getLoadStorePointerOperand(A); 1288038ede2aSRenato Golin Value *PtrB = getLoadStorePointerOperand(B); 128999203f20SAlexey Bataev if (!PtrA || !PtrB) 1290f1c00a22SHaicheng Wu return false; 129100d3f7ccSNikita Popov Type *ElemTyA = getLoadStoreType(A); 129200d3f7ccSNikita Popov Type *ElemTyB = getLoadStoreType(B); 129300d3f7ccSNikita Popov Optional<int> Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE, 129400d3f7ccSNikita Popov /*StrictCheck=*/true, CheckType); 129599203f20SAlexey Bataev return Diff && *Diff == 1; 1296f1c00a22SHaicheng Wu } 1297f1c00a22SHaicheng Wu 1298e248d690SFlorian Hahn void MemoryDepChecker::addAccess(StoreInst *SI) { 1299e248d690SFlorian Hahn visitPointers(SI->getPointerOperand(), *InnermostLoop, 1300e248d690SFlorian Hahn [this, SI](Value *Ptr) { 1301e248d690SFlorian Hahn Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx); 1302e248d690SFlorian Hahn InstMap.push_back(SI); 1303e248d690SFlorian Hahn ++AccessIdx; 1304e248d690SFlorian Hahn }); 1305e248d690SFlorian Hahn } 1306e248d690SFlorian Hahn 1307e248d690SFlorian Hahn void MemoryDepChecker::addAccess(LoadInst *LI) { 1308e248d690SFlorian Hahn visitPointers(LI->getPointerOperand(), *InnermostLoop, 1309e248d690SFlorian Hahn [this, LI](Value *Ptr) { 1310e248d690SFlorian Hahn Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx); 1311e248d690SFlorian Hahn InstMap.push_back(LI); 1312e248d690SFlorian Hahn ++AccessIdx; 1313e248d690SFlorian Hahn }); 1314e248d690SFlorian Hahn } 1315e248d690SFlorian Hahn 1316485f2826SFlorian Hahn MemoryDepChecker::VectorizationSafetyStatus 1317485f2826SFlorian Hahn MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { 13189c926579SAdam Nemet switch (Type) { 13199c926579SAdam Nemet case NoDep: 13209c926579SAdam Nemet case Forward: 13219c926579SAdam Nemet case BackwardVectorizable: 1322485f2826SFlorian Hahn return VectorizationSafetyStatus::Safe; 13239c926579SAdam Nemet 13249c926579SAdam Nemet case Unknown: 1325ef307b8cSFlorian Hahn return VectorizationSafetyStatus::PossiblySafeWithRtChecks; 13269c926579SAdam Nemet case ForwardButPreventsForwarding: 13279c926579SAdam Nemet case Backward: 13289c926579SAdam Nemet case BackwardVectorizableButPreventsForwarding: 1329485f2826SFlorian Hahn return VectorizationSafetyStatus::Unsafe; 13309c926579SAdam Nemet } 1331d388e930SDavid Majnemer llvm_unreachable("unexpected DepType!"); 13329c926579SAdam Nemet } 13339c926579SAdam Nemet 1334397f5829SAdam Nemet bool MemoryDepChecker::Dependence::isBackward() const { 13359c926579SAdam Nemet switch (Type) { 13369c926579SAdam Nemet case NoDep: 13379c926579SAdam Nemet case Forward: 13389c926579SAdam Nemet case ForwardButPreventsForwarding: 1339397f5829SAdam Nemet case Unknown: 13409c926579SAdam Nemet return false; 13419c926579SAdam Nemet 13429c926579SAdam Nemet case BackwardVectorizable: 13439c926579SAdam Nemet case Backward: 13449c926579SAdam Nemet case BackwardVectorizableButPreventsForwarding: 13459c926579SAdam Nemet return true; 13469c926579SAdam Nemet } 1347d388e930SDavid Majnemer llvm_unreachable("unexpected DepType!"); 13489c926579SAdam Nemet } 13499c926579SAdam Nemet 1350397f5829SAdam Nemet bool MemoryDepChecker::Dependence::isPossiblyBackward() const { 1351397f5829SAdam Nemet return isBackward() || Type == Unknown; 1352397f5829SAdam Nemet } 1353397f5829SAdam Nemet 1354397f5829SAdam Nemet bool MemoryDepChecker::Dependence::isForward() const { 1355397f5829SAdam Nemet switch (Type) { 1356397f5829SAdam Nemet case Forward: 1357397f5829SAdam Nemet case ForwardButPreventsForwarding: 1358397f5829SAdam Nemet return true; 1359397f5829SAdam Nemet 1360397f5829SAdam Nemet case NoDep: 1361397f5829SAdam Nemet case Unknown: 1362397f5829SAdam Nemet case BackwardVectorizable: 1363397f5829SAdam Nemet case Backward: 1364397f5829SAdam Nemet case BackwardVectorizableButPreventsForwarding: 1365397f5829SAdam Nemet return false; 1366397f5829SAdam Nemet } 1367397f5829SAdam Nemet llvm_unreachable("unexpected DepType!"); 1368397f5829SAdam Nemet } 1369397f5829SAdam Nemet 13707afb46d3SDavid Majnemer bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, 13717afb46d3SDavid Majnemer uint64_t TypeByteSize) { 13720456327cSAdam Nemet // If loads occur at a distance that is not a multiple of a feasible vector 13730456327cSAdam Nemet // factor store-load forwarding does not take place. 13740456327cSAdam Nemet // Positive dependences might cause troubles because vectorizing them might 13750456327cSAdam Nemet // prevent store-load forwarding making vectorized code run a lot slower. 13760456327cSAdam Nemet // a[i] = a[i-3] ^ a[i-8]; 13770456327cSAdam Nemet // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and 13780456327cSAdam Nemet // hence on your typical architecture store-load forwarding does not take 13790456327cSAdam Nemet // place. Vectorizing in such cases does not make sense. 13800456327cSAdam Nemet // Store-load forwarding distance. 1381884d313bSAdam Nemet 1382884d313bSAdam Nemet // After this many iterations store-to-load forwarding conflicts should not 1383884d313bSAdam Nemet // cause any slowdowns. 13847afb46d3SDavid Majnemer const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize; 13850456327cSAdam Nemet // Maximum vector factor. 13867afb46d3SDavid Majnemer uint64_t MaxVFWithoutSLForwardIssues = std::min( 13872c34ab51SAdam Nemet VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes); 13880456327cSAdam Nemet 1389884d313bSAdam Nemet // Compute the smallest VF at which the store and load would be misaligned. 13907afb46d3SDavid Majnemer for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues; 13919b5852aeSAdam Nemet VF *= 2) { 1392884d313bSAdam Nemet // If the number of vector iteration between the store and the load are 1393884d313bSAdam Nemet // small we could incur conflicts. 1394884d313bSAdam Nemet if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) { 1395fa6d8977SSimon Pilgrim MaxVFWithoutSLForwardIssues = (VF >> 1); 13960456327cSAdam Nemet break; 13970456327cSAdam Nemet } 13980456327cSAdam Nemet } 13990456327cSAdam Nemet 14000456327cSAdam Nemet if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) { 1401d34e60caSNicola Zaghen LLVM_DEBUG( 1402d34e60caSNicola Zaghen dbgs() << "LAA: Distance " << Distance 14039b5852aeSAdam Nemet << " that could cause a store-load forwarding conflict\n"); 14040456327cSAdam Nemet return true; 14050456327cSAdam Nemet } 14060456327cSAdam Nemet 14070456327cSAdam Nemet if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes && 1408f219c647SAdam Nemet MaxVFWithoutSLForwardIssues != 1409f219c647SAdam Nemet VectorizerParams::MaxVectorWidth * TypeByteSize) 14100456327cSAdam Nemet MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues; 14110456327cSAdam Nemet return false; 14120456327cSAdam Nemet } 14130456327cSAdam Nemet 1414485f2826SFlorian Hahn void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) { 1415485f2826SFlorian Hahn if (Status < S) 1416485f2826SFlorian Hahn Status = S; 1417485f2826SFlorian Hahn } 1418485f2826SFlorian Hahn 1419eac89d73SDorit Nuzman /// Given a non-constant (unknown) dependence-distance \p Dist between two 1420eac89d73SDorit Nuzman /// memory accesses, that have the same stride whose absolute value is given 1421eac89d73SDorit Nuzman /// in \p Stride, and that have the same type size \p TypeByteSize, 1422eac89d73SDorit Nuzman /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is 1423eac89d73SDorit Nuzman /// possible to prove statically that the dependence distance is larger 1424eac89d73SDorit Nuzman /// than the range that the accesses will travel through the execution of 1425eac89d73SDorit Nuzman /// the loop. If so, return true; false otherwise. This is useful for 1426eac89d73SDorit Nuzman /// example in loops such as the following (PR31098): 1427eac89d73SDorit Nuzman /// for (i = 0; i < D; ++i) { 1428eac89d73SDorit Nuzman /// = out[i]; 1429eac89d73SDorit Nuzman /// out[i+D] = 1430eac89d73SDorit Nuzman /// } 1431eac89d73SDorit Nuzman static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, 1432eac89d73SDorit Nuzman const SCEV &BackedgeTakenCount, 1433eac89d73SDorit Nuzman const SCEV &Dist, uint64_t Stride, 1434eac89d73SDorit Nuzman uint64_t TypeByteSize) { 1435eac89d73SDorit Nuzman 1436eac89d73SDorit Nuzman // If we can prove that 1437eac89d73SDorit Nuzman // (**) |Dist| > BackedgeTakenCount * Step 1438eac89d73SDorit Nuzman // where Step is the absolute stride of the memory accesses in bytes, 1439eac89d73SDorit Nuzman // then there is no dependence. 1440eac89d73SDorit Nuzman // 1441c437f310SHiroshi Inoue // Rationale: 1442eac89d73SDorit Nuzman // We basically want to check if the absolute distance (|Dist/Step|) 1443eac89d73SDorit Nuzman // is >= the loop iteration count (or > BackedgeTakenCount). 1444eac89d73SDorit Nuzman // This is equivalent to the Strong SIV Test (Practical Dependence Testing, 1445eac89d73SDorit Nuzman // Section 4.2.1); Note, that for vectorization it is sufficient to prove 1446eac89d73SDorit Nuzman // that the dependence distance is >= VF; This is checked elsewhere. 1447eac89d73SDorit Nuzman // But in some cases we can prune unknown dependence distances early, and 1448eac89d73SDorit Nuzman // even before selecting the VF, and without a runtime test, by comparing 1449eac89d73SDorit Nuzman // the distance against the loop iteration count. Since the vectorized code 1450eac89d73SDorit Nuzman // will be executed only if LoopCount >= VF, proving distance >= LoopCount 1451eac89d73SDorit Nuzman // also guarantees that distance >= VF. 1452eac89d73SDorit Nuzman // 1453eac89d73SDorit Nuzman const uint64_t ByteStride = Stride * TypeByteSize; 1454eac89d73SDorit Nuzman const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride); 1455eac89d73SDorit Nuzman const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step); 1456eac89d73SDorit Nuzman 1457eac89d73SDorit Nuzman const SCEV *CastedDist = &Dist; 1458eac89d73SDorit Nuzman const SCEV *CastedProduct = Product; 14597ee30a0eSChang-Sun Lin Jr uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType()); 14607ee30a0eSChang-Sun Lin Jr uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType()); 1461eac89d73SDorit Nuzman 1462eac89d73SDorit Nuzman // The dependence distance can be positive/negative, so we sign extend Dist; 1463eac89d73SDorit Nuzman // The multiplication of the absolute stride in bytes and the 1464c437f310SHiroshi Inoue // backedgeTakenCount is non-negative, so we zero extend Product. 14657ee30a0eSChang-Sun Lin Jr if (DistTypeSizeBits > ProductTypeSizeBits) 1466eac89d73SDorit Nuzman CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); 1467eac89d73SDorit Nuzman else 1468eac89d73SDorit Nuzman CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType()); 1469eac89d73SDorit Nuzman 1470eac89d73SDorit Nuzman // Is Dist - (BackedgeTakenCount * Step) > 0 ? 1471eac89d73SDorit Nuzman // (If so, then we have proven (**) because |Dist| >= Dist) 1472eac89d73SDorit Nuzman const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct); 1473eac89d73SDorit Nuzman if (SE.isKnownPositive(Minus)) 1474eac89d73SDorit Nuzman return true; 1475eac89d73SDorit Nuzman 1476eac89d73SDorit Nuzman // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ? 1477eac89d73SDorit Nuzman // (If so, then we have proven (**) because |Dist| >= -1*Dist) 1478eac89d73SDorit Nuzman const SCEV *NegDist = SE.getNegativeSCEV(CastedDist); 1479eac89d73SDorit Nuzman Minus = SE.getMinusSCEV(NegDist, CastedProduct); 1480eac89d73SDorit Nuzman if (SE.isKnownPositive(Minus)) 1481eac89d73SDorit Nuzman return true; 1482eac89d73SDorit Nuzman 1483eac89d73SDorit Nuzman return false; 1484eac89d73SDorit Nuzman } 1485eac89d73SDorit Nuzman 14865f8f34e4SAdrian Prantl /// Check the dependence for two accesses with the same stride \p Stride. 1487751004a6SHao Liu /// \p Distance is the positive distance and \p TypeByteSize is type size in 1488751004a6SHao Liu /// bytes. 1489751004a6SHao Liu /// 1490751004a6SHao Liu /// \returns true if they are independent. 14917afb46d3SDavid Majnemer static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, 14927afb46d3SDavid Majnemer uint64_t TypeByteSize) { 1493751004a6SHao Liu assert(Stride > 1 && "The stride must be greater than 1"); 1494751004a6SHao Liu assert(TypeByteSize > 0 && "The type size in byte must be non-zero"); 1495751004a6SHao Liu assert(Distance > 0 && "The distance must be non-zero"); 1496751004a6SHao Liu 1497751004a6SHao Liu // Skip if the distance is not multiple of type byte size. 1498751004a6SHao Liu if (Distance % TypeByteSize) 1499751004a6SHao Liu return false; 1500751004a6SHao Liu 15017afb46d3SDavid Majnemer uint64_t ScaledDist = Distance / TypeByteSize; 1502751004a6SHao Liu 1503751004a6SHao Liu // No dependence if the scaled distance is not multiple of the stride. 1504751004a6SHao Liu // E.g. 1505751004a6SHao Liu // for (i = 0; i < 1024 ; i += 4) 1506751004a6SHao Liu // A[i+2] = A[i] + 1; 1507751004a6SHao Liu // 1508751004a6SHao Liu // Two accesses in memory (scaled distance is 2, stride is 4): 1509751004a6SHao Liu // | A[0] | | | | A[4] | | | | 1510751004a6SHao Liu // | | | A[2] | | | | A[6] | | 1511751004a6SHao Liu // 1512751004a6SHao Liu // E.g. 1513751004a6SHao Liu // for (i = 0; i < 1024 ; i += 3) 1514751004a6SHao Liu // A[i+4] = A[i] + 1; 1515751004a6SHao Liu // 1516751004a6SHao Liu // Two accesses in memory (scaled distance is 4, stride is 3): 1517751004a6SHao Liu // | A[0] | | | A[3] | | | A[6] | | | 1518751004a6SHao Liu // | | | | | A[4] | | | A[7] | | 1519751004a6SHao Liu return ScaledDist % Stride; 1520751004a6SHao Liu } 1521751004a6SHao Liu 15229c926579SAdam Nemet MemoryDepChecker::Dependence::DepType 15239c926579SAdam Nemet MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, 15240456327cSAdam Nemet const MemAccessInfo &B, unsigned BIdx, 15258bc61df9SAdam Nemet const ValueToValueMap &Strides) { 15260456327cSAdam Nemet assert (AIdx < BIdx && "Must pass arguments in program order"); 15270456327cSAdam Nemet 15280456327cSAdam Nemet Value *APtr = A.getPointer(); 15290456327cSAdam Nemet Value *BPtr = B.getPointer(); 15300456327cSAdam Nemet bool AIsWrite = A.getInt(); 15310456327cSAdam Nemet bool BIsWrite = B.getInt(); 1532ff31020eSArthur Eubanks Type *ATy = getLoadStoreType(InstMap[AIdx]); 1533ff31020eSArthur Eubanks Type *BTy = getLoadStoreType(InstMap[BIdx]); 15340456327cSAdam Nemet 15350456327cSAdam Nemet // Two reads are independent. 15360456327cSAdam Nemet if (!AIsWrite && !BIsWrite) 15379c926579SAdam Nemet return Dependence::NoDep; 15380456327cSAdam Nemet 15390456327cSAdam Nemet // We cannot check pointers in different address spaces. 15400456327cSAdam Nemet if (APtr->getType()->getPointerAddressSpace() != 15410456327cSAdam Nemet BPtr->getType()->getPointerAddressSpace()) 15429c926579SAdam Nemet return Dependence::Unknown; 15430456327cSAdam Nemet 154445c46734SNikita Popov int64_t StrideAPtr = 154545c46734SNikita Popov getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true); 154645c46734SNikita Popov int64_t StrideBPtr = 154745c46734SNikita Popov getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true); 15480456327cSAdam Nemet 1549adf4b739SSilviu Baranga const SCEV *Src = PSE.getSCEV(APtr); 1550adf4b739SSilviu Baranga const SCEV *Sink = PSE.getSCEV(BPtr); 15510456327cSAdam Nemet 15520456327cSAdam Nemet // If the induction step is negative we have to invert source and sink of the 15530456327cSAdam Nemet // dependence. 15540456327cSAdam Nemet if (StrideAPtr < 0) { 15550456327cSAdam Nemet std::swap(APtr, BPtr); 155645c46734SNikita Popov std::swap(ATy, BTy); 15570456327cSAdam Nemet std::swap(Src, Sink); 15580456327cSAdam Nemet std::swap(AIsWrite, BIsWrite); 15590456327cSAdam Nemet std::swap(AIdx, BIdx); 15600456327cSAdam Nemet std::swap(StrideAPtr, StrideBPtr); 15610456327cSAdam Nemet } 15620456327cSAdam Nemet 15639cd9a7e3SSilviu Baranga const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src); 15640456327cSAdam Nemet 1565d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink 15660456327cSAdam Nemet << "(Induction step: " << StrideAPtr << ")\n"); 1567d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " 15680456327cSAdam Nemet << *InstMap[BIdx] << ": " << *Dist << "\n"); 15690456327cSAdam Nemet 1570943befedSAdam Nemet // Need accesses with constant stride. We don't want to vectorize 15710456327cSAdam Nemet // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in 15720456327cSAdam Nemet // the address space. 15730456327cSAdam Nemet if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ 1574d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); 15759c926579SAdam Nemet return Dependence::Unknown; 15760456327cSAdam Nemet } 15770456327cSAdam Nemet 1578eac89d73SDorit Nuzman auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); 1579eac89d73SDorit Nuzman uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); 158077b2bb55SJolanta Jensen bool HasSameSize = 158177b2bb55SJolanta Jensen DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); 1582eac89d73SDorit Nuzman uint64_t Stride = std::abs(StrideAPtr); 15830456327cSAdam Nemet const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist); 15840456327cSAdam Nemet if (!C) { 158577b2bb55SJolanta Jensen if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize && 1586eac89d73SDorit Nuzman isSafeDependenceDistance(DL, *(PSE.getSE()), 1587eac89d73SDorit Nuzman *(PSE.getBackedgeTakenCount()), *Dist, Stride, 1588eac89d73SDorit Nuzman TypeByteSize)) 1589eac89d73SDorit Nuzman return Dependence::NoDep; 1590eac89d73SDorit Nuzman 1591d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); 1592ef307b8cSFlorian Hahn FoundNonConstantDistanceDependence = true; 15939c926579SAdam Nemet return Dependence::Unknown; 15940456327cSAdam Nemet } 15950456327cSAdam Nemet 15960de2feceSSanjoy Das const APInt &Val = C->getAPInt(); 15976feebe98SMatthew Simpson int64_t Distance = Val.getSExtValue(); 15986feebe98SMatthew Simpson 15996feebe98SMatthew Simpson // Attempt to prove strided accesses independent. 160077b2bb55SJolanta Jensen if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize && 16016feebe98SMatthew Simpson areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) { 1602d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); 16036feebe98SMatthew Simpson return Dependence::NoDep; 16046feebe98SMatthew Simpson } 16056feebe98SMatthew Simpson 16066feebe98SMatthew Simpson // Negative distances are not plausible dependencies. 16070456327cSAdam Nemet if (Val.isNegative()) { 16080456327cSAdam Nemet bool IsTrueDataDependence = (AIsWrite && !BIsWrite); 160937ec5f91SMatthew Simpson if (IsTrueDataDependence && EnableForwardingConflictDetection && 16100456327cSAdam Nemet (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || 161177b2bb55SJolanta Jensen !HasSameSize)) { 1612d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); 16139c926579SAdam Nemet return Dependence::ForwardButPreventsForwarding; 1614b8486e5aSAdam Nemet } 16150456327cSAdam Nemet 1616d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n"); 16179c926579SAdam Nemet return Dependence::Forward; 16180456327cSAdam Nemet } 16190456327cSAdam Nemet 16200456327cSAdam Nemet // Write to the same location with the same size. 16210456327cSAdam Nemet if (Val == 0) { 162277b2bb55SJolanta Jensen if (HasSameSize) 1623d7037c56SAdam Nemet return Dependence::Forward; 1624d34e60caSNicola Zaghen LLVM_DEBUG( 162577b2bb55SJolanta Jensen dbgs() << "LAA: Zero dependence difference but different type sizes\n"); 16269c926579SAdam Nemet return Dependence::Unknown; 16270456327cSAdam Nemet } 16280456327cSAdam Nemet 16290456327cSAdam Nemet assert(Val.isStrictlyPositive() && "Expect a positive value"); 16300456327cSAdam Nemet 163177b2bb55SJolanta Jensen if (!HasSameSize) { 163277b2bb55SJolanta Jensen LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with " 163377b2bb55SJolanta Jensen "different type sizes\n"); 16349c926579SAdam Nemet return Dependence::Unknown; 16350456327cSAdam Nemet } 16360456327cSAdam Nemet 16370456327cSAdam Nemet // Bail out early if passed-in parameters make vectorization not feasible. 1638f219c647SAdam Nemet unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? 1639f219c647SAdam Nemet VectorizerParams::VectorizationFactor : 1); 1640f219c647SAdam Nemet unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? 1641f219c647SAdam Nemet VectorizerParams::VectorizationInterleave : 1); 1642751004a6SHao Liu // The minimum number of iterations for a vectorized/unrolled version. 1643751004a6SHao Liu unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U); 16440456327cSAdam Nemet 1645751004a6SHao Liu // It's not vectorizable if the distance is smaller than the minimum distance 1646751004a6SHao Liu // needed for a vectroized/unrolled version. Vectorizing one iteration in 1647751004a6SHao Liu // front needs TypeByteSize * Stride. Vectorizing the last iteration needs 1648751004a6SHao Liu // TypeByteSize (No need to plus the last gap distance). 1649751004a6SHao Liu // 1650751004a6SHao Liu // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. 1651751004a6SHao Liu // foo(int *A) { 1652751004a6SHao Liu // int *B = (int *)((char *)A + 14); 1653751004a6SHao Liu // for (i = 0 ; i < 1024 ; i += 2) 1654751004a6SHao Liu // B[i] = A[i] + 1; 1655751004a6SHao Liu // } 1656751004a6SHao Liu // 1657751004a6SHao Liu // Two accesses in memory (stride is 2): 1658751004a6SHao Liu // | A[0] | | A[2] | | A[4] | | A[6] | | 1659751004a6SHao Liu // | B[0] | | B[2] | | B[4] | 1660751004a6SHao Liu // 1661751004a6SHao Liu // Distance needs for vectorizing iterations except the last iteration: 1662751004a6SHao Liu // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4. 1663751004a6SHao Liu // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4. 1664751004a6SHao Liu // 1665751004a6SHao Liu // If MinNumIter is 2, it is vectorizable as the minimum distance needed is 1666751004a6SHao Liu // 12, which is less than distance. 1667751004a6SHao Liu // 1668751004a6SHao Liu // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4), 1669751004a6SHao Liu // the minimum distance needed is 28, which is greater than distance. It is 1670751004a6SHao Liu // not safe to do vectorization. 16717afb46d3SDavid Majnemer uint64_t MinDistanceNeeded = 1672751004a6SHao Liu TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize; 16737afb46d3SDavid Majnemer if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) { 1674d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance " 1675d34e60caSNicola Zaghen << Distance << '\n'); 1676751004a6SHao Liu return Dependence::Backward; 1677751004a6SHao Liu } 1678751004a6SHao Liu 1679751004a6SHao Liu // Unsafe if the minimum distance needed is greater than max safe distance. 1680751004a6SHao Liu if (MinDistanceNeeded > MaxSafeDepDistBytes) { 1681d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least " 1682751004a6SHao Liu << MinDistanceNeeded << " size in bytes"); 16839c926579SAdam Nemet return Dependence::Backward; 16840456327cSAdam Nemet } 16850456327cSAdam Nemet 16869cc0c399SAdam Nemet // Positive distance bigger than max vectorization factor. 1687751004a6SHao Liu // FIXME: Should use max factor instead of max distance in bytes, which could 1688751004a6SHao Liu // not handle different types. 1689751004a6SHao Liu // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. 1690751004a6SHao Liu // void foo (int *A, char *B) { 1691751004a6SHao Liu // for (unsigned i = 0; i < 1024; i++) { 1692751004a6SHao Liu // A[i+2] = A[i] + 1; 1693751004a6SHao Liu // B[i+2] = B[i] + 1; 1694751004a6SHao Liu // } 1695751004a6SHao Liu // } 1696751004a6SHao Liu // 1697751004a6SHao Liu // This case is currently unsafe according to the max safe distance. If we 1698751004a6SHao Liu // analyze the two accesses on array B, the max safe dependence distance 1699751004a6SHao Liu // is 2. Then we analyze the accesses on array A, the minimum distance needed 1700751004a6SHao Liu // is 8, which is less than 2 and forbidden vectorization, But actually 1701751004a6SHao Liu // both A and B could be vectorized by 2 iterations. 1702751004a6SHao Liu MaxSafeDepDistBytes = 17037afb46d3SDavid Majnemer std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes); 17040456327cSAdam Nemet 17050456327cSAdam Nemet bool IsTrueDataDependence = (!AIsWrite && BIsWrite); 170637ec5f91SMatthew Simpson if (IsTrueDataDependence && EnableForwardingConflictDetection && 17070456327cSAdam Nemet couldPreventStoreLoadForward(Distance, TypeByteSize)) 17089c926579SAdam Nemet return Dependence::BackwardVectorizableButPreventsForwarding; 17090456327cSAdam Nemet 1710682cfc1dSAlon Kom uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride); 1711d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() 1712682cfc1dSAlon Kom << " with max VF = " << MaxVF << '\n'); 1713682cfc1dSAlon Kom uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; 17141ba4b82fSCullen Rhodes MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits); 17159c926579SAdam Nemet return Dependence::BackwardVectorizable; 17160456327cSAdam Nemet } 17170456327cSAdam Nemet 1718dee666bcSAdam Nemet bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, 17195448e989SAmjad Aboud MemAccessInfoList &CheckDeps, 17208bc61df9SAdam Nemet const ValueToValueMap &Strides) { 17210456327cSAdam Nemet 17227afb46d3SDavid Majnemer MaxSafeDepDistBytes = -1; 17235448e989SAmjad Aboud SmallPtrSet<MemAccessInfo, 8> Visited; 17245448e989SAmjad Aboud for (MemAccessInfo CurAccess : CheckDeps) { 17255448e989SAmjad Aboud if (Visited.count(CurAccess)) 17265448e989SAmjad Aboud continue; 17270456327cSAdam Nemet 17280456327cSAdam Nemet // Get the relevant memory access set. 17290456327cSAdam Nemet EquivalenceClasses<MemAccessInfo>::iterator I = 17300456327cSAdam Nemet AccessSets.findValue(AccessSets.getLeaderValue(CurAccess)); 17310456327cSAdam Nemet 17320456327cSAdam Nemet // Check accesses within this set. 17337a083814SRichard Trieu EquivalenceClasses<MemAccessInfo>::member_iterator AI = 17347a083814SRichard Trieu AccessSets.member_begin(I); 17357a083814SRichard Trieu EquivalenceClasses<MemAccessInfo>::member_iterator AE = 17367a083814SRichard Trieu AccessSets.member_end(); 17370456327cSAdam Nemet 17380456327cSAdam Nemet // Check every access pair. 17390456327cSAdam Nemet while (AI != AE) { 17405448e989SAmjad Aboud Visited.insert(*AI); 174109fac245SHideki Saito bool AIIsWrite = AI->getInt(); 174209fac245SHideki Saito // Check loads only against next equivalent class, but stores also against 174309fac245SHideki Saito // other stores in the same equivalence class - to the same address. 174409fac245SHideki Saito EquivalenceClasses<MemAccessInfo>::member_iterator OI = 174509fac245SHideki Saito (AIIsWrite ? AI : std::next(AI)); 17460456327cSAdam Nemet while (OI != AE) { 17470456327cSAdam Nemet // Check every accessing instruction pair in program order. 17480456327cSAdam Nemet for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(), 17490456327cSAdam Nemet I1E = Accesses[*AI].end(); I1 != I1E; ++I1) 175009fac245SHideki Saito // Scan all accesses of another equivalence class, but only the next 175109fac245SHideki Saito // accesses of the same equivalent class. 175209fac245SHideki Saito for (std::vector<unsigned>::iterator 175309fac245SHideki Saito I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()), 175409fac245SHideki Saito I2E = (OI == AI ? I1E : Accesses[*OI].end()); 175509fac245SHideki Saito I2 != I2E; ++I2) { 17569c926579SAdam Nemet auto A = std::make_pair(&*AI, *I1); 17579c926579SAdam Nemet auto B = std::make_pair(&*OI, *I2); 17589c926579SAdam Nemet 17599c926579SAdam Nemet assert(*I1 != *I2); 17609c926579SAdam Nemet if (*I1 > *I2) 17619c926579SAdam Nemet std::swap(A, B); 17629c926579SAdam Nemet 17639c926579SAdam Nemet Dependence::DepType Type = 17649c926579SAdam Nemet isDependent(*A.first, A.second, *B.first, B.second, Strides); 1765485f2826SFlorian Hahn mergeInStatus(Dependence::isSafeForVectorization(Type)); 17669c926579SAdam Nemet 1767a2df750fSAdam Nemet // Gather dependences unless we accumulated MaxDependences 17689c926579SAdam Nemet // dependences. In that case return as soon as we find the first 17699c926579SAdam Nemet // unsafe dependence. This puts a limit on this quadratic 17709c926579SAdam Nemet // algorithm. 1771a2df750fSAdam Nemet if (RecordDependences) { 1772a2df750fSAdam Nemet if (Type != Dependence::NoDep) 1773a2df750fSAdam Nemet Dependences.push_back(Dependence(A.second, B.second, Type)); 17749c926579SAdam Nemet 1775a2df750fSAdam Nemet if (Dependences.size() >= MaxDependences) { 1776a2df750fSAdam Nemet RecordDependences = false; 1777a2df750fSAdam Nemet Dependences.clear(); 1778d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() 1779d34e60caSNicola Zaghen << "Too many dependences, stopped recording\n"); 17809c926579SAdam Nemet } 17819c926579SAdam Nemet } 1782485f2826SFlorian Hahn if (!RecordDependences && !isSafeForVectorization()) 17830456327cSAdam Nemet return false; 17840456327cSAdam Nemet } 17850456327cSAdam Nemet ++OI; 17860456327cSAdam Nemet } 17870456327cSAdam Nemet AI++; 17880456327cSAdam Nemet } 17890456327cSAdam Nemet } 17909c926579SAdam Nemet 1791d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n"); 1792485f2826SFlorian Hahn return isSafeForVectorization(); 17930456327cSAdam Nemet } 17940456327cSAdam Nemet 1795ec1e2bb6SAdam Nemet SmallVector<Instruction *, 4> 1796ec1e2bb6SAdam Nemet MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const { 1797ec1e2bb6SAdam Nemet MemAccessInfo Access(Ptr, isWrite); 1798ec1e2bb6SAdam Nemet auto &IndexVector = Accesses.find(Access)->second; 1799ec1e2bb6SAdam Nemet 1800ec1e2bb6SAdam Nemet SmallVector<Instruction *, 4> Insts; 18012d006e76SDavid Majnemer transform(IndexVector, 1802ec1e2bb6SAdam Nemet std::back_inserter(Insts), 1803ec1e2bb6SAdam Nemet [&](unsigned Idx) { return this->InstMap[Idx]; }); 1804ec1e2bb6SAdam Nemet return Insts; 1805ec1e2bb6SAdam Nemet } 1806ec1e2bb6SAdam Nemet 180758913d65SAdam Nemet const char *MemoryDepChecker::Dependence::DepName[] = { 180858913d65SAdam Nemet "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward", 180958913d65SAdam Nemet "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"}; 181058913d65SAdam Nemet 181158913d65SAdam Nemet void MemoryDepChecker::Dependence::print( 181258913d65SAdam Nemet raw_ostream &OS, unsigned Depth, 181358913d65SAdam Nemet const SmallVectorImpl<Instruction *> &Instrs) const { 181458913d65SAdam Nemet OS.indent(Depth) << DepName[Type] << ":\n"; 181558913d65SAdam Nemet OS.indent(Depth + 2) << *Instrs[Source] << " -> \n"; 181658913d65SAdam Nemet OS.indent(Depth + 2) << *Instrs[Destination] << "\n"; 181758913d65SAdam Nemet } 181858913d65SAdam Nemet 1819929c38e8SAdam Nemet bool LoopAccessInfo::canAnalyzeLoop() { 18208dcb3b6aSAdam Nemet // We need to have a loop header. 1821d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a loop in " 1822d8968f09SAdam Nemet << TheLoop->getHeader()->getParent()->getName() << ": " 1823d8968f09SAdam Nemet << TheLoop->getHeader()->getName() << '\n'); 18248dcb3b6aSAdam Nemet 1825929c38e8SAdam Nemet // We can only analyze innermost loops. 182689c1e35fSStefanos Baziotis if (!TheLoop->isInnermost()) { 1827d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n"); 1828877ccee8SAdam Nemet recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop"; 1829929c38e8SAdam Nemet return false; 1830929c38e8SAdam Nemet } 1831929c38e8SAdam Nemet 1832929c38e8SAdam Nemet // We must have a single backedge. 1833929c38e8SAdam Nemet if (TheLoop->getNumBackEdges() != 1) { 1834d34e60caSNicola Zaghen LLVM_DEBUG( 1835d34e60caSNicola Zaghen dbgs() << "LAA: loop control flow is not understood by analyzer\n"); 1836877ccee8SAdam Nemet recordAnalysis("CFGNotUnderstood") 1837877ccee8SAdam Nemet << "loop control flow is not understood by analyzer"; 1838929c38e8SAdam Nemet return false; 1839929c38e8SAdam Nemet } 1840929c38e8SAdam Nemet 1841929c38e8SAdam Nemet // ScalarEvolution needs to be able to find the exit count. 184294734eefSXinliang David Li const SCEV *ExitCount = PSE->getBackedgeTakenCount(); 184310ddb927SPhilip Reames if (isa<SCEVCouldNotCompute>(ExitCount)) { 1844877ccee8SAdam Nemet recordAnalysis("CantComputeNumberOfIterations") 1845877ccee8SAdam Nemet << "could not determine number of loop iterations"; 1846d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); 1847929c38e8SAdam Nemet return false; 1848929c38e8SAdam Nemet } 1849929c38e8SAdam Nemet 1850929c38e8SAdam Nemet return true; 1851929c38e8SAdam Nemet } 1852929c38e8SAdam Nemet 1853db69b174SSimon Pilgrim void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, 18547da74abfSAdam Nemet const TargetLibraryInfo *TLI, 18557da74abfSAdam Nemet DominatorTree *DT) { 1856e3e3b994SMatthew Simpson // Holds the Load and Store instructions. 1857e3e3b994SMatthew Simpson SmallVector<LoadInst *, 16> Loads; 1858e3e3b994SMatthew Simpson SmallVector<StoreInst *, 16> Stores; 18590456327cSAdam Nemet 18600456327cSAdam Nemet // Holds all the different accesses in the loop. 18610456327cSAdam Nemet unsigned NumReads = 0; 18620456327cSAdam Nemet unsigned NumReadWrites = 0; 18630456327cSAdam Nemet 18642466ba97SMatt Arsenault bool HasComplexMemInst = false; 18652466ba97SMatt Arsenault 18662466ba97SMatt Arsenault // A runtime check is only legal to insert if there are no convergent calls. 18672466ba97SMatt Arsenault HasConvergentOp = false; 18682466ba97SMatt Arsenault 1869ce030acbSXinliang David Li PtrRtChecking->Pointers.clear(); 1870ce030acbSXinliang David Li PtrRtChecking->Need = false; 18710456327cSAdam Nemet 18720456327cSAdam Nemet const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 18730456327cSAdam Nemet 18747bf299c8SAyal Zaks const bool EnableMemAccessVersioningOfLoop = 18757bf299c8SAyal Zaks EnableMemAccessVersioning && 18767bf299c8SAyal Zaks !TheLoop->getHeader()->getParent()->hasOptSize(); 18777bf299c8SAyal Zaks 18780456327cSAdam Nemet // For each block. 18798b401013SDavid Majnemer for (BasicBlock *BB : TheLoop->blocks()) { 18802466ba97SMatt Arsenault // Scan the BB and collect legal loads and stores. Also detect any 18812466ba97SMatt Arsenault // convergent instructions. 18828b401013SDavid Majnemer for (Instruction &I : *BB) { 18832466ba97SMatt Arsenault if (auto *Call = dyn_cast<CallBase>(&I)) { 18842466ba97SMatt Arsenault if (Call->isConvergent()) 18852466ba97SMatt Arsenault HasConvergentOp = true; 18862466ba97SMatt Arsenault } 18872466ba97SMatt Arsenault 18882466ba97SMatt Arsenault // With both a non-vectorizable memory instruction and a convergent 18892466ba97SMatt Arsenault // operation, found in this loop, no reason to continue the search. 18902466ba97SMatt Arsenault if (HasComplexMemInst && HasConvergentOp) { 18912466ba97SMatt Arsenault CanVecMem = false; 18922466ba97SMatt Arsenault return; 18932466ba97SMatt Arsenault } 18942466ba97SMatt Arsenault 18952466ba97SMatt Arsenault // Avoid hitting recordAnalysis multiple times. 18962466ba97SMatt Arsenault if (HasComplexMemInst) 18972466ba97SMatt Arsenault continue; 18982466ba97SMatt Arsenault 18990456327cSAdam Nemet // If this is a load, save it. If this instruction can read from memory 19000456327cSAdam Nemet // but is not a load, then we quit. Notice that we don't handle function 19010456327cSAdam Nemet // calls that read or write. 19028b401013SDavid Majnemer if (I.mayReadFromMemory()) { 19030456327cSAdam Nemet // Many math library functions read the rounding mode. We will only 19040456327cSAdam Nemet // vectorize a loop if it contains known function calls that don't set 19050456327cSAdam Nemet // the flag. Therefore, it is safe to ignore this read from memory. 19068b401013SDavid Majnemer auto *Call = dyn_cast<CallInst>(&I); 1907b4b27230SDavid Majnemer if (Call && getVectorIntrinsicIDForCall(Call, TLI)) 19080456327cSAdam Nemet continue; 19090456327cSAdam Nemet 19109b3cf604SMichael Zolotukhin // If the function has an explicit vectorized counterpart, we can safely 19119b3cf604SMichael Zolotukhin // assume that it can be vectorized. 19129b3cf604SMichael Zolotukhin if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && 191366c120f0SFrancesco Petrogalli !VFDatabase::getMappings(*Call).empty()) 19149b3cf604SMichael Zolotukhin continue; 19159b3cf604SMichael Zolotukhin 19168b401013SDavid Majnemer auto *Ld = dyn_cast<LoadInst>(&I); 19172466ba97SMatt Arsenault if (!Ld) { 19182466ba97SMatt Arsenault recordAnalysis("CantVectorizeInstruction", Ld) 19192466ba97SMatt Arsenault << "instruction cannot be vectorized"; 19202466ba97SMatt Arsenault HasComplexMemInst = true; 19212466ba97SMatt Arsenault continue; 19222466ba97SMatt Arsenault } 19232466ba97SMatt Arsenault if (!Ld->isSimple() && !IsAnnotatedParallel) { 1924877ccee8SAdam Nemet recordAnalysis("NonSimpleLoad", Ld) 1925877ccee8SAdam Nemet << "read with atomic ordering or volatile read"; 1926d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); 19272466ba97SMatt Arsenault HasComplexMemInst = true; 19282466ba97SMatt Arsenault continue; 19290456327cSAdam Nemet } 19300456327cSAdam Nemet NumLoads++; 19310456327cSAdam Nemet Loads.push_back(Ld); 1932ce030acbSXinliang David Li DepChecker->addAccess(Ld); 19337bf299c8SAyal Zaks if (EnableMemAccessVersioningOfLoop) 1934c953bb99SAdam Nemet collectStridedAccess(Ld); 19350456327cSAdam Nemet continue; 19360456327cSAdam Nemet } 19370456327cSAdam Nemet 19380456327cSAdam Nemet // Save 'store' instructions. Abort if other instructions write to memory. 19398b401013SDavid Majnemer if (I.mayWriteToMemory()) { 19408b401013SDavid Majnemer auto *St = dyn_cast<StoreInst>(&I); 19410456327cSAdam Nemet if (!St) { 1942877ccee8SAdam Nemet recordAnalysis("CantVectorizeInstruction", St) 1943877ccee8SAdam Nemet << "instruction cannot be vectorized"; 19442466ba97SMatt Arsenault HasComplexMemInst = true; 19452466ba97SMatt Arsenault continue; 19460456327cSAdam Nemet } 19470456327cSAdam Nemet if (!St->isSimple() && !IsAnnotatedParallel) { 1948877ccee8SAdam Nemet recordAnalysis("NonSimpleStore", St) 1949877ccee8SAdam Nemet << "write with atomic ordering or volatile write"; 1950d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); 19512466ba97SMatt Arsenault HasComplexMemInst = true; 19522466ba97SMatt Arsenault continue; 19530456327cSAdam Nemet } 19540456327cSAdam Nemet NumStores++; 19550456327cSAdam Nemet Stores.push_back(St); 1956ce030acbSXinliang David Li DepChecker->addAccess(St); 19577bf299c8SAyal Zaks if (EnableMemAccessVersioningOfLoop) 1958c953bb99SAdam Nemet collectStridedAccess(St); 19590456327cSAdam Nemet } 19600456327cSAdam Nemet } // Next instr. 19610456327cSAdam Nemet } // Next block. 19620456327cSAdam Nemet 19632466ba97SMatt Arsenault if (HasComplexMemInst) { 19642466ba97SMatt Arsenault CanVecMem = false; 19652466ba97SMatt Arsenault return; 19662466ba97SMatt Arsenault } 19672466ba97SMatt Arsenault 19680456327cSAdam Nemet // Now we have two lists that hold the loads and the stores. 19690456327cSAdam Nemet // Next, we find the pointers that they use. 19700456327cSAdam Nemet 19710456327cSAdam Nemet // Check if we see any stores. If there are no stores, then we don't 19720456327cSAdam Nemet // care if the pointers are *restrict*. 19730456327cSAdam Nemet if (!Stores.size()) { 1974d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); 1975436018c3SAdam Nemet CanVecMem = true; 1976436018c3SAdam Nemet return; 19770456327cSAdam Nemet } 19780456327cSAdam Nemet 1979dee666bcSAdam Nemet MemoryDepChecker::DepCandidates DependentAccesses; 1980b0eb40caSVitaly Buka AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE); 19810456327cSAdam Nemet 198289051ebaSVitaly Buka // Holds the analyzed pointers. We don't want to call getUnderlyingObjects 19830456327cSAdam Nemet // multiple times on the same object. If the ptr is accessed twice, once 19840456327cSAdam Nemet // for read and once for write, it will only appear once (on the write 19850456327cSAdam Nemet // list). This is okay, since we are going to check for conflicts between 19860456327cSAdam Nemet // writes and between reads and writes, but not between reads and reads. 1987ff31020eSArthur Eubanks SmallSet<std::pair<Value *, Type *>, 16> Seen; 19880456327cSAdam Nemet 1989b1e3d453SAnna Thomas // Record uniform store addresses to identify if we have multiple stores 1990b1e3d453SAnna Thomas // to the same address. 1991ff31020eSArthur Eubanks SmallPtrSet<Value *, 16> UniformStores; 1992b1e3d453SAnna Thomas 1993e3e3b994SMatthew Simpson for (StoreInst *ST : Stores) { 19940456327cSAdam Nemet Value *Ptr = ST->getPointerOperand(); 1995b1e3d453SAnna Thomas 1996*4e5e042dSIgor Kirillov if (isUniform(Ptr)) { 1997*4e5e042dSIgor Kirillov // Record store instructions to loop invariant addresses 1998*4e5e042dSIgor Kirillov StoresToInvariantAddresses.push_back(ST); 19995e9215f0SAnna Thomas HasDependenceInvolvingLoopInvariantAddress |= 20006f732bfbSAnna Thomas !UniformStores.insert(Ptr).second; 2001*4e5e042dSIgor Kirillov } 2002b1e3d453SAnna Thomas 20030456327cSAdam Nemet // If we did *not* see this pointer before, insert it to the read-write 20040456327cSAdam Nemet // list. At this phase it is only a 'write' list. 2005ff31020eSArthur Eubanks Type *AccessTy = getLoadStoreType(ST); 2006ff31020eSArthur Eubanks if (Seen.insert({Ptr, AccessTy}).second) { 20070456327cSAdam Nemet ++NumReadWrites; 20080456327cSAdam Nemet 2009ac80dc75SChandler Carruth MemoryLocation Loc = MemoryLocation::get(ST); 20100456327cSAdam Nemet // The TBAA metadata could have a control dependency on the predication 20110456327cSAdam Nemet // condition, so we cannot rely on it when determining whether or not we 20120456327cSAdam Nemet // need runtime pointer checks. 201301abb2c3SAdam Nemet if (blockNeedsPredication(ST->getParent(), TheLoop, DT)) 20140456327cSAdam Nemet Loc.AATags.TBAA = nullptr; 20150456327cSAdam Nemet 2016e248d690SFlorian Hahn visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, 2017ff31020eSArthur Eubanks [&Accesses, AccessTy, Loc](Value *Ptr) { 2018e248d690SFlorian Hahn MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); 2019ff31020eSArthur Eubanks Accesses.addStore(NewLoc, AccessTy); 2020e248d690SFlorian Hahn }); 20210456327cSAdam Nemet } 20220456327cSAdam Nemet } 20230456327cSAdam Nemet 20240456327cSAdam Nemet if (IsAnnotatedParallel) { 2025d34e60caSNicola Zaghen LLVM_DEBUG( 2026d34e60caSNicola Zaghen dbgs() << "LAA: A loop annotated parallel, ignore memory dependency " 20270456327cSAdam Nemet << "checks.\n"); 2028436018c3SAdam Nemet CanVecMem = true; 2029436018c3SAdam Nemet return; 20300456327cSAdam Nemet } 20310456327cSAdam Nemet 2032e3e3b994SMatthew Simpson for (LoadInst *LD : Loads) { 20330456327cSAdam Nemet Value *Ptr = LD->getPointerOperand(); 20340456327cSAdam Nemet // If we did *not* see this pointer before, insert it to the 20350456327cSAdam Nemet // read list. If we *did* see it before, then it is already in 20360456327cSAdam Nemet // the read-write list. This allows us to vectorize expressions 20370456327cSAdam Nemet // such as A[i] += x; Because the address of A[i] is a read-write 20380456327cSAdam Nemet // pointer. This only works if the index of A[i] is consecutive. 20390456327cSAdam Nemet // If the address of i is unknown (for example A[B[i]]) then we may 20400456327cSAdam Nemet // read a few words, modify, and write a few words, and some of the 20410456327cSAdam Nemet // words may be written to the same address. 20420456327cSAdam Nemet bool IsReadOnlyPtr = false; 2043ff31020eSArthur Eubanks Type *AccessTy = getLoadStoreType(LD); 2044ff31020eSArthur Eubanks if (Seen.insert({Ptr, AccessTy}).second || 204545c46734SNikita Popov !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) { 20460456327cSAdam Nemet ++NumReads; 20470456327cSAdam Nemet IsReadOnlyPtr = true; 20480456327cSAdam Nemet } 20490456327cSAdam Nemet 20505e9215f0SAnna Thomas // See if there is an unsafe dependency between a load to a uniform address and 20515e9215f0SAnna Thomas // store to the same uniform address. 20525e9215f0SAnna Thomas if (UniformStores.count(Ptr)) { 20535e9215f0SAnna Thomas LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform " 20545e9215f0SAnna Thomas "load and uniform store to the same address!\n"); 20555e9215f0SAnna Thomas HasDependenceInvolvingLoopInvariantAddress = true; 20565e9215f0SAnna Thomas } 20575e9215f0SAnna Thomas 2058ac80dc75SChandler Carruth MemoryLocation Loc = MemoryLocation::get(LD); 20590456327cSAdam Nemet // The TBAA metadata could have a control dependency on the predication 20600456327cSAdam Nemet // condition, so we cannot rely on it when determining whether or not we 20610456327cSAdam Nemet // need runtime pointer checks. 206201abb2c3SAdam Nemet if (blockNeedsPredication(LD->getParent(), TheLoop, DT)) 20630456327cSAdam Nemet Loc.AATags.TBAA = nullptr; 20640456327cSAdam Nemet 2065e248d690SFlorian Hahn visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, 2066ff31020eSArthur Eubanks [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) { 2067e248d690SFlorian Hahn MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); 2068ff31020eSArthur Eubanks Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr); 2069e248d690SFlorian Hahn }); 20700456327cSAdam Nemet } 20710456327cSAdam Nemet 20720456327cSAdam Nemet // If we write (or read-write) to a single destination and there are no 20730456327cSAdam Nemet // other reads in this loop then is it safe to vectorize. 20740456327cSAdam Nemet if (NumReadWrites == 1 && NumReads == 0) { 2075d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); 2076436018c3SAdam Nemet CanVecMem = true; 2077436018c3SAdam Nemet return; 20780456327cSAdam Nemet } 20790456327cSAdam Nemet 20800456327cSAdam Nemet // Build dependence sets and check whether we need a runtime pointer bounds 20810456327cSAdam Nemet // check. 20820456327cSAdam Nemet Accesses.buildDependenceSets(); 20830456327cSAdam Nemet 20840456327cSAdam Nemet // Find pointers with computable bounds. We are going to use this information 20850456327cSAdam Nemet // to place a runtime bound check. 20869f1c6fbfSMalhar Jajoo Value *UncomputablePtr = nullptr; 20879f1c6fbfSMalhar Jajoo bool CanDoRTIfNeeded = 20889f1c6fbfSMalhar Jajoo Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop, 20899f1c6fbfSMalhar Jajoo SymbolicStrides, UncomputablePtr, false); 2090ee61474aSAdam Nemet if (!CanDoRTIfNeeded) { 20919f1c6fbfSMalhar Jajoo auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr); 20929f1c6fbfSMalhar Jajoo recordAnalysis("CantIdentifyArrayBounds", I) 20939f1c6fbfSMalhar Jajoo << "cannot identify array bounds"; 2094d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " 2095ee61474aSAdam Nemet << "the array bounds.\n"); 2096436018c3SAdam Nemet CanVecMem = false; 2097436018c3SAdam Nemet return; 20980456327cSAdam Nemet } 20990456327cSAdam Nemet 2100d34e60caSNicola Zaghen LLVM_DEBUG( 21012466ba97SMatt Arsenault dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"); 21020456327cSAdam Nemet 2103436018c3SAdam Nemet CanVecMem = true; 21040456327cSAdam Nemet if (Accesses.isDependencyCheckNeeded()) { 2105d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); 2106ce030acbSXinliang David Li CanVecMem = DepChecker->areDepsSafe( 2107139ffba3SAdam Nemet DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides); 2108ce030acbSXinliang David Li MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes(); 21090456327cSAdam Nemet 2110ce030acbSXinliang David Li if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { 2111d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); 21120456327cSAdam Nemet 21130456327cSAdam Nemet // Clear the dependency checks. We assume they are not needed. 2114ce030acbSXinliang David Li Accesses.resetDepChecks(*DepChecker); 21150456327cSAdam Nemet 2116ce030acbSXinliang David Li PtrRtChecking->reset(); 2117ce030acbSXinliang David Li PtrRtChecking->Need = true; 21180456327cSAdam Nemet 211994734eefSXinliang David Li auto *SE = PSE->getSE(); 21209f1c6fbfSMalhar Jajoo UncomputablePtr = nullptr; 21219f1c6fbfSMalhar Jajoo CanDoRTIfNeeded = Accesses.canCheckPtrAtRT( 21229f1c6fbfSMalhar Jajoo *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true); 212398a13719SSilviu Baranga 2124949e91a6SAdam Nemet // Check that we found the bounds for the pointer. 2125ee61474aSAdam Nemet if (!CanDoRTIfNeeded) { 21269f1c6fbfSMalhar Jajoo auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr); 21279f1c6fbfSMalhar Jajoo recordAnalysis("CantCheckMemDepsAtRunTime", I) 2128877ccee8SAdam Nemet << "cannot check memory dependencies at runtime"; 2129d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); 2130b6dc76ffSAdam Nemet CanVecMem = false; 2131b6dc76ffSAdam Nemet return; 2132b6dc76ffSAdam Nemet } 2133b6dc76ffSAdam Nemet 21340456327cSAdam Nemet CanVecMem = true; 21350456327cSAdam Nemet } 21360456327cSAdam Nemet } 21370456327cSAdam Nemet 21382466ba97SMatt Arsenault if (HasConvergentOp) { 21392466ba97SMatt Arsenault recordAnalysis("CantInsertRuntimeCheckWithConvergent") 21402466ba97SMatt Arsenault << "cannot add control dependency to convergent operation"; 21412466ba97SMatt Arsenault LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check " 21422466ba97SMatt Arsenault "would be needed with a convergent operation\n"); 21432466ba97SMatt Arsenault CanVecMem = false; 21442466ba97SMatt Arsenault return; 21452466ba97SMatt Arsenault } 21462466ba97SMatt Arsenault 21474bb90a71SAdam Nemet if (CanVecMem) 2148d34e60caSNicola Zaghen LLVM_DEBUG( 2149d34e60caSNicola Zaghen dbgs() << "LAA: No unsafe dependent memory operations in loop. We" 2150ce030acbSXinliang David Li << (PtrRtChecking->Need ? "" : " don't") 21510f67c6c1SAdam Nemet << " need runtime memory checks.\n"); 2152778b455dSMalhar Jajoo else 2153778b455dSMalhar Jajoo emitUnsafeDependenceRemark(); 2154778b455dSMalhar Jajoo } 2155778b455dSMalhar Jajoo 2156778b455dSMalhar Jajoo void LoopAccessInfo::emitUnsafeDependenceRemark() { 2157778b455dSMalhar Jajoo auto Deps = getDepChecker().getDependences(); 2158778b455dSMalhar Jajoo if (!Deps) 2159778b455dSMalhar Jajoo return; 2160778b455dSMalhar Jajoo auto Found = std::find_if( 2161778b455dSMalhar Jajoo Deps->begin(), Deps->end(), [](const MemoryDepChecker::Dependence &D) { 2162778b455dSMalhar Jajoo return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) != 2163778b455dSMalhar Jajoo MemoryDepChecker::VectorizationSafetyStatus::Safe; 2164778b455dSMalhar Jajoo }); 2165778b455dSMalhar Jajoo if (Found == Deps->end()) 2166778b455dSMalhar Jajoo return; 2167778b455dSMalhar Jajoo MemoryDepChecker::Dependence Dep = *Found; 2168778b455dSMalhar Jajoo 2169778b455dSMalhar Jajoo LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); 2170778b455dSMalhar Jajoo 2171778b455dSMalhar Jajoo // Emit remark for first unsafe dependence 2172778b455dSMalhar Jajoo OptimizationRemarkAnalysis &R = 2173778b455dSMalhar Jajoo recordAnalysis("UnsafeDep", Dep.getDestination(*this)) 21740a77dfadSAdam Nemet << "unsafe dependent memory operations in loop. Use " 21750a77dfadSAdam Nemet "#pragma loop distribute(enable) to allow loop distribution " 21760a77dfadSAdam Nemet "to attempt to isolate the offending operations into a separate " 2177877ccee8SAdam Nemet "loop"; 2178778b455dSMalhar Jajoo 2179778b455dSMalhar Jajoo switch (Dep.Type) { 2180778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::NoDep: 2181778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::Forward: 2182778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::BackwardVectorizable: 2183778b455dSMalhar Jajoo llvm_unreachable("Unexpected dependence"); 2184778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::Backward: 2185778b455dSMalhar Jajoo R << "\nBackward loop carried data dependence."; 2186778b455dSMalhar Jajoo break; 2187778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::ForwardButPreventsForwarding: 2188778b455dSMalhar Jajoo R << "\nForward loop carried data dependence that prevents " 2189778b455dSMalhar Jajoo "store-to-load forwarding."; 2190778b455dSMalhar Jajoo break; 2191778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding: 2192778b455dSMalhar Jajoo R << "\nBackward loop carried data dependence that prevents " 2193778b455dSMalhar Jajoo "store-to-load forwarding."; 2194778b455dSMalhar Jajoo break; 2195778b455dSMalhar Jajoo case MemoryDepChecker::Dependence::Unknown: 2196778b455dSMalhar Jajoo R << "\nUnknown data dependence."; 2197778b455dSMalhar Jajoo break; 2198778b455dSMalhar Jajoo } 2199778b455dSMalhar Jajoo 2200778b455dSMalhar Jajoo if (Instruction *I = Dep.getSource(*this)) { 2201778b455dSMalhar Jajoo DebugLoc SourceLoc = I->getDebugLoc(); 2202778b455dSMalhar Jajoo if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I))) 2203778b455dSMalhar Jajoo SourceLoc = DD->getDebugLoc(); 2204778b455dSMalhar Jajoo if (SourceLoc) 2205778b455dSMalhar Jajoo R << " Memory location is the same as accessed at " 2206778b455dSMalhar Jajoo << ore::NV("Location", SourceLoc); 22074bb90a71SAdam Nemet } 22080456327cSAdam Nemet } 22090456327cSAdam Nemet 221001abb2c3SAdam Nemet bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, 221101abb2c3SAdam Nemet DominatorTree *DT) { 22120456327cSAdam Nemet assert(TheLoop->contains(BB) && "Unknown block used"); 22130456327cSAdam Nemet 22140456327cSAdam Nemet // Blocks that do not dominate the latch need predication. 22150456327cSAdam Nemet BasicBlock* Latch = TheLoop->getLoopLatch(); 22160456327cSAdam Nemet return !DT->dominates(BB, Latch); 22170456327cSAdam Nemet } 22180456327cSAdam Nemet 2219877ccee8SAdam Nemet OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName, 2220877ccee8SAdam Nemet Instruction *I) { 2221c922853bSAdam Nemet assert(!Report && "Multiple reports generated"); 2222877ccee8SAdam Nemet 2223877ccee8SAdam Nemet Value *CodeRegion = TheLoop->getHeader(); 2224877ccee8SAdam Nemet DebugLoc DL = TheLoop->getStartLoc(); 2225877ccee8SAdam Nemet 2226877ccee8SAdam Nemet if (I) { 2227877ccee8SAdam Nemet CodeRegion = I->getParent(); 2228877ccee8SAdam Nemet // If there is no debug location attached to the instruction, revert back to 2229877ccee8SAdam Nemet // using the loop's. 2230877ccee8SAdam Nemet if (I->getDebugLoc()) 2231877ccee8SAdam Nemet DL = I->getDebugLoc(); 2232877ccee8SAdam Nemet } 2233877ccee8SAdam Nemet 22340eaee545SJonas Devlieghere Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL, 2235877ccee8SAdam Nemet CodeRegion); 2236877ccee8SAdam Nemet return *Report; 22370456327cSAdam Nemet } 22380456327cSAdam Nemet 223957ac766eSAdam Nemet bool LoopAccessInfo::isUniform(Value *V) const { 22403ceac2bbSMichael Kuperstein auto *SE = PSE->getSE(); 22413ceac2bbSMichael Kuperstein // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is 22423ceac2bbSMichael Kuperstein // never considered uniform. 22433ceac2bbSMichael Kuperstein // TODO: Is this really what we want? Even without FP SCEV, we may want some 22443ceac2bbSMichael Kuperstein // trivially loop-invariant FP values to be considered uniform. 22453ceac2bbSMichael Kuperstein if (!SE->isSCEVable(V->getType())) 22463ceac2bbSMichael Kuperstein return false; 22473ceac2bbSMichael Kuperstein return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); 22480456327cSAdam Nemet } 22497206d7a5SAdam Nemet 2250c953bb99SAdam Nemet void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { 2251b3a8a153SPhilip Reames Value *Ptr = getLoadStorePointerOperand(MemAccess); 2252b3a8a153SPhilip Reames if (!Ptr) 2253c953bb99SAdam Nemet return; 2254c953bb99SAdam Nemet 225594734eefSXinliang David Li Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop); 2256c953bb99SAdam Nemet if (!Stride) 2257c953bb99SAdam Nemet return; 2258c953bb99SAdam Nemet 2259d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " 2260eb13dd3eSDorit Nuzman "versioning:"); 2261d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); 2262eb13dd3eSDorit Nuzman 2263eb13dd3eSDorit Nuzman // Avoid adding the "Stride == 1" predicate when we know that 2264eb13dd3eSDorit Nuzman // Stride >= Trip-Count. Such a predicate will effectively optimize a single 2265eb13dd3eSDorit Nuzman // or zero iteration loop, as Trip-Count <= Stride == 1. 2266eb13dd3eSDorit Nuzman // 2267eb13dd3eSDorit Nuzman // TODO: We are currently not making a very informed decision on when it is 2268eb13dd3eSDorit Nuzman // beneficial to apply stride versioning. It might make more sense that the 2269eb13dd3eSDorit Nuzman // users of this analysis (such as the vectorizer) will trigger it, based on 2270eb13dd3eSDorit Nuzman // their specific cost considerations; For example, in cases where stride 2271eb13dd3eSDorit Nuzman // versioning does not help resolving memory accesses/dependences, the 2272eb13dd3eSDorit Nuzman // vectorizer should evaluate the cost of the runtime test, and the benefit 2273eb13dd3eSDorit Nuzman // of various possible stride specializations, considering the alternatives 2274eb13dd3eSDorit Nuzman // of using gather/scatters (if available). 2275eb13dd3eSDorit Nuzman 2276eb13dd3eSDorit Nuzman const SCEV *StrideExpr = PSE->getSCEV(Stride); 2277eb13dd3eSDorit Nuzman const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); 2278eb13dd3eSDorit Nuzman 2279eb13dd3eSDorit Nuzman // Match the types so we can compare the stride and the BETakenCount. 2280eb13dd3eSDorit Nuzman // The Stride can be positive/negative, so we sign extend Stride; 228102a2bb2fSHiroshi Inoue // The backedgeTakenCount is non-negative, so we zero extend BETakenCount. 2282eb13dd3eSDorit Nuzman const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); 22837ee30a0eSChang-Sun Lin Jr uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType()); 22847ee30a0eSChang-Sun Lin Jr uint64_t BETypeSizeBits = DL.getTypeSizeInBits(BETakenCount->getType()); 2285eb13dd3eSDorit Nuzman const SCEV *CastedStride = StrideExpr; 2286eb13dd3eSDorit Nuzman const SCEV *CastedBECount = BETakenCount; 2287eb13dd3eSDorit Nuzman ScalarEvolution *SE = PSE->getSE(); 22887ee30a0eSChang-Sun Lin Jr if (BETypeSizeBits >= StrideTypeSizeBits) 2289eb13dd3eSDorit Nuzman CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType()); 2290eb13dd3eSDorit Nuzman else 2291eb13dd3eSDorit Nuzman CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType()); 2292eb13dd3eSDorit Nuzman const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount); 2293eb13dd3eSDorit Nuzman // Since TripCount == BackEdgeTakenCount + 1, checking: 2294eb13dd3eSDorit Nuzman // "Stride >= TripCount" is equivalent to checking: 2295eb13dd3eSDorit Nuzman // Stride - BETakenCount > 0 2296eb13dd3eSDorit Nuzman if (SE->isKnownPositive(StrideMinusBETaken)) { 2297d34e60caSNicola Zaghen LLVM_DEBUG( 2298d34e60caSNicola Zaghen dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " 2299eb13dd3eSDorit Nuzman "Stride==1 predicate will imply that the loop executes " 2300eb13dd3eSDorit Nuzman "at most once.\n"); 2301eb13dd3eSDorit Nuzman return; 2302eb13dd3eSDorit Nuzman } 230340f90819SThomas Preud'homme LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n"); 2304eb13dd3eSDorit Nuzman 2305c953bb99SAdam Nemet SymbolicStrides[Ptr] = Stride; 2306c953bb99SAdam Nemet StrideSet.insert(Stride); 2307c953bb99SAdam Nemet } 2308c953bb99SAdam Nemet 23093bfd93d7SAdam Nemet LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, 2310db69b174SSimon Pilgrim const TargetLibraryInfo *TLI, AAResults *AA, 2311a9f09c62SAdam Nemet DominatorTree *DT, LoopInfo *LI) 23120eaee545SJonas Devlieghere : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)), 23130eaee545SJonas Devlieghere PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)), 2314b752eb88SKazu Hirata DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) { 2315929c38e8SAdam Nemet if (canAnalyzeLoop()) 23167da74abfSAdam Nemet analyzeLoop(AA, LI, TLI, DT); 23173bfd93d7SAdam Nemet } 23183bfd93d7SAdam Nemet 2319e91cc6efSAdam Nemet void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { 2320e91cc6efSAdam Nemet if (CanVecMem) { 23214ad38b63SAdam Nemet OS.indent(Depth) << "Memory dependences are safe"; 23227afb46d3SDavid Majnemer if (MaxSafeDepDistBytes != -1ULL) 2323c62e554eSAdam Nemet OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes 2324c62e554eSAdam Nemet << " bytes"; 2325ce030acbSXinliang David Li if (PtrRtChecking->Need) 23264ad38b63SAdam Nemet OS << " with run-time checks"; 23274ad38b63SAdam Nemet OS << "\n"; 2328e91cc6efSAdam Nemet } 2329e91cc6efSAdam Nemet 23302466ba97SMatt Arsenault if (HasConvergentOp) 23312466ba97SMatt Arsenault OS.indent(Depth) << "Has convergent operation in loop\n"; 23322466ba97SMatt Arsenault 2333e91cc6efSAdam Nemet if (Report) 2334877ccee8SAdam Nemet OS.indent(Depth) << "Report: " << Report->getMsg() << "\n"; 2335e91cc6efSAdam Nemet 2336ce030acbSXinliang David Li if (auto *Dependences = DepChecker->getDependences()) { 2337a2df750fSAdam Nemet OS.indent(Depth) << "Dependences:\n"; 2338a2df750fSAdam Nemet for (auto &Dep : *Dependences) { 2339ce030acbSXinliang David Li Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions()); 234058913d65SAdam Nemet OS << "\n"; 234158913d65SAdam Nemet } 234258913d65SAdam Nemet } else 2343a2df750fSAdam Nemet OS.indent(Depth) << "Too many dependences, not recorded\n"; 2344e91cc6efSAdam Nemet 2345e91cc6efSAdam Nemet // List the pair of accesses need run-time checks to prove independence. 2346ce030acbSXinliang David Li PtrRtChecking->print(OS, Depth); 2347e91cc6efSAdam Nemet OS << "\n"; 2348c3384320SAdam Nemet 23495e9215f0SAnna Thomas OS.indent(Depth) << "Non vectorizable stores to invariant address were " 23505e9215f0SAnna Thomas << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ") 2351c3384320SAdam Nemet << "found in loop.\n"; 2352e3c0534bSSilviu Baranga 2353e3c0534bSSilviu Baranga OS.indent(Depth) << "SCEV assumptions:\n"; 23545ba11503SPhilip Reames PSE->getPredicate().print(OS, Depth); 2355b77365b5SSilviu Baranga 2356b77365b5SSilviu Baranga OS << "\n"; 2357b77365b5SSilviu Baranga 2358b77365b5SSilviu Baranga OS.indent(Depth) << "Expressions re-written:\n"; 235994734eefSXinliang David Li PSE->print(OS, Depth); 2360e91cc6efSAdam Nemet } 2361e91cc6efSAdam Nemet 236205da2fe5SReid Kleckner LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) { 236305da2fe5SReid Kleckner initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry()); 236405da2fe5SReid Kleckner } 236505da2fe5SReid Kleckner 23667853c1ddSXinliang David Li const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) { 23673bfd93d7SAdam Nemet auto &LAI = LoopAccessInfoMap[L]; 23683bfd93d7SAdam Nemet 23691824e411SAdam Nemet if (!LAI) 23700eaee545SJonas Devlieghere LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI); 23711824e411SAdam Nemet 23729aa52ba5SKazu Hirata return *LAI; 23733bfd93d7SAdam Nemet } 23743bfd93d7SAdam Nemet 23757853c1ddSXinliang David Li void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const { 23767853c1ddSXinliang David Li LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this); 2377ecde1c7fSXinliang David Li 2378e91cc6efSAdam Nemet for (Loop *TopLevelLoop : *LI) 2379e91cc6efSAdam Nemet for (Loop *L : depth_first(TopLevelLoop)) { 2380e91cc6efSAdam Nemet OS.indent(2) << L->getHeader()->getName() << ":\n"; 2381bdbc5227SAdam Nemet auto &LAI = LAA.getInfo(L); 2382e91cc6efSAdam Nemet LAI.print(OS, 4); 2383e91cc6efSAdam Nemet } 2384e91cc6efSAdam Nemet } 2385e91cc6efSAdam Nemet 23867853c1ddSXinliang David Li bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) { 2387ecde1c7fSXinliang David Li SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 23883bfd93d7SAdam Nemet auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 23899c27b59cSTeresa Johnson TLI = TLIP ? &TLIP->getTLI(F) : nullptr; 2390ecde1c7fSXinliang David Li AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 2391ecde1c7fSXinliang David Li DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2392ecde1c7fSXinliang David Li LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 23933bfd93d7SAdam Nemet 23943bfd93d7SAdam Nemet return false; 23953bfd93d7SAdam Nemet } 23963bfd93d7SAdam Nemet 23977853c1ddSXinliang David Li void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 23983ee82659SBjorn Pettersson AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 23993ee82659SBjorn Pettersson AU.addRequiredTransitive<AAResultsWrapperPass>(); 24003ee82659SBjorn Pettersson AU.addRequiredTransitive<DominatorTreeWrapperPass>(); 24013ee82659SBjorn Pettersson AU.addRequiredTransitive<LoopInfoWrapperPass>(); 24023bfd93d7SAdam Nemet 24033bfd93d7SAdam Nemet AU.setPreservesAll(); 24043bfd93d7SAdam Nemet } 24053bfd93d7SAdam Nemet 24067853c1ddSXinliang David Li char LoopAccessLegacyAnalysis::ID = 0; 24073bfd93d7SAdam Nemet static const char laa_name[] = "Loop Access Analysis"; 24083bfd93d7SAdam Nemet #define LAA_NAME "loop-accesses" 24093bfd93d7SAdam Nemet 24107853c1ddSXinliang David Li INITIALIZE_PASS_BEGIN(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true) 24117b560d40SChandler Carruth INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 24122f1fd165SChandler Carruth INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 24133bfd93d7SAdam Nemet INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2414e91cc6efSAdam Nemet INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 24157853c1ddSXinliang David Li INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true) 24163bfd93d7SAdam Nemet 2417dab4eae2SChandler Carruth AnalysisKey LoopAccessAnalysis::Key; 24188a021317SXinliang David Li 2419410eaeb0SChandler Carruth LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM, 2420410eaeb0SChandler Carruth LoopStandardAnalysisResults &AR) { 2421410eaeb0SChandler Carruth return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI); 24228a021317SXinliang David Li } 24238a021317SXinliang David Li 24243bfd93d7SAdam Nemet namespace llvm { 2425a3fe70d2SEugene Zelenko 24263bfd93d7SAdam Nemet Pass *createLAAPass() { 24277853c1ddSXinliang David Li return new LoopAccessLegacyAnalysis(); 24283bfd93d7SAdam Nemet } 2429a3fe70d2SEugene Zelenko 2430a3fe70d2SEugene Zelenko } // end namespace llvm 2431