19d9cb274SAdam Nemet //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===// 29d9cb274SAdam Nemet // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69d9cb274SAdam Nemet // 79d9cb274SAdam Nemet //===----------------------------------------------------------------------===// 89d9cb274SAdam Nemet // 99d9cb274SAdam Nemet // This file implements a Loop Data Prefetching Pass. 109d9cb274SAdam Nemet // 119d9cb274SAdam Nemet //===----------------------------------------------------------------------===// 129d9cb274SAdam Nemet 131eca6bc6STeresa Johnson #include "llvm/Transforms/Scalar/LoopDataPrefetch.h" 1405da2fe5SReid Kleckner #include "llvm/InitializePasses.h" 151eca6bc6STeresa Johnson 169d9cb274SAdam Nemet #include "llvm/ADT/DepthFirstIterator.h" 179d9cb274SAdam Nemet #include "llvm/ADT/Statistic.h" 18aec2fa35SDaniel Jasper #include "llvm/Analysis/AssumptionCache.h" 199d9cb274SAdam Nemet #include "llvm/Analysis/CodeMetrics.h" 209d9cb274SAdam Nemet #include "llvm/Analysis/LoopInfo.h" 210965da20SAdam Nemet #include "llvm/Analysis/OptimizationRemarkEmitter.h" 229d9cb274SAdam Nemet #include "llvm/Analysis/ScalarEvolution.h" 239d9cb274SAdam Nemet #include "llvm/Analysis/ScalarEvolutionExpressions.h" 249d9cb274SAdam Nemet #include "llvm/Analysis/TargetTransformInfo.h" 259d9cb274SAdam Nemet #include "llvm/IR/CFG.h" 269d9cb274SAdam Nemet #include "llvm/IR/Dominators.h" 279d9cb274SAdam Nemet #include "llvm/IR/Function.h" 289d9cb274SAdam Nemet #include "llvm/IR/Module.h" 299d9cb274SAdam Nemet #include "llvm/Support/CommandLine.h" 309d9cb274SAdam Nemet #include "llvm/Support/Debug.h" 31885f1de4SAdam Nemet #include "llvm/Transforms/Scalar.h" 32*99c43363SAndrew Wei #include "llvm/Transforms/Utils.h" 339d9cb274SAdam Nemet #include "llvm/Transforms/Utils/BasicBlockUtils.h" 34bcbd26bfSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 359d9cb274SAdam Nemet #include "llvm/Transforms/Utils/ValueMapper.h" 3671acce68SMindong Chen 3771acce68SMindong Chen #define DEBUG_TYPE "loop-data-prefetch" 3871acce68SMindong Chen 399d9cb274SAdam Nemet using namespace llvm; 409d9cb274SAdam Nemet 419d9cb274SAdam Nemet // By default, we limit this to creating 16 PHIs (which is a little over half 429d9cb274SAdam Nemet // of the allocatable register set). 439d9cb274SAdam Nemet static cl::opt<bool> 449d9cb274SAdam Nemet PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), 459d9cb274SAdam Nemet cl::desc("Prefetch write addresses")); 469d9cb274SAdam Nemet 471428d41fSAdam Nemet static cl::opt<unsigned> 481428d41fSAdam Nemet PrefetchDistance("prefetch-distance", 491428d41fSAdam Nemet cl::desc("Number of instructions to prefetch ahead"), 501428d41fSAdam Nemet cl::Hidden); 511428d41fSAdam Nemet 521428d41fSAdam Nemet static cl::opt<unsigned> 531428d41fSAdam Nemet MinPrefetchStride("min-prefetch-stride", 541428d41fSAdam Nemet cl::desc("Min stride to add prefetches"), cl::Hidden); 551428d41fSAdam Nemet 561428d41fSAdam Nemet static cl::opt<unsigned> MaxPrefetchIterationsAhead( 571428d41fSAdam Nemet "max-prefetch-iters-ahead", 581428d41fSAdam Nemet cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden); 591428d41fSAdam Nemet 6034785ecfSAdam Nemet STATISTIC(NumPrefetches, "Number of prefetches inserted"); 6134785ecfSAdam Nemet 629d9cb274SAdam Nemet namespace { 639d9cb274SAdam Nemet 641eca6bc6STeresa Johnson /// Loop prefetch implementation class. 651eca6bc6STeresa Johnson class LoopDataPrefetch { 669d9cb274SAdam Nemet public: 6736d4421fSJonas Paulsson LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI, 6836d4421fSJonas Paulsson ScalarEvolution *SE, const TargetTransformInfo *TTI, 691eca6bc6STeresa Johnson OptimizationRemarkEmitter *ORE) 7036d4421fSJonas Paulsson : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {} 719d9cb274SAdam Nemet 721eca6bc6STeresa Johnson bool run(); 7385fba393SAdam Nemet 7485fba393SAdam Nemet private: 759d9cb274SAdam Nemet bool runOnLoop(Loop *L); 769d9cb274SAdam Nemet 775f8f34e4SAdrian Prantl /// Check if the stride of the accesses is large enough to 786d8beecaSAdam Nemet /// warrant a prefetch. 7936d4421fSJonas Paulsson bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride); 806d8beecaSAdam Nemet 8136d4421fSJonas Paulsson unsigned getMinPrefetchStride(unsigned NumMemAccesses, 8236d4421fSJonas Paulsson unsigned NumStridedMemAccesses, 8336d4421fSJonas Paulsson unsigned NumPrefetches, 8436d4421fSJonas Paulsson bool HasCall) { 851428d41fSAdam Nemet if (MinPrefetchStride.getNumOccurrences() > 0) 861428d41fSAdam Nemet return MinPrefetchStride; 8736d4421fSJonas Paulsson return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, 8836d4421fSJonas Paulsson NumPrefetches, HasCall); 891428d41fSAdam Nemet } 901428d41fSAdam Nemet 911428d41fSAdam Nemet unsigned getPrefetchDistance() { 921428d41fSAdam Nemet if (PrefetchDistance.getNumOccurrences() > 0) 931428d41fSAdam Nemet return PrefetchDistance; 941428d41fSAdam Nemet return TTI->getPrefetchDistance(); 951428d41fSAdam Nemet } 961428d41fSAdam Nemet 971428d41fSAdam Nemet unsigned getMaxPrefetchIterationsAhead() { 981428d41fSAdam Nemet if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0) 991428d41fSAdam Nemet return MaxPrefetchIterationsAhead; 1001428d41fSAdam Nemet return TTI->getMaxPrefetchIterationsAhead(); 1011428d41fSAdam Nemet } 1021428d41fSAdam Nemet 10336d4421fSJonas Paulsson bool doPrefetchWrites() { 10436d4421fSJonas Paulsson if (PrefetchWrites.getNumOccurrences() > 0) 10536d4421fSJonas Paulsson return PrefetchWrites; 10636d4421fSJonas Paulsson return TTI->enableWritePrefetching(); 10736d4421fSJonas Paulsson } 10836d4421fSJonas Paulsson 109aec2fa35SDaniel Jasper AssumptionCache *AC; 11036d4421fSJonas Paulsson DominatorTree *DT; 1119d9cb274SAdam Nemet LoopInfo *LI; 1129d9cb274SAdam Nemet ScalarEvolution *SE; 1139d9cb274SAdam Nemet const TargetTransformInfo *TTI; 1149e6e63fbSAdam Nemet OptimizationRemarkEmitter *ORE; 1159d9cb274SAdam Nemet }; 1161eca6bc6STeresa Johnson 1171eca6bc6STeresa Johnson /// Legacy class for inserting loop data prefetches. 1181eca6bc6STeresa Johnson class LoopDataPrefetchLegacyPass : public FunctionPass { 1191eca6bc6STeresa Johnson public: 1201eca6bc6STeresa Johnson static char ID; // Pass ID, replacement for typeid 1211eca6bc6STeresa Johnson LoopDataPrefetchLegacyPass() : FunctionPass(ID) { 1221eca6bc6STeresa Johnson initializeLoopDataPrefetchLegacyPassPass(*PassRegistry::getPassRegistry()); 1231eca6bc6STeresa Johnson } 1241eca6bc6STeresa Johnson 1251eca6bc6STeresa Johnson void getAnalysisUsage(AnalysisUsage &AU) const override { 126aec2fa35SDaniel Jasper AU.addRequired<AssumptionCacheTracker>(); 12736d4421fSJonas Paulsson AU.addRequired<DominatorTreeWrapperPass>(); 1281eca6bc6STeresa Johnson AU.addPreserved<DominatorTreeWrapperPass>(); 1291eca6bc6STeresa Johnson AU.addRequired<LoopInfoWrapperPass>(); 1301eca6bc6STeresa Johnson AU.addPreserved<LoopInfoWrapperPass>(); 131*99c43363SAndrew Wei AU.addRequiredID(LoopSimplifyID); 132*99c43363SAndrew Wei AU.addPreservedID(LoopSimplifyID); 1331eca6bc6STeresa Johnson AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1341eca6bc6STeresa Johnson AU.addRequired<ScalarEvolutionWrapperPass>(); 13540549ad1SGeoff Berry AU.addPreserved<ScalarEvolutionWrapperPass>(); 1361eca6bc6STeresa Johnson AU.addRequired<TargetTransformInfoWrapperPass>(); 1371eca6bc6STeresa Johnson } 1381eca6bc6STeresa Johnson 1391eca6bc6STeresa Johnson bool runOnFunction(Function &F) override; 1401eca6bc6STeresa Johnson }; 1419d9cb274SAdam Nemet } 1429d9cb274SAdam Nemet 1431eca6bc6STeresa Johnson char LoopDataPrefetchLegacyPass::ID = 0; 1441eca6bc6STeresa Johnson INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", 1459d9cb274SAdam Nemet "Loop Data Prefetch", false, false) 146aec2fa35SDaniel Jasper INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 1479d9cb274SAdam Nemet INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 1489d9cb274SAdam Nemet INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 149*99c43363SAndrew Wei INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1509e6e63fbSAdam Nemet INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 1519d9cb274SAdam Nemet INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1521eca6bc6STeresa Johnson INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch", 1539d9cb274SAdam Nemet "Loop Data Prefetch", false, false) 1549d9cb274SAdam Nemet 1551eca6bc6STeresa Johnson FunctionPass *llvm::createLoopDataPrefetchPass() { 1561eca6bc6STeresa Johnson return new LoopDataPrefetchLegacyPass(); 1571eca6bc6STeresa Johnson } 1589d9cb274SAdam Nemet 15936d4421fSJonas Paulsson bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR, 16036d4421fSJonas Paulsson unsigned TargetMinStride) { 1616d8beecaSAdam Nemet // No need to check if any stride goes. 1626d8beecaSAdam Nemet if (TargetMinStride <= 1) 1636d8beecaSAdam Nemet return true; 1646d8beecaSAdam Nemet 1656d8beecaSAdam Nemet const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 1666d8beecaSAdam Nemet // If MinStride is set, don't prefetch unless we can ensure that stride is 1676d8beecaSAdam Nemet // larger. 1686d8beecaSAdam Nemet if (!ConstStride) 1696d8beecaSAdam Nemet return false; 1706d8beecaSAdam Nemet 1716d8beecaSAdam Nemet unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue()); 1726d8beecaSAdam Nemet return TargetMinStride <= AbsStride; 1736d8beecaSAdam Nemet } 1746d8beecaSAdam Nemet 1751eca6bc6STeresa Johnson PreservedAnalyses LoopDataPrefetchPass::run(Function &F, 1761eca6bc6STeresa Johnson FunctionAnalysisManager &AM) { 17736d4421fSJonas Paulsson DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1781eca6bc6STeresa Johnson LoopInfo *LI = &AM.getResult<LoopAnalysis>(F); 1791eca6bc6STeresa Johnson ScalarEvolution *SE = &AM.getResult<ScalarEvolutionAnalysis>(F); 180aec2fa35SDaniel Jasper AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F); 1811eca6bc6STeresa Johnson OptimizationRemarkEmitter *ORE = 1821eca6bc6STeresa Johnson &AM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1831eca6bc6STeresa Johnson const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F); 1841eca6bc6STeresa Johnson 18536d4421fSJonas Paulsson LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE); 1861eca6bc6STeresa Johnson bool Changed = LDP.run(); 1871eca6bc6STeresa Johnson 1881eca6bc6STeresa Johnson if (Changed) { 1891eca6bc6STeresa Johnson PreservedAnalyses PA; 1901eca6bc6STeresa Johnson PA.preserve<DominatorTreeAnalysis>(); 1911eca6bc6STeresa Johnson PA.preserve<LoopAnalysis>(); 1921eca6bc6STeresa Johnson return PA; 1931eca6bc6STeresa Johnson } 1941eca6bc6STeresa Johnson 1951eca6bc6STeresa Johnson return PreservedAnalyses::all(); 1961eca6bc6STeresa Johnson } 1971eca6bc6STeresa Johnson 1981eca6bc6STeresa Johnson bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) { 19950271f78SAndrew Kaylor if (skipFunction(F)) 20050271f78SAndrew Kaylor return false; 20150271f78SAndrew Kaylor 20236d4421fSJonas Paulsson DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2031eca6bc6STeresa Johnson LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2041eca6bc6STeresa Johnson ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 205aec2fa35SDaniel Jasper AssumptionCache *AC = 206aec2fa35SDaniel Jasper &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 2071eca6bc6STeresa Johnson OptimizationRemarkEmitter *ORE = 2081eca6bc6STeresa Johnson &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 2091eca6bc6STeresa Johnson const TargetTransformInfo *TTI = 2101eca6bc6STeresa Johnson &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 2119d9cb274SAdam Nemet 21236d4421fSJonas Paulsson LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE); 2131eca6bc6STeresa Johnson return LDP.run(); 2141eca6bc6STeresa Johnson } 2151eca6bc6STeresa Johnson 2161eca6bc6STeresa Johnson bool LoopDataPrefetch::run() { 217bb3680bdSAdam Nemet // If PrefetchDistance is not set, don't run the pass. This gives an 218bb3680bdSAdam Nemet // opportunity for targets to run this pass for selected subtargets only 219bb3680bdSAdam Nemet // (whose TTI sets PrefetchDistance). 2201428d41fSAdam Nemet if (getPrefetchDistance() == 0) 221bb3680bdSAdam Nemet return false; 2229d9cb274SAdam Nemet assert(TTI->getCacheLineSize() && "Cache line size is not set for target"); 2239d9cb274SAdam Nemet 2249d9cb274SAdam Nemet bool MadeChange = false; 2259d9cb274SAdam Nemet 226135f735aSBenjamin Kramer for (Loop *I : *LI) 227135f735aSBenjamin Kramer for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L) 2289d9cb274SAdam Nemet MadeChange |= runOnLoop(*L); 2299d9cb274SAdam Nemet 2309d9cb274SAdam Nemet return MadeChange; 2319d9cb274SAdam Nemet } 2329d9cb274SAdam Nemet 23336d4421fSJonas Paulsson /// A record for a potential prefetch made during the initial scan of the 23436d4421fSJonas Paulsson /// loop. This is used to let a single prefetch target multiple memory accesses. 23536d4421fSJonas Paulsson struct Prefetch { 23636d4421fSJonas Paulsson /// The address formula for this prefetch as returned by ScalarEvolution. 23736d4421fSJonas Paulsson const SCEVAddRecExpr *LSCEVAddRec; 23836d4421fSJonas Paulsson /// The point of insertion for the prefetch instruction. 23936d4421fSJonas Paulsson Instruction *InsertPt; 24036d4421fSJonas Paulsson /// True if targeting a write memory access. 24136d4421fSJonas Paulsson bool Writes; 24236d4421fSJonas Paulsson /// The (first seen) prefetched instruction. 24336d4421fSJonas Paulsson Instruction *MemI; 24436d4421fSJonas Paulsson 245d5ea89f8SBenjamin Kramer /// Constructor to create a new Prefetch for \p I. 24636d4421fSJonas Paulsson Prefetch(const SCEVAddRecExpr *L, Instruction *I) 24736d4421fSJonas Paulsson : LSCEVAddRec(L), InsertPt(nullptr), Writes(false), MemI(nullptr) { 24836d4421fSJonas Paulsson addInstruction(I); 24936d4421fSJonas Paulsson }; 25036d4421fSJonas Paulsson 25136d4421fSJonas Paulsson /// Add the instruction \param I to this prefetch. If it's not the first 25236d4421fSJonas Paulsson /// one, 'InsertPt' and 'Writes' will be updated as required. 25336d4421fSJonas Paulsson /// \param PtrDiff the known constant address difference to the first added 25436d4421fSJonas Paulsson /// instruction. 25536d4421fSJonas Paulsson void addInstruction(Instruction *I, DominatorTree *DT = nullptr, 25636d4421fSJonas Paulsson int64_t PtrDiff = 0) { 25736d4421fSJonas Paulsson if (!InsertPt) { 25836d4421fSJonas Paulsson MemI = I; 25936d4421fSJonas Paulsson InsertPt = I; 26036d4421fSJonas Paulsson Writes = isa<StoreInst>(I); 26136d4421fSJonas Paulsson } else { 26236d4421fSJonas Paulsson BasicBlock *PrefBB = InsertPt->getParent(); 26336d4421fSJonas Paulsson BasicBlock *InsBB = I->getParent(); 26436d4421fSJonas Paulsson if (PrefBB != InsBB) { 26536d4421fSJonas Paulsson BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB); 26636d4421fSJonas Paulsson if (DomBB != PrefBB) 26736d4421fSJonas Paulsson InsertPt = DomBB->getTerminator(); 26836d4421fSJonas Paulsson } 26936d4421fSJonas Paulsson 27036d4421fSJonas Paulsson if (isa<StoreInst>(I) && PtrDiff == 0) 27136d4421fSJonas Paulsson Writes = true; 27236d4421fSJonas Paulsson } 27336d4421fSJonas Paulsson } 27436d4421fSJonas Paulsson }; 27536d4421fSJonas Paulsson 2769d9cb274SAdam Nemet bool LoopDataPrefetch::runOnLoop(Loop *L) { 2779d9cb274SAdam Nemet bool MadeChange = false; 2789d9cb274SAdam Nemet 2799d9cb274SAdam Nemet // Only prefetch in the inner-most loop 28089c1e35fSStefanos Baziotis if (!L->isInnermost()) 2819d9cb274SAdam Nemet return MadeChange; 2829d9cb274SAdam Nemet 2839d9cb274SAdam Nemet SmallPtrSet<const Value *, 32> EphValues; 284aec2fa35SDaniel Jasper CodeMetrics::collectEphemeralValues(L, AC, EphValues); 2859d9cb274SAdam Nemet 2869d9cb274SAdam Nemet // Calculate the number of iterations ahead to prefetch 2879d9cb274SAdam Nemet CodeMetrics Metrics; 28836d4421fSJonas Paulsson bool HasCall = false; 289c6cebf72SBalaram Makam for (const auto BB : L->blocks()) { 2909d9cb274SAdam Nemet // If the loop already has prefetches, then assume that the user knows 2912cf5e89eSNico Weber // what they are doing and don't add any more. 29236d4421fSJonas Paulsson for (auto &I : *BB) { 29336d4421fSJonas Paulsson if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) { 294b8960b5dSMircea Trofin if (const Function *F = cast<CallBase>(I).getCalledFunction()) { 2959d9cb274SAdam Nemet if (F->getIntrinsicID() == Intrinsic::prefetch) 2969d9cb274SAdam Nemet return MadeChange; 29736d4421fSJonas Paulsson if (TTI->isLoweredToCall(F)) 29836d4421fSJonas Paulsson HasCall = true; 29936d4421fSJonas Paulsson } else { // indirect call. 30036d4421fSJonas Paulsson HasCall = true; 30136d4421fSJonas Paulsson } 30236d4421fSJonas Paulsson } 30336d4421fSJonas Paulsson } 304c6cebf72SBalaram Makam Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 3059d9cb274SAdam Nemet } 3069d9cb274SAdam Nemet unsigned LoopSize = Metrics.NumInsts; 3079d9cb274SAdam Nemet if (!LoopSize) 3089d9cb274SAdam Nemet LoopSize = 1; 3099d9cb274SAdam Nemet 3101428d41fSAdam Nemet unsigned ItersAhead = getPrefetchDistance() / LoopSize; 3119d9cb274SAdam Nemet if (!ItersAhead) 3129d9cb274SAdam Nemet ItersAhead = 1; 3139d9cb274SAdam Nemet 3141428d41fSAdam Nemet if (ItersAhead > getMaxPrefetchIterationsAhead()) 315709e3046SAdam Nemet return MadeChange; 316709e3046SAdam Nemet 31736d4421fSJonas Paulsson unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L); 31836d4421fSJonas Paulsson if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1) 31936d4421fSJonas Paulsson return MadeChange; 32034785ecfSAdam Nemet 32136d4421fSJonas Paulsson unsigned NumMemAccesses = 0; 32236d4421fSJonas Paulsson unsigned NumStridedMemAccesses = 0; 32336d4421fSJonas Paulsson SmallVector<Prefetch, 16> Prefetches; 32436d4421fSJonas Paulsson for (const auto BB : L->blocks()) 325c6cebf72SBalaram Makam for (auto &I : *BB) { 3269d9cb274SAdam Nemet Value *PtrValue; 3279d9cb274SAdam Nemet Instruction *MemI; 3289d9cb274SAdam Nemet 329c6cebf72SBalaram Makam if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) { 3309d9cb274SAdam Nemet MemI = LMemI; 3319d9cb274SAdam Nemet PtrValue = LMemI->getPointerOperand(); 332c6cebf72SBalaram Makam } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) { 33336d4421fSJonas Paulsson if (!doPrefetchWrites()) continue; 3349d9cb274SAdam Nemet MemI = SMemI; 3359d9cb274SAdam Nemet PtrValue = SMemI->getPointerOperand(); 3369d9cb274SAdam Nemet } else continue; 3379d9cb274SAdam Nemet 3389d9cb274SAdam Nemet unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); 3399d9cb274SAdam Nemet if (PtrAddrSpace) 3409d9cb274SAdam Nemet continue; 34136d4421fSJonas Paulsson NumMemAccesses++; 3429d9cb274SAdam Nemet if (L->isLoopInvariant(PtrValue)) 3439d9cb274SAdam Nemet continue; 3449d9cb274SAdam Nemet 3459d9cb274SAdam Nemet const SCEV *LSCEV = SE->getSCEV(PtrValue); 3469d9cb274SAdam Nemet const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); 3479d9cb274SAdam Nemet if (!LSCEVAddRec) 3489d9cb274SAdam Nemet continue; 34936d4421fSJonas Paulsson NumStridedMemAccesses++; 3509d9cb274SAdam Nemet 35136d4421fSJonas Paulsson // We don't want to double prefetch individual cache lines. If this 35236d4421fSJonas Paulsson // access is known to be within one cache line of some other one that 35336d4421fSJonas Paulsson // has already been prefetched, then don't prefetch this one as well. 3549d9cb274SAdam Nemet bool DupPref = false; 35536d4421fSJonas Paulsson for (auto &Pref : Prefetches) { 35636d4421fSJonas Paulsson const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec); 3579d9cb274SAdam Nemet if (const SCEVConstant *ConstPtrDiff = 3589d9cb274SAdam Nemet dyn_cast<SCEVConstant>(PtrDiff)) { 3599d9cb274SAdam Nemet int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); 3609d9cb274SAdam Nemet if (PD < (int64_t) TTI->getCacheLineSize()) { 36136d4421fSJonas Paulsson Pref.addInstruction(MemI, DT, PD); 3629d9cb274SAdam Nemet DupPref = true; 3639d9cb274SAdam Nemet break; 3649d9cb274SAdam Nemet } 3659d9cb274SAdam Nemet } 3669d9cb274SAdam Nemet } 36736d4421fSJonas Paulsson if (!DupPref) 36836d4421fSJonas Paulsson Prefetches.push_back(Prefetch(LSCEVAddRec, MemI)); 36936d4421fSJonas Paulsson } 37036d4421fSJonas Paulsson 37136d4421fSJonas Paulsson unsigned TargetMinStride = 37236d4421fSJonas Paulsson getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, 37336d4421fSJonas Paulsson Prefetches.size(), HasCall); 37436d4421fSJonas Paulsson 37536d4421fSJonas Paulsson LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead 37636d4421fSJonas Paulsson << " iterations ahead (loop size: " << LoopSize << ") in " 37736d4421fSJonas Paulsson << L->getHeader()->getParent()->getName() << ": " << *L); 37836d4421fSJonas Paulsson LLVM_DEBUG(dbgs() << "Loop has: " 37936d4421fSJonas Paulsson << NumMemAccesses << " memory accesses, " 38036d4421fSJonas Paulsson << NumStridedMemAccesses << " strided memory accesses, " 38136d4421fSJonas Paulsson << Prefetches.size() << " potential prefetch(es), " 38236d4421fSJonas Paulsson << "a minimum stride of " << TargetMinStride << ", " 38336d4421fSJonas Paulsson << (HasCall ? "calls" : "no calls") << ".\n"); 38436d4421fSJonas Paulsson 38536d4421fSJonas Paulsson for (auto &P : Prefetches) { 38636d4421fSJonas Paulsson // Check if the stride of the accesses is large enough to warrant a 38736d4421fSJonas Paulsson // prefetch. 38836d4421fSJonas Paulsson if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride)) 3899d9cb274SAdam Nemet continue; 3909d9cb274SAdam Nemet 39136d4421fSJonas Paulsson const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr( 39236d4421fSJonas Paulsson SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead), 39336d4421fSJonas Paulsson P.LSCEVAddRec->getStepRecurrence(*SE))); 3949d9cb274SAdam Nemet if (!isSafeToExpand(NextLSCEV, *SE)) 3959d9cb274SAdam Nemet continue; 3969d9cb274SAdam Nemet 39736d4421fSJonas Paulsson BasicBlock *BB = P.InsertPt->getParent(); 39836d4421fSJonas Paulsson Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/*PtrAddrSpace*/); 39936d4421fSJonas Paulsson SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr"); 40036d4421fSJonas Paulsson Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt); 4019d9cb274SAdam Nemet 40236d4421fSJonas Paulsson IRBuilder<> Builder(P.InsertPt); 403c6cebf72SBalaram Makam Module *M = BB->getParent()->getParent(); 404c6cebf72SBalaram Makam Type *I32 = Type::getInt32Ty(BB->getContext()); 405dbc0a5dfSJF Bastien Function *PrefetchFunc = Intrinsic::getDeclaration( 406dbc0a5dfSJF Bastien M, Intrinsic::prefetch, PrefPtrValue->getType()); 4079d9cb274SAdam Nemet Builder.CreateCall( 4089d9cb274SAdam Nemet PrefetchFunc, 4099d9cb274SAdam Nemet {PrefPtrValue, 41036d4421fSJonas Paulsson ConstantInt::get(I32, P.Writes), 4119d9cb274SAdam Nemet ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)}); 41234785ecfSAdam Nemet ++NumPrefetches; 41336d4421fSJonas Paulsson LLVM_DEBUG(dbgs() << " Access: " 41436d4421fSJonas Paulsson << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1) 41536d4421fSJonas Paulsson << ", SCEV: " << *P.LSCEVAddRec << "\n"); 4169590658fSVivek Pandya ORE->emit([&]() { 41736d4421fSJonas Paulsson return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI) 4189590658fSVivek Pandya << "prefetched memory access"; 4199590658fSVivek Pandya }); 4209d9cb274SAdam Nemet 4219d9cb274SAdam Nemet MadeChange = true; 4229d9cb274SAdam Nemet } 4239d9cb274SAdam Nemet 4249d9cb274SAdam Nemet return MadeChange; 4259d9cb274SAdam Nemet } 426