19d9cb274SAdam Nemet //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
29d9cb274SAdam Nemet //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69d9cb274SAdam Nemet //
79d9cb274SAdam Nemet //===----------------------------------------------------------------------===//
89d9cb274SAdam Nemet //
99d9cb274SAdam Nemet // This file implements a Loop Data Prefetching Pass.
109d9cb274SAdam Nemet //
119d9cb274SAdam Nemet //===----------------------------------------------------------------------===//
129d9cb274SAdam Nemet 
131eca6bc6STeresa Johnson #include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
1405da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
151eca6bc6STeresa Johnson 
169d9cb274SAdam Nemet #include "llvm/ADT/DepthFirstIterator.h"
179d9cb274SAdam Nemet #include "llvm/ADT/Statistic.h"
18aec2fa35SDaniel Jasper #include "llvm/Analysis/AssumptionCache.h"
199d9cb274SAdam Nemet #include "llvm/Analysis/CodeMetrics.h"
209d9cb274SAdam Nemet #include "llvm/Analysis/LoopInfo.h"
210965da20SAdam Nemet #include "llvm/Analysis/OptimizationRemarkEmitter.h"
229d9cb274SAdam Nemet #include "llvm/Analysis/ScalarEvolution.h"
239d9cb274SAdam Nemet #include "llvm/Analysis/ScalarEvolutionExpressions.h"
249d9cb274SAdam Nemet #include "llvm/Analysis/TargetTransformInfo.h"
259d9cb274SAdam Nemet #include "llvm/IR/CFG.h"
269d9cb274SAdam Nemet #include "llvm/IR/Dominators.h"
279d9cb274SAdam Nemet #include "llvm/IR/Function.h"
289d9cb274SAdam Nemet #include "llvm/IR/Module.h"
299d9cb274SAdam Nemet #include "llvm/Support/CommandLine.h"
309d9cb274SAdam Nemet #include "llvm/Support/Debug.h"
31885f1de4SAdam Nemet #include "llvm/Transforms/Scalar.h"
32*99c43363SAndrew Wei #include "llvm/Transforms/Utils.h"
339d9cb274SAdam Nemet #include "llvm/Transforms/Utils/BasicBlockUtils.h"
34bcbd26bfSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
359d9cb274SAdam Nemet #include "llvm/Transforms/Utils/ValueMapper.h"
3671acce68SMindong Chen 
3771acce68SMindong Chen #define DEBUG_TYPE "loop-data-prefetch"
3871acce68SMindong Chen 
399d9cb274SAdam Nemet using namespace llvm;
409d9cb274SAdam Nemet 
419d9cb274SAdam Nemet // By default, we limit this to creating 16 PHIs (which is a little over half
429d9cb274SAdam Nemet // of the allocatable register set).
439d9cb274SAdam Nemet static cl::opt<bool>
449d9cb274SAdam Nemet PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
459d9cb274SAdam Nemet                cl::desc("Prefetch write addresses"));
469d9cb274SAdam Nemet 
471428d41fSAdam Nemet static cl::opt<unsigned>
481428d41fSAdam Nemet     PrefetchDistance("prefetch-distance",
491428d41fSAdam Nemet                      cl::desc("Number of instructions to prefetch ahead"),
501428d41fSAdam Nemet                      cl::Hidden);
511428d41fSAdam Nemet 
521428d41fSAdam Nemet static cl::opt<unsigned>
531428d41fSAdam Nemet     MinPrefetchStride("min-prefetch-stride",
541428d41fSAdam Nemet                       cl::desc("Min stride to add prefetches"), cl::Hidden);
551428d41fSAdam Nemet 
561428d41fSAdam Nemet static cl::opt<unsigned> MaxPrefetchIterationsAhead(
571428d41fSAdam Nemet     "max-prefetch-iters-ahead",
581428d41fSAdam Nemet     cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
591428d41fSAdam Nemet 
6034785ecfSAdam Nemet STATISTIC(NumPrefetches, "Number of prefetches inserted");
6134785ecfSAdam Nemet 
629d9cb274SAdam Nemet namespace {
639d9cb274SAdam Nemet 
641eca6bc6STeresa Johnson /// Loop prefetch implementation class.
651eca6bc6STeresa Johnson class LoopDataPrefetch {
669d9cb274SAdam Nemet public:
6736d4421fSJonas Paulsson   LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
6836d4421fSJonas Paulsson                    ScalarEvolution *SE, const TargetTransformInfo *TTI,
691eca6bc6STeresa Johnson                    OptimizationRemarkEmitter *ORE)
7036d4421fSJonas Paulsson       : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
719d9cb274SAdam Nemet 
721eca6bc6STeresa Johnson   bool run();
7385fba393SAdam Nemet 
7485fba393SAdam Nemet private:
759d9cb274SAdam Nemet   bool runOnLoop(Loop *L);
769d9cb274SAdam Nemet 
775f8f34e4SAdrian Prantl   /// Check if the stride of the accesses is large enough to
786d8beecaSAdam Nemet   /// warrant a prefetch.
7936d4421fSJonas Paulsson   bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
806d8beecaSAdam Nemet 
8136d4421fSJonas Paulsson   unsigned getMinPrefetchStride(unsigned NumMemAccesses,
8236d4421fSJonas Paulsson                                 unsigned NumStridedMemAccesses,
8336d4421fSJonas Paulsson                                 unsigned NumPrefetches,
8436d4421fSJonas Paulsson                                 bool HasCall) {
851428d41fSAdam Nemet     if (MinPrefetchStride.getNumOccurrences() > 0)
861428d41fSAdam Nemet       return MinPrefetchStride;
8736d4421fSJonas Paulsson     return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
8836d4421fSJonas Paulsson                                      NumPrefetches, HasCall);
891428d41fSAdam Nemet   }
901428d41fSAdam Nemet 
911428d41fSAdam Nemet   unsigned getPrefetchDistance() {
921428d41fSAdam Nemet     if (PrefetchDistance.getNumOccurrences() > 0)
931428d41fSAdam Nemet       return PrefetchDistance;
941428d41fSAdam Nemet     return TTI->getPrefetchDistance();
951428d41fSAdam Nemet   }
961428d41fSAdam Nemet 
971428d41fSAdam Nemet   unsigned getMaxPrefetchIterationsAhead() {
981428d41fSAdam Nemet     if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
991428d41fSAdam Nemet       return MaxPrefetchIterationsAhead;
1001428d41fSAdam Nemet     return TTI->getMaxPrefetchIterationsAhead();
1011428d41fSAdam Nemet   }
1021428d41fSAdam Nemet 
10336d4421fSJonas Paulsson   bool doPrefetchWrites() {
10436d4421fSJonas Paulsson     if (PrefetchWrites.getNumOccurrences() > 0)
10536d4421fSJonas Paulsson       return PrefetchWrites;
10636d4421fSJonas Paulsson     return TTI->enableWritePrefetching();
10736d4421fSJonas Paulsson   }
10836d4421fSJonas Paulsson 
109aec2fa35SDaniel Jasper   AssumptionCache *AC;
11036d4421fSJonas Paulsson   DominatorTree *DT;
1119d9cb274SAdam Nemet   LoopInfo *LI;
1129d9cb274SAdam Nemet   ScalarEvolution *SE;
1139d9cb274SAdam Nemet   const TargetTransformInfo *TTI;
1149e6e63fbSAdam Nemet   OptimizationRemarkEmitter *ORE;
1159d9cb274SAdam Nemet };
1161eca6bc6STeresa Johnson 
1171eca6bc6STeresa Johnson /// Legacy class for inserting loop data prefetches.
1181eca6bc6STeresa Johnson class LoopDataPrefetchLegacyPass : public FunctionPass {
1191eca6bc6STeresa Johnson public:
1201eca6bc6STeresa Johnson   static char ID; // Pass ID, replacement for typeid
1211eca6bc6STeresa Johnson   LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
1221eca6bc6STeresa Johnson     initializeLoopDataPrefetchLegacyPassPass(*PassRegistry::getPassRegistry());
1231eca6bc6STeresa Johnson   }
1241eca6bc6STeresa Johnson 
1251eca6bc6STeresa Johnson   void getAnalysisUsage(AnalysisUsage &AU) const override {
126aec2fa35SDaniel Jasper     AU.addRequired<AssumptionCacheTracker>();
12736d4421fSJonas Paulsson     AU.addRequired<DominatorTreeWrapperPass>();
1281eca6bc6STeresa Johnson     AU.addPreserved<DominatorTreeWrapperPass>();
1291eca6bc6STeresa Johnson     AU.addRequired<LoopInfoWrapperPass>();
1301eca6bc6STeresa Johnson     AU.addPreserved<LoopInfoWrapperPass>();
131*99c43363SAndrew Wei     AU.addRequiredID(LoopSimplifyID);
132*99c43363SAndrew Wei     AU.addPreservedID(LoopSimplifyID);
1331eca6bc6STeresa Johnson     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1341eca6bc6STeresa Johnson     AU.addRequired<ScalarEvolutionWrapperPass>();
13540549ad1SGeoff Berry     AU.addPreserved<ScalarEvolutionWrapperPass>();
1361eca6bc6STeresa Johnson     AU.addRequired<TargetTransformInfoWrapperPass>();
1371eca6bc6STeresa Johnson   }
1381eca6bc6STeresa Johnson 
1391eca6bc6STeresa Johnson   bool runOnFunction(Function &F) override;
1401eca6bc6STeresa Johnson   };
1419d9cb274SAdam Nemet }
1429d9cb274SAdam Nemet 
1431eca6bc6STeresa Johnson char LoopDataPrefetchLegacyPass::ID = 0;
1441eca6bc6STeresa Johnson INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
1459d9cb274SAdam Nemet                       "Loop Data Prefetch", false, false)
146aec2fa35SDaniel Jasper INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1479d9cb274SAdam Nemet INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1489d9cb274SAdam Nemet INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
149*99c43363SAndrew Wei INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
1509e6e63fbSAdam Nemet INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
1519d9cb274SAdam Nemet INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
1521eca6bc6STeresa Johnson INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
1539d9cb274SAdam Nemet                     "Loop Data Prefetch", false, false)
1549d9cb274SAdam Nemet 
1551eca6bc6STeresa Johnson FunctionPass *llvm::createLoopDataPrefetchPass() {
1561eca6bc6STeresa Johnson   return new LoopDataPrefetchLegacyPass();
1571eca6bc6STeresa Johnson }
1589d9cb274SAdam Nemet 
15936d4421fSJonas Paulsson bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
16036d4421fSJonas Paulsson                                            unsigned TargetMinStride) {
1616d8beecaSAdam Nemet   // No need to check if any stride goes.
1626d8beecaSAdam Nemet   if (TargetMinStride <= 1)
1636d8beecaSAdam Nemet     return true;
1646d8beecaSAdam Nemet 
1656d8beecaSAdam Nemet   const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1666d8beecaSAdam Nemet   // If MinStride is set, don't prefetch unless we can ensure that stride is
1676d8beecaSAdam Nemet   // larger.
1686d8beecaSAdam Nemet   if (!ConstStride)
1696d8beecaSAdam Nemet     return false;
1706d8beecaSAdam Nemet 
1716d8beecaSAdam Nemet   unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
1726d8beecaSAdam Nemet   return TargetMinStride <= AbsStride;
1736d8beecaSAdam Nemet }
1746d8beecaSAdam Nemet 
1751eca6bc6STeresa Johnson PreservedAnalyses LoopDataPrefetchPass::run(Function &F,
1761eca6bc6STeresa Johnson                                             FunctionAnalysisManager &AM) {
17736d4421fSJonas Paulsson   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1781eca6bc6STeresa Johnson   LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
1791eca6bc6STeresa Johnson   ScalarEvolution *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
180aec2fa35SDaniel Jasper   AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
1811eca6bc6STeresa Johnson   OptimizationRemarkEmitter *ORE =
1821eca6bc6STeresa Johnson       &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1831eca6bc6STeresa Johnson   const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
1841eca6bc6STeresa Johnson 
18536d4421fSJonas Paulsson   LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
1861eca6bc6STeresa Johnson   bool Changed = LDP.run();
1871eca6bc6STeresa Johnson 
1881eca6bc6STeresa Johnson   if (Changed) {
1891eca6bc6STeresa Johnson     PreservedAnalyses PA;
1901eca6bc6STeresa Johnson     PA.preserve<DominatorTreeAnalysis>();
1911eca6bc6STeresa Johnson     PA.preserve<LoopAnalysis>();
1921eca6bc6STeresa Johnson     return PA;
1931eca6bc6STeresa Johnson   }
1941eca6bc6STeresa Johnson 
1951eca6bc6STeresa Johnson   return PreservedAnalyses::all();
1961eca6bc6STeresa Johnson }
1971eca6bc6STeresa Johnson 
1981eca6bc6STeresa Johnson bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {
19950271f78SAndrew Kaylor   if (skipFunction(F))
20050271f78SAndrew Kaylor     return false;
20150271f78SAndrew Kaylor 
20236d4421fSJonas Paulsson   DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2031eca6bc6STeresa Johnson   LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2041eca6bc6STeresa Johnson   ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
205aec2fa35SDaniel Jasper   AssumptionCache *AC =
206aec2fa35SDaniel Jasper       &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2071eca6bc6STeresa Johnson   OptimizationRemarkEmitter *ORE =
2081eca6bc6STeresa Johnson       &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
2091eca6bc6STeresa Johnson   const TargetTransformInfo *TTI =
2101eca6bc6STeresa Johnson       &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2119d9cb274SAdam Nemet 
21236d4421fSJonas Paulsson   LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
2131eca6bc6STeresa Johnson   return LDP.run();
2141eca6bc6STeresa Johnson }
2151eca6bc6STeresa Johnson 
2161eca6bc6STeresa Johnson bool LoopDataPrefetch::run() {
217bb3680bdSAdam Nemet   // If PrefetchDistance is not set, don't run the pass.  This gives an
218bb3680bdSAdam Nemet   // opportunity for targets to run this pass for selected subtargets only
219bb3680bdSAdam Nemet   // (whose TTI sets PrefetchDistance).
2201428d41fSAdam Nemet   if (getPrefetchDistance() == 0)
221bb3680bdSAdam Nemet     return false;
2229d9cb274SAdam Nemet   assert(TTI->getCacheLineSize() && "Cache line size is not set for target");
2239d9cb274SAdam Nemet 
2249d9cb274SAdam Nemet   bool MadeChange = false;
2259d9cb274SAdam Nemet 
226135f735aSBenjamin Kramer   for (Loop *I : *LI)
227135f735aSBenjamin Kramer     for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
2289d9cb274SAdam Nemet       MadeChange |= runOnLoop(*L);
2299d9cb274SAdam Nemet 
2309d9cb274SAdam Nemet   return MadeChange;
2319d9cb274SAdam Nemet }
2329d9cb274SAdam Nemet 
23336d4421fSJonas Paulsson /// A record for a potential prefetch made during the initial scan of the
23436d4421fSJonas Paulsson /// loop. This is used to let a single prefetch target multiple memory accesses.
23536d4421fSJonas Paulsson struct Prefetch {
23636d4421fSJonas Paulsson   /// The address formula for this prefetch as returned by ScalarEvolution.
23736d4421fSJonas Paulsson   const SCEVAddRecExpr *LSCEVAddRec;
23836d4421fSJonas Paulsson   /// The point of insertion for the prefetch instruction.
23936d4421fSJonas Paulsson   Instruction *InsertPt;
24036d4421fSJonas Paulsson   /// True if targeting a write memory access.
24136d4421fSJonas Paulsson   bool Writes;
24236d4421fSJonas Paulsson   /// The (first seen) prefetched instruction.
24336d4421fSJonas Paulsson   Instruction *MemI;
24436d4421fSJonas Paulsson 
245d5ea89f8SBenjamin Kramer   /// Constructor to create a new Prefetch for \p I.
24636d4421fSJonas Paulsson   Prefetch(const SCEVAddRecExpr *L, Instruction *I)
24736d4421fSJonas Paulsson       : LSCEVAddRec(L), InsertPt(nullptr), Writes(false), MemI(nullptr) {
24836d4421fSJonas Paulsson     addInstruction(I);
24936d4421fSJonas Paulsson   };
25036d4421fSJonas Paulsson 
25136d4421fSJonas Paulsson   /// Add the instruction \param I to this prefetch. If it's not the first
25236d4421fSJonas Paulsson   /// one, 'InsertPt' and 'Writes' will be updated as required.
25336d4421fSJonas Paulsson   /// \param PtrDiff the known constant address difference to the first added
25436d4421fSJonas Paulsson   /// instruction.
25536d4421fSJonas Paulsson   void addInstruction(Instruction *I, DominatorTree *DT = nullptr,
25636d4421fSJonas Paulsson                       int64_t PtrDiff = 0) {
25736d4421fSJonas Paulsson     if (!InsertPt) {
25836d4421fSJonas Paulsson       MemI = I;
25936d4421fSJonas Paulsson       InsertPt = I;
26036d4421fSJonas Paulsson       Writes = isa<StoreInst>(I);
26136d4421fSJonas Paulsson     } else {
26236d4421fSJonas Paulsson       BasicBlock *PrefBB = InsertPt->getParent();
26336d4421fSJonas Paulsson       BasicBlock *InsBB = I->getParent();
26436d4421fSJonas Paulsson       if (PrefBB != InsBB) {
26536d4421fSJonas Paulsson         BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB);
26636d4421fSJonas Paulsson         if (DomBB != PrefBB)
26736d4421fSJonas Paulsson           InsertPt = DomBB->getTerminator();
26836d4421fSJonas Paulsson       }
26936d4421fSJonas Paulsson 
27036d4421fSJonas Paulsson       if (isa<StoreInst>(I) && PtrDiff == 0)
27136d4421fSJonas Paulsson         Writes = true;
27236d4421fSJonas Paulsson     }
27336d4421fSJonas Paulsson   }
27436d4421fSJonas Paulsson };
27536d4421fSJonas Paulsson 
2769d9cb274SAdam Nemet bool LoopDataPrefetch::runOnLoop(Loop *L) {
2779d9cb274SAdam Nemet   bool MadeChange = false;
2789d9cb274SAdam Nemet 
2799d9cb274SAdam Nemet   // Only prefetch in the inner-most loop
28089c1e35fSStefanos Baziotis   if (!L->isInnermost())
2819d9cb274SAdam Nemet     return MadeChange;
2829d9cb274SAdam Nemet 
2839d9cb274SAdam Nemet   SmallPtrSet<const Value *, 32> EphValues;
284aec2fa35SDaniel Jasper   CodeMetrics::collectEphemeralValues(L, AC, EphValues);
2859d9cb274SAdam Nemet 
2869d9cb274SAdam Nemet   // Calculate the number of iterations ahead to prefetch
2879d9cb274SAdam Nemet   CodeMetrics Metrics;
28836d4421fSJonas Paulsson   bool HasCall = false;
289c6cebf72SBalaram Makam   for (const auto BB : L->blocks()) {
2909d9cb274SAdam Nemet     // If the loop already has prefetches, then assume that the user knows
2912cf5e89eSNico Weber     // what they are doing and don't add any more.
29236d4421fSJonas Paulsson     for (auto &I : *BB) {
29336d4421fSJonas Paulsson       if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
294b8960b5dSMircea Trofin         if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
2959d9cb274SAdam Nemet           if (F->getIntrinsicID() == Intrinsic::prefetch)
2969d9cb274SAdam Nemet             return MadeChange;
29736d4421fSJonas Paulsson           if (TTI->isLoweredToCall(F))
29836d4421fSJonas Paulsson             HasCall = true;
29936d4421fSJonas Paulsson         } else { // indirect call.
30036d4421fSJonas Paulsson           HasCall = true;
30136d4421fSJonas Paulsson         }
30236d4421fSJonas Paulsson       }
30336d4421fSJonas Paulsson     }
304c6cebf72SBalaram Makam     Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
3059d9cb274SAdam Nemet   }
3069d9cb274SAdam Nemet   unsigned LoopSize = Metrics.NumInsts;
3079d9cb274SAdam Nemet   if (!LoopSize)
3089d9cb274SAdam Nemet     LoopSize = 1;
3099d9cb274SAdam Nemet 
3101428d41fSAdam Nemet   unsigned ItersAhead = getPrefetchDistance() / LoopSize;
3119d9cb274SAdam Nemet   if (!ItersAhead)
3129d9cb274SAdam Nemet     ItersAhead = 1;
3139d9cb274SAdam Nemet 
3141428d41fSAdam Nemet   if (ItersAhead > getMaxPrefetchIterationsAhead())
315709e3046SAdam Nemet     return MadeChange;
316709e3046SAdam Nemet 
31736d4421fSJonas Paulsson   unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
31836d4421fSJonas Paulsson   if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
31936d4421fSJonas Paulsson     return MadeChange;
32034785ecfSAdam Nemet 
32136d4421fSJonas Paulsson   unsigned NumMemAccesses = 0;
32236d4421fSJonas Paulsson   unsigned NumStridedMemAccesses = 0;
32336d4421fSJonas Paulsson   SmallVector<Prefetch, 16> Prefetches;
32436d4421fSJonas Paulsson   for (const auto BB : L->blocks())
325c6cebf72SBalaram Makam     for (auto &I : *BB) {
3269d9cb274SAdam Nemet       Value *PtrValue;
3279d9cb274SAdam Nemet       Instruction *MemI;
3289d9cb274SAdam Nemet 
329c6cebf72SBalaram Makam       if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
3309d9cb274SAdam Nemet         MemI = LMemI;
3319d9cb274SAdam Nemet         PtrValue = LMemI->getPointerOperand();
332c6cebf72SBalaram Makam       } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
33336d4421fSJonas Paulsson         if (!doPrefetchWrites()) continue;
3349d9cb274SAdam Nemet         MemI = SMemI;
3359d9cb274SAdam Nemet         PtrValue = SMemI->getPointerOperand();
3369d9cb274SAdam Nemet       } else continue;
3379d9cb274SAdam Nemet 
3389d9cb274SAdam Nemet       unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
3399d9cb274SAdam Nemet       if (PtrAddrSpace)
3409d9cb274SAdam Nemet         continue;
34136d4421fSJonas Paulsson       NumMemAccesses++;
3429d9cb274SAdam Nemet       if (L->isLoopInvariant(PtrValue))
3439d9cb274SAdam Nemet         continue;
3449d9cb274SAdam Nemet 
3459d9cb274SAdam Nemet       const SCEV *LSCEV = SE->getSCEV(PtrValue);
3469d9cb274SAdam Nemet       const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
3479d9cb274SAdam Nemet       if (!LSCEVAddRec)
3489d9cb274SAdam Nemet         continue;
34936d4421fSJonas Paulsson       NumStridedMemAccesses++;
3509d9cb274SAdam Nemet 
35136d4421fSJonas Paulsson       // We don't want to double prefetch individual cache lines. If this
35236d4421fSJonas Paulsson       // access is known to be within one cache line of some other one that
35336d4421fSJonas Paulsson       // has already been prefetched, then don't prefetch this one as well.
3549d9cb274SAdam Nemet       bool DupPref = false;
35536d4421fSJonas Paulsson       for (auto &Pref : Prefetches) {
35636d4421fSJonas Paulsson         const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
3579d9cb274SAdam Nemet         if (const SCEVConstant *ConstPtrDiff =
3589d9cb274SAdam Nemet             dyn_cast<SCEVConstant>(PtrDiff)) {
3599d9cb274SAdam Nemet           int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
3609d9cb274SAdam Nemet           if (PD < (int64_t) TTI->getCacheLineSize()) {
36136d4421fSJonas Paulsson             Pref.addInstruction(MemI, DT, PD);
3629d9cb274SAdam Nemet             DupPref = true;
3639d9cb274SAdam Nemet             break;
3649d9cb274SAdam Nemet           }
3659d9cb274SAdam Nemet         }
3669d9cb274SAdam Nemet       }
36736d4421fSJonas Paulsson       if (!DupPref)
36836d4421fSJonas Paulsson         Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
36936d4421fSJonas Paulsson     }
37036d4421fSJonas Paulsson 
37136d4421fSJonas Paulsson   unsigned TargetMinStride =
37236d4421fSJonas Paulsson     getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
37336d4421fSJonas Paulsson                          Prefetches.size(), HasCall);
37436d4421fSJonas Paulsson 
37536d4421fSJonas Paulsson   LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
37636d4421fSJonas Paulsson              << " iterations ahead (loop size: " << LoopSize << ") in "
37736d4421fSJonas Paulsson              << L->getHeader()->getParent()->getName() << ": " << *L);
37836d4421fSJonas Paulsson   LLVM_DEBUG(dbgs() << "Loop has: "
37936d4421fSJonas Paulsson              << NumMemAccesses << " memory accesses, "
38036d4421fSJonas Paulsson              << NumStridedMemAccesses << " strided memory accesses, "
38136d4421fSJonas Paulsson              << Prefetches.size() << " potential prefetch(es), "
38236d4421fSJonas Paulsson              << "a minimum stride of " << TargetMinStride << ", "
38336d4421fSJonas Paulsson              << (HasCall ? "calls" : "no calls") << ".\n");
38436d4421fSJonas Paulsson 
38536d4421fSJonas Paulsson   for (auto &P : Prefetches) {
38636d4421fSJonas Paulsson     // Check if the stride of the accesses is large enough to warrant a
38736d4421fSJonas Paulsson     // prefetch.
38836d4421fSJonas Paulsson     if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
3899d9cb274SAdam Nemet       continue;
3909d9cb274SAdam Nemet 
39136d4421fSJonas Paulsson     const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
39236d4421fSJonas Paulsson       SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
39336d4421fSJonas Paulsson       P.LSCEVAddRec->getStepRecurrence(*SE)));
3949d9cb274SAdam Nemet     if (!isSafeToExpand(NextLSCEV, *SE))
3959d9cb274SAdam Nemet       continue;
3969d9cb274SAdam Nemet 
39736d4421fSJonas Paulsson     BasicBlock *BB = P.InsertPt->getParent();
39836d4421fSJonas Paulsson     Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/*PtrAddrSpace*/);
39936d4421fSJonas Paulsson     SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
40036d4421fSJonas Paulsson     Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
4019d9cb274SAdam Nemet 
40236d4421fSJonas Paulsson     IRBuilder<> Builder(P.InsertPt);
403c6cebf72SBalaram Makam     Module *M = BB->getParent()->getParent();
404c6cebf72SBalaram Makam     Type *I32 = Type::getInt32Ty(BB->getContext());
405dbc0a5dfSJF Bastien     Function *PrefetchFunc = Intrinsic::getDeclaration(
406dbc0a5dfSJF Bastien         M, Intrinsic::prefetch, PrefPtrValue->getType());
4079d9cb274SAdam Nemet     Builder.CreateCall(
4089d9cb274SAdam Nemet         PrefetchFunc,
4099d9cb274SAdam Nemet         {PrefPtrValue,
41036d4421fSJonas Paulsson          ConstantInt::get(I32, P.Writes),
4119d9cb274SAdam Nemet          ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
41234785ecfSAdam Nemet     ++NumPrefetches;
41336d4421fSJonas Paulsson     LLVM_DEBUG(dbgs() << "  Access: "
41436d4421fSJonas Paulsson                << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1)
41536d4421fSJonas Paulsson                << ", SCEV: " << *P.LSCEVAddRec << "\n");
4169590658fSVivek Pandya     ORE->emit([&]() {
41736d4421fSJonas Paulsson         return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
4189590658fSVivek Pandya           << "prefetched memory access";
4199590658fSVivek Pandya       });
4209d9cb274SAdam Nemet 
4219d9cb274SAdam Nemet     MadeChange = true;
4229d9cb274SAdam Nemet   }
4239d9cb274SAdam Nemet 
4249d9cb274SAdam Nemet   return MadeChange;
4259d9cb274SAdam Nemet }
426