10b57cec5SDimitry Andric //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements a Loop Data Prefetching Pass.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
14480093f4SDimitry Andric #include "llvm/InitializePasses.h"
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
170b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
180b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
190b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h"
200b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
230b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
240b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
250b57cec5SDimitry Andric #include "llvm/IR/CFG.h"
260b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
270b57cec5SDimitry Andric #include "llvm/IR/Function.h"
280b57cec5SDimitry Andric #include "llvm/IR/Module.h"
290b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
300b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
310b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
320b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
335ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
340b57cec5SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
35*5f7ddb14SDimitry Andric 
36*5f7ddb14SDimitry Andric #define DEBUG_TYPE "loop-data-prefetch"
37*5f7ddb14SDimitry Andric 
380b57cec5SDimitry Andric using namespace llvm;
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric // By default, we limit this to creating 16 PHIs (which is a little over half
410b57cec5SDimitry Andric // of the allocatable register set).
420b57cec5SDimitry Andric static cl::opt<bool>
430b57cec5SDimitry Andric PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
440b57cec5SDimitry Andric                cl::desc("Prefetch write addresses"));
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric static cl::opt<unsigned>
470b57cec5SDimitry Andric     PrefetchDistance("prefetch-distance",
480b57cec5SDimitry Andric                      cl::desc("Number of instructions to prefetch ahead"),
490b57cec5SDimitry Andric                      cl::Hidden);
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric static cl::opt<unsigned>
520b57cec5SDimitry Andric     MinPrefetchStride("min-prefetch-stride",
530b57cec5SDimitry Andric                       cl::desc("Min stride to add prefetches"), cl::Hidden);
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric static cl::opt<unsigned> MaxPrefetchIterationsAhead(
560b57cec5SDimitry Andric     "max-prefetch-iters-ahead",
570b57cec5SDimitry Andric     cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric STATISTIC(NumPrefetches, "Number of prefetches inserted");
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric namespace {
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric /// Loop prefetch implementation class.
640b57cec5SDimitry Andric class LoopDataPrefetch {
650b57cec5SDimitry Andric public:
LoopDataPrefetch(AssumptionCache * AC,DominatorTree * DT,LoopInfo * LI,ScalarEvolution * SE,const TargetTransformInfo * TTI,OptimizationRemarkEmitter * ORE)665ffd83dbSDimitry Andric   LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
675ffd83dbSDimitry Andric                    ScalarEvolution *SE, const TargetTransformInfo *TTI,
680b57cec5SDimitry Andric                    OptimizationRemarkEmitter *ORE)
695ffd83dbSDimitry Andric       : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric   bool run();
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric private:
740b57cec5SDimitry Andric   bool runOnLoop(Loop *L);
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric   /// Check if the stride of the accesses is large enough to
770b57cec5SDimitry Andric   /// warrant a prefetch.
785ffd83dbSDimitry Andric   bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
790b57cec5SDimitry Andric 
getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)805ffd83dbSDimitry Andric   unsigned getMinPrefetchStride(unsigned NumMemAccesses,
815ffd83dbSDimitry Andric                                 unsigned NumStridedMemAccesses,
825ffd83dbSDimitry Andric                                 unsigned NumPrefetches,
835ffd83dbSDimitry Andric                                 bool HasCall) {
840b57cec5SDimitry Andric     if (MinPrefetchStride.getNumOccurrences() > 0)
850b57cec5SDimitry Andric       return MinPrefetchStride;
865ffd83dbSDimitry Andric     return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
875ffd83dbSDimitry Andric                                      NumPrefetches, HasCall);
880b57cec5SDimitry Andric   }
890b57cec5SDimitry Andric 
getPrefetchDistance()900b57cec5SDimitry Andric   unsigned getPrefetchDistance() {
910b57cec5SDimitry Andric     if (PrefetchDistance.getNumOccurrences() > 0)
920b57cec5SDimitry Andric       return PrefetchDistance;
930b57cec5SDimitry Andric     return TTI->getPrefetchDistance();
940b57cec5SDimitry Andric   }
950b57cec5SDimitry Andric 
getMaxPrefetchIterationsAhead()960b57cec5SDimitry Andric   unsigned getMaxPrefetchIterationsAhead() {
970b57cec5SDimitry Andric     if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
980b57cec5SDimitry Andric       return MaxPrefetchIterationsAhead;
990b57cec5SDimitry Andric     return TTI->getMaxPrefetchIterationsAhead();
1000b57cec5SDimitry Andric   }
1010b57cec5SDimitry Andric 
doPrefetchWrites()1025ffd83dbSDimitry Andric   bool doPrefetchWrites() {
1035ffd83dbSDimitry Andric     if (PrefetchWrites.getNumOccurrences() > 0)
1045ffd83dbSDimitry Andric       return PrefetchWrites;
1055ffd83dbSDimitry Andric     return TTI->enableWritePrefetching();
1065ffd83dbSDimitry Andric   }
1075ffd83dbSDimitry Andric 
1080b57cec5SDimitry Andric   AssumptionCache *AC;
1095ffd83dbSDimitry Andric   DominatorTree *DT;
1100b57cec5SDimitry Andric   LoopInfo *LI;
1110b57cec5SDimitry Andric   ScalarEvolution *SE;
1120b57cec5SDimitry Andric   const TargetTransformInfo *TTI;
1130b57cec5SDimitry Andric   OptimizationRemarkEmitter *ORE;
1140b57cec5SDimitry Andric };
1150b57cec5SDimitry Andric 
1160b57cec5SDimitry Andric /// Legacy class for inserting loop data prefetches.
1170b57cec5SDimitry Andric class LoopDataPrefetchLegacyPass : public FunctionPass {
1180b57cec5SDimitry Andric public:
1190b57cec5SDimitry Andric   static char ID; // Pass ID, replacement for typeid
LoopDataPrefetchLegacyPass()1200b57cec5SDimitry Andric   LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
1210b57cec5SDimitry Andric     initializeLoopDataPrefetchLegacyPassPass(*PassRegistry::getPassRegistry());
1220b57cec5SDimitry Andric   }
1230b57cec5SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const1240b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
1250b57cec5SDimitry Andric     AU.addRequired<AssumptionCacheTracker>();
1265ffd83dbSDimitry Andric     AU.addRequired<DominatorTreeWrapperPass>();
1270b57cec5SDimitry Andric     AU.addPreserved<DominatorTreeWrapperPass>();
1280b57cec5SDimitry Andric     AU.addRequired<LoopInfoWrapperPass>();
1290b57cec5SDimitry Andric     AU.addPreserved<LoopInfoWrapperPass>();
1300b57cec5SDimitry Andric     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1310b57cec5SDimitry Andric     AU.addRequired<ScalarEvolutionWrapperPass>();
1320b57cec5SDimitry Andric     AU.addPreserved<ScalarEvolutionWrapperPass>();
1330b57cec5SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
1340b57cec5SDimitry Andric   }
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric   bool runOnFunction(Function &F) override;
1370b57cec5SDimitry Andric   };
1380b57cec5SDimitry Andric }
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric char LoopDataPrefetchLegacyPass::ID = 0;
1410b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
1420b57cec5SDimitry Andric                       "Loop Data Prefetch", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)1430b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1440b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1450b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1460b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
1470b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
1480b57cec5SDimitry Andric INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
1490b57cec5SDimitry Andric                     "Loop Data Prefetch", false, false)
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric FunctionPass *llvm::createLoopDataPrefetchPass() {
1520b57cec5SDimitry Andric   return new LoopDataPrefetchLegacyPass();
1530b57cec5SDimitry Andric }
1540b57cec5SDimitry Andric 
isStrideLargeEnough(const SCEVAddRecExpr * AR,unsigned TargetMinStride)1555ffd83dbSDimitry Andric bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
1565ffd83dbSDimitry Andric                                            unsigned TargetMinStride) {
1570b57cec5SDimitry Andric   // No need to check if any stride goes.
1580b57cec5SDimitry Andric   if (TargetMinStride <= 1)
1590b57cec5SDimitry Andric     return true;
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric   const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1620b57cec5SDimitry Andric   // If MinStride is set, don't prefetch unless we can ensure that stride is
1630b57cec5SDimitry Andric   // larger.
1640b57cec5SDimitry Andric   if (!ConstStride)
1650b57cec5SDimitry Andric     return false;
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric   unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
1680b57cec5SDimitry Andric   return TargetMinStride <= AbsStride;
1690b57cec5SDimitry Andric }
1700b57cec5SDimitry Andric 
run(Function & F,FunctionAnalysisManager & AM)1710b57cec5SDimitry Andric PreservedAnalyses LoopDataPrefetchPass::run(Function &F,
1720b57cec5SDimitry Andric                                             FunctionAnalysisManager &AM) {
1735ffd83dbSDimitry Andric   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1740b57cec5SDimitry Andric   LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
1750b57cec5SDimitry Andric   ScalarEvolution *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
1760b57cec5SDimitry Andric   AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
1770b57cec5SDimitry Andric   OptimizationRemarkEmitter *ORE =
1780b57cec5SDimitry Andric       &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1790b57cec5SDimitry Andric   const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
1800b57cec5SDimitry Andric 
1815ffd83dbSDimitry Andric   LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
1820b57cec5SDimitry Andric   bool Changed = LDP.run();
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric   if (Changed) {
1850b57cec5SDimitry Andric     PreservedAnalyses PA;
1860b57cec5SDimitry Andric     PA.preserve<DominatorTreeAnalysis>();
1870b57cec5SDimitry Andric     PA.preserve<LoopAnalysis>();
1880b57cec5SDimitry Andric     return PA;
1890b57cec5SDimitry Andric   }
1900b57cec5SDimitry Andric 
1910b57cec5SDimitry Andric   return PreservedAnalyses::all();
1920b57cec5SDimitry Andric }
1930b57cec5SDimitry Andric 
runOnFunction(Function & F)1940b57cec5SDimitry Andric bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {
1950b57cec5SDimitry Andric   if (skipFunction(F))
1960b57cec5SDimitry Andric     return false;
1970b57cec5SDimitry Andric 
1985ffd83dbSDimitry Andric   DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1990b57cec5SDimitry Andric   LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2000b57cec5SDimitry Andric   ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2010b57cec5SDimitry Andric   AssumptionCache *AC =
2020b57cec5SDimitry Andric       &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2030b57cec5SDimitry Andric   OptimizationRemarkEmitter *ORE =
2040b57cec5SDimitry Andric       &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
2050b57cec5SDimitry Andric   const TargetTransformInfo *TTI =
2060b57cec5SDimitry Andric       &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2070b57cec5SDimitry Andric 
2085ffd83dbSDimitry Andric   LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
2090b57cec5SDimitry Andric   return LDP.run();
2100b57cec5SDimitry Andric }
2110b57cec5SDimitry Andric 
run()2120b57cec5SDimitry Andric bool LoopDataPrefetch::run() {
2130b57cec5SDimitry Andric   // If PrefetchDistance is not set, don't run the pass.  This gives an
2140b57cec5SDimitry Andric   // opportunity for targets to run this pass for selected subtargets only
2150b57cec5SDimitry Andric   // (whose TTI sets PrefetchDistance).
2160b57cec5SDimitry Andric   if (getPrefetchDistance() == 0)
2170b57cec5SDimitry Andric     return false;
2180b57cec5SDimitry Andric   assert(TTI->getCacheLineSize() && "Cache line size is not set for target");
2190b57cec5SDimitry Andric 
2200b57cec5SDimitry Andric   bool MadeChange = false;
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric   for (Loop *I : *LI)
2230b57cec5SDimitry Andric     for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
2240b57cec5SDimitry Andric       MadeChange |= runOnLoop(*L);
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric   return MadeChange;
2270b57cec5SDimitry Andric }
2280b57cec5SDimitry Andric 
2295ffd83dbSDimitry Andric /// A record for a potential prefetch made during the initial scan of the
2305ffd83dbSDimitry Andric /// loop. This is used to let a single prefetch target multiple memory accesses.
2315ffd83dbSDimitry Andric struct Prefetch {
2325ffd83dbSDimitry Andric   /// The address formula for this prefetch as returned by ScalarEvolution.
2335ffd83dbSDimitry Andric   const SCEVAddRecExpr *LSCEVAddRec;
2345ffd83dbSDimitry Andric   /// The point of insertion for the prefetch instruction.
2355ffd83dbSDimitry Andric   Instruction *InsertPt;
2365ffd83dbSDimitry Andric   /// True if targeting a write memory access.
2375ffd83dbSDimitry Andric   bool Writes;
2385ffd83dbSDimitry Andric   /// The (first seen) prefetched instruction.
2395ffd83dbSDimitry Andric   Instruction *MemI;
2405ffd83dbSDimitry Andric 
2415ffd83dbSDimitry Andric   /// Constructor to create a new Prefetch for \p I.
PrefetchPrefetch2425ffd83dbSDimitry Andric   Prefetch(const SCEVAddRecExpr *L, Instruction *I)
2435ffd83dbSDimitry Andric       : LSCEVAddRec(L), InsertPt(nullptr), Writes(false), MemI(nullptr) {
2445ffd83dbSDimitry Andric     addInstruction(I);
2455ffd83dbSDimitry Andric   };
2465ffd83dbSDimitry Andric 
2475ffd83dbSDimitry Andric   /// Add the instruction \param I to this prefetch. If it's not the first
2485ffd83dbSDimitry Andric   /// one, 'InsertPt' and 'Writes' will be updated as required.
2495ffd83dbSDimitry Andric   /// \param PtrDiff the known constant address difference to the first added
2505ffd83dbSDimitry Andric   /// instruction.
addInstructionPrefetch2515ffd83dbSDimitry Andric   void addInstruction(Instruction *I, DominatorTree *DT = nullptr,
2525ffd83dbSDimitry Andric                       int64_t PtrDiff = 0) {
2535ffd83dbSDimitry Andric     if (!InsertPt) {
2545ffd83dbSDimitry Andric       MemI = I;
2555ffd83dbSDimitry Andric       InsertPt = I;
2565ffd83dbSDimitry Andric       Writes = isa<StoreInst>(I);
2575ffd83dbSDimitry Andric     } else {
2585ffd83dbSDimitry Andric       BasicBlock *PrefBB = InsertPt->getParent();
2595ffd83dbSDimitry Andric       BasicBlock *InsBB = I->getParent();
2605ffd83dbSDimitry Andric       if (PrefBB != InsBB) {
2615ffd83dbSDimitry Andric         BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB);
2625ffd83dbSDimitry Andric         if (DomBB != PrefBB)
2635ffd83dbSDimitry Andric           InsertPt = DomBB->getTerminator();
2645ffd83dbSDimitry Andric       }
2655ffd83dbSDimitry Andric 
2665ffd83dbSDimitry Andric       if (isa<StoreInst>(I) && PtrDiff == 0)
2675ffd83dbSDimitry Andric         Writes = true;
2685ffd83dbSDimitry Andric     }
2695ffd83dbSDimitry Andric   }
2705ffd83dbSDimitry Andric };
2715ffd83dbSDimitry Andric 
runOnLoop(Loop * L)2720b57cec5SDimitry Andric bool LoopDataPrefetch::runOnLoop(Loop *L) {
2730b57cec5SDimitry Andric   bool MadeChange = false;
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric   // Only prefetch in the inner-most loop
276af732203SDimitry Andric   if (!L->isInnermost())
2770b57cec5SDimitry Andric     return MadeChange;
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric   SmallPtrSet<const Value *, 32> EphValues;
2800b57cec5SDimitry Andric   CodeMetrics::collectEphemeralValues(L, AC, EphValues);
2810b57cec5SDimitry Andric 
2820b57cec5SDimitry Andric   // Calculate the number of iterations ahead to prefetch
2830b57cec5SDimitry Andric   CodeMetrics Metrics;
2845ffd83dbSDimitry Andric   bool HasCall = false;
2850b57cec5SDimitry Andric   for (const auto BB : L->blocks()) {
2860b57cec5SDimitry Andric     // If the loop already has prefetches, then assume that the user knows
2870b57cec5SDimitry Andric     // what they are doing and don't add any more.
2885ffd83dbSDimitry Andric     for (auto &I : *BB) {
2895ffd83dbSDimitry Andric       if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
2905ffd83dbSDimitry Andric         if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
2910b57cec5SDimitry Andric           if (F->getIntrinsicID() == Intrinsic::prefetch)
2920b57cec5SDimitry Andric             return MadeChange;
2935ffd83dbSDimitry Andric           if (TTI->isLoweredToCall(F))
2945ffd83dbSDimitry Andric             HasCall = true;
2955ffd83dbSDimitry Andric         } else { // indirect call.
2965ffd83dbSDimitry Andric           HasCall = true;
2975ffd83dbSDimitry Andric         }
2985ffd83dbSDimitry Andric       }
2995ffd83dbSDimitry Andric     }
3000b57cec5SDimitry Andric     Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
3010b57cec5SDimitry Andric   }
3020b57cec5SDimitry Andric   unsigned LoopSize = Metrics.NumInsts;
3030b57cec5SDimitry Andric   if (!LoopSize)
3040b57cec5SDimitry Andric     LoopSize = 1;
3050b57cec5SDimitry Andric 
3060b57cec5SDimitry Andric   unsigned ItersAhead = getPrefetchDistance() / LoopSize;
3070b57cec5SDimitry Andric   if (!ItersAhead)
3080b57cec5SDimitry Andric     ItersAhead = 1;
3090b57cec5SDimitry Andric 
3100b57cec5SDimitry Andric   if (ItersAhead > getMaxPrefetchIterationsAhead())
3110b57cec5SDimitry Andric     return MadeChange;
3120b57cec5SDimitry Andric 
3135ffd83dbSDimitry Andric   unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
3145ffd83dbSDimitry Andric   if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
3155ffd83dbSDimitry Andric     return MadeChange;
3160b57cec5SDimitry Andric 
3175ffd83dbSDimitry Andric   unsigned NumMemAccesses = 0;
3185ffd83dbSDimitry Andric   unsigned NumStridedMemAccesses = 0;
3195ffd83dbSDimitry Andric   SmallVector<Prefetch, 16> Prefetches;
3205ffd83dbSDimitry Andric   for (const auto BB : L->blocks())
3210b57cec5SDimitry Andric     for (auto &I : *BB) {
3220b57cec5SDimitry Andric       Value *PtrValue;
3230b57cec5SDimitry Andric       Instruction *MemI;
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric       if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
3260b57cec5SDimitry Andric         MemI = LMemI;
3270b57cec5SDimitry Andric         PtrValue = LMemI->getPointerOperand();
3280b57cec5SDimitry Andric       } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
3295ffd83dbSDimitry Andric         if (!doPrefetchWrites()) continue;
3300b57cec5SDimitry Andric         MemI = SMemI;
3310b57cec5SDimitry Andric         PtrValue = SMemI->getPointerOperand();
3320b57cec5SDimitry Andric       } else continue;
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric       unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
3350b57cec5SDimitry Andric       if (PtrAddrSpace)
3360b57cec5SDimitry Andric         continue;
3375ffd83dbSDimitry Andric       NumMemAccesses++;
3380b57cec5SDimitry Andric       if (L->isLoopInvariant(PtrValue))
3390b57cec5SDimitry Andric         continue;
3400b57cec5SDimitry Andric 
3410b57cec5SDimitry Andric       const SCEV *LSCEV = SE->getSCEV(PtrValue);
3420b57cec5SDimitry Andric       const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
3430b57cec5SDimitry Andric       if (!LSCEVAddRec)
3440b57cec5SDimitry Andric         continue;
3455ffd83dbSDimitry Andric       NumStridedMemAccesses++;
3460b57cec5SDimitry Andric 
3475ffd83dbSDimitry Andric       // We don't want to double prefetch individual cache lines. If this
3485ffd83dbSDimitry Andric       // access is known to be within one cache line of some other one that
3495ffd83dbSDimitry Andric       // has already been prefetched, then don't prefetch this one as well.
3500b57cec5SDimitry Andric       bool DupPref = false;
3515ffd83dbSDimitry Andric       for (auto &Pref : Prefetches) {
3525ffd83dbSDimitry Andric         const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
3530b57cec5SDimitry Andric         if (const SCEVConstant *ConstPtrDiff =
3540b57cec5SDimitry Andric             dyn_cast<SCEVConstant>(PtrDiff)) {
3550b57cec5SDimitry Andric           int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
3560b57cec5SDimitry Andric           if (PD < (int64_t) TTI->getCacheLineSize()) {
3575ffd83dbSDimitry Andric             Pref.addInstruction(MemI, DT, PD);
3580b57cec5SDimitry Andric             DupPref = true;
3590b57cec5SDimitry Andric             break;
3600b57cec5SDimitry Andric           }
3610b57cec5SDimitry Andric         }
3620b57cec5SDimitry Andric       }
3635ffd83dbSDimitry Andric       if (!DupPref)
3645ffd83dbSDimitry Andric         Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
3655ffd83dbSDimitry Andric     }
3665ffd83dbSDimitry Andric 
3675ffd83dbSDimitry Andric   unsigned TargetMinStride =
3685ffd83dbSDimitry Andric     getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
3695ffd83dbSDimitry Andric                          Prefetches.size(), HasCall);
3705ffd83dbSDimitry Andric 
3715ffd83dbSDimitry Andric   LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
3725ffd83dbSDimitry Andric              << " iterations ahead (loop size: " << LoopSize << ") in "
3735ffd83dbSDimitry Andric              << L->getHeader()->getParent()->getName() << ": " << *L);
3745ffd83dbSDimitry Andric   LLVM_DEBUG(dbgs() << "Loop has: "
3755ffd83dbSDimitry Andric              << NumMemAccesses << " memory accesses, "
3765ffd83dbSDimitry Andric              << NumStridedMemAccesses << " strided memory accesses, "
3775ffd83dbSDimitry Andric              << Prefetches.size() << " potential prefetch(es), "
3785ffd83dbSDimitry Andric              << "a minimum stride of " << TargetMinStride << ", "
3795ffd83dbSDimitry Andric              << (HasCall ? "calls" : "no calls") << ".\n");
3805ffd83dbSDimitry Andric 
3815ffd83dbSDimitry Andric   for (auto &P : Prefetches) {
3825ffd83dbSDimitry Andric     // Check if the stride of the accesses is large enough to warrant a
3835ffd83dbSDimitry Andric     // prefetch.
3845ffd83dbSDimitry Andric     if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
3850b57cec5SDimitry Andric       continue;
3860b57cec5SDimitry Andric 
3875ffd83dbSDimitry Andric     const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
3885ffd83dbSDimitry Andric       SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
3895ffd83dbSDimitry Andric       P.LSCEVAddRec->getStepRecurrence(*SE)));
3900b57cec5SDimitry Andric     if (!isSafeToExpand(NextLSCEV, *SE))
3910b57cec5SDimitry Andric       continue;
3920b57cec5SDimitry Andric 
3935ffd83dbSDimitry Andric     BasicBlock *BB = P.InsertPt->getParent();
3945ffd83dbSDimitry Andric     Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/*PtrAddrSpace*/);
3955ffd83dbSDimitry Andric     SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
3965ffd83dbSDimitry Andric     Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
3970b57cec5SDimitry Andric 
3985ffd83dbSDimitry Andric     IRBuilder<> Builder(P.InsertPt);
3990b57cec5SDimitry Andric     Module *M = BB->getParent()->getParent();
4000b57cec5SDimitry Andric     Type *I32 = Type::getInt32Ty(BB->getContext());
4018bcb0991SDimitry Andric     Function *PrefetchFunc = Intrinsic::getDeclaration(
4028bcb0991SDimitry Andric         M, Intrinsic::prefetch, PrefPtrValue->getType());
4030b57cec5SDimitry Andric     Builder.CreateCall(
4040b57cec5SDimitry Andric         PrefetchFunc,
4050b57cec5SDimitry Andric         {PrefPtrValue,
4065ffd83dbSDimitry Andric          ConstantInt::get(I32, P.Writes),
4070b57cec5SDimitry Andric          ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
4080b57cec5SDimitry Andric     ++NumPrefetches;
4095ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << "  Access: "
4105ffd83dbSDimitry Andric                << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1)
4115ffd83dbSDimitry Andric                << ", SCEV: " << *P.LSCEVAddRec << "\n");
4120b57cec5SDimitry Andric     ORE->emit([&]() {
4135ffd83dbSDimitry Andric         return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
4140b57cec5SDimitry Andric           << "prefetched memory access";
4150b57cec5SDimitry Andric       });
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric     MadeChange = true;
4180b57cec5SDimitry Andric   }
4190b57cec5SDimitry Andric 
4200b57cec5SDimitry Andric   return MadeChange;
4210b57cec5SDimitry Andric }
422