19d9cb274SAdam Nemet //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
29d9cb274SAdam Nemet //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69d9cb274SAdam Nemet //
79d9cb274SAdam Nemet //===----------------------------------------------------------------------===//
89d9cb274SAdam Nemet //
99d9cb274SAdam Nemet // This file implements a Loop Data Prefetching Pass.
109d9cb274SAdam Nemet //
119d9cb274SAdam Nemet //===----------------------------------------------------------------------===//
129d9cb274SAdam Nemet
131eca6bc6STeresa Johnson #include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
1405da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
151eca6bc6STeresa Johnson
169d9cb274SAdam Nemet #include "llvm/ADT/DepthFirstIterator.h"
179d9cb274SAdam Nemet #include "llvm/ADT/Statistic.h"
18aec2fa35SDaniel Jasper #include "llvm/Analysis/AssumptionCache.h"
199d9cb274SAdam Nemet #include "llvm/Analysis/CodeMetrics.h"
209d9cb274SAdam Nemet #include "llvm/Analysis/LoopInfo.h"
210965da20SAdam Nemet #include "llvm/Analysis/OptimizationRemarkEmitter.h"
229d9cb274SAdam Nemet #include "llvm/Analysis/ScalarEvolution.h"
239d9cb274SAdam Nemet #include "llvm/Analysis/ScalarEvolutionExpressions.h"
249d9cb274SAdam Nemet #include "llvm/Analysis/TargetTransformInfo.h"
259d9cb274SAdam Nemet #include "llvm/IR/Dominators.h"
269d9cb274SAdam Nemet #include "llvm/IR/Function.h"
279d9cb274SAdam Nemet #include "llvm/IR/Module.h"
289d9cb274SAdam Nemet #include "llvm/Support/CommandLine.h"
299d9cb274SAdam Nemet #include "llvm/Support/Debug.h"
30885f1de4SAdam Nemet #include "llvm/Transforms/Scalar.h"
3199c43363SAndrew Wei #include "llvm/Transforms/Utils.h"
32bcbd26bfSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
3371acce68SMindong Chen
3471acce68SMindong Chen #define DEBUG_TYPE "loop-data-prefetch"
3571acce68SMindong Chen
369d9cb274SAdam Nemet using namespace llvm;
379d9cb274SAdam Nemet
389d9cb274SAdam Nemet // By default, we limit this to creating 16 PHIs (which is a little over half
399d9cb274SAdam Nemet // of the allocatable register set).
409d9cb274SAdam Nemet static cl::opt<bool>
419d9cb274SAdam Nemet PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
429d9cb274SAdam Nemet cl::desc("Prefetch write addresses"));
439d9cb274SAdam Nemet
441428d41fSAdam Nemet static cl::opt<unsigned>
451428d41fSAdam Nemet PrefetchDistance("prefetch-distance",
461428d41fSAdam Nemet cl::desc("Number of instructions to prefetch ahead"),
471428d41fSAdam Nemet cl::Hidden);
481428d41fSAdam Nemet
491428d41fSAdam Nemet static cl::opt<unsigned>
501428d41fSAdam Nemet MinPrefetchStride("min-prefetch-stride",
511428d41fSAdam Nemet cl::desc("Min stride to add prefetches"), cl::Hidden);
521428d41fSAdam Nemet
531428d41fSAdam Nemet static cl::opt<unsigned> MaxPrefetchIterationsAhead(
541428d41fSAdam Nemet "max-prefetch-iters-ahead",
551428d41fSAdam Nemet cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
561428d41fSAdam Nemet
5734785ecfSAdam Nemet STATISTIC(NumPrefetches, "Number of prefetches inserted");
5834785ecfSAdam Nemet
599d9cb274SAdam Nemet namespace {
609d9cb274SAdam Nemet
611eca6bc6STeresa Johnson /// Loop prefetch implementation class.
621eca6bc6STeresa Johnson class LoopDataPrefetch {
639d9cb274SAdam Nemet public:
LoopDataPrefetch(AssumptionCache * AC,DominatorTree * DT,LoopInfo * LI,ScalarEvolution * SE,const TargetTransformInfo * TTI,OptimizationRemarkEmitter * ORE)6436d4421fSJonas Paulsson LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
6536d4421fSJonas Paulsson ScalarEvolution *SE, const TargetTransformInfo *TTI,
661eca6bc6STeresa Johnson OptimizationRemarkEmitter *ORE)
6736d4421fSJonas Paulsson : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
689d9cb274SAdam Nemet
691eca6bc6STeresa Johnson bool run();
7085fba393SAdam Nemet
7185fba393SAdam Nemet private:
729d9cb274SAdam Nemet bool runOnLoop(Loop *L);
739d9cb274SAdam Nemet
745f8f34e4SAdrian Prantl /// Check if the stride of the accesses is large enough to
756d8beecaSAdam Nemet /// warrant a prefetch.
7636d4421fSJonas Paulsson bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
776d8beecaSAdam Nemet
getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)7836d4421fSJonas Paulsson unsigned getMinPrefetchStride(unsigned NumMemAccesses,
7936d4421fSJonas Paulsson unsigned NumStridedMemAccesses,
8036d4421fSJonas Paulsson unsigned NumPrefetches,
8136d4421fSJonas Paulsson bool HasCall) {
821428d41fSAdam Nemet if (MinPrefetchStride.getNumOccurrences() > 0)
831428d41fSAdam Nemet return MinPrefetchStride;
8436d4421fSJonas Paulsson return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
8536d4421fSJonas Paulsson NumPrefetches, HasCall);
861428d41fSAdam Nemet }
871428d41fSAdam Nemet
getPrefetchDistance()881428d41fSAdam Nemet unsigned getPrefetchDistance() {
891428d41fSAdam Nemet if (PrefetchDistance.getNumOccurrences() > 0)
901428d41fSAdam Nemet return PrefetchDistance;
911428d41fSAdam Nemet return TTI->getPrefetchDistance();
921428d41fSAdam Nemet }
931428d41fSAdam Nemet
getMaxPrefetchIterationsAhead()941428d41fSAdam Nemet unsigned getMaxPrefetchIterationsAhead() {
951428d41fSAdam Nemet if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
961428d41fSAdam Nemet return MaxPrefetchIterationsAhead;
971428d41fSAdam Nemet return TTI->getMaxPrefetchIterationsAhead();
981428d41fSAdam Nemet }
991428d41fSAdam Nemet
doPrefetchWrites()10036d4421fSJonas Paulsson bool doPrefetchWrites() {
10136d4421fSJonas Paulsson if (PrefetchWrites.getNumOccurrences() > 0)
10236d4421fSJonas Paulsson return PrefetchWrites;
10336d4421fSJonas Paulsson return TTI->enableWritePrefetching();
10436d4421fSJonas Paulsson }
10536d4421fSJonas Paulsson
106aec2fa35SDaniel Jasper AssumptionCache *AC;
10736d4421fSJonas Paulsson DominatorTree *DT;
1089d9cb274SAdam Nemet LoopInfo *LI;
1099d9cb274SAdam Nemet ScalarEvolution *SE;
1109d9cb274SAdam Nemet const TargetTransformInfo *TTI;
1119e6e63fbSAdam Nemet OptimizationRemarkEmitter *ORE;
1129d9cb274SAdam Nemet };
1131eca6bc6STeresa Johnson
1141eca6bc6STeresa Johnson /// Legacy class for inserting loop data prefetches.
1151eca6bc6STeresa Johnson class LoopDataPrefetchLegacyPass : public FunctionPass {
1161eca6bc6STeresa Johnson public:
1171eca6bc6STeresa Johnson static char ID; // Pass ID, replacement for typeid
LoopDataPrefetchLegacyPass()1181eca6bc6STeresa Johnson LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
1191eca6bc6STeresa Johnson initializeLoopDataPrefetchLegacyPassPass(*PassRegistry::getPassRegistry());
1201eca6bc6STeresa Johnson }
1211eca6bc6STeresa Johnson
getAnalysisUsage(AnalysisUsage & AU) const1221eca6bc6STeresa Johnson void getAnalysisUsage(AnalysisUsage &AU) const override {
123aec2fa35SDaniel Jasper AU.addRequired<AssumptionCacheTracker>();
12436d4421fSJonas Paulsson AU.addRequired<DominatorTreeWrapperPass>();
1251eca6bc6STeresa Johnson AU.addPreserved<DominatorTreeWrapperPass>();
1261eca6bc6STeresa Johnson AU.addRequired<LoopInfoWrapperPass>();
1271eca6bc6STeresa Johnson AU.addPreserved<LoopInfoWrapperPass>();
12899c43363SAndrew Wei AU.addRequiredID(LoopSimplifyID);
12999c43363SAndrew Wei AU.addPreservedID(LoopSimplifyID);
1301eca6bc6STeresa Johnson AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1311eca6bc6STeresa Johnson AU.addRequired<ScalarEvolutionWrapperPass>();
13240549ad1SGeoff Berry AU.addPreserved<ScalarEvolutionWrapperPass>();
1331eca6bc6STeresa Johnson AU.addRequired<TargetTransformInfoWrapperPass>();
1341eca6bc6STeresa Johnson }
1351eca6bc6STeresa Johnson
1361eca6bc6STeresa Johnson bool runOnFunction(Function &F) override;
1371eca6bc6STeresa Johnson };
1389d9cb274SAdam Nemet }
1399d9cb274SAdam Nemet
1401eca6bc6STeresa Johnson char LoopDataPrefetchLegacyPass::ID = 0;
1411eca6bc6STeresa Johnson INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
1429d9cb274SAdam Nemet "Loop Data Prefetch", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)143aec2fa35SDaniel Jasper INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1449d9cb274SAdam Nemet INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1459d9cb274SAdam Nemet INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
14699c43363SAndrew Wei INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
1479e6e63fbSAdam Nemet INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
1489d9cb274SAdam Nemet INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
1491eca6bc6STeresa Johnson INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
1509d9cb274SAdam Nemet "Loop Data Prefetch", false, false)
1519d9cb274SAdam Nemet
1521eca6bc6STeresa Johnson FunctionPass *llvm::createLoopDataPrefetchPass() {
1531eca6bc6STeresa Johnson return new LoopDataPrefetchLegacyPass();
1541eca6bc6STeresa Johnson }
1559d9cb274SAdam Nemet
isStrideLargeEnough(const SCEVAddRecExpr * AR,unsigned TargetMinStride)15636d4421fSJonas Paulsson bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
15736d4421fSJonas Paulsson unsigned TargetMinStride) {
1586d8beecaSAdam Nemet // No need to check if any stride goes.
1596d8beecaSAdam Nemet if (TargetMinStride <= 1)
1606d8beecaSAdam Nemet return true;
1616d8beecaSAdam Nemet
1626d8beecaSAdam Nemet const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1636d8beecaSAdam Nemet // If MinStride is set, don't prefetch unless we can ensure that stride is
1646d8beecaSAdam Nemet // larger.
1656d8beecaSAdam Nemet if (!ConstStride)
1666d8beecaSAdam Nemet return false;
1676d8beecaSAdam Nemet
1686d8beecaSAdam Nemet unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
1696d8beecaSAdam Nemet return TargetMinStride <= AbsStride;
1706d8beecaSAdam Nemet }
1716d8beecaSAdam Nemet
run(Function & F,FunctionAnalysisManager & AM)1721eca6bc6STeresa Johnson PreservedAnalyses LoopDataPrefetchPass::run(Function &F,
1731eca6bc6STeresa Johnson FunctionAnalysisManager &AM) {
17436d4421fSJonas Paulsson DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1751eca6bc6STeresa Johnson LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
1761eca6bc6STeresa Johnson ScalarEvolution *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
177aec2fa35SDaniel Jasper AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
1781eca6bc6STeresa Johnson OptimizationRemarkEmitter *ORE =
1791eca6bc6STeresa Johnson &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1801eca6bc6STeresa Johnson const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
1811eca6bc6STeresa Johnson
18236d4421fSJonas Paulsson LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
1831eca6bc6STeresa Johnson bool Changed = LDP.run();
1841eca6bc6STeresa Johnson
1851eca6bc6STeresa Johnson if (Changed) {
1861eca6bc6STeresa Johnson PreservedAnalyses PA;
1871eca6bc6STeresa Johnson PA.preserve<DominatorTreeAnalysis>();
1881eca6bc6STeresa Johnson PA.preserve<LoopAnalysis>();
1891eca6bc6STeresa Johnson return PA;
1901eca6bc6STeresa Johnson }
1911eca6bc6STeresa Johnson
1921eca6bc6STeresa Johnson return PreservedAnalyses::all();
1931eca6bc6STeresa Johnson }
1941eca6bc6STeresa Johnson
runOnFunction(Function & F)1951eca6bc6STeresa Johnson bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {
19650271f78SAndrew Kaylor if (skipFunction(F))
19750271f78SAndrew Kaylor return false;
19850271f78SAndrew Kaylor
19936d4421fSJonas Paulsson DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2001eca6bc6STeresa Johnson LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2011eca6bc6STeresa Johnson ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
202aec2fa35SDaniel Jasper AssumptionCache *AC =
203aec2fa35SDaniel Jasper &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2041eca6bc6STeresa Johnson OptimizationRemarkEmitter *ORE =
2051eca6bc6STeresa Johnson &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
2061eca6bc6STeresa Johnson const TargetTransformInfo *TTI =
2071eca6bc6STeresa Johnson &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2089d9cb274SAdam Nemet
20936d4421fSJonas Paulsson LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
2101eca6bc6STeresa Johnson return LDP.run();
2111eca6bc6STeresa Johnson }
2121eca6bc6STeresa Johnson
run()2131eca6bc6STeresa Johnson bool LoopDataPrefetch::run() {
214bb3680bdSAdam Nemet // If PrefetchDistance is not set, don't run the pass. This gives an
215bb3680bdSAdam Nemet // opportunity for targets to run this pass for selected subtargets only
216*d485c1b7Szhongyunde // (whose TTI sets PrefetchDistance and CacheLineSize).
217*d485c1b7Szhongyunde if (getPrefetchDistance() == 0 || TTI->getCacheLineSize() == 0) {
218*d485c1b7Szhongyunde LLVM_DEBUG(dbgs() << "Please set both PrefetchDistance and CacheLineSize "
219*d485c1b7Szhongyunde "for loop data prefetch.\n");
220bb3680bdSAdam Nemet return false;
221*d485c1b7Szhongyunde }
2229d9cb274SAdam Nemet
2239d9cb274SAdam Nemet bool MadeChange = false;
2249d9cb274SAdam Nemet
225135f735aSBenjamin Kramer for (Loop *I : *LI)
2269db0e216SKazu Hirata for (Loop *L : depth_first(I))
2279db0e216SKazu Hirata MadeChange |= runOnLoop(L);
2289d9cb274SAdam Nemet
2299d9cb274SAdam Nemet return MadeChange;
2309d9cb274SAdam Nemet }
2319d9cb274SAdam Nemet
23236d4421fSJonas Paulsson /// A record for a potential prefetch made during the initial scan of the
23336d4421fSJonas Paulsson /// loop. This is used to let a single prefetch target multiple memory accesses.
23436d4421fSJonas Paulsson struct Prefetch {
23536d4421fSJonas Paulsson /// The address formula for this prefetch as returned by ScalarEvolution.
23636d4421fSJonas Paulsson const SCEVAddRecExpr *LSCEVAddRec;
23736d4421fSJonas Paulsson /// The point of insertion for the prefetch instruction.
238fd3e8044SKazu Hirata Instruction *InsertPt = nullptr;
23936d4421fSJonas Paulsson /// True if targeting a write memory access.
240fd3e8044SKazu Hirata bool Writes = false;
24136d4421fSJonas Paulsson /// The (first seen) prefetched instruction.
242fd3e8044SKazu Hirata Instruction *MemI = nullptr;
24336d4421fSJonas Paulsson
244d5ea89f8SBenjamin Kramer /// Constructor to create a new Prefetch for \p I.
PrefetchPrefetch245fd3e8044SKazu Hirata Prefetch(const SCEVAddRecExpr *L, Instruction *I) : LSCEVAddRec(L) {
24636d4421fSJonas Paulsson addInstruction(I);
24736d4421fSJonas Paulsson };
24836d4421fSJonas Paulsson
24936d4421fSJonas Paulsson /// Add the instruction \param I to this prefetch. If it's not the first
25036d4421fSJonas Paulsson /// one, 'InsertPt' and 'Writes' will be updated as required.
25136d4421fSJonas Paulsson /// \param PtrDiff the known constant address difference to the first added
25236d4421fSJonas Paulsson /// instruction.
addInstructionPrefetch25336d4421fSJonas Paulsson void addInstruction(Instruction *I, DominatorTree *DT = nullptr,
25436d4421fSJonas Paulsson int64_t PtrDiff = 0) {
25536d4421fSJonas Paulsson if (!InsertPt) {
25636d4421fSJonas Paulsson MemI = I;
25736d4421fSJonas Paulsson InsertPt = I;
25836d4421fSJonas Paulsson Writes = isa<StoreInst>(I);
25936d4421fSJonas Paulsson } else {
26036d4421fSJonas Paulsson BasicBlock *PrefBB = InsertPt->getParent();
26136d4421fSJonas Paulsson BasicBlock *InsBB = I->getParent();
26236d4421fSJonas Paulsson if (PrefBB != InsBB) {
26336d4421fSJonas Paulsson BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB);
26436d4421fSJonas Paulsson if (DomBB != PrefBB)
26536d4421fSJonas Paulsson InsertPt = DomBB->getTerminator();
26636d4421fSJonas Paulsson }
26736d4421fSJonas Paulsson
26836d4421fSJonas Paulsson if (isa<StoreInst>(I) && PtrDiff == 0)
26936d4421fSJonas Paulsson Writes = true;
27036d4421fSJonas Paulsson }
27136d4421fSJonas Paulsson }
27236d4421fSJonas Paulsson };
27336d4421fSJonas Paulsson
runOnLoop(Loop * L)2749d9cb274SAdam Nemet bool LoopDataPrefetch::runOnLoop(Loop *L) {
2759d9cb274SAdam Nemet bool MadeChange = false;
2769d9cb274SAdam Nemet
2779d9cb274SAdam Nemet // Only prefetch in the inner-most loop
27889c1e35fSStefanos Baziotis if (!L->isInnermost())
2799d9cb274SAdam Nemet return MadeChange;
2809d9cb274SAdam Nemet
2819d9cb274SAdam Nemet SmallPtrSet<const Value *, 32> EphValues;
282aec2fa35SDaniel Jasper CodeMetrics::collectEphemeralValues(L, AC, EphValues);
2839d9cb274SAdam Nemet
2849d9cb274SAdam Nemet // Calculate the number of iterations ahead to prefetch
2859d9cb274SAdam Nemet CodeMetrics Metrics;
28636d4421fSJonas Paulsson bool HasCall = false;
287c6cebf72SBalaram Makam for (const auto BB : L->blocks()) {
2889d9cb274SAdam Nemet // If the loop already has prefetches, then assume that the user knows
2892cf5e89eSNico Weber // what they are doing and don't add any more.
29036d4421fSJonas Paulsson for (auto &I : *BB) {
29136d4421fSJonas Paulsson if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
292b8960b5dSMircea Trofin if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
2939d9cb274SAdam Nemet if (F->getIntrinsicID() == Intrinsic::prefetch)
2949d9cb274SAdam Nemet return MadeChange;
29536d4421fSJonas Paulsson if (TTI->isLoweredToCall(F))
29636d4421fSJonas Paulsson HasCall = true;
29736d4421fSJonas Paulsson } else { // indirect call.
29836d4421fSJonas Paulsson HasCall = true;
29936d4421fSJonas Paulsson }
30036d4421fSJonas Paulsson }
30136d4421fSJonas Paulsson }
302c6cebf72SBalaram Makam Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
3039d9cb274SAdam Nemet }
304f85c5079SPhilip Reames
305f85c5079SPhilip Reames if (!Metrics.NumInsts.isValid())
306f85c5079SPhilip Reames return MadeChange;
307f85c5079SPhilip Reames
308f85c5079SPhilip Reames unsigned LoopSize = *Metrics.NumInsts.getValue();
3099d9cb274SAdam Nemet if (!LoopSize)
3109d9cb274SAdam Nemet LoopSize = 1;
3119d9cb274SAdam Nemet
3121428d41fSAdam Nemet unsigned ItersAhead = getPrefetchDistance() / LoopSize;
3139d9cb274SAdam Nemet if (!ItersAhead)
3149d9cb274SAdam Nemet ItersAhead = 1;
3159d9cb274SAdam Nemet
3161428d41fSAdam Nemet if (ItersAhead > getMaxPrefetchIterationsAhead())
317709e3046SAdam Nemet return MadeChange;
318709e3046SAdam Nemet
31936d4421fSJonas Paulsson unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
32036d4421fSJonas Paulsson if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
32136d4421fSJonas Paulsson return MadeChange;
32234785ecfSAdam Nemet
32336d4421fSJonas Paulsson unsigned NumMemAccesses = 0;
32436d4421fSJonas Paulsson unsigned NumStridedMemAccesses = 0;
32536d4421fSJonas Paulsson SmallVector<Prefetch, 16> Prefetches;
32636d4421fSJonas Paulsson for (const auto BB : L->blocks())
327c6cebf72SBalaram Makam for (auto &I : *BB) {
3289d9cb274SAdam Nemet Value *PtrValue;
3299d9cb274SAdam Nemet Instruction *MemI;
3309d9cb274SAdam Nemet
331c6cebf72SBalaram Makam if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
3329d9cb274SAdam Nemet MemI = LMemI;
3339d9cb274SAdam Nemet PtrValue = LMemI->getPointerOperand();
334c6cebf72SBalaram Makam } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
33536d4421fSJonas Paulsson if (!doPrefetchWrites()) continue;
3369d9cb274SAdam Nemet MemI = SMemI;
3379d9cb274SAdam Nemet PtrValue = SMemI->getPointerOperand();
3389d9cb274SAdam Nemet } else continue;
3399d9cb274SAdam Nemet
3409d9cb274SAdam Nemet unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
3419d9cb274SAdam Nemet if (PtrAddrSpace)
3429d9cb274SAdam Nemet continue;
34336d4421fSJonas Paulsson NumMemAccesses++;
3449d9cb274SAdam Nemet if (L->isLoopInvariant(PtrValue))
3459d9cb274SAdam Nemet continue;
3469d9cb274SAdam Nemet
3479d9cb274SAdam Nemet const SCEV *LSCEV = SE->getSCEV(PtrValue);
3489d9cb274SAdam Nemet const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
3499d9cb274SAdam Nemet if (!LSCEVAddRec)
3509d9cb274SAdam Nemet continue;
35136d4421fSJonas Paulsson NumStridedMemAccesses++;
3529d9cb274SAdam Nemet
35336d4421fSJonas Paulsson // We don't want to double prefetch individual cache lines. If this
35436d4421fSJonas Paulsson // access is known to be within one cache line of some other one that
35536d4421fSJonas Paulsson // has already been prefetched, then don't prefetch this one as well.
3569d9cb274SAdam Nemet bool DupPref = false;
35736d4421fSJonas Paulsson for (auto &Pref : Prefetches) {
35836d4421fSJonas Paulsson const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
3599d9cb274SAdam Nemet if (const SCEVConstant *ConstPtrDiff =
3609d9cb274SAdam Nemet dyn_cast<SCEVConstant>(PtrDiff)) {
3619d9cb274SAdam Nemet int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
3629d9cb274SAdam Nemet if (PD < (int64_t) TTI->getCacheLineSize()) {
36336d4421fSJonas Paulsson Pref.addInstruction(MemI, DT, PD);
3649d9cb274SAdam Nemet DupPref = true;
3659d9cb274SAdam Nemet break;
3669d9cb274SAdam Nemet }
3679d9cb274SAdam Nemet }
3689d9cb274SAdam Nemet }
36936d4421fSJonas Paulsson if (!DupPref)
37036d4421fSJonas Paulsson Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
37136d4421fSJonas Paulsson }
37236d4421fSJonas Paulsson
37336d4421fSJonas Paulsson unsigned TargetMinStride =
37436d4421fSJonas Paulsson getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
37536d4421fSJonas Paulsson Prefetches.size(), HasCall);
37636d4421fSJonas Paulsson
37736d4421fSJonas Paulsson LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
37836d4421fSJonas Paulsson << " iterations ahead (loop size: " << LoopSize << ") in "
37936d4421fSJonas Paulsson << L->getHeader()->getParent()->getName() << ": " << *L);
38036d4421fSJonas Paulsson LLVM_DEBUG(dbgs() << "Loop has: "
38136d4421fSJonas Paulsson << NumMemAccesses << " memory accesses, "
38236d4421fSJonas Paulsson << NumStridedMemAccesses << " strided memory accesses, "
38336d4421fSJonas Paulsson << Prefetches.size() << " potential prefetch(es), "
38436d4421fSJonas Paulsson << "a minimum stride of " << TargetMinStride << ", "
38536d4421fSJonas Paulsson << (HasCall ? "calls" : "no calls") << ".\n");
38636d4421fSJonas Paulsson
38736d4421fSJonas Paulsson for (auto &P : Prefetches) {
38836d4421fSJonas Paulsson // Check if the stride of the accesses is large enough to warrant a
38936d4421fSJonas Paulsson // prefetch.
39036d4421fSJonas Paulsson if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
3919d9cb274SAdam Nemet continue;
3929d9cb274SAdam Nemet
393dcf4b733SNikita Popov BasicBlock *BB = P.InsertPt->getParent();
394dcf4b733SNikita Popov SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
39536d4421fSJonas Paulsson const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
39636d4421fSJonas Paulsson SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
39736d4421fSJonas Paulsson P.LSCEVAddRec->getStepRecurrence(*SE)));
398dcf4b733SNikita Popov if (!SCEVE.isSafeToExpand(NextLSCEV))
3999d9cb274SAdam Nemet continue;
4009d9cb274SAdam Nemet
40136d4421fSJonas Paulsson Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/*PtrAddrSpace*/);
40236d4421fSJonas Paulsson Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
4039d9cb274SAdam Nemet
40436d4421fSJonas Paulsson IRBuilder<> Builder(P.InsertPt);
405c6cebf72SBalaram Makam Module *M = BB->getParent()->getParent();
406c6cebf72SBalaram Makam Type *I32 = Type::getInt32Ty(BB->getContext());
407dbc0a5dfSJF Bastien Function *PrefetchFunc = Intrinsic::getDeclaration(
408dbc0a5dfSJF Bastien M, Intrinsic::prefetch, PrefPtrValue->getType());
4099d9cb274SAdam Nemet Builder.CreateCall(
4109d9cb274SAdam Nemet PrefetchFunc,
4119d9cb274SAdam Nemet {PrefPtrValue,
41236d4421fSJonas Paulsson ConstantInt::get(I32, P.Writes),
4139d9cb274SAdam Nemet ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
41434785ecfSAdam Nemet ++NumPrefetches;
41536d4421fSJonas Paulsson LLVM_DEBUG(dbgs() << " Access: "
41636d4421fSJonas Paulsson << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1)
41736d4421fSJonas Paulsson << ", SCEV: " << *P.LSCEVAddRec << "\n");
4189590658fSVivek Pandya ORE->emit([&]() {
41936d4421fSJonas Paulsson return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
4209590658fSVivek Pandya << "prefetched memory access";
4219590658fSVivek Pandya });
4229d9cb274SAdam Nemet
4239d9cb274SAdam Nemet MadeChange = true;
4249d9cb274SAdam Nemet }
4259d9cb274SAdam Nemet
4269d9cb274SAdam Nemet return MadeChange;
4279d9cb274SAdam Nemet }
428