198189755Sczhengsz //===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===// 21260ea74SJinsong Ji // 31260ea74SJinsong Ji // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 41260ea74SJinsong Ji // See https://llvm.org/LICENSE.txt for license information. 51260ea74SJinsong Ji // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 61260ea74SJinsong Ji // 71260ea74SJinsong Ji //===----------------------------------------------------------------------===// 81260ea74SJinsong Ji // 91260ea74SJinsong Ji // This file implements a pass to prepare loops for ppc preferred addressing 101260ea74SJinsong Ji // modes, leveraging different instruction form. (eg: DS/DQ form, D/DS form with 111260ea74SJinsong Ji // update) 121260ea74SJinsong Ji // Additional PHIs are created for loop induction variables used by load/store 131260ea74SJinsong Ji // instructions so that preferred addressing modes can be used. 141260ea74SJinsong Ji // 151260ea74SJinsong Ji // 1: DS/DQ form preparation, prepare the load/store instructions so that they 161260ea74SJinsong Ji // can satisfy the DS/DQ form displacement requirements. 171260ea74SJinsong Ji // Generically, this means transforming loops like this: 181260ea74SJinsong Ji // for (int i = 0; i < n; ++i) { 191260ea74SJinsong Ji // unsigned long x1 = *(unsigned long *)(p + i + 5); 201260ea74SJinsong Ji // unsigned long x2 = *(unsigned long *)(p + i + 9); 211260ea74SJinsong Ji // } 221260ea74SJinsong Ji // 231260ea74SJinsong Ji // to look like this: 241260ea74SJinsong Ji // 251260ea74SJinsong Ji // unsigned NewP = p + 5; 261260ea74SJinsong Ji // for (int i = 0; i < n; ++i) { 271260ea74SJinsong Ji // unsigned long x1 = *(unsigned long *)(i + NewP); 281260ea74SJinsong Ji // unsigned long x2 = *(unsigned long *)(i + NewP + 4); 291260ea74SJinsong Ji // } 301260ea74SJinsong Ji // 311260ea74SJinsong Ji // 2: D/DS form with update preparation, prepare the load/store instructions so 321260ea74SJinsong Ji // that we can use update form to do pre-increment. 331260ea74SJinsong Ji // Generically, this means transforming loops like this: 341260ea74SJinsong Ji // for (int i = 0; i < n; ++i) 351260ea74SJinsong Ji // array[i] = c; 361260ea74SJinsong Ji // 371260ea74SJinsong Ji // to look like this: 381260ea74SJinsong Ji // 391260ea74SJinsong Ji // T *p = array[-1]; 401260ea74SJinsong Ji // for (int i = 0; i < n; ++i) 411260ea74SJinsong Ji // *++p = c; 421260ea74SJinsong Ji //===----------------------------------------------------------------------===// 431260ea74SJinsong Ji 4498189755Sczhengsz #define DEBUG_TYPE "ppc-loop-instr-form-prep" 451260ea74SJinsong Ji 461260ea74SJinsong Ji #include "PPC.h" 471260ea74SJinsong Ji #include "PPCSubtarget.h" 481260ea74SJinsong Ji #include "PPCTargetMachine.h" 491260ea74SJinsong Ji #include "llvm/ADT/DepthFirstIterator.h" 501260ea74SJinsong Ji #include "llvm/ADT/SmallPtrSet.h" 511260ea74SJinsong Ji #include "llvm/ADT/SmallSet.h" 521260ea74SJinsong Ji #include "llvm/ADT/SmallVector.h" 531260ea74SJinsong Ji #include "llvm/ADT/Statistic.h" 541260ea74SJinsong Ji #include "llvm/Analysis/LoopInfo.h" 551260ea74SJinsong Ji #include "llvm/Analysis/ScalarEvolution.h" 561260ea74SJinsong Ji #include "llvm/Analysis/ScalarEvolutionExpressions.h" 571260ea74SJinsong Ji #include "llvm/IR/BasicBlock.h" 581260ea74SJinsong Ji #include "llvm/IR/CFG.h" 591260ea74SJinsong Ji #include "llvm/IR/Dominators.h" 601260ea74SJinsong Ji #include "llvm/IR/Instruction.h" 611260ea74SJinsong Ji #include "llvm/IR/Instructions.h" 621260ea74SJinsong Ji #include "llvm/IR/IntrinsicInst.h" 631260ea74SJinsong Ji #include "llvm/IR/Module.h" 641260ea74SJinsong Ji #include "llvm/IR/Type.h" 651260ea74SJinsong Ji #include "llvm/IR/Value.h" 661260ea74SJinsong Ji #include "llvm/InitializePasses.h" 671260ea74SJinsong Ji #include "llvm/Pass.h" 681260ea74SJinsong Ji #include "llvm/Support/Casting.h" 691260ea74SJinsong Ji #include "llvm/Support/CommandLine.h" 701260ea74SJinsong Ji #include "llvm/Support/Debug.h" 711260ea74SJinsong Ji #include "llvm/Transforms/Scalar.h" 721260ea74SJinsong Ji #include "llvm/Transforms/Utils.h" 731260ea74SJinsong Ji #include "llvm/Transforms/Utils/BasicBlockUtils.h" 741260ea74SJinsong Ji #include "llvm/Transforms/Utils/Local.h" 751260ea74SJinsong Ji #include "llvm/Transforms/Utils/LoopUtils.h" 76*bcbd26bfSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 771260ea74SJinsong Ji #include <cassert> 781260ea74SJinsong Ji #include <iterator> 791260ea74SJinsong Ji #include <utility> 801260ea74SJinsong Ji 811260ea74SJinsong Ji using namespace llvm; 821260ea74SJinsong Ji 831260ea74SJinsong Ji // By default, we limit this to creating 16 common bases out of loops per 841260ea74SJinsong Ji // function. 16 is a little over half of the allocatable register set. 851260ea74SJinsong Ji static cl::opt<unsigned> MaxVarsPrep("ppc-formprep-max-vars", 861260ea74SJinsong Ji cl::Hidden, cl::init(16), 871260ea74SJinsong Ji cl::desc("Potential common base number threshold per function for PPC loop " 881260ea74SJinsong Ji "prep")); 891260ea74SJinsong Ji 901260ea74SJinsong Ji static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update", 911260ea74SJinsong Ji cl::init(true), cl::Hidden, 921260ea74SJinsong Ji cl::desc("prefer update form when ds form is also a update form")); 931260ea74SJinsong Ji 941260ea74SJinsong Ji // Sum of following 3 per loop thresholds for all loops can not be larger 951260ea74SJinsong Ji // than MaxVarsPrep. 961260ea74SJinsong Ji // By default, we limit this to creating 9 PHIs for one loop. 971260ea74SJinsong Ji // 9 and 3 for each kind prep are exterimental values on Power9. 981260ea74SJinsong Ji static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars", 991260ea74SJinsong Ji cl::Hidden, cl::init(3), 1001260ea74SJinsong Ji cl::desc("Potential PHI threshold per loop for PPC loop prep of update " 1011260ea74SJinsong Ji "form")); 1021260ea74SJinsong Ji 1031260ea74SJinsong Ji static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars", 1041260ea74SJinsong Ji cl::Hidden, cl::init(3), 1051260ea74SJinsong Ji cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form")); 1061260ea74SJinsong Ji 1071260ea74SJinsong Ji static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars", 1081260ea74SJinsong Ji cl::Hidden, cl::init(3), 1091260ea74SJinsong Ji cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form")); 1101260ea74SJinsong Ji 1111260ea74SJinsong Ji 1121260ea74SJinsong Ji // If would not be profitable if the common base has only one load/store, ISEL 1131260ea74SJinsong Ji // should already be able to choose best load/store form based on offset for 1141260ea74SJinsong Ji // single load/store. Set minimal profitable value default to 2 and make it as 1151260ea74SJinsong Ji // an option. 1161260ea74SJinsong Ji static cl::opt<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold", 1171260ea74SJinsong Ji cl::Hidden, cl::init(2), 1181260ea74SJinsong Ji cl::desc("Minimal common base load/store instructions triggering DS/DQ form " 1191260ea74SJinsong Ji "preparation")); 1201260ea74SJinsong Ji 1211260ea74SJinsong Ji STATISTIC(PHINodeAlreadyExistsUpdate, "PHI node already in pre-increment form"); 1221260ea74SJinsong Ji STATISTIC(PHINodeAlreadyExistsDS, "PHI node already in DS form"); 1231260ea74SJinsong Ji STATISTIC(PHINodeAlreadyExistsDQ, "PHI node already in DQ form"); 1241260ea74SJinsong Ji STATISTIC(DSFormChainRewritten, "Num of DS form chain rewritten"); 1251260ea74SJinsong Ji STATISTIC(DQFormChainRewritten, "Num of DQ form chain rewritten"); 1261260ea74SJinsong Ji STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten"); 1271260ea74SJinsong Ji 1281260ea74SJinsong Ji namespace { 1291260ea74SJinsong Ji struct BucketElement { 1301260ea74SJinsong Ji BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {} 1311260ea74SJinsong Ji BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {} 1321260ea74SJinsong Ji 1331260ea74SJinsong Ji const SCEVConstant *Offset; 1341260ea74SJinsong Ji Instruction *Instr; 1351260ea74SJinsong Ji }; 1361260ea74SJinsong Ji 1371260ea74SJinsong Ji struct Bucket { 1381260ea74SJinsong Ji Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B), 1391260ea74SJinsong Ji Elements(1, BucketElement(I)) {} 1401260ea74SJinsong Ji 1411260ea74SJinsong Ji const SCEV *BaseSCEV; 1421260ea74SJinsong Ji SmallVector<BucketElement, 16> Elements; 1431260ea74SJinsong Ji }; 1441260ea74SJinsong Ji 1451260ea74SJinsong Ji // "UpdateForm" is not a real PPC instruction form, it stands for dform 1461260ea74SJinsong Ji // load/store with update like ldu/stdu, or Prefetch intrinsic. 1471260ea74SJinsong Ji // For DS form instructions, their displacements must be multiple of 4. 1481260ea74SJinsong Ji // For DQ form instructions, their displacements must be multiple of 16. 1491260ea74SJinsong Ji enum InstrForm { UpdateForm = 1, DSForm = 4, DQForm = 16 }; 1501260ea74SJinsong Ji 15198189755Sczhengsz class PPCLoopInstrFormPrep : public FunctionPass { 1521260ea74SJinsong Ji public: 1531260ea74SJinsong Ji static char ID; // Pass ID, replacement for typeid 1541260ea74SJinsong Ji 15598189755Sczhengsz PPCLoopInstrFormPrep() : FunctionPass(ID) { 15698189755Sczhengsz initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry()); 1571260ea74SJinsong Ji } 1581260ea74SJinsong Ji 15998189755Sczhengsz PPCLoopInstrFormPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { 16098189755Sczhengsz initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry()); 1611260ea74SJinsong Ji } 1621260ea74SJinsong Ji 1631260ea74SJinsong Ji void getAnalysisUsage(AnalysisUsage &AU) const override { 1641260ea74SJinsong Ji AU.addPreserved<DominatorTreeWrapperPass>(); 1651260ea74SJinsong Ji AU.addRequired<LoopInfoWrapperPass>(); 1661260ea74SJinsong Ji AU.addPreserved<LoopInfoWrapperPass>(); 1671260ea74SJinsong Ji AU.addRequired<ScalarEvolutionWrapperPass>(); 1681260ea74SJinsong Ji } 1691260ea74SJinsong Ji 1701260ea74SJinsong Ji bool runOnFunction(Function &F) override; 1711260ea74SJinsong Ji 1721260ea74SJinsong Ji private: 1731260ea74SJinsong Ji PPCTargetMachine *TM = nullptr; 1741260ea74SJinsong Ji const PPCSubtarget *ST; 1751260ea74SJinsong Ji DominatorTree *DT; 1761260ea74SJinsong Ji LoopInfo *LI; 1771260ea74SJinsong Ji ScalarEvolution *SE; 1781260ea74SJinsong Ji bool PreserveLCSSA; 1791260ea74SJinsong Ji 1801260ea74SJinsong Ji /// Successful preparation number for Update/DS/DQ form in all inner most 1811260ea74SJinsong Ji /// loops. One successful preparation will put one common base out of loop, 1821260ea74SJinsong Ji /// this may leads to register presure like LICM does. 1831260ea74SJinsong Ji /// Make sure total preparation number can be controlled by option. 1841260ea74SJinsong Ji unsigned SuccPrepCount; 1851260ea74SJinsong Ji 1861260ea74SJinsong Ji bool runOnLoop(Loop *L); 1871260ea74SJinsong Ji 1881260ea74SJinsong Ji /// Check if required PHI node is already exist in Loop \p L. 1891260ea74SJinsong Ji bool alreadyPrepared(Loop *L, Instruction* MemI, 1901260ea74SJinsong Ji const SCEV *BasePtrStartSCEV, 1911260ea74SJinsong Ji const SCEVConstant *BasePtrIncSCEV, 1921260ea74SJinsong Ji InstrForm Form); 1931260ea74SJinsong Ji 1941260ea74SJinsong Ji /// Collect condition matched(\p isValidCandidate() returns true) 1951260ea74SJinsong Ji /// candidates in Loop \p L. 1961260ea74SJinsong Ji SmallVector<Bucket, 16> 1971260ea74SJinsong Ji collectCandidates(Loop *L, 1981260ea74SJinsong Ji std::function<bool(const Instruction *, const Value *)> 1991260ea74SJinsong Ji isValidCandidate, 2001260ea74SJinsong Ji unsigned MaxCandidateNum); 2011260ea74SJinsong Ji 2021260ea74SJinsong Ji /// Add a candidate to candidates \p Buckets. 2031260ea74SJinsong Ji void addOneCandidate(Instruction *MemI, const SCEV *LSCEV, 2041260ea74SJinsong Ji SmallVector<Bucket, 16> &Buckets, 2051260ea74SJinsong Ji unsigned MaxCandidateNum); 2061260ea74SJinsong Ji 2071260ea74SJinsong Ji /// Prepare all candidates in \p Buckets for update form. 2081260ea74SJinsong Ji bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets); 2091260ea74SJinsong Ji 2101260ea74SJinsong Ji /// Prepare all candidates in \p Buckets for displacement form, now for 2111260ea74SJinsong Ji /// ds/dq. 2121260ea74SJinsong Ji bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, 2131260ea74SJinsong Ji InstrForm Form); 2141260ea74SJinsong Ji 2151260ea74SJinsong Ji /// Prepare for one chain \p BucketChain, find the best base element and 2161260ea74SJinsong Ji /// update all other elements in \p BucketChain accordingly. 2171260ea74SJinsong Ji /// \p Form is used to find the best base element. 2181260ea74SJinsong Ji /// If success, best base element must be stored as the first element of 2191260ea74SJinsong Ji /// \p BucketChain. 2201260ea74SJinsong Ji /// Return false if no base element found, otherwise return true. 2211260ea74SJinsong Ji bool prepareBaseForDispFormChain(Bucket &BucketChain, 2221260ea74SJinsong Ji InstrForm Form); 2231260ea74SJinsong Ji 2241260ea74SJinsong Ji /// Prepare for one chain \p BucketChain, find the best base element and 2251260ea74SJinsong Ji /// update all other elements in \p BucketChain accordingly. 2261260ea74SJinsong Ji /// If success, best base element must be stored as the first element of 2271260ea74SJinsong Ji /// \p BucketChain. 2281260ea74SJinsong Ji /// Return false if no base element found, otherwise return true. 2291260ea74SJinsong Ji bool prepareBaseForUpdateFormChain(Bucket &BucketChain); 2301260ea74SJinsong Ji 2311260ea74SJinsong Ji /// Rewrite load/store instructions in \p BucketChain according to 2321260ea74SJinsong Ji /// preparation. 2331260ea74SJinsong Ji bool rewriteLoadStores(Loop *L, Bucket &BucketChain, 2341260ea74SJinsong Ji SmallSet<BasicBlock *, 16> &BBChanged, 2351260ea74SJinsong Ji InstrForm Form); 2361260ea74SJinsong Ji }; 2371260ea74SJinsong Ji 2381260ea74SJinsong Ji } // end anonymous namespace 2391260ea74SJinsong Ji 24098189755Sczhengsz char PPCLoopInstrFormPrep::ID = 0; 24198189755Sczhengsz static const char *name = "Prepare loop for ppc preferred instruction forms"; 24298189755Sczhengsz INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false) 2431260ea74SJinsong Ji INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 2441260ea74SJinsong Ji INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 24598189755Sczhengsz INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false) 2461260ea74SJinsong Ji 2471260ea74SJinsong Ji static const std::string PHINodeNameSuffix = ".phi"; 2481260ea74SJinsong Ji static const std::string CastNodeNameSuffix = ".cast"; 2491260ea74SJinsong Ji static const std::string GEPNodeIncNameSuffix = ".inc"; 2501260ea74SJinsong Ji static const std::string GEPNodeOffNameSuffix = ".off"; 2511260ea74SJinsong Ji 25298189755Sczhengsz FunctionPass *llvm::createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM) { 25398189755Sczhengsz return new PPCLoopInstrFormPrep(TM); 2541260ea74SJinsong Ji } 2551260ea74SJinsong Ji 2561260ea74SJinsong Ji static bool IsPtrInBounds(Value *BasePtr) { 2571260ea74SJinsong Ji Value *StrippedBasePtr = BasePtr; 2581260ea74SJinsong Ji while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr)) 2591260ea74SJinsong Ji StrippedBasePtr = BC->getOperand(0); 2601260ea74SJinsong Ji if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr)) 2611260ea74SJinsong Ji return GEP->isInBounds(); 2621260ea74SJinsong Ji 2631260ea74SJinsong Ji return false; 2641260ea74SJinsong Ji } 2651260ea74SJinsong Ji 2661260ea74SJinsong Ji static std::string getInstrName(const Value *I, const std::string Suffix) { 2671260ea74SJinsong Ji assert(I && "Invalid paramater!"); 2681260ea74SJinsong Ji if (I->hasName()) 2691260ea74SJinsong Ji return (I->getName() + Suffix).str(); 2701260ea74SJinsong Ji else 2711260ea74SJinsong Ji return ""; 2721260ea74SJinsong Ji } 2731260ea74SJinsong Ji 2741260ea74SJinsong Ji static Value *GetPointerOperand(Value *MemI) { 2751260ea74SJinsong Ji if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) { 2761260ea74SJinsong Ji return LMemI->getPointerOperand(); 2771260ea74SJinsong Ji } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) { 2781260ea74SJinsong Ji return SMemI->getPointerOperand(); 2791260ea74SJinsong Ji } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) { 2801260ea74SJinsong Ji if (IMemI->getIntrinsicID() == Intrinsic::prefetch) 2811260ea74SJinsong Ji return IMemI->getArgOperand(0); 2821260ea74SJinsong Ji } 2831260ea74SJinsong Ji 2841260ea74SJinsong Ji return nullptr; 2851260ea74SJinsong Ji } 2861260ea74SJinsong Ji 28798189755Sczhengsz bool PPCLoopInstrFormPrep::runOnFunction(Function &F) { 2881260ea74SJinsong Ji if (skipFunction(F)) 2891260ea74SJinsong Ji return false; 2901260ea74SJinsong Ji 2911260ea74SJinsong Ji LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2921260ea74SJinsong Ji SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2931260ea74SJinsong Ji auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 2941260ea74SJinsong Ji DT = DTWP ? &DTWP->getDomTree() : nullptr; 2951260ea74SJinsong Ji PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 2961260ea74SJinsong Ji ST = TM ? TM->getSubtargetImpl(F) : nullptr; 2971260ea74SJinsong Ji SuccPrepCount = 0; 2981260ea74SJinsong Ji 2991260ea74SJinsong Ji bool MadeChange = false; 3001260ea74SJinsong Ji 3011260ea74SJinsong Ji for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) 3021260ea74SJinsong Ji for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) 3031260ea74SJinsong Ji MadeChange |= runOnLoop(*L); 3041260ea74SJinsong Ji 3051260ea74SJinsong Ji return MadeChange; 3061260ea74SJinsong Ji } 3071260ea74SJinsong Ji 30898189755Sczhengsz void PPCLoopInstrFormPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV, 3091260ea74SJinsong Ji SmallVector<Bucket, 16> &Buckets, 3101260ea74SJinsong Ji unsigned MaxCandidateNum) { 3111260ea74SJinsong Ji assert((MemI && GetPointerOperand(MemI)) && 3121260ea74SJinsong Ji "Candidate should be a memory instruction."); 3131260ea74SJinsong Ji assert(LSCEV && "Invalid SCEV for Ptr value."); 3141260ea74SJinsong Ji bool FoundBucket = false; 3151260ea74SJinsong Ji for (auto &B : Buckets) { 3161260ea74SJinsong Ji const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV); 3171260ea74SJinsong Ji if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) { 3181260ea74SJinsong Ji B.Elements.push_back(BucketElement(CDiff, MemI)); 3191260ea74SJinsong Ji FoundBucket = true; 3201260ea74SJinsong Ji break; 3211260ea74SJinsong Ji } 3221260ea74SJinsong Ji } 3231260ea74SJinsong Ji 3241260ea74SJinsong Ji if (!FoundBucket) { 3251260ea74SJinsong Ji if (Buckets.size() == MaxCandidateNum) 3261260ea74SJinsong Ji return; 3271260ea74SJinsong Ji Buckets.push_back(Bucket(LSCEV, MemI)); 3281260ea74SJinsong Ji } 3291260ea74SJinsong Ji } 3301260ea74SJinsong Ji 33198189755Sczhengsz SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates( 3321260ea74SJinsong Ji Loop *L, 3331260ea74SJinsong Ji std::function<bool(const Instruction *, const Value *)> isValidCandidate, 3341260ea74SJinsong Ji unsigned MaxCandidateNum) { 3351260ea74SJinsong Ji SmallVector<Bucket, 16> Buckets; 3361260ea74SJinsong Ji for (const auto &BB : L->blocks()) 3371260ea74SJinsong Ji for (auto &J : *BB) { 3381260ea74SJinsong Ji Value *PtrValue; 3391260ea74SJinsong Ji Instruction *MemI; 3401260ea74SJinsong Ji 3411260ea74SJinsong Ji if (LoadInst *LMemI = dyn_cast<LoadInst>(&J)) { 3421260ea74SJinsong Ji MemI = LMemI; 3431260ea74SJinsong Ji PtrValue = LMemI->getPointerOperand(); 3441260ea74SJinsong Ji } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&J)) { 3451260ea74SJinsong Ji MemI = SMemI; 3461260ea74SJinsong Ji PtrValue = SMemI->getPointerOperand(); 3471260ea74SJinsong Ji } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) { 3481260ea74SJinsong Ji if (IMemI->getIntrinsicID() == Intrinsic::prefetch) { 3491260ea74SJinsong Ji MemI = IMemI; 3501260ea74SJinsong Ji PtrValue = IMemI->getArgOperand(0); 3511260ea74SJinsong Ji } else continue; 3521260ea74SJinsong Ji } else continue; 3531260ea74SJinsong Ji 3541260ea74SJinsong Ji unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); 3551260ea74SJinsong Ji if (PtrAddrSpace) 3561260ea74SJinsong Ji continue; 3571260ea74SJinsong Ji 3581260ea74SJinsong Ji if (L->isLoopInvariant(PtrValue)) 3591260ea74SJinsong Ji continue; 3601260ea74SJinsong Ji 3611260ea74SJinsong Ji const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); 3621260ea74SJinsong Ji const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV); 3631260ea74SJinsong Ji if (!LARSCEV || LARSCEV->getLoop() != L) 3641260ea74SJinsong Ji continue; 3651260ea74SJinsong Ji 3661260ea74SJinsong Ji if (isValidCandidate(&J, PtrValue)) 3671260ea74SJinsong Ji addOneCandidate(MemI, LSCEV, Buckets, MaxCandidateNum); 3681260ea74SJinsong Ji } 3691260ea74SJinsong Ji return Buckets; 3701260ea74SJinsong Ji } 3711260ea74SJinsong Ji 37298189755Sczhengsz bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain, 3731260ea74SJinsong Ji InstrForm Form) { 3741260ea74SJinsong Ji // RemainderOffsetInfo details: 3751260ea74SJinsong Ji // key: value of (Offset urem DispConstraint). For DSForm, it can 3761260ea74SJinsong Ji // be [0, 4). 3771260ea74SJinsong Ji // first of pair: the index of first BucketElement whose remainder is equal 3781260ea74SJinsong Ji // to key. For key 0, this value must be 0. 3791260ea74SJinsong Ji // second of pair: number of load/stores with the same remainder. 3801260ea74SJinsong Ji DenseMap<unsigned, std::pair<unsigned, unsigned>> RemainderOffsetInfo; 3811260ea74SJinsong Ji 3821260ea74SJinsong Ji for (unsigned j = 0, je = BucketChain.Elements.size(); j != je; ++j) { 3831260ea74SJinsong Ji if (!BucketChain.Elements[j].Offset) 3841260ea74SJinsong Ji RemainderOffsetInfo[0] = std::make_pair(0, 1); 3851260ea74SJinsong Ji else { 3861260ea74SJinsong Ji unsigned Remainder = 3871260ea74SJinsong Ji BucketChain.Elements[j].Offset->getAPInt().urem(Form); 3881260ea74SJinsong Ji if (RemainderOffsetInfo.find(Remainder) == RemainderOffsetInfo.end()) 3891260ea74SJinsong Ji RemainderOffsetInfo[Remainder] = std::make_pair(j, 1); 3901260ea74SJinsong Ji else 3911260ea74SJinsong Ji RemainderOffsetInfo[Remainder].second++; 3921260ea74SJinsong Ji } 3931260ea74SJinsong Ji } 3941260ea74SJinsong Ji // Currently we choose the most profitable base as the one which has the max 3951260ea74SJinsong Ji // number of load/store with same remainder. 3961260ea74SJinsong Ji // FIXME: adjust the base selection strategy according to load/store offset 3971260ea74SJinsong Ji // distribution. 3981260ea74SJinsong Ji // For example, if we have one candidate chain for DS form preparation, which 3991260ea74SJinsong Ji // contains following load/stores with different remainders: 4001260ea74SJinsong Ji // 1: 10 load/store whose remainder is 1; 4011260ea74SJinsong Ji // 2: 9 load/store whose remainder is 2; 4021260ea74SJinsong Ji // 3: 1 for remainder 3 and 0 for remainder 0; 4031260ea74SJinsong Ji // Now we will choose the first load/store whose remainder is 1 as base and 4041260ea74SJinsong Ji // adjust all other load/stores according to new base, so we will get 10 DS 4051260ea74SJinsong Ji // form and 10 X form. 4061260ea74SJinsong Ji // But we should be more clever, for this case we could use two bases, one for 4071260ea74SJinsong Ji // remainder 1 and the other for remainder 2, thus we could get 19 DS form and 1 4081260ea74SJinsong Ji // X form. 4091260ea74SJinsong Ji unsigned MaxCountRemainder = 0; 4101260ea74SJinsong Ji for (unsigned j = 0; j < (unsigned)Form; j++) 4111260ea74SJinsong Ji if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) && 4121260ea74SJinsong Ji RemainderOffsetInfo[j].second > 4131260ea74SJinsong Ji RemainderOffsetInfo[MaxCountRemainder].second) 4141260ea74SJinsong Ji MaxCountRemainder = j; 4151260ea74SJinsong Ji 4161260ea74SJinsong Ji // Abort when there are too few insts with common base. 4171260ea74SJinsong Ji if (RemainderOffsetInfo[MaxCountRemainder].second < DispFormPrepMinThreshold) 4181260ea74SJinsong Ji return false; 4191260ea74SJinsong Ji 4201260ea74SJinsong Ji // If the first value is most profitable, no needed to adjust BucketChain 4211260ea74SJinsong Ji // elements as they are substracted the first value when collecting. 4221260ea74SJinsong Ji if (MaxCountRemainder == 0) 4231260ea74SJinsong Ji return true; 4241260ea74SJinsong Ji 4251260ea74SJinsong Ji // Adjust load/store to the new chosen base. 4261260ea74SJinsong Ji const SCEV *Offset = 4271260ea74SJinsong Ji BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first].Offset; 4281260ea74SJinsong Ji BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset); 4291260ea74SJinsong Ji for (auto &E : BucketChain.Elements) { 4301260ea74SJinsong Ji if (E.Offset) 4311260ea74SJinsong Ji E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset)); 4321260ea74SJinsong Ji else 4331260ea74SJinsong Ji E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset)); 4341260ea74SJinsong Ji } 4351260ea74SJinsong Ji 4361260ea74SJinsong Ji std::swap(BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first], 4371260ea74SJinsong Ji BucketChain.Elements[0]); 4381260ea74SJinsong Ji return true; 4391260ea74SJinsong Ji } 4401260ea74SJinsong Ji 4411260ea74SJinsong Ji // FIXME: implement a more clever base choosing policy. 4421260ea74SJinsong Ji // Currently we always choose an exist load/store offset. This maybe lead to 4431260ea74SJinsong Ji // suboptimal code sequences. For example, for one DS chain with offsets 4441260ea74SJinsong Ji // {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp 4451260ea74SJinsong Ji // for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a 4461260ea74SJinsong Ji // multipler of 4, it cannot be represented by sint16. 44798189755Sczhengsz bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { 4481260ea74SJinsong Ji // We have a choice now of which instruction's memory operand we use as the 4491260ea74SJinsong Ji // base for the generated PHI. Always picking the first instruction in each 4501260ea74SJinsong Ji // bucket does not work well, specifically because that instruction might 4511260ea74SJinsong Ji // be a prefetch (and there are no pre-increment dcbt variants). Otherwise, 4521260ea74SJinsong Ji // the choice is somewhat arbitrary, because the backend will happily 4531260ea74SJinsong Ji // generate direct offsets from both the pre-incremented and 4541260ea74SJinsong Ji // post-incremented pointer values. Thus, we'll pick the first non-prefetch 4551260ea74SJinsong Ji // instruction in each bucket, and adjust the recurrence and other offsets 4561260ea74SJinsong Ji // accordingly. 4571260ea74SJinsong Ji for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) { 4581260ea74SJinsong Ji if (auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[j].Instr)) 4591260ea74SJinsong Ji if (II->getIntrinsicID() == Intrinsic::prefetch) 4601260ea74SJinsong Ji continue; 4611260ea74SJinsong Ji 4621260ea74SJinsong Ji // If we'd otherwise pick the first element anyway, there's nothing to do. 4631260ea74SJinsong Ji if (j == 0) 4641260ea74SJinsong Ji break; 4651260ea74SJinsong Ji 4661260ea74SJinsong Ji // If our chosen element has no offset from the base pointer, there's 4671260ea74SJinsong Ji // nothing to do. 4681260ea74SJinsong Ji if (!BucketChain.Elements[j].Offset || 4691260ea74SJinsong Ji BucketChain.Elements[j].Offset->isZero()) 4701260ea74SJinsong Ji break; 4711260ea74SJinsong Ji 4721260ea74SJinsong Ji const SCEV *Offset = BucketChain.Elements[j].Offset; 4731260ea74SJinsong Ji BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset); 4741260ea74SJinsong Ji for (auto &E : BucketChain.Elements) { 4751260ea74SJinsong Ji if (E.Offset) 4761260ea74SJinsong Ji E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset)); 4771260ea74SJinsong Ji else 4781260ea74SJinsong Ji E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset)); 4791260ea74SJinsong Ji } 4801260ea74SJinsong Ji 4811260ea74SJinsong Ji std::swap(BucketChain.Elements[j], BucketChain.Elements[0]); 4821260ea74SJinsong Ji break; 4831260ea74SJinsong Ji } 4841260ea74SJinsong Ji return true; 4851260ea74SJinsong Ji } 4861260ea74SJinsong Ji 48798189755Sczhengsz bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain, 4881260ea74SJinsong Ji SmallSet<BasicBlock *, 16> &BBChanged, 4891260ea74SJinsong Ji InstrForm Form) { 4901260ea74SJinsong Ji bool MadeChange = false; 4911260ea74SJinsong Ji const SCEVAddRecExpr *BasePtrSCEV = 4921260ea74SJinsong Ji cast<SCEVAddRecExpr>(BucketChain.BaseSCEV); 4931260ea74SJinsong Ji if (!BasePtrSCEV->isAffine()) 4941260ea74SJinsong Ji return MadeChange; 4951260ea74SJinsong Ji 4961260ea74SJinsong Ji LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); 4971260ea74SJinsong Ji 4981260ea74SJinsong Ji assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?"); 4991260ea74SJinsong Ji 5001260ea74SJinsong Ji // The instruction corresponding to the Bucket's BaseSCEV must be the first 5011260ea74SJinsong Ji // in the vector of elements. 5021260ea74SJinsong Ji Instruction *MemI = BucketChain.Elements.begin()->Instr; 5031260ea74SJinsong Ji Value *BasePtr = GetPointerOperand(MemI); 5041260ea74SJinsong Ji assert(BasePtr && "No pointer operand"); 5051260ea74SJinsong Ji 5061260ea74SJinsong Ji Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); 5071260ea74SJinsong Ji Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), 5081260ea74SJinsong Ji BasePtr->getType()->getPointerAddressSpace()); 5091260ea74SJinsong Ji 5101260ea74SJinsong Ji if (!SE->isLoopInvariant(BasePtrSCEV->getStart(), L)) 5111260ea74SJinsong Ji return MadeChange; 5121260ea74SJinsong Ji 5131260ea74SJinsong Ji const SCEVConstant *BasePtrIncSCEV = 5141260ea74SJinsong Ji dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE)); 5151260ea74SJinsong Ji if (!BasePtrIncSCEV) 5161260ea74SJinsong Ji return MadeChange; 5171260ea74SJinsong Ji 5181260ea74SJinsong Ji // For some DS form load/store instructions, it can also be an update form, 5191260ea74SJinsong Ji // if the stride is a multipler of 4. Use update form if prefer it. 5201260ea74SJinsong Ji bool CanPreInc = (Form == UpdateForm || 5211260ea74SJinsong Ji ((Form == DSForm) && !BasePtrIncSCEV->getAPInt().urem(4) && 5221260ea74SJinsong Ji PreferUpdateForm)); 5231260ea74SJinsong Ji const SCEV *BasePtrStartSCEV = nullptr; 5241260ea74SJinsong Ji if (CanPreInc) 5251260ea74SJinsong Ji BasePtrStartSCEV = 5261260ea74SJinsong Ji SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncSCEV); 5271260ea74SJinsong Ji else 5281260ea74SJinsong Ji BasePtrStartSCEV = BasePtrSCEV->getStart(); 5291260ea74SJinsong Ji 5301260ea74SJinsong Ji if (!isSafeToExpand(BasePtrStartSCEV, *SE)) 5311260ea74SJinsong Ji return MadeChange; 5321260ea74SJinsong Ji 5331260ea74SJinsong Ji if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) 5341260ea74SJinsong Ji return MadeChange; 5351260ea74SJinsong Ji 5361260ea74SJinsong Ji LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); 5371260ea74SJinsong Ji 5381260ea74SJinsong Ji BasicBlock *Header = L->getHeader(); 5391260ea74SJinsong Ji unsigned HeaderLoopPredCount = pred_size(Header); 5401260ea74SJinsong Ji BasicBlock *LoopPredecessor = L->getLoopPredecessor(); 5411260ea74SJinsong Ji 5421260ea74SJinsong Ji PHINode *NewPHI = 5431260ea74SJinsong Ji PHINode::Create(I8PtrTy, HeaderLoopPredCount, 5441260ea74SJinsong Ji getInstrName(MemI, PHINodeNameSuffix), 5451260ea74SJinsong Ji Header->getFirstNonPHI()); 5461260ea74SJinsong Ji 5471260ea74SJinsong Ji SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); 5481260ea74SJinsong Ji Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, 5491260ea74SJinsong Ji LoopPredecessor->getTerminator()); 5501260ea74SJinsong Ji 5511260ea74SJinsong Ji // Note that LoopPredecessor might occur in the predecessor list multiple 5521260ea74SJinsong Ji // times, and we need to add it the right number of times. 5531b344e79SMark de Wever for (auto PI : predecessors(Header)) { 5541260ea74SJinsong Ji if (PI != LoopPredecessor) 5551260ea74SJinsong Ji continue; 5561260ea74SJinsong Ji 5571260ea74SJinsong Ji NewPHI->addIncoming(BasePtrStart, LoopPredecessor); 5581260ea74SJinsong Ji } 5591260ea74SJinsong Ji 5601260ea74SJinsong Ji Instruction *PtrInc = nullptr; 5611260ea74SJinsong Ji Instruction *NewBasePtr = nullptr; 5621260ea74SJinsong Ji if (CanPreInc) { 5631260ea74SJinsong Ji Instruction *InsPoint = &*Header->getFirstInsertionPt(); 5641260ea74SJinsong Ji PtrInc = GetElementPtrInst::Create( 5651260ea74SJinsong Ji I8Ty, NewPHI, BasePtrIncSCEV->getValue(), 5661260ea74SJinsong Ji getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint); 5671260ea74SJinsong Ji cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr)); 5681b344e79SMark de Wever for (auto PI : predecessors(Header)) { 5691260ea74SJinsong Ji if (PI == LoopPredecessor) 5701260ea74SJinsong Ji continue; 5711260ea74SJinsong Ji 5721260ea74SJinsong Ji NewPHI->addIncoming(PtrInc, PI); 5731260ea74SJinsong Ji } 5741260ea74SJinsong Ji if (PtrInc->getType() != BasePtr->getType()) 5751260ea74SJinsong Ji NewBasePtr = new BitCastInst( 5761260ea74SJinsong Ji PtrInc, BasePtr->getType(), 5771260ea74SJinsong Ji getInstrName(PtrInc, CastNodeNameSuffix), InsPoint); 5781260ea74SJinsong Ji else 5791260ea74SJinsong Ji NewBasePtr = PtrInc; 5801260ea74SJinsong Ji } else { 5811260ea74SJinsong Ji // Note that LoopPredecessor might occur in the predecessor list multiple 5821260ea74SJinsong Ji // times, and we need to make sure no more incoming value for them in PHI. 5831b344e79SMark de Wever for (auto PI : predecessors(Header)) { 5841260ea74SJinsong Ji if (PI == LoopPredecessor) 5851260ea74SJinsong Ji continue; 5861260ea74SJinsong Ji 5871260ea74SJinsong Ji // For the latch predecessor, we need to insert a GEP just before the 5881260ea74SJinsong Ji // terminator to increase the address. 5891260ea74SJinsong Ji BasicBlock *BB = PI; 5901260ea74SJinsong Ji Instruction *InsPoint = BB->getTerminator(); 5911260ea74SJinsong Ji PtrInc = GetElementPtrInst::Create( 5921260ea74SJinsong Ji I8Ty, NewPHI, BasePtrIncSCEV->getValue(), 5931260ea74SJinsong Ji getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint); 5941260ea74SJinsong Ji 5951260ea74SJinsong Ji cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr)); 5961260ea74SJinsong Ji 5971260ea74SJinsong Ji NewPHI->addIncoming(PtrInc, PI); 5981260ea74SJinsong Ji } 5991260ea74SJinsong Ji PtrInc = NewPHI; 6001260ea74SJinsong Ji if (NewPHI->getType() != BasePtr->getType()) 6011260ea74SJinsong Ji NewBasePtr = 6021260ea74SJinsong Ji new BitCastInst(NewPHI, BasePtr->getType(), 6031260ea74SJinsong Ji getInstrName(NewPHI, CastNodeNameSuffix), 6041260ea74SJinsong Ji &*Header->getFirstInsertionPt()); 6051260ea74SJinsong Ji else 6061260ea74SJinsong Ji NewBasePtr = NewPHI; 6071260ea74SJinsong Ji } 6081260ea74SJinsong Ji 6091260ea74SJinsong Ji if (Instruction *IDel = dyn_cast<Instruction>(BasePtr)) 6101260ea74SJinsong Ji BBChanged.insert(IDel->getParent()); 6111260ea74SJinsong Ji BasePtr->replaceAllUsesWith(NewBasePtr); 6121260ea74SJinsong Ji RecursivelyDeleteTriviallyDeadInstructions(BasePtr); 6131260ea74SJinsong Ji 6141260ea74SJinsong Ji // Keep track of the replacement pointer values we've inserted so that we 6151260ea74SJinsong Ji // don't generate more pointer values than necessary. 6161260ea74SJinsong Ji SmallPtrSet<Value *, 16> NewPtrs; 6171260ea74SJinsong Ji NewPtrs.insert(NewBasePtr); 6181260ea74SJinsong Ji 6191260ea74SJinsong Ji for (auto I = std::next(BucketChain.Elements.begin()), 6201260ea74SJinsong Ji IE = BucketChain.Elements.end(); I != IE; ++I) { 6211260ea74SJinsong Ji Value *Ptr = GetPointerOperand(I->Instr); 6221260ea74SJinsong Ji assert(Ptr && "No pointer operand"); 6231260ea74SJinsong Ji if (NewPtrs.count(Ptr)) 6241260ea74SJinsong Ji continue; 6251260ea74SJinsong Ji 6261260ea74SJinsong Ji Instruction *RealNewPtr; 6271260ea74SJinsong Ji if (!I->Offset || I->Offset->getValue()->isZero()) { 6281260ea74SJinsong Ji RealNewPtr = NewBasePtr; 6291260ea74SJinsong Ji } else { 6301260ea74SJinsong Ji Instruction *PtrIP = dyn_cast<Instruction>(Ptr); 6311260ea74SJinsong Ji if (PtrIP && isa<Instruction>(NewBasePtr) && 6321260ea74SJinsong Ji cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent()) 6331260ea74SJinsong Ji PtrIP = nullptr; 6341260ea74SJinsong Ji else if (PtrIP && isa<PHINode>(PtrIP)) 6351260ea74SJinsong Ji PtrIP = &*PtrIP->getParent()->getFirstInsertionPt(); 6361260ea74SJinsong Ji else if (!PtrIP) 6371260ea74SJinsong Ji PtrIP = I->Instr; 6381260ea74SJinsong Ji 6391260ea74SJinsong Ji GetElementPtrInst *NewPtr = GetElementPtrInst::Create( 6401260ea74SJinsong Ji I8Ty, PtrInc, I->Offset->getValue(), 6411260ea74SJinsong Ji getInstrName(I->Instr, GEPNodeOffNameSuffix), PtrIP); 6421260ea74SJinsong Ji if (!PtrIP) 6431260ea74SJinsong Ji NewPtr->insertAfter(cast<Instruction>(PtrInc)); 6441260ea74SJinsong Ji NewPtr->setIsInBounds(IsPtrInBounds(Ptr)); 6451260ea74SJinsong Ji RealNewPtr = NewPtr; 6461260ea74SJinsong Ji } 6471260ea74SJinsong Ji 6481260ea74SJinsong Ji if (Instruction *IDel = dyn_cast<Instruction>(Ptr)) 6491260ea74SJinsong Ji BBChanged.insert(IDel->getParent()); 6501260ea74SJinsong Ji 6511260ea74SJinsong Ji Instruction *ReplNewPtr; 6521260ea74SJinsong Ji if (Ptr->getType() != RealNewPtr->getType()) { 6531260ea74SJinsong Ji ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(), 6541260ea74SJinsong Ji getInstrName(Ptr, CastNodeNameSuffix)); 6551260ea74SJinsong Ji ReplNewPtr->insertAfter(RealNewPtr); 6561260ea74SJinsong Ji } else 6571260ea74SJinsong Ji ReplNewPtr = RealNewPtr; 6581260ea74SJinsong Ji 6591260ea74SJinsong Ji Ptr->replaceAllUsesWith(ReplNewPtr); 6601260ea74SJinsong Ji RecursivelyDeleteTriviallyDeadInstructions(Ptr); 6611260ea74SJinsong Ji 6621260ea74SJinsong Ji NewPtrs.insert(RealNewPtr); 6631260ea74SJinsong Ji } 6641260ea74SJinsong Ji 6651260ea74SJinsong Ji MadeChange = true; 6661260ea74SJinsong Ji 6671260ea74SJinsong Ji SuccPrepCount++; 6681260ea74SJinsong Ji 6691260ea74SJinsong Ji if (Form == DSForm && !CanPreInc) 6701260ea74SJinsong Ji DSFormChainRewritten++; 6711260ea74SJinsong Ji else if (Form == DQForm) 6721260ea74SJinsong Ji DQFormChainRewritten++; 6731260ea74SJinsong Ji else if (Form == UpdateForm || (Form == DSForm && CanPreInc)) 6741260ea74SJinsong Ji UpdFormChainRewritten++; 6751260ea74SJinsong Ji 6761260ea74SJinsong Ji return MadeChange; 6771260ea74SJinsong Ji } 6781260ea74SJinsong Ji 67998189755Sczhengsz bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L, 6801260ea74SJinsong Ji SmallVector<Bucket, 16> &Buckets) { 6811260ea74SJinsong Ji bool MadeChange = false; 6821260ea74SJinsong Ji if (Buckets.empty()) 6831260ea74SJinsong Ji return MadeChange; 6841260ea74SJinsong Ji SmallSet<BasicBlock *, 16> BBChanged; 6851260ea74SJinsong Ji for (auto &Bucket : Buckets) 6861260ea74SJinsong Ji // The base address of each bucket is transformed into a phi and the others 6871260ea74SJinsong Ji // are rewritten based on new base. 6881260ea74SJinsong Ji if (prepareBaseForUpdateFormChain(Bucket)) 6891260ea74SJinsong Ji MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm); 6901260ea74SJinsong Ji 6911260ea74SJinsong Ji if (MadeChange) 6921260ea74SJinsong Ji for (auto &BB : L->blocks()) 6931260ea74SJinsong Ji if (BBChanged.count(BB)) 6941260ea74SJinsong Ji DeleteDeadPHIs(BB); 6951260ea74SJinsong Ji return MadeChange; 6961260ea74SJinsong Ji } 6971260ea74SJinsong Ji 69898189755Sczhengsz bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, 6991260ea74SJinsong Ji InstrForm Form) { 7001260ea74SJinsong Ji bool MadeChange = false; 7011260ea74SJinsong Ji 7021260ea74SJinsong Ji if (Buckets.empty()) 7031260ea74SJinsong Ji return MadeChange; 7041260ea74SJinsong Ji 7051260ea74SJinsong Ji SmallSet<BasicBlock *, 16> BBChanged; 7061260ea74SJinsong Ji for (auto &Bucket : Buckets) { 7071260ea74SJinsong Ji if (Bucket.Elements.size() < DispFormPrepMinThreshold) 7081260ea74SJinsong Ji continue; 7091260ea74SJinsong Ji if (prepareBaseForDispFormChain(Bucket, Form)) 7101260ea74SJinsong Ji MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, Form); 7111260ea74SJinsong Ji } 7121260ea74SJinsong Ji 7131260ea74SJinsong Ji if (MadeChange) 7141260ea74SJinsong Ji for (auto &BB : L->blocks()) 7151260ea74SJinsong Ji if (BBChanged.count(BB)) 7161260ea74SJinsong Ji DeleteDeadPHIs(BB); 7171260ea74SJinsong Ji return MadeChange; 7181260ea74SJinsong Ji } 7191260ea74SJinsong Ji 7201260ea74SJinsong Ji // In order to prepare for the preferred instruction form, a PHI is added. 7211260ea74SJinsong Ji // This function will check to see if that PHI already exists and will return 7221260ea74SJinsong Ji // true if it found an existing PHI with the matched start and increment as the 7231260ea74SJinsong Ji // one we wanted to create. 72498189755Sczhengsz bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI, 7251260ea74SJinsong Ji const SCEV *BasePtrStartSCEV, 7261260ea74SJinsong Ji const SCEVConstant *BasePtrIncSCEV, 7271260ea74SJinsong Ji InstrForm Form) { 7281260ea74SJinsong Ji BasicBlock *BB = MemI->getParent(); 7291260ea74SJinsong Ji if (!BB) 7301260ea74SJinsong Ji return false; 7311260ea74SJinsong Ji 7321260ea74SJinsong Ji BasicBlock *PredBB = L->getLoopPredecessor(); 7331260ea74SJinsong Ji BasicBlock *LatchBB = L->getLoopLatch(); 7341260ea74SJinsong Ji 7351260ea74SJinsong Ji if (!PredBB || !LatchBB) 7361260ea74SJinsong Ji return false; 7371260ea74SJinsong Ji 7381260ea74SJinsong Ji // Run through the PHIs and see if we have some that looks like a preparation 7391260ea74SJinsong Ji iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis(); 7401260ea74SJinsong Ji for (auto & CurrentPHI : PHIIter) { 7411260ea74SJinsong Ji PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI); 7421260ea74SJinsong Ji if (!CurrentPHINode) 7431260ea74SJinsong Ji continue; 7441260ea74SJinsong Ji 7451260ea74SJinsong Ji if (!SE->isSCEVable(CurrentPHINode->getType())) 7461260ea74SJinsong Ji continue; 7471260ea74SJinsong Ji 7481260ea74SJinsong Ji const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L); 7491260ea74SJinsong Ji 7501260ea74SJinsong Ji const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV); 7511260ea74SJinsong Ji if (!PHIBasePtrSCEV) 7521260ea74SJinsong Ji continue; 7531260ea74SJinsong Ji 7541260ea74SJinsong Ji const SCEVConstant *PHIBasePtrIncSCEV = 7551260ea74SJinsong Ji dyn_cast<SCEVConstant>(PHIBasePtrSCEV->getStepRecurrence(*SE)); 7561260ea74SJinsong Ji if (!PHIBasePtrIncSCEV) 7571260ea74SJinsong Ji continue; 7581260ea74SJinsong Ji 7591260ea74SJinsong Ji if (CurrentPHINode->getNumIncomingValues() == 2) { 7601260ea74SJinsong Ji if ((CurrentPHINode->getIncomingBlock(0) == LatchBB && 7611260ea74SJinsong Ji CurrentPHINode->getIncomingBlock(1) == PredBB) || 7621260ea74SJinsong Ji (CurrentPHINode->getIncomingBlock(1) == LatchBB && 7631260ea74SJinsong Ji CurrentPHINode->getIncomingBlock(0) == PredBB)) { 7641260ea74SJinsong Ji if (PHIBasePtrIncSCEV == BasePtrIncSCEV) { 7651260ea74SJinsong Ji // The existing PHI (CurrentPHINode) has the same start and increment 7661260ea74SJinsong Ji // as the PHI that we wanted to create. 7671260ea74SJinsong Ji if (Form == UpdateForm && 7681260ea74SJinsong Ji PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) { 7691260ea74SJinsong Ji ++PHINodeAlreadyExistsUpdate; 7701260ea74SJinsong Ji return true; 7711260ea74SJinsong Ji } 7721260ea74SJinsong Ji if (Form == DSForm || Form == DQForm) { 7731260ea74SJinsong Ji const SCEVConstant *Diff = dyn_cast<SCEVConstant>( 7741260ea74SJinsong Ji SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV)); 7751260ea74SJinsong Ji if (Diff && !Diff->getAPInt().urem(Form)) { 7761260ea74SJinsong Ji if (Form == DSForm) 7771260ea74SJinsong Ji ++PHINodeAlreadyExistsDS; 7781260ea74SJinsong Ji else 7791260ea74SJinsong Ji ++PHINodeAlreadyExistsDQ; 7801260ea74SJinsong Ji return true; 7811260ea74SJinsong Ji } 7821260ea74SJinsong Ji } 7831260ea74SJinsong Ji } 7841260ea74SJinsong Ji } 7851260ea74SJinsong Ji } 7861260ea74SJinsong Ji } 7871260ea74SJinsong Ji return false; 7881260ea74SJinsong Ji } 7891260ea74SJinsong Ji 79098189755Sczhengsz bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) { 7911260ea74SJinsong Ji bool MadeChange = false; 7921260ea74SJinsong Ji 7931260ea74SJinsong Ji // Only prep. the inner-most loop 7941260ea74SJinsong Ji if (!L->empty()) 7951260ea74SJinsong Ji return MadeChange; 7961260ea74SJinsong Ji 7971260ea74SJinsong Ji // Return if already done enough preparation. 7981260ea74SJinsong Ji if (SuccPrepCount >= MaxVarsPrep) 7991260ea74SJinsong Ji return MadeChange; 8001260ea74SJinsong Ji 8011260ea74SJinsong Ji LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); 8021260ea74SJinsong Ji 8031260ea74SJinsong Ji BasicBlock *LoopPredecessor = L->getLoopPredecessor(); 8041260ea74SJinsong Ji // If there is no loop predecessor, or the loop predecessor's terminator 8051260ea74SJinsong Ji // returns a value (which might contribute to determining the loop's 8061260ea74SJinsong Ji // iteration space), insert a new preheader for the loop. 8071260ea74SJinsong Ji if (!LoopPredecessor || 8081260ea74SJinsong Ji !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { 8091260ea74SJinsong Ji LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA); 8101260ea74SJinsong Ji if (LoopPredecessor) 8111260ea74SJinsong Ji MadeChange = true; 8121260ea74SJinsong Ji } 8131260ea74SJinsong Ji if (!LoopPredecessor) { 8141260ea74SJinsong Ji LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n"); 8151260ea74SJinsong Ji return MadeChange; 8161260ea74SJinsong Ji } 8171260ea74SJinsong Ji // Check if a load/store has update form. This lambda is used by function 8181260ea74SJinsong Ji // collectCandidates which can collect candidates for types defined by lambda. 8191260ea74SJinsong Ji auto isUpdateFormCandidate = [&] (const Instruction *I, 8201260ea74SJinsong Ji const Value *PtrValue) { 8211260ea74SJinsong Ji assert((PtrValue && I) && "Invalid parameter!"); 8221260ea74SJinsong Ji // There are no update forms for Altivec vector load/stores. 8231260ea74SJinsong Ji if (ST && ST->hasAltivec() && 8241260ea74SJinsong Ji PtrValue->getType()->getPointerElementType()->isVectorTy()) 8251260ea74SJinsong Ji return false; 8261260ea74SJinsong Ji // See getPreIndexedAddressParts, the displacement for LDU/STDU has to 8271260ea74SJinsong Ji // be 4's multiple (DS-form). For i64 loads/stores when the displacement 8281260ea74SJinsong Ji // fits in a 16-bit signed field but isn't a multiple of 4, it will be 8291260ea74SJinsong Ji // useless and possible to break some original well-form addressing mode 8301260ea74SJinsong Ji // to make this pre-inc prep for it. 8311260ea74SJinsong Ji if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) { 8321260ea74SJinsong Ji const SCEV *LSCEV = SE->getSCEVAtScope(const_cast<Value *>(PtrValue), L); 8331260ea74SJinsong Ji const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV); 8341260ea74SJinsong Ji if (!LARSCEV || LARSCEV->getLoop() != L) 8351260ea74SJinsong Ji return false; 8361260ea74SJinsong Ji if (const SCEVConstant *StepConst = 8371260ea74SJinsong Ji dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) { 8381260ea74SJinsong Ji const APInt &ConstInt = StepConst->getValue()->getValue(); 8391260ea74SJinsong Ji if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0) 8401260ea74SJinsong Ji return false; 8411260ea74SJinsong Ji } 8421260ea74SJinsong Ji } 8431260ea74SJinsong Ji return true; 8441260ea74SJinsong Ji }; 8451260ea74SJinsong Ji 8461260ea74SJinsong Ji // Check if a load/store has DS form. 8471260ea74SJinsong Ji auto isDSFormCandidate = [] (const Instruction *I, const Value *PtrValue) { 8481260ea74SJinsong Ji assert((PtrValue && I) && "Invalid parameter!"); 849f5440ec4Sczhengsz if (isa<IntrinsicInst>(I)) 850f5440ec4Sczhengsz return false; 851f5440ec4Sczhengsz Type *PointerElementType = PtrValue->getType()->getPointerElementType(); 852f5440ec4Sczhengsz return (PointerElementType->isIntegerTy(64)) || 853f5440ec4Sczhengsz (PointerElementType->isFloatTy()) || 854f5440ec4Sczhengsz (PointerElementType->isDoubleTy()) || 855f5440ec4Sczhengsz (PointerElementType->isIntegerTy(32) && 856f5440ec4Sczhengsz llvm::any_of(I->users(), 857f5440ec4Sczhengsz [](const User *U) { return isa<SExtInst>(U); })); 8581260ea74SJinsong Ji }; 8591260ea74SJinsong Ji 8601260ea74SJinsong Ji // Check if a load/store has DQ form. 8611260ea74SJinsong Ji auto isDQFormCandidate = [&] (const Instruction *I, const Value *PtrValue) { 8621260ea74SJinsong Ji assert((PtrValue && I) && "Invalid parameter!"); 8631260ea74SJinsong Ji return !isa<IntrinsicInst>(I) && ST && ST->hasP9Vector() && 8641260ea74SJinsong Ji (PtrValue->getType()->getPointerElementType()->isVectorTy()); 8651260ea74SJinsong Ji }; 8661260ea74SJinsong Ji 8671260ea74SJinsong Ji // intrinsic for update form. 8681260ea74SJinsong Ji SmallVector<Bucket, 16> UpdateFormBuckets = 8691260ea74SJinsong Ji collectCandidates(L, isUpdateFormCandidate, MaxVarsUpdateForm); 8701260ea74SJinsong Ji 8711260ea74SJinsong Ji // Prepare for update form. 8721260ea74SJinsong Ji if (!UpdateFormBuckets.empty()) 8731260ea74SJinsong Ji MadeChange |= updateFormPrep(L, UpdateFormBuckets); 8741260ea74SJinsong Ji 8751260ea74SJinsong Ji // Collect buckets of comparable addresses used by loads and stores for DS 8761260ea74SJinsong Ji // form. 8771260ea74SJinsong Ji SmallVector<Bucket, 16> DSFormBuckets = 8781260ea74SJinsong Ji collectCandidates(L, isDSFormCandidate, MaxVarsDSForm); 8791260ea74SJinsong Ji 8801260ea74SJinsong Ji // Prepare for DS form. 8811260ea74SJinsong Ji if (!DSFormBuckets.empty()) 8821260ea74SJinsong Ji MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm); 8831260ea74SJinsong Ji 8841260ea74SJinsong Ji // Collect buckets of comparable addresses used by loads and stores for DQ 8851260ea74SJinsong Ji // form. 8861260ea74SJinsong Ji SmallVector<Bucket, 16> DQFormBuckets = 8871260ea74SJinsong Ji collectCandidates(L, isDQFormCandidate, MaxVarsDQForm); 8881260ea74SJinsong Ji 8891260ea74SJinsong Ji // Prepare for DQ form. 8901260ea74SJinsong Ji if (!DQFormBuckets.empty()) 8911260ea74SJinsong Ji MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm); 8921260ea74SJinsong Ji 8931260ea74SJinsong Ji return MadeChange; 8941260ea74SJinsong Ji } 895