1c5ef502eSSam Parker //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===// 2c5ef502eSSam Parker // 3c5ef502eSSam Parker // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4c5ef502eSSam Parker // See https://llvm.org/LICENSE.txt for license information. 5c5ef502eSSam Parker // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6c5ef502eSSam Parker // 7c5ef502eSSam Parker //===----------------------------------------------------------------------===// 8c5ef502eSSam Parker /// \file 9c5ef502eSSam Parker /// Insert hardware loop intrinsics into loops which are deemed profitable by 10c5ef502eSSam Parker /// the target, by querying TargetTransformInfo. A hardware loop comprises of 11c5ef502eSSam Parker /// two intrinsics: one, outside the loop, to set the loop iteration count and 12c5ef502eSSam Parker /// another, in the exit block, to decrement the counter. The decremented value 13c5ef502eSSam Parker /// can either be carried through the loop via a phi or handled in some opaque 14c5ef502eSSam Parker /// way by the target. 15c5ef502eSSam Parker /// 16c5ef502eSSam Parker //===----------------------------------------------------------------------===// 17c5ef502eSSam Parker 18c5ef502eSSam Parker #include "llvm/ADT/Statistic.h" 19c5ef502eSSam Parker #include "llvm/Analysis/AssumptionCache.h" 20c5ef502eSSam Parker #include "llvm/Analysis/LoopInfo.h" 2192164cf2SSjoerd Meijer #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22c5ef502eSSam Parker #include "llvm/Analysis/ScalarEvolution.h" 23c5ef502eSSam Parker #include "llvm/Analysis/TargetTransformInfo.h" 24c5ef502eSSam Parker #include "llvm/CodeGen/Passes.h" 25c5ef502eSSam Parker #include "llvm/CodeGen/TargetPassConfig.h" 26c5ef502eSSam Parker #include "llvm/IR/BasicBlock.h" 2705da2fe5SReid Kleckner #include "llvm/IR/Constants.h" 28c5ef502eSSam Parker #include "llvm/IR/DataLayout.h" 29c5ef502eSSam Parker #include "llvm/IR/Dominators.h" 30c5ef502eSSam Parker #include "llvm/IR/IRBuilder.h" 31c5ef502eSSam Parker #include "llvm/IR/Instructions.h" 32c5ef502eSSam Parker #include "llvm/IR/IntrinsicInst.h" 33c5ef502eSSam Parker #include "llvm/IR/Value.h" 3405da2fe5SReid Kleckner #include "llvm/InitializePasses.h" 3505da2fe5SReid Kleckner #include "llvm/Pass.h" 3605da2fe5SReid Kleckner #include "llvm/PassRegistry.h" 374c1a1d3cSReid Kleckner #include "llvm/Support/CommandLine.h" 38c5ef502eSSam Parker #include "llvm/Support/Debug.h" 39c5ef502eSSam Parker #include "llvm/Transforms/Scalar.h" 4006fef0b3SJinsong Ji #include "llvm/Transforms/Utils.h" 41c5ef502eSSam Parker #include "llvm/Transforms/Utils/BasicBlockUtils.h" 42c5ef502eSSam Parker #include "llvm/Transforms/Utils/Local.h" 4306fef0b3SJinsong Ji #include "llvm/Transforms/Utils/LoopUtils.h" 44*bcbd26bfSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 45c5ef502eSSam Parker 46c5ef502eSSam Parker #define DEBUG_TYPE "hardware-loops" 47c5ef502eSSam Parker 48c5ef502eSSam Parker #define HW_LOOPS_NAME "Hardware Loop Insertion" 49c5ef502eSSam Parker 50c5ef502eSSam Parker using namespace llvm; 51c5ef502eSSam Parker 52c5ef502eSSam Parker static cl::opt<bool> 53c5ef502eSSam Parker ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), 54c5ef502eSSam Parker cl::desc("Force hardware loops intrinsics to be inserted")); 55c5ef502eSSam Parker 56c5ef502eSSam Parker static cl::opt<bool> 57c5ef502eSSam Parker ForceHardwareLoopPHI( 58c5ef502eSSam Parker "force-hardware-loop-phi", cl::Hidden, cl::init(false), 59c5ef502eSSam Parker cl::desc("Force hardware loop counter to be updated through a phi")); 60c5ef502eSSam Parker 61c5ef502eSSam Parker static cl::opt<bool> 62c5ef502eSSam Parker ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), 63c5ef502eSSam Parker cl::desc("Force allowance of nested hardware loops")); 64c5ef502eSSam Parker 65c5ef502eSSam Parker static cl::opt<unsigned> 66c5ef502eSSam Parker LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), 67c5ef502eSSam Parker cl::desc("Set the loop decrement value")); 68c5ef502eSSam Parker 69c5ef502eSSam Parker static cl::opt<unsigned> 70c5ef502eSSam Parker CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), 71c5ef502eSSam Parker cl::desc("Set the loop counter bitwidth")); 72c5ef502eSSam Parker 739a92be1bSSam Parker static cl::opt<bool> 749a92be1bSSam Parker ForceGuardLoopEntry( 759a92be1bSSam Parker "force-hardware-loop-guard", cl::Hidden, cl::init(false), 769a92be1bSSam Parker cl::desc("Force generation of loop guard intrinsic")); 779a92be1bSSam Parker 78c5ef502eSSam Parker STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); 79c5ef502eSSam Parker 8092164cf2SSjoerd Meijer #ifndef NDEBUG 8192164cf2SSjoerd Meijer static void debugHWLoopFailure(const StringRef DebugMsg, 8292164cf2SSjoerd Meijer Instruction *I) { 8392164cf2SSjoerd Meijer dbgs() << "HWLoops: " << DebugMsg; 8492164cf2SSjoerd Meijer if (I) 8592164cf2SSjoerd Meijer dbgs() << ' ' << *I; 8692164cf2SSjoerd Meijer else 8792164cf2SSjoerd Meijer dbgs() << '.'; 8892164cf2SSjoerd Meijer dbgs() << '\n'; 8992164cf2SSjoerd Meijer } 9092164cf2SSjoerd Meijer #endif 9192164cf2SSjoerd Meijer 9292164cf2SSjoerd Meijer static OptimizationRemarkAnalysis 9392164cf2SSjoerd Meijer createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) { 9492164cf2SSjoerd Meijer Value *CodeRegion = L->getHeader(); 9592164cf2SSjoerd Meijer DebugLoc DL = L->getStartLoc(); 9692164cf2SSjoerd Meijer 9792164cf2SSjoerd Meijer if (I) { 9892164cf2SSjoerd Meijer CodeRegion = I->getParent(); 9992164cf2SSjoerd Meijer // If there is no debug location attached to the instruction, revert back to 10092164cf2SSjoerd Meijer // using the loop's. 10192164cf2SSjoerd Meijer if (I->getDebugLoc()) 10292164cf2SSjoerd Meijer DL = I->getDebugLoc(); 10392164cf2SSjoerd Meijer } 10492164cf2SSjoerd Meijer 10592164cf2SSjoerd Meijer OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion); 10692164cf2SSjoerd Meijer R << "hardware-loop not created: "; 10792164cf2SSjoerd Meijer return R; 10892164cf2SSjoerd Meijer } 10992164cf2SSjoerd Meijer 110c5ef502eSSam Parker namespace { 111c5ef502eSSam Parker 11292164cf2SSjoerd Meijer void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag, 11392164cf2SSjoerd Meijer OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) { 11492164cf2SSjoerd Meijer LLVM_DEBUG(debugHWLoopFailure(Msg, I)); 11592164cf2SSjoerd Meijer ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg); 11692164cf2SSjoerd Meijer } 11792164cf2SSjoerd Meijer 118c5ef502eSSam Parker using TTI = TargetTransformInfo; 119c5ef502eSSam Parker 120c5ef502eSSam Parker class HardwareLoops : public FunctionPass { 121c5ef502eSSam Parker public: 122c5ef502eSSam Parker static char ID; 123c5ef502eSSam Parker 124c5ef502eSSam Parker HardwareLoops() : FunctionPass(ID) { 125c5ef502eSSam Parker initializeHardwareLoopsPass(*PassRegistry::getPassRegistry()); 126c5ef502eSSam Parker } 127c5ef502eSSam Parker 128c5ef502eSSam Parker bool runOnFunction(Function &F) override; 129c5ef502eSSam Parker 130c5ef502eSSam Parker void getAnalysisUsage(AnalysisUsage &AU) const override { 131c5ef502eSSam Parker AU.addRequired<LoopInfoWrapperPass>(); 132c5ef502eSSam Parker AU.addPreserved<LoopInfoWrapperPass>(); 133c5ef502eSSam Parker AU.addRequired<DominatorTreeWrapperPass>(); 134c5ef502eSSam Parker AU.addPreserved<DominatorTreeWrapperPass>(); 135c5ef502eSSam Parker AU.addRequired<ScalarEvolutionWrapperPass>(); 136c5ef502eSSam Parker AU.addRequired<AssumptionCacheTracker>(); 137c5ef502eSSam Parker AU.addRequired<TargetTransformInfoWrapperPass>(); 13892164cf2SSjoerd Meijer AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 139c5ef502eSSam Parker } 140c5ef502eSSam Parker 141c5ef502eSSam Parker // Try to convert the given Loop into a hardware loop. 142c5ef502eSSam Parker bool TryConvertLoop(Loop *L); 143c5ef502eSSam Parker 144c5ef502eSSam Parker // Given that the target believes the loop to be profitable, try to 145c5ef502eSSam Parker // convert it. 146c5b918deSChen Zheng bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo); 147c5ef502eSSam Parker 148c5ef502eSSam Parker private: 149c5ef502eSSam Parker ScalarEvolution *SE = nullptr; 150c5ef502eSSam Parker LoopInfo *LI = nullptr; 151c5ef502eSSam Parker const DataLayout *DL = nullptr; 15292164cf2SSjoerd Meijer OptimizationRemarkEmitter *ORE = nullptr; 153c5ef502eSSam Parker const TargetTransformInfo *TTI = nullptr; 154c5ef502eSSam Parker DominatorTree *DT = nullptr; 15506fef0b3SJinsong Ji bool PreserveLCSSA = false; 156c5ef502eSSam Parker AssumptionCache *AC = nullptr; 157c5ef502eSSam Parker TargetLibraryInfo *LibInfo = nullptr; 158c5ef502eSSam Parker Module *M = nullptr; 159c5ef502eSSam Parker bool MadeChange = false; 160c5ef502eSSam Parker }; 161c5ef502eSSam Parker 162c5ef502eSSam Parker class HardwareLoop { 163c5ef502eSSam Parker // Expand the trip count scev into a value that we can use. 1649a92be1bSSam Parker Value *InitLoopCount(); 165c5ef502eSSam Parker 166c5ef502eSSam Parker // Insert the set_loop_iteration intrinsic. 1679a92be1bSSam Parker void InsertIterationSetup(Value *LoopCountInit); 168c5ef502eSSam Parker 169c5ef502eSSam Parker // Insert the loop_decrement intrinsic. 170c5ef502eSSam Parker void InsertLoopDec(); 171c5ef502eSSam Parker 172c5ef502eSSam Parker // Insert the loop_decrement_reg intrinsic. 173c5ef502eSSam Parker Instruction *InsertLoopRegDec(Value *EltsRem); 174c5ef502eSSam Parker 175c5ef502eSSam Parker // If the target requires the counter value to be updated in the loop, 176c5ef502eSSam Parker // insert a phi to hold the value. The intended purpose is for use by 177c5ef502eSSam Parker // loop_decrement_reg. 178c5ef502eSSam Parker PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem); 179c5ef502eSSam Parker 180c5ef502eSSam Parker // Create a new cmp, that checks the returned value of loop_decrement*, 181c5ef502eSSam Parker // and update the exit branch to use it. 182c5ef502eSSam Parker void UpdateBranch(Value *EltsRem); 183c5ef502eSSam Parker 184c5ef502eSSam Parker public: 185c5b918deSChen Zheng HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE, 18692164cf2SSjoerd Meijer const DataLayout &DL, 18792164cf2SSjoerd Meijer OptimizationRemarkEmitter *ORE) : 18892164cf2SSjoerd Meijer SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), 189c5ef502eSSam Parker ExitCount(Info.ExitCount), 190c5ef502eSSam Parker CountType(Info.CountType), 191c5ef502eSSam Parker ExitBranch(Info.ExitBranch), 192c5ef502eSSam Parker LoopDecrement(Info.LoopDecrement), 1939a92be1bSSam Parker UsePHICounter(Info.CounterInReg), 1949a92be1bSSam Parker UseLoopGuard(Info.PerformEntryTest) { } 195c5ef502eSSam Parker 196c5ef502eSSam Parker void Create(); 197c5ef502eSSam Parker 198c5ef502eSSam Parker private: 199c5ef502eSSam Parker ScalarEvolution &SE; 200c5ef502eSSam Parker const DataLayout &DL; 20192164cf2SSjoerd Meijer OptimizationRemarkEmitter *ORE = nullptr; 202c5ef502eSSam Parker Loop *L = nullptr; 203c5ef502eSSam Parker Module *M = nullptr; 204c5ef502eSSam Parker const SCEV *ExitCount = nullptr; 205c5ef502eSSam Parker Type *CountType = nullptr; 206c5ef502eSSam Parker BranchInst *ExitBranch = nullptr; 207c5ef502eSSam Parker Value *LoopDecrement = nullptr; 208c5ef502eSSam Parker bool UsePHICounter = false; 2099a92be1bSSam Parker bool UseLoopGuard = false; 2109a92be1bSSam Parker BasicBlock *BeginBB = nullptr; 211c5ef502eSSam Parker }; 212c5ef502eSSam Parker } 213c5ef502eSSam Parker 214c5ef502eSSam Parker char HardwareLoops::ID = 0; 215c5ef502eSSam Parker 216c5ef502eSSam Parker bool HardwareLoops::runOnFunction(Function &F) { 217c5ef502eSSam Parker if (skipFunction(F)) 218c5ef502eSSam Parker return false; 219c5ef502eSSam Parker 220c5ef502eSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n"); 221c5ef502eSSam Parker 222c5ef502eSSam Parker LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 223c5ef502eSSam Parker SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 224c5ef502eSSam Parker DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 225c5ef502eSSam Parker TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 226c5ef502eSSam Parker DL = &F.getParent()->getDataLayout(); 22792164cf2SSjoerd Meijer ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 228c5ef502eSSam Parker auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2299c27b59cSTeresa Johnson LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr; 23006fef0b3SJinsong Ji PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 231c5ef502eSSam Parker AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 232c5ef502eSSam Parker M = F.getParent(); 233c5ef502eSSam Parker 234c5ef502eSSam Parker for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { 235c5ef502eSSam Parker Loop *L = *I; 236c5ef502eSSam Parker if (!L->getParentLoop()) 237c5ef502eSSam Parker TryConvertLoop(L); 238c5ef502eSSam Parker } 239c5ef502eSSam Parker 240c5ef502eSSam Parker return MadeChange; 241c5ef502eSSam Parker } 242c5ef502eSSam Parker 243c5ef502eSSam Parker // Return true if the search should stop, which will be when an inner loop is 244c5ef502eSSam Parker // converted and the parent loop doesn't support containing a hardware loop. 245c5ef502eSSam Parker bool HardwareLoops::TryConvertLoop(Loop *L) { 246c5ef502eSSam Parker // Process nested loops first. 24792164cf2SSjoerd Meijer for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { 24892164cf2SSjoerd Meijer if (TryConvertLoop(*I)) { 24992164cf2SSjoerd Meijer reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested", 25092164cf2SSjoerd Meijer ORE, L); 251c5ef502eSSam Parker return true; // Stop search. 25292164cf2SSjoerd Meijer } 25392164cf2SSjoerd Meijer } 254c5ef502eSSam Parker 255c5b918deSChen Zheng HardwareLoopInfo HWLoopInfo(L); 25692164cf2SSjoerd Meijer if (!HWLoopInfo.canAnalyze(*LI)) { 25792164cf2SSjoerd Meijer reportHWLoopFailure("cannot analyze loop, irreducible control flow", 25892164cf2SSjoerd Meijer "HWLoopCannotAnalyze", ORE, L); 259aa999528SChen Zheng return false; 26092164cf2SSjoerd Meijer } 261aa999528SChen Zheng 26292164cf2SSjoerd Meijer if (!ForceHardwareLoops && 26392164cf2SSjoerd Meijer !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) { 26492164cf2SSjoerd Meijer reportHWLoopFailure("it's not profitable to create a hardware-loop", 26592164cf2SSjoerd Meijer "HWLoopNotProfitable", ORE, L); 26692164cf2SSjoerd Meijer return false; 26792164cf2SSjoerd Meijer } 268c5ef502eSSam Parker 269c5ef502eSSam Parker // Allow overriding of the counter width and loop decrement value. 270c5ef502eSSam Parker if (CounterBitWidth.getNumOccurrences()) 271c5ef502eSSam Parker HWLoopInfo.CountType = 272c5ef502eSSam Parker IntegerType::get(M->getContext(), CounterBitWidth); 273c5ef502eSSam Parker 274c5ef502eSSam Parker if (LoopDecrement.getNumOccurrences()) 275c5ef502eSSam Parker HWLoopInfo.LoopDecrement = 276c5ef502eSSam Parker ConstantInt::get(HWLoopInfo.CountType, LoopDecrement); 277c5ef502eSSam Parker 278c5ef502eSSam Parker MadeChange |= TryConvertLoop(HWLoopInfo); 279c5ef502eSSam Parker return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop); 280c5ef502eSSam Parker } 281c5ef502eSSam Parker 282c5b918deSChen Zheng bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { 283c5ef502eSSam Parker 28406fef0b3SJinsong Ji Loop *L = HWLoopInfo.L; 28506fef0b3SJinsong Ji LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L); 286c5ef502eSSam Parker 287c5b918deSChen Zheng if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop, 28892164cf2SSjoerd Meijer ForceHardwareLoopPHI)) { 28992164cf2SSjoerd Meijer // TODO: there can be many reasons a loop is not considered a 29092164cf2SSjoerd Meijer // candidate, so we should let isHardwareLoopCandidate fill in the 29192164cf2SSjoerd Meijer // reason and then report a better message here. 29292164cf2SSjoerd Meijer reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L); 293c5ef502eSSam Parker return false; 29492164cf2SSjoerd Meijer } 295c5ef502eSSam Parker 296c5b918deSChen Zheng assert( 297c5b918deSChen Zheng (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && 298c5b918deSChen Zheng "Hardware Loop must have set exit info."); 299c5b918deSChen Zheng 30006fef0b3SJinsong Ji BasicBlock *Preheader = L->getLoopPreheader(); 30106fef0b3SJinsong Ji 30206fef0b3SJinsong Ji // If we don't have a preheader, then insert one. 30306fef0b3SJinsong Ji if (!Preheader) 30406fef0b3SJinsong Ji Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA); 30506fef0b3SJinsong Ji if (!Preheader) 30606fef0b3SJinsong Ji return false; 30706fef0b3SJinsong Ji 30892164cf2SSjoerd Meijer HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE); 309c5ef502eSSam Parker HWLoop.Create(); 310c5ef502eSSam Parker ++NumHWLoops; 311c5ef502eSSam Parker return true; 312c5ef502eSSam Parker } 313c5ef502eSSam Parker 314c5ef502eSSam Parker void HardwareLoop::Create() { 315c5ef502eSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n"); 3169a92be1bSSam Parker 3179a92be1bSSam Parker Value *LoopCountInit = InitLoopCount(); 31892164cf2SSjoerd Meijer if (!LoopCountInit) { 31992164cf2SSjoerd Meijer reportHWLoopFailure("could not safely create a loop count expression", 32092164cf2SSjoerd Meijer "HWLoopNotSafe", ORE, L); 32106fef0b3SJinsong Ji return; 32292164cf2SSjoerd Meijer } 323c5ef502eSSam Parker 3249a92be1bSSam Parker InsertIterationSetup(LoopCountInit); 325c5ef502eSSam Parker 326c5ef502eSSam Parker if (UsePHICounter || ForceHardwareLoopPHI) { 327c5ef502eSSam Parker Instruction *LoopDec = InsertLoopRegDec(LoopCountInit); 328c5ef502eSSam Parker Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec); 329c5ef502eSSam Parker LoopDec->setOperand(0, EltsRem); 330c5ef502eSSam Parker UpdateBranch(LoopDec); 331c5ef502eSSam Parker } else 332c5ef502eSSam Parker InsertLoopDec(); 333c5ef502eSSam Parker 334c5ef502eSSam Parker // Run through the basic blocks of the loop and see if any of them have dead 335c5ef502eSSam Parker // PHIs that can be removed. 336c5ef502eSSam Parker for (auto I : L->blocks()) 337c5ef502eSSam Parker DeleteDeadPHIs(I); 338c5ef502eSSam Parker } 339c5ef502eSSam Parker 3409a92be1bSSam Parker static bool CanGenerateTest(Loop *L, Value *Count) { 3419a92be1bSSam Parker BasicBlock *Preheader = L->getLoopPreheader(); 3429a92be1bSSam Parker if (!Preheader->getSinglePredecessor()) 3439a92be1bSSam Parker return false; 3449a92be1bSSam Parker 3459a92be1bSSam Parker BasicBlock *Pred = Preheader->getSinglePredecessor(); 3469a92be1bSSam Parker if (!isa<BranchInst>(Pred->getTerminator())) 3479a92be1bSSam Parker return false; 3489a92be1bSSam Parker 3499a92be1bSSam Parker auto *BI = cast<BranchInst>(Pred->getTerminator()); 3509a92be1bSSam Parker if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition())) 3519a92be1bSSam Parker return false; 3529a92be1bSSam Parker 3539a92be1bSSam Parker // Check that the icmp is checking for equality of Count and zero and that 3549a92be1bSSam Parker // a non-zero value results in entering the loop. 3559a92be1bSSam Parker auto ICmp = cast<ICmpInst>(BI->getCondition()); 35698722691SSam Parker LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n"); 3579a92be1bSSam Parker if (!ICmp->isEquality()) 3589a92be1bSSam Parker return false; 3599a92be1bSSam Parker 3609a92be1bSSam Parker auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) { 3619a92be1bSSam Parker if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx))) 3629a92be1bSSam Parker return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count; 3639a92be1bSSam Parker return false; 3649a92be1bSSam Parker }; 3659a92be1bSSam Parker 3669a92be1bSSam Parker if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1)) 3679a92be1bSSam Parker return false; 3689a92be1bSSam Parker 3699a92be1bSSam Parker unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1; 3709a92be1bSSam Parker if (BI->getSuccessor(SuccIdx) != Preheader) 3719a92be1bSSam Parker return false; 3729a92be1bSSam Parker 3739a92be1bSSam Parker return true; 3749a92be1bSSam Parker } 3759a92be1bSSam Parker 3769a92be1bSSam Parker Value *HardwareLoop::InitLoopCount() { 3779a92be1bSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n"); 3789a92be1bSSam Parker // Can we replace a conditional branch with an intrinsic that sets the 3799a92be1bSSam Parker // loop counter and tests that is not zero? 3809a92be1bSSam Parker 381c5ef502eSSam Parker SCEVExpander SCEVE(SE, DL, "loopcnt"); 38206fef0b3SJinsong Ji if (!ExitCount->getType()->isPointerTy() && 38306fef0b3SJinsong Ji ExitCount->getType() != CountType) 38406fef0b3SJinsong Ji ExitCount = SE.getZeroExtendExpr(ExitCount, CountType); 38506fef0b3SJinsong Ji 38606fef0b3SJinsong Ji ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType)); 387c5ef502eSSam Parker 3889a92be1bSSam Parker // If we're trying to use the 'test and set' form of the intrinsic, we need 3899a92be1bSSam Parker // to replace a conditional branch that is controlling entry to the loop. It 3909a92be1bSSam Parker // is likely (guaranteed?) that the preheader has an unconditional branch to 3919a92be1bSSam Parker // the loop header, so also check if it has a single predecessor. 3929a92be1bSSam Parker if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount, 39306fef0b3SJinsong Ji SE.getZero(ExitCount->getType()))) { 39406fef0b3SJinsong Ji LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); 3959a92be1bSSam Parker UseLoopGuard |= ForceGuardLoopEntry; 39606fef0b3SJinsong Ji } else 3979a92be1bSSam Parker UseLoopGuard = false; 3989a92be1bSSam Parker 3999a92be1bSSam Parker BasicBlock *BB = L->getLoopPreheader(); 4009a92be1bSSam Parker if (UseLoopGuard && BB->getSinglePredecessor() && 40106fef0b3SJinsong Ji cast<BranchInst>(BB->getTerminator())->isUnconditional()) 4029a92be1bSSam Parker BB = BB->getSinglePredecessor(); 40306fef0b3SJinsong Ji 40406fef0b3SJinsong Ji if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) { 40506fef0b3SJinsong Ji LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount " 40606fef0b3SJinsong Ji << *ExitCount << "\n"); 40706fef0b3SJinsong Ji return nullptr; 408c5ef502eSSam Parker } 409c5ef502eSSam Parker 410c5ef502eSSam Parker Value *Count = SCEVE.expandCodeFor(ExitCount, CountType, 411c5ef502eSSam Parker BB->getTerminator()); 4129a92be1bSSam Parker 4139a92be1bSSam Parker // FIXME: We've expanded Count where we hope to insert the counter setting 4149a92be1bSSam Parker // intrinsic. But, in the case of the 'test and set' form, we may fallback to 4159a92be1bSSam Parker // the just 'set' form and in which case the insertion block is most likely 4169a92be1bSSam Parker // different. It means there will be instruction(s) in a block that possibly 4179a92be1bSSam Parker // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue, 4189a92be1bSSam Parker // but it's doesn't appear to work in all cases. 4199a92be1bSSam Parker 4209a92be1bSSam Parker UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count); 4219a92be1bSSam Parker BeginBB = UseLoopGuard ? BB : L->getLoopPreheader(); 4229a92be1bSSam Parker LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n" 4239a92be1bSSam Parker << " - Expanded Count in " << BB->getName() << "\n" 4249a92be1bSSam Parker << " - Will insert set counter intrinsic into: " 4259a92be1bSSam Parker << BeginBB->getName() << "\n"); 426c5ef502eSSam Parker return Count; 427c5ef502eSSam Parker } 428c5ef502eSSam Parker 4299a92be1bSSam Parker void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) { 4309a92be1bSSam Parker IRBuilder<> Builder(BeginBB->getTerminator()); 431c5ef502eSSam Parker Type *Ty = LoopCountInit->getType(); 4329a92be1bSSam Parker Intrinsic::ID ID = UseLoopGuard ? 4339a92be1bSSam Parker Intrinsic::test_set_loop_iterations : Intrinsic::set_loop_iterations; 4349a92be1bSSam Parker Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty); 4359a92be1bSSam Parker Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit); 4369a92be1bSSam Parker 4379a92be1bSSam Parker // Use the return value of the intrinsic to control the entry of the loop. 4389a92be1bSSam Parker if (UseLoopGuard) { 4399a92be1bSSam Parker assert((isa<BranchInst>(BeginBB->getTerminator()) && 4409a92be1bSSam Parker cast<BranchInst>(BeginBB->getTerminator())->isConditional()) && 4419a92be1bSSam Parker "Expected conditional branch"); 4429a92be1bSSam Parker auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator()); 4439a92be1bSSam Parker LoopGuard->setCondition(SetCount); 4449a92be1bSSam Parker if (LoopGuard->getSuccessor(0) != L->getLoopPreheader()) 4459a92be1bSSam Parker LoopGuard->swapSuccessors(); 4469a92be1bSSam Parker } 4479a92be1bSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " 4489a92be1bSSam Parker << *SetCount << "\n"); 449c5ef502eSSam Parker } 450c5ef502eSSam Parker 451c5ef502eSSam Parker void HardwareLoop::InsertLoopDec() { 452c5ef502eSSam Parker IRBuilder<> CondBuilder(ExitBranch); 453c5ef502eSSam Parker 454c5ef502eSSam Parker Function *DecFunc = 455c5ef502eSSam Parker Intrinsic::getDeclaration(M, Intrinsic::loop_decrement, 456c5ef502eSSam Parker LoopDecrement->getType()); 457c5ef502eSSam Parker Value *Ops[] = { LoopDecrement }; 458c5ef502eSSam Parker Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops); 459c5ef502eSSam Parker Value *OldCond = ExitBranch->getCondition(); 460c5ef502eSSam Parker ExitBranch->setCondition(NewCond); 461c5ef502eSSam Parker 462c5ef502eSSam Parker // The false branch must exit the loop. 463c5ef502eSSam Parker if (!L->contains(ExitBranch->getSuccessor(0))) 464c5ef502eSSam Parker ExitBranch->swapSuccessors(); 465c5ef502eSSam Parker 466c5ef502eSSam Parker // The old condition may be dead now, and may have even created a dead PHI 467c5ef502eSSam Parker // (the original induction variable). 468c5ef502eSSam Parker RecursivelyDeleteTriviallyDeadInstructions(OldCond); 469c5ef502eSSam Parker 470c5ef502eSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n"); 471c5ef502eSSam Parker } 472c5ef502eSSam Parker 473c5ef502eSSam Parker Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) { 474c5ef502eSSam Parker IRBuilder<> CondBuilder(ExitBranch); 475c5ef502eSSam Parker 476c5ef502eSSam Parker Function *DecFunc = 477c5ef502eSSam Parker Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg, 478c5ef502eSSam Parker { EltsRem->getType(), EltsRem->getType(), 479c5ef502eSSam Parker LoopDecrement->getType() 480c5ef502eSSam Parker }); 481c5ef502eSSam Parker Value *Ops[] = { EltsRem, LoopDecrement }; 482c5ef502eSSam Parker Value *Call = CondBuilder.CreateCall(DecFunc, Ops); 483c5ef502eSSam Parker 484c5ef502eSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n"); 485c5ef502eSSam Parker return cast<Instruction>(Call); 486c5ef502eSSam Parker } 487c5ef502eSSam Parker 488c5ef502eSSam Parker PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) { 489c5ef502eSSam Parker BasicBlock *Preheader = L->getLoopPreheader(); 490c5ef502eSSam Parker BasicBlock *Header = L->getHeader(); 491c5ef502eSSam Parker BasicBlock *Latch = ExitBranch->getParent(); 492c5ef502eSSam Parker IRBuilder<> Builder(Header->getFirstNonPHI()); 493c5ef502eSSam Parker PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2); 494c5ef502eSSam Parker Index->addIncoming(NumElts, Preheader); 495c5ef502eSSam Parker Index->addIncoming(EltsRem, Latch); 496c5ef502eSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n"); 497c5ef502eSSam Parker return Index; 498c5ef502eSSam Parker } 499c5ef502eSSam Parker 500c5ef502eSSam Parker void HardwareLoop::UpdateBranch(Value *EltsRem) { 501c5ef502eSSam Parker IRBuilder<> CondBuilder(ExitBranch); 502c5ef502eSSam Parker Value *NewCond = 503c5ef502eSSam Parker CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0)); 504c5ef502eSSam Parker Value *OldCond = ExitBranch->getCondition(); 505c5ef502eSSam Parker ExitBranch->setCondition(NewCond); 506c5ef502eSSam Parker 507c5ef502eSSam Parker // The false branch must exit the loop. 508c5ef502eSSam Parker if (!L->contains(ExitBranch->getSuccessor(0))) 509c5ef502eSSam Parker ExitBranch->swapSuccessors(); 510c5ef502eSSam Parker 511c5ef502eSSam Parker // The old condition may be dead now, and may have even created a dead PHI 512c5ef502eSSam Parker // (the original induction variable). 513c5ef502eSSam Parker RecursivelyDeleteTriviallyDeadInstructions(OldCond); 514c5ef502eSSam Parker } 515c5ef502eSSam Parker 516c5ef502eSSam Parker INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false) 517c5ef502eSSam Parker INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 518c5ef502eSSam Parker INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 519c5ef502eSSam Parker INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 52092164cf2SSjoerd Meijer INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 521c5ef502eSSam Parker INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false) 522c5ef502eSSam Parker 523c5ef502eSSam Parker FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); } 524