1c5ef502eSSam Parker //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2c5ef502eSSam Parker //
3c5ef502eSSam Parker // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c5ef502eSSam Parker // See https://llvm.org/LICENSE.txt for license information.
5c5ef502eSSam Parker // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c5ef502eSSam Parker //
7c5ef502eSSam Parker //===----------------------------------------------------------------------===//
8c5ef502eSSam Parker /// \file
9c5ef502eSSam Parker /// Insert hardware loop intrinsics into loops which are deemed profitable by
10c5ef502eSSam Parker /// the target, by querying TargetTransformInfo. A hardware loop comprises of
11c5ef502eSSam Parker /// two intrinsics: one, outside the loop, to set the loop iteration count and
12c5ef502eSSam Parker /// another, in the exit block, to decrement the counter. The decremented value
13c5ef502eSSam Parker /// can either be carried through the loop via a phi or handled in some opaque
14c5ef502eSSam Parker /// way by the target.
15c5ef502eSSam Parker ///
16c5ef502eSSam Parker //===----------------------------------------------------------------------===//
17c5ef502eSSam Parker 
18c5ef502eSSam Parker #include "llvm/ADT/Statistic.h"
19c5ef502eSSam Parker #include "llvm/Analysis/AssumptionCache.h"
20c5ef502eSSam Parker #include "llvm/Analysis/LoopInfo.h"
2192164cf2SSjoerd Meijer #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22c5ef502eSSam Parker #include "llvm/Analysis/ScalarEvolution.h"
23a53dddb3SSimon Pilgrim #include "llvm/Analysis/TargetLibraryInfo.h"
24c5ef502eSSam Parker #include "llvm/Analysis/TargetTransformInfo.h"
25c5ef502eSSam Parker #include "llvm/CodeGen/Passes.h"
26c5ef502eSSam Parker #include "llvm/IR/BasicBlock.h"
2705da2fe5SReid Kleckner #include "llvm/IR/Constants.h"
28c5ef502eSSam Parker #include "llvm/IR/Dominators.h"
29c5ef502eSSam Parker #include "llvm/IR/IRBuilder.h"
30c5ef502eSSam Parker #include "llvm/IR/Instructions.h"
31c5ef502eSSam Parker #include "llvm/IR/IntrinsicInst.h"
32c5ef502eSSam Parker #include "llvm/IR/Value.h"
3305da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
3405da2fe5SReid Kleckner #include "llvm/Pass.h"
3505da2fe5SReid Kleckner #include "llvm/PassRegistry.h"
364c1a1d3cSReid Kleckner #include "llvm/Support/CommandLine.h"
37c5ef502eSSam Parker #include "llvm/Support/Debug.h"
3806fef0b3SJinsong Ji #include "llvm/Transforms/Utils.h"
39c5ef502eSSam Parker #include "llvm/Transforms/Utils/BasicBlockUtils.h"
40c5ef502eSSam Parker #include "llvm/Transforms/Utils/Local.h"
4106fef0b3SJinsong Ji #include "llvm/Transforms/Utils/LoopUtils.h"
42bcbd26bfSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
43c5ef502eSSam Parker 
44c5ef502eSSam Parker #define DEBUG_TYPE "hardware-loops"
45c5ef502eSSam Parker 
46c5ef502eSSam Parker #define HW_LOOPS_NAME "Hardware Loop Insertion"
47c5ef502eSSam Parker 
48c5ef502eSSam Parker using namespace llvm;
49c5ef502eSSam Parker 
50c5ef502eSSam Parker static cl::opt<bool>
51c5ef502eSSam Parker ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
52c5ef502eSSam Parker                    cl::desc("Force hardware loops intrinsics to be inserted"));
53c5ef502eSSam Parker 
54c5ef502eSSam Parker static cl::opt<bool>
55c5ef502eSSam Parker ForceHardwareLoopPHI(
56c5ef502eSSam Parker   "force-hardware-loop-phi", cl::Hidden, cl::init(false),
57c5ef502eSSam Parker   cl::desc("Force hardware loop counter to be updated through a phi"));
58c5ef502eSSam Parker 
59c5ef502eSSam Parker static cl::opt<bool>
60c5ef502eSSam Parker ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
61c5ef502eSSam Parker                 cl::desc("Force allowance of nested hardware loops"));
62c5ef502eSSam Parker 
63c5ef502eSSam Parker static cl::opt<unsigned>
64c5ef502eSSam Parker LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
65c5ef502eSSam Parker             cl::desc("Set the loop decrement value"));
66c5ef502eSSam Parker 
67c5ef502eSSam Parker static cl::opt<unsigned>
68c5ef502eSSam Parker CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
69c5ef502eSSam Parker                 cl::desc("Set the loop counter bitwidth"));
70c5ef502eSSam Parker 
719a92be1bSSam Parker static cl::opt<bool>
729a92be1bSSam Parker ForceGuardLoopEntry(
739a92be1bSSam Parker   "force-hardware-loop-guard", cl::Hidden, cl::init(false),
749a92be1bSSam Parker   cl::desc("Force generation of loop guard intrinsic"));
759a92be1bSSam Parker 
76c5ef502eSSam Parker STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
77c5ef502eSSam Parker 
7892164cf2SSjoerd Meijer #ifndef NDEBUG
debugHWLoopFailure(const StringRef DebugMsg,Instruction * I)7992164cf2SSjoerd Meijer static void debugHWLoopFailure(const StringRef DebugMsg,
8092164cf2SSjoerd Meijer     Instruction *I) {
8192164cf2SSjoerd Meijer   dbgs() << "HWLoops: " << DebugMsg;
8292164cf2SSjoerd Meijer   if (I)
8392164cf2SSjoerd Meijer     dbgs() << ' ' << *I;
8492164cf2SSjoerd Meijer   else
8592164cf2SSjoerd Meijer     dbgs() << '.';
8692164cf2SSjoerd Meijer   dbgs() << '\n';
8792164cf2SSjoerd Meijer }
8892164cf2SSjoerd Meijer #endif
8992164cf2SSjoerd Meijer 
9092164cf2SSjoerd Meijer static OptimizationRemarkAnalysis
createHWLoopAnalysis(StringRef RemarkName,Loop * L,Instruction * I)9192164cf2SSjoerd Meijer createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
9292164cf2SSjoerd Meijer   Value *CodeRegion = L->getHeader();
9392164cf2SSjoerd Meijer   DebugLoc DL = L->getStartLoc();
9492164cf2SSjoerd Meijer 
9592164cf2SSjoerd Meijer   if (I) {
9692164cf2SSjoerd Meijer     CodeRegion = I->getParent();
9792164cf2SSjoerd Meijer     // If there is no debug location attached to the instruction, revert back to
9892164cf2SSjoerd Meijer     // using the loop's.
9992164cf2SSjoerd Meijer     if (I->getDebugLoc())
10092164cf2SSjoerd Meijer       DL = I->getDebugLoc();
10192164cf2SSjoerd Meijer   }
10292164cf2SSjoerd Meijer 
10392164cf2SSjoerd Meijer   OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
10492164cf2SSjoerd Meijer   R << "hardware-loop not created: ";
10592164cf2SSjoerd Meijer   return R;
10692164cf2SSjoerd Meijer }
10792164cf2SSjoerd Meijer 
108c5ef502eSSam Parker namespace {
109c5ef502eSSam Parker 
reportHWLoopFailure(const StringRef Msg,const StringRef ORETag,OptimizationRemarkEmitter * ORE,Loop * TheLoop,Instruction * I=nullptr)11092164cf2SSjoerd Meijer   void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
11192164cf2SSjoerd Meijer       OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
11292164cf2SSjoerd Meijer     LLVM_DEBUG(debugHWLoopFailure(Msg, I));
11392164cf2SSjoerd Meijer     ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
11492164cf2SSjoerd Meijer   }
11592164cf2SSjoerd Meijer 
116c5ef502eSSam Parker   using TTI = TargetTransformInfo;
117c5ef502eSSam Parker 
118c5ef502eSSam Parker   class HardwareLoops : public FunctionPass {
119c5ef502eSSam Parker   public:
120c5ef502eSSam Parker     static char ID;
121c5ef502eSSam Parker 
HardwareLoops()122c5ef502eSSam Parker     HardwareLoops() : FunctionPass(ID) {
123c5ef502eSSam Parker       initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
124c5ef502eSSam Parker     }
125c5ef502eSSam Parker 
126c5ef502eSSam Parker     bool runOnFunction(Function &F) override;
127c5ef502eSSam Parker 
getAnalysisUsage(AnalysisUsage & AU) const128c5ef502eSSam Parker     void getAnalysisUsage(AnalysisUsage &AU) const override {
129c5ef502eSSam Parker       AU.addRequired<LoopInfoWrapperPass>();
130c5ef502eSSam Parker       AU.addPreserved<LoopInfoWrapperPass>();
131c5ef502eSSam Parker       AU.addRequired<DominatorTreeWrapperPass>();
132c5ef502eSSam Parker       AU.addPreserved<DominatorTreeWrapperPass>();
133c5ef502eSSam Parker       AU.addRequired<ScalarEvolutionWrapperPass>();
134c5ef502eSSam Parker       AU.addRequired<AssumptionCacheTracker>();
135c5ef502eSSam Parker       AU.addRequired<TargetTransformInfoWrapperPass>();
13692164cf2SSjoerd Meijer       AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
137c5ef502eSSam Parker     }
138c5ef502eSSam Parker 
139c5ef502eSSam Parker     // Try to convert the given Loop into a hardware loop.
140c5ef502eSSam Parker     bool TryConvertLoop(Loop *L);
141c5ef502eSSam Parker 
142c5ef502eSSam Parker     // Given that the target believes the loop to be profitable, try to
143c5ef502eSSam Parker     // convert it.
144c5b918deSChen Zheng     bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
145c5ef502eSSam Parker 
146c5ef502eSSam Parker   private:
147c5ef502eSSam Parker     ScalarEvolution *SE = nullptr;
148c5ef502eSSam Parker     LoopInfo *LI = nullptr;
149c5ef502eSSam Parker     const DataLayout *DL = nullptr;
15092164cf2SSjoerd Meijer     OptimizationRemarkEmitter *ORE = nullptr;
151c5ef502eSSam Parker     const TargetTransformInfo *TTI = nullptr;
152c5ef502eSSam Parker     DominatorTree *DT = nullptr;
15306fef0b3SJinsong Ji     bool PreserveLCSSA = false;
154c5ef502eSSam Parker     AssumptionCache *AC = nullptr;
155c5ef502eSSam Parker     TargetLibraryInfo *LibInfo = nullptr;
156c5ef502eSSam Parker     Module *M = nullptr;
157c5ef502eSSam Parker     bool MadeChange = false;
158c5ef502eSSam Parker   };
159c5ef502eSSam Parker 
160c5ef502eSSam Parker   class HardwareLoop {
161c5ef502eSSam Parker     // Expand the trip count scev into a value that we can use.
1629a92be1bSSam Parker     Value *InitLoopCount();
163c5ef502eSSam Parker 
164c5ef502eSSam Parker     // Insert the set_loop_iteration intrinsic.
165b2ac9681SDavid Green     Value *InsertIterationSetup(Value *LoopCountInit);
166c5ef502eSSam Parker 
167c5ef502eSSam Parker     // Insert the loop_decrement intrinsic.
168c5ef502eSSam Parker     void InsertLoopDec();
169c5ef502eSSam Parker 
170c5ef502eSSam Parker     // Insert the loop_decrement_reg intrinsic.
171c5ef502eSSam Parker     Instruction *InsertLoopRegDec(Value *EltsRem);
172c5ef502eSSam Parker 
173c5ef502eSSam Parker     // If the target requires the counter value to be updated in the loop,
174c5ef502eSSam Parker     // insert a phi to hold the value. The intended purpose is for use by
175c5ef502eSSam Parker     // loop_decrement_reg.
176c5ef502eSSam Parker     PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
177c5ef502eSSam Parker 
178c5ef502eSSam Parker     // Create a new cmp, that checks the returned value of loop_decrement*,
179c5ef502eSSam Parker     // and update the exit branch to use it.
180c5ef502eSSam Parker     void UpdateBranch(Value *EltsRem);
181c5ef502eSSam Parker 
182c5ef502eSSam Parker   public:
HardwareLoop(HardwareLoopInfo & Info,ScalarEvolution & SE,const DataLayout & DL,OptimizationRemarkEmitter * ORE)183c5b918deSChen Zheng     HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
18492164cf2SSjoerd Meijer                  const DataLayout &DL,
18592164cf2SSjoerd Meijer                  OptimizationRemarkEmitter *ORE) :
18692164cf2SSjoerd Meijer       SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
18734badc40SChen Zheng       ExitCount(Info.ExitCount),
188c5ef502eSSam Parker       CountType(Info.CountType),
189c5ef502eSSam Parker       ExitBranch(Info.ExitBranch),
190c5ef502eSSam Parker       LoopDecrement(Info.LoopDecrement),
1919a92be1bSSam Parker       UsePHICounter(Info.CounterInReg),
1929a92be1bSSam Parker       UseLoopGuard(Info.PerformEntryTest) { }
193c5ef502eSSam Parker 
194c5ef502eSSam Parker     void Create();
195c5ef502eSSam Parker 
196c5ef502eSSam Parker   private:
197c5ef502eSSam Parker     ScalarEvolution &SE;
198c5ef502eSSam Parker     const DataLayout &DL;
19992164cf2SSjoerd Meijer     OptimizationRemarkEmitter *ORE = nullptr;
200c5ef502eSSam Parker     Loop *L                 = nullptr;
201c5ef502eSSam Parker     Module *M               = nullptr;
20234badc40SChen Zheng     const SCEV *ExitCount   = nullptr;
203c5ef502eSSam Parker     Type *CountType         = nullptr;
204c5ef502eSSam Parker     BranchInst *ExitBranch  = nullptr;
205c5ef502eSSam Parker     Value *LoopDecrement    = nullptr;
206c5ef502eSSam Parker     bool UsePHICounter      = false;
2079a92be1bSSam Parker     bool UseLoopGuard       = false;
2089a92be1bSSam Parker     BasicBlock *BeginBB     = nullptr;
209c5ef502eSSam Parker   };
210c5ef502eSSam Parker }
211c5ef502eSSam Parker 
212c5ef502eSSam Parker char HardwareLoops::ID = 0;
213c5ef502eSSam Parker 
runOnFunction(Function & F)214c5ef502eSSam Parker bool HardwareLoops::runOnFunction(Function &F) {
215c5ef502eSSam Parker   if (skipFunction(F))
216c5ef502eSSam Parker     return false;
217c5ef502eSSam Parker 
218c5ef502eSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
219c5ef502eSSam Parker 
220c5ef502eSSam Parker   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
221c5ef502eSSam Parker   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
222c5ef502eSSam Parker   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
223c5ef502eSSam Parker   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
224c5ef502eSSam Parker   DL = &F.getParent()->getDataLayout();
22592164cf2SSjoerd Meijer   ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
226c5ef502eSSam Parker   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2279c27b59cSTeresa Johnson   LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
22806fef0b3SJinsong Ji   PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
229c5ef502eSSam Parker   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
230c5ef502eSSam Parker   M = F.getParent();
231c5ef502eSSam Parker 
232905cf88dSKazu Hirata   for (Loop *L : *LI)
23389c1e35fSStefanos Baziotis     if (L->isOutermost())
234c5ef502eSSam Parker       TryConvertLoop(L);
235c5ef502eSSam Parker 
236c5ef502eSSam Parker   return MadeChange;
237c5ef502eSSam Parker }
238c5ef502eSSam Parker 
239c5ef502eSSam Parker // Return true if the search should stop, which will be when an inner loop is
240c5ef502eSSam Parker // converted and the parent loop doesn't support containing a hardware loop.
TryConvertLoop(Loop * L)241c5ef502eSSam Parker bool HardwareLoops::TryConvertLoop(Loop *L) {
242c5ef502eSSam Parker   // Process nested loops first.
2439e03547cSDavid Green   bool AnyChanged = false;
2449e03547cSDavid Green   for (Loop *SL : *L)
2459e03547cSDavid Green     AnyChanged |= TryConvertLoop(SL);
2469e03547cSDavid Green   if (AnyChanged) {
24792164cf2SSjoerd Meijer     reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
24892164cf2SSjoerd Meijer                         ORE, L);
249c5ef502eSSam Parker     return true; // Stop search.
25092164cf2SSjoerd Meijer   }
2519e03547cSDavid Green 
2529e03547cSDavid Green   LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
253c5ef502eSSam Parker 
254c5b918deSChen Zheng   HardwareLoopInfo HWLoopInfo(L);
25592164cf2SSjoerd Meijer   if (!HWLoopInfo.canAnalyze(*LI)) {
25692164cf2SSjoerd Meijer     reportHWLoopFailure("cannot analyze loop, irreducible control flow",
25792164cf2SSjoerd Meijer                         "HWLoopCannotAnalyze", ORE, L);
258aa999528SChen Zheng     return false;
25992164cf2SSjoerd Meijer   }
260aa999528SChen Zheng 
26192164cf2SSjoerd Meijer   if (!ForceHardwareLoops &&
26292164cf2SSjoerd Meijer       !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
26392164cf2SSjoerd Meijer     reportHWLoopFailure("it's not profitable to create a hardware-loop",
26492164cf2SSjoerd Meijer                         "HWLoopNotProfitable", ORE, L);
26592164cf2SSjoerd Meijer     return false;
26692164cf2SSjoerd Meijer   }
267c5ef502eSSam Parker 
268c5ef502eSSam Parker   // Allow overriding of the counter width and loop decrement value.
269c5ef502eSSam Parker   if (CounterBitWidth.getNumOccurrences())
270c5ef502eSSam Parker     HWLoopInfo.CountType =
271c5ef502eSSam Parker       IntegerType::get(M->getContext(), CounterBitWidth);
272c5ef502eSSam Parker 
273c5ef502eSSam Parker   if (LoopDecrement.getNumOccurrences())
274c5ef502eSSam Parker     HWLoopInfo.LoopDecrement =
275c5ef502eSSam Parker       ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
276c5ef502eSSam Parker 
277c5ef502eSSam Parker   MadeChange |= TryConvertLoop(HWLoopInfo);
278c5ef502eSSam Parker   return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
279c5ef502eSSam Parker }
280c5ef502eSSam Parker 
TryConvertLoop(HardwareLoopInfo & HWLoopInfo)281c5b918deSChen Zheng bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
282c5ef502eSSam Parker 
28306fef0b3SJinsong Ji   Loop *L = HWLoopInfo.L;
28406fef0b3SJinsong Ji   LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
285c5ef502eSSam Parker 
286c5b918deSChen Zheng   if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
28792164cf2SSjoerd Meijer                                           ForceHardwareLoopPHI)) {
28892164cf2SSjoerd Meijer     // TODO: there can be many reasons a loop is not considered a
28992164cf2SSjoerd Meijer     // candidate, so we should let isHardwareLoopCandidate fill in the
29092164cf2SSjoerd Meijer     // reason and then report a better message here.
29192164cf2SSjoerd Meijer     reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
292c5ef502eSSam Parker     return false;
29392164cf2SSjoerd Meijer   }
294c5ef502eSSam Parker 
295c5b918deSChen Zheng   assert(
29634badc40SChen Zheng       (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
297c5b918deSChen Zheng       "Hardware Loop must have set exit info.");
298c5b918deSChen Zheng 
29906fef0b3SJinsong Ji   BasicBlock *Preheader = L->getLoopPreheader();
30006fef0b3SJinsong Ji 
30106fef0b3SJinsong Ji   // If we don't have a preheader, then insert one.
30206fef0b3SJinsong Ji   if (!Preheader)
30306fef0b3SJinsong Ji     Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
30406fef0b3SJinsong Ji   if (!Preheader)
30506fef0b3SJinsong Ji     return false;
30606fef0b3SJinsong Ji 
30792164cf2SSjoerd Meijer   HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
308c5ef502eSSam Parker   HWLoop.Create();
309c5ef502eSSam Parker   ++NumHWLoops;
310c5ef502eSSam Parker   return true;
311c5ef502eSSam Parker }
312c5ef502eSSam Parker 
Create()313c5ef502eSSam Parker void HardwareLoop::Create() {
314c5ef502eSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
3159a92be1bSSam Parker 
3169a92be1bSSam Parker   Value *LoopCountInit = InitLoopCount();
31792164cf2SSjoerd Meijer   if (!LoopCountInit) {
31892164cf2SSjoerd Meijer     reportHWLoopFailure("could not safely create a loop count expression",
31992164cf2SSjoerd Meijer                         "HWLoopNotSafe", ORE, L);
32006fef0b3SJinsong Ji     return;
32192164cf2SSjoerd Meijer   }
322c5ef502eSSam Parker 
323b2ac9681SDavid Green   Value *Setup = InsertIterationSetup(LoopCountInit);
324c5ef502eSSam Parker 
325c5ef502eSSam Parker   if (UsePHICounter || ForceHardwareLoopPHI) {
326c5ef502eSSam Parker     Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
327b2ac9681SDavid Green     Value *EltsRem = InsertPHICounter(Setup, LoopDec);
328c5ef502eSSam Parker     LoopDec->setOperand(0, EltsRem);
329c5ef502eSSam Parker     UpdateBranch(LoopDec);
330c5ef502eSSam Parker   } else
331c5ef502eSSam Parker     InsertLoopDec();
332c5ef502eSSam Parker 
333c5ef502eSSam Parker   // Run through the basic blocks of the loop and see if any of them have dead
334c5ef502eSSam Parker   // PHIs that can be removed.
335*9e6d1f4bSKazu Hirata   for (auto *I : L->blocks())
336c5ef502eSSam Parker     DeleteDeadPHIs(I);
337c5ef502eSSam Parker }
338c5ef502eSSam Parker 
CanGenerateTest(Loop * L,Value * Count)3399a92be1bSSam Parker static bool CanGenerateTest(Loop *L, Value *Count) {
3409a92be1bSSam Parker   BasicBlock *Preheader = L->getLoopPreheader();
3419a92be1bSSam Parker   if (!Preheader->getSinglePredecessor())
3429a92be1bSSam Parker     return false;
3439a92be1bSSam Parker 
3449a92be1bSSam Parker   BasicBlock *Pred = Preheader->getSinglePredecessor();
3459a92be1bSSam Parker   if (!isa<BranchInst>(Pred->getTerminator()))
3469a92be1bSSam Parker     return false;
3479a92be1bSSam Parker 
3489a92be1bSSam Parker   auto *BI = cast<BranchInst>(Pred->getTerminator());
3499a92be1bSSam Parker   if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
3509a92be1bSSam Parker     return false;
3519a92be1bSSam Parker 
3529a92be1bSSam Parker   // Check that the icmp is checking for equality of Count and zero and that
3539a92be1bSSam Parker   // a non-zero value results in entering the loop.
3549a92be1bSSam Parker   auto ICmp = cast<ICmpInst>(BI->getCondition());
35598722691SSam Parker   LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
3569a92be1bSSam Parker   if (!ICmp->isEquality())
3579a92be1bSSam Parker     return false;
3589a92be1bSSam Parker 
3599a92be1bSSam Parker   auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
3609a92be1bSSam Parker     if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
3619a92be1bSSam Parker       return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
3629a92be1bSSam Parker     return false;
3639a92be1bSSam Parker   };
3649a92be1bSSam Parker 
365c98a8a09SSam Parker   // Check if Count is a zext.
366c98a8a09SSam Parker   Value *CountBefZext =
367c98a8a09SSam Parker       isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
368c98a8a09SSam Parker 
369c98a8a09SSam Parker   if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
370c98a8a09SSam Parker       !IsCompareZero(ICmp, CountBefZext, 0) &&
371c98a8a09SSam Parker       !IsCompareZero(ICmp, CountBefZext, 1))
3729a92be1bSSam Parker     return false;
3739a92be1bSSam Parker 
3749a92be1bSSam Parker   unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
3759a92be1bSSam Parker   if (BI->getSuccessor(SuccIdx) != Preheader)
3769a92be1bSSam Parker     return false;
3779a92be1bSSam Parker 
3789a92be1bSSam Parker   return true;
3799a92be1bSSam Parker }
3809a92be1bSSam Parker 
InitLoopCount()3819a92be1bSSam Parker Value *HardwareLoop::InitLoopCount() {
3829a92be1bSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
3839a92be1bSSam Parker   // Can we replace a conditional branch with an intrinsic that sets the
3849a92be1bSSam Parker   // loop counter and tests that is not zero?
3859a92be1bSSam Parker 
386c5ef502eSSam Parker   SCEVExpander SCEVE(SE, DL, "loopcnt");
38734badc40SChen Zheng   if (!ExitCount->getType()->isPointerTy() &&
38834badc40SChen Zheng       ExitCount->getType() != CountType)
38934badc40SChen Zheng     ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
39034badc40SChen Zheng 
39134badc40SChen Zheng   ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
392c5ef502eSSam Parker 
3939a92be1bSSam Parker   // If we're trying to use the 'test and set' form of the intrinsic, we need
3949a92be1bSSam Parker   // to replace a conditional branch that is controlling entry to the loop. It
3959a92be1bSSam Parker   // is likely (guaranteed?) that the preheader has an unconditional branch to
3969a92be1bSSam Parker   // the loop header, so also check if it has a single predecessor.
39734badc40SChen Zheng   if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
39834badc40SChen Zheng                                   SE.getZero(ExitCount->getType()))) {
39906fef0b3SJinsong Ji     LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
4009a92be1bSSam Parker     UseLoopGuard |= ForceGuardLoopEntry;
40106fef0b3SJinsong Ji   } else
4029a92be1bSSam Parker     UseLoopGuard = false;
4039a92be1bSSam Parker 
4049a92be1bSSam Parker   BasicBlock *BB = L->getLoopPreheader();
4059a92be1bSSam Parker   if (UseLoopGuard && BB->getSinglePredecessor() &&
4066c348e40SSam Tebbs       cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
4076c348e40SSam Tebbs     BasicBlock *Predecessor = BB->getSinglePredecessor();
4086c348e40SSam Tebbs     // If it's not safe to create a while loop then don't force it and create a
4096c348e40SSam Tebbs     // do-while loop instead
410dcf4b733SNikita Popov     if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))
4116c348e40SSam Tebbs         UseLoopGuard = false;
4126c348e40SSam Tebbs     else
4136c348e40SSam Tebbs         BB = Predecessor;
4146c348e40SSam Tebbs   }
41506fef0b3SJinsong Ji 
416dcf4b733SNikita Popov   if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {
41734badc40SChen Zheng     LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
41834badc40SChen Zheng                << *ExitCount << "\n");
41906fef0b3SJinsong Ji     return nullptr;
420c5ef502eSSam Parker   }
421c5ef502eSSam Parker 
42234badc40SChen Zheng   Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
423c5ef502eSSam Parker                                      BB->getTerminator());
4249a92be1bSSam Parker 
4259a92be1bSSam Parker   // FIXME: We've expanded Count where we hope to insert the counter setting
4269a92be1bSSam Parker   // intrinsic. But, in the case of the 'test and set' form, we may fallback to
4279a92be1bSSam Parker   // the just 'set' form and in which case the insertion block is most likely
4289a92be1bSSam Parker   // different. It means there will be instruction(s) in a block that possibly
4299a92be1bSSam Parker   // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
4309a92be1bSSam Parker   // but it's doesn't appear to work in all cases.
4319a92be1bSSam Parker 
4329a92be1bSSam Parker   UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
4339a92be1bSSam Parker   BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
4349a92be1bSSam Parker   LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
4359a92be1bSSam Parker                     << " - Expanded Count in " << BB->getName() << "\n"
4369a92be1bSSam Parker                     << " - Will insert set counter intrinsic into: "
4379a92be1bSSam Parker                     << BeginBB->getName() << "\n");
438c5ef502eSSam Parker   return Count;
439c5ef502eSSam Parker }
440c5ef502eSSam Parker 
InsertIterationSetup(Value * LoopCountInit)441b2ac9681SDavid Green Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
4429a92be1bSSam Parker   IRBuilder<> Builder(BeginBB->getTerminator());
443c5ef502eSSam Parker   Type *Ty = LoopCountInit->getType();
444b2ac9681SDavid Green   bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
445fad70c30SDavid Green   Intrinsic::ID ID = UseLoopGuard
446fad70c30SDavid Green                          ? (UsePhi ? Intrinsic::test_start_loop_iterations
447fad70c30SDavid Green                                    : Intrinsic::test_set_loop_iterations)
448b2ac9681SDavid Green                          : (UsePhi ? Intrinsic::start_loop_iterations
449b2ac9681SDavid Green                                    : Intrinsic::set_loop_iterations);
4509a92be1bSSam Parker   Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
451fad70c30SDavid Green   Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit);
4529a92be1bSSam Parker 
4539a92be1bSSam Parker   // Use the return value of the intrinsic to control the entry of the loop.
4549a92be1bSSam Parker   if (UseLoopGuard) {
4559a92be1bSSam Parker     assert((isa<BranchInst>(BeginBB->getTerminator()) &&
4569a92be1bSSam Parker             cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
4579a92be1bSSam Parker            "Expected conditional branch");
458fad70c30SDavid Green 
459fad70c30SDavid Green     Value *SetCount =
460fad70c30SDavid Green         UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
4619a92be1bSSam Parker     auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
4629a92be1bSSam Parker     LoopGuard->setCondition(SetCount);
4639a92be1bSSam Parker     if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
4649a92be1bSSam Parker       LoopGuard->swapSuccessors();
4659a92be1bSSam Parker   }
466fad70c30SDavid Green   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
467fad70c30SDavid Green                     << "\n");
468fad70c30SDavid Green   if (UsePhi && UseLoopGuard)
469fad70c30SDavid Green     LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
470fad70c30SDavid Green   return !UsePhi ? LoopCountInit : LoopSetup;
471c5ef502eSSam Parker }
472c5ef502eSSam Parker 
InsertLoopDec()473c5ef502eSSam Parker void HardwareLoop::InsertLoopDec() {
474c5ef502eSSam Parker   IRBuilder<> CondBuilder(ExitBranch);
475c5ef502eSSam Parker 
476c5ef502eSSam Parker   Function *DecFunc =
477c5ef502eSSam Parker     Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
478c5ef502eSSam Parker                               LoopDecrement->getType());
479c5ef502eSSam Parker   Value *Ops[] = { LoopDecrement };
480c5ef502eSSam Parker   Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
481c5ef502eSSam Parker   Value *OldCond = ExitBranch->getCondition();
482c5ef502eSSam Parker   ExitBranch->setCondition(NewCond);
483c5ef502eSSam Parker 
484c5ef502eSSam Parker   // The false branch must exit the loop.
485c5ef502eSSam Parker   if (!L->contains(ExitBranch->getSuccessor(0)))
486c5ef502eSSam Parker     ExitBranch->swapSuccessors();
487c5ef502eSSam Parker 
488c5ef502eSSam Parker   // The old condition may be dead now, and may have even created a dead PHI
489c5ef502eSSam Parker   // (the original induction variable).
490c5ef502eSSam Parker   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
491c5ef502eSSam Parker 
492c5ef502eSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
493c5ef502eSSam Parker }
494c5ef502eSSam Parker 
InsertLoopRegDec(Value * EltsRem)495c5ef502eSSam Parker Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
496c5ef502eSSam Parker   IRBuilder<> CondBuilder(ExitBranch);
497c5ef502eSSam Parker 
498c5ef502eSSam Parker   Function *DecFunc =
499c5ef502eSSam Parker       Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
500b0614509SSjoerd Meijer                                 { EltsRem->getType() });
501c5ef502eSSam Parker   Value *Ops[] = { EltsRem, LoopDecrement };
502c5ef502eSSam Parker   Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
503c5ef502eSSam Parker 
504c5ef502eSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
505c5ef502eSSam Parker   return cast<Instruction>(Call);
506c5ef502eSSam Parker }
507c5ef502eSSam Parker 
InsertPHICounter(Value * NumElts,Value * EltsRem)508c5ef502eSSam Parker PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
509c5ef502eSSam Parker   BasicBlock *Preheader = L->getLoopPreheader();
510c5ef502eSSam Parker   BasicBlock *Header = L->getHeader();
511c5ef502eSSam Parker   BasicBlock *Latch = ExitBranch->getParent();
512c5ef502eSSam Parker   IRBuilder<> Builder(Header->getFirstNonPHI());
513c5ef502eSSam Parker   PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
514c5ef502eSSam Parker   Index->addIncoming(NumElts, Preheader);
515c5ef502eSSam Parker   Index->addIncoming(EltsRem, Latch);
516c5ef502eSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
517c5ef502eSSam Parker   return Index;
518c5ef502eSSam Parker }
519c5ef502eSSam Parker 
UpdateBranch(Value * EltsRem)520c5ef502eSSam Parker void HardwareLoop::UpdateBranch(Value *EltsRem) {
521c5ef502eSSam Parker   IRBuilder<> CondBuilder(ExitBranch);
522c5ef502eSSam Parker   Value *NewCond =
523c5ef502eSSam Parker     CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
524c5ef502eSSam Parker   Value *OldCond = ExitBranch->getCondition();
525c5ef502eSSam Parker   ExitBranch->setCondition(NewCond);
526c5ef502eSSam Parker 
527c5ef502eSSam Parker   // The false branch must exit the loop.
528c5ef502eSSam Parker   if (!L->contains(ExitBranch->getSuccessor(0)))
529c5ef502eSSam Parker     ExitBranch->swapSuccessors();
530c5ef502eSSam Parker 
531c5ef502eSSam Parker   // The old condition may be dead now, and may have even created a dead PHI
532c5ef502eSSam Parker   // (the original induction variable).
533c5ef502eSSam Parker   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
534c5ef502eSSam Parker }
535c5ef502eSSam Parker 
INITIALIZE_PASS_BEGIN(HardwareLoops,DEBUG_TYPE,HW_LOOPS_NAME,false,false)536c5ef502eSSam Parker INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
537c5ef502eSSam Parker INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
538c5ef502eSSam Parker INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
539c5ef502eSSam Parker INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
54092164cf2SSjoerd Meijer INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
541c5ef502eSSam Parker INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
542c5ef502eSSam Parker 
543c5ef502eSSam Parker FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
544