1c5ef502eSSam Parker //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2c5ef502eSSam Parker //
3c5ef502eSSam Parker // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c5ef502eSSam Parker // See https://llvm.org/LICENSE.txt for license information.
5c5ef502eSSam Parker // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c5ef502eSSam Parker //
7c5ef502eSSam Parker //===----------------------------------------------------------------------===//
8c5ef502eSSam Parker /// \file
9c5ef502eSSam Parker /// Insert hardware loop intrinsics into loops which are deemed profitable by
10c5ef502eSSam Parker /// the target, by querying TargetTransformInfo. A hardware loop comprises of
11c5ef502eSSam Parker /// two intrinsics: one, outside the loop, to set the loop iteration count and
12c5ef502eSSam Parker /// another, in the exit block, to decrement the counter. The decremented value
13c5ef502eSSam Parker /// can either be carried through the loop via a phi or handled in some opaque
14c5ef502eSSam Parker /// way by the target.
15c5ef502eSSam Parker ///
16c5ef502eSSam Parker //===----------------------------------------------------------------------===//
17c5ef502eSSam Parker 
18c5ef502eSSam Parker #include "llvm/ADT/Statistic.h"
19c5ef502eSSam Parker #include "llvm/Analysis/AssumptionCache.h"
20c5ef502eSSam Parker #include "llvm/Analysis/LoopInfo.h"
2192164cf2SSjoerd Meijer #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22c5ef502eSSam Parker #include "llvm/Analysis/ScalarEvolution.h"
23c5ef502eSSam Parker #include "llvm/Analysis/TargetTransformInfo.h"
24c5ef502eSSam Parker #include "llvm/CodeGen/Passes.h"
25c5ef502eSSam Parker #include "llvm/CodeGen/TargetPassConfig.h"
26c5ef502eSSam Parker #include "llvm/IR/BasicBlock.h"
2705da2fe5SReid Kleckner #include "llvm/IR/Constants.h"
28c5ef502eSSam Parker #include "llvm/IR/DataLayout.h"
29c5ef502eSSam Parker #include "llvm/IR/Dominators.h"
30c5ef502eSSam Parker #include "llvm/IR/IRBuilder.h"
31c5ef502eSSam Parker #include "llvm/IR/Instructions.h"
32c5ef502eSSam Parker #include "llvm/IR/IntrinsicInst.h"
33c5ef502eSSam Parker #include "llvm/IR/Value.h"
3405da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
3505da2fe5SReid Kleckner #include "llvm/Pass.h"
3605da2fe5SReid Kleckner #include "llvm/PassRegistry.h"
374c1a1d3cSReid Kleckner #include "llvm/Support/CommandLine.h"
38c5ef502eSSam Parker #include "llvm/Support/Debug.h"
39c5ef502eSSam Parker #include "llvm/Transforms/Scalar.h"
4006fef0b3SJinsong Ji #include "llvm/Transforms/Utils.h"
41c5ef502eSSam Parker #include "llvm/Transforms/Utils/BasicBlockUtils.h"
42c5ef502eSSam Parker #include "llvm/Transforms/Utils/Local.h"
4306fef0b3SJinsong Ji #include "llvm/Transforms/Utils/LoopUtils.h"
44*bcbd26bfSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
45c5ef502eSSam Parker 
46c5ef502eSSam Parker #define DEBUG_TYPE "hardware-loops"
47c5ef502eSSam Parker 
48c5ef502eSSam Parker #define HW_LOOPS_NAME "Hardware Loop Insertion"
49c5ef502eSSam Parker 
50c5ef502eSSam Parker using namespace llvm;
51c5ef502eSSam Parker 
52c5ef502eSSam Parker static cl::opt<bool>
53c5ef502eSSam Parker ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
54c5ef502eSSam Parker                    cl::desc("Force hardware loops intrinsics to be inserted"));
55c5ef502eSSam Parker 
56c5ef502eSSam Parker static cl::opt<bool>
57c5ef502eSSam Parker ForceHardwareLoopPHI(
58c5ef502eSSam Parker   "force-hardware-loop-phi", cl::Hidden, cl::init(false),
59c5ef502eSSam Parker   cl::desc("Force hardware loop counter to be updated through a phi"));
60c5ef502eSSam Parker 
61c5ef502eSSam Parker static cl::opt<bool>
62c5ef502eSSam Parker ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
63c5ef502eSSam Parker                 cl::desc("Force allowance of nested hardware loops"));
64c5ef502eSSam Parker 
65c5ef502eSSam Parker static cl::opt<unsigned>
66c5ef502eSSam Parker LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
67c5ef502eSSam Parker             cl::desc("Set the loop decrement value"));
68c5ef502eSSam Parker 
69c5ef502eSSam Parker static cl::opt<unsigned>
70c5ef502eSSam Parker CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
71c5ef502eSSam Parker                 cl::desc("Set the loop counter bitwidth"));
72c5ef502eSSam Parker 
739a92be1bSSam Parker static cl::opt<bool>
749a92be1bSSam Parker ForceGuardLoopEntry(
759a92be1bSSam Parker   "force-hardware-loop-guard", cl::Hidden, cl::init(false),
769a92be1bSSam Parker   cl::desc("Force generation of loop guard intrinsic"));
779a92be1bSSam Parker 
78c5ef502eSSam Parker STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
79c5ef502eSSam Parker 
8092164cf2SSjoerd Meijer #ifndef NDEBUG
8192164cf2SSjoerd Meijer static void debugHWLoopFailure(const StringRef DebugMsg,
8292164cf2SSjoerd Meijer     Instruction *I) {
8392164cf2SSjoerd Meijer   dbgs() << "HWLoops: " << DebugMsg;
8492164cf2SSjoerd Meijer   if (I)
8592164cf2SSjoerd Meijer     dbgs() << ' ' << *I;
8692164cf2SSjoerd Meijer   else
8792164cf2SSjoerd Meijer     dbgs() << '.';
8892164cf2SSjoerd Meijer   dbgs() << '\n';
8992164cf2SSjoerd Meijer }
9092164cf2SSjoerd Meijer #endif
9192164cf2SSjoerd Meijer 
9292164cf2SSjoerd Meijer static OptimizationRemarkAnalysis
9392164cf2SSjoerd Meijer createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
9492164cf2SSjoerd Meijer   Value *CodeRegion = L->getHeader();
9592164cf2SSjoerd Meijer   DebugLoc DL = L->getStartLoc();
9692164cf2SSjoerd Meijer 
9792164cf2SSjoerd Meijer   if (I) {
9892164cf2SSjoerd Meijer     CodeRegion = I->getParent();
9992164cf2SSjoerd Meijer     // If there is no debug location attached to the instruction, revert back to
10092164cf2SSjoerd Meijer     // using the loop's.
10192164cf2SSjoerd Meijer     if (I->getDebugLoc())
10292164cf2SSjoerd Meijer       DL = I->getDebugLoc();
10392164cf2SSjoerd Meijer   }
10492164cf2SSjoerd Meijer 
10592164cf2SSjoerd Meijer   OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
10692164cf2SSjoerd Meijer   R << "hardware-loop not created: ";
10792164cf2SSjoerd Meijer   return R;
10892164cf2SSjoerd Meijer }
10992164cf2SSjoerd Meijer 
110c5ef502eSSam Parker namespace {
111c5ef502eSSam Parker 
11292164cf2SSjoerd Meijer   void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
11392164cf2SSjoerd Meijer       OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
11492164cf2SSjoerd Meijer     LLVM_DEBUG(debugHWLoopFailure(Msg, I));
11592164cf2SSjoerd Meijer     ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
11692164cf2SSjoerd Meijer   }
11792164cf2SSjoerd Meijer 
118c5ef502eSSam Parker   using TTI = TargetTransformInfo;
119c5ef502eSSam Parker 
120c5ef502eSSam Parker   class HardwareLoops : public FunctionPass {
121c5ef502eSSam Parker   public:
122c5ef502eSSam Parker     static char ID;
123c5ef502eSSam Parker 
124c5ef502eSSam Parker     HardwareLoops() : FunctionPass(ID) {
125c5ef502eSSam Parker       initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
126c5ef502eSSam Parker     }
127c5ef502eSSam Parker 
128c5ef502eSSam Parker     bool runOnFunction(Function &F) override;
129c5ef502eSSam Parker 
130c5ef502eSSam Parker     void getAnalysisUsage(AnalysisUsage &AU) const override {
131c5ef502eSSam Parker       AU.addRequired<LoopInfoWrapperPass>();
132c5ef502eSSam Parker       AU.addPreserved<LoopInfoWrapperPass>();
133c5ef502eSSam Parker       AU.addRequired<DominatorTreeWrapperPass>();
134c5ef502eSSam Parker       AU.addPreserved<DominatorTreeWrapperPass>();
135c5ef502eSSam Parker       AU.addRequired<ScalarEvolutionWrapperPass>();
136c5ef502eSSam Parker       AU.addRequired<AssumptionCacheTracker>();
137c5ef502eSSam Parker       AU.addRequired<TargetTransformInfoWrapperPass>();
13892164cf2SSjoerd Meijer       AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
139c5ef502eSSam Parker     }
140c5ef502eSSam Parker 
141c5ef502eSSam Parker     // Try to convert the given Loop into a hardware loop.
142c5ef502eSSam Parker     bool TryConvertLoop(Loop *L);
143c5ef502eSSam Parker 
144c5ef502eSSam Parker     // Given that the target believes the loop to be profitable, try to
145c5ef502eSSam Parker     // convert it.
146c5b918deSChen Zheng     bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
147c5ef502eSSam Parker 
148c5ef502eSSam Parker   private:
149c5ef502eSSam Parker     ScalarEvolution *SE = nullptr;
150c5ef502eSSam Parker     LoopInfo *LI = nullptr;
151c5ef502eSSam Parker     const DataLayout *DL = nullptr;
15292164cf2SSjoerd Meijer     OptimizationRemarkEmitter *ORE = nullptr;
153c5ef502eSSam Parker     const TargetTransformInfo *TTI = nullptr;
154c5ef502eSSam Parker     DominatorTree *DT = nullptr;
15506fef0b3SJinsong Ji     bool PreserveLCSSA = false;
156c5ef502eSSam Parker     AssumptionCache *AC = nullptr;
157c5ef502eSSam Parker     TargetLibraryInfo *LibInfo = nullptr;
158c5ef502eSSam Parker     Module *M = nullptr;
159c5ef502eSSam Parker     bool MadeChange = false;
160c5ef502eSSam Parker   };
161c5ef502eSSam Parker 
162c5ef502eSSam Parker   class HardwareLoop {
163c5ef502eSSam Parker     // Expand the trip count scev into a value that we can use.
1649a92be1bSSam Parker     Value *InitLoopCount();
165c5ef502eSSam Parker 
166c5ef502eSSam Parker     // Insert the set_loop_iteration intrinsic.
1679a92be1bSSam Parker     void InsertIterationSetup(Value *LoopCountInit);
168c5ef502eSSam Parker 
169c5ef502eSSam Parker     // Insert the loop_decrement intrinsic.
170c5ef502eSSam Parker     void InsertLoopDec();
171c5ef502eSSam Parker 
172c5ef502eSSam Parker     // Insert the loop_decrement_reg intrinsic.
173c5ef502eSSam Parker     Instruction *InsertLoopRegDec(Value *EltsRem);
174c5ef502eSSam Parker 
175c5ef502eSSam Parker     // If the target requires the counter value to be updated in the loop,
176c5ef502eSSam Parker     // insert a phi to hold the value. The intended purpose is for use by
177c5ef502eSSam Parker     // loop_decrement_reg.
178c5ef502eSSam Parker     PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
179c5ef502eSSam Parker 
180c5ef502eSSam Parker     // Create a new cmp, that checks the returned value of loop_decrement*,
181c5ef502eSSam Parker     // and update the exit branch to use it.
182c5ef502eSSam Parker     void UpdateBranch(Value *EltsRem);
183c5ef502eSSam Parker 
184c5ef502eSSam Parker   public:
185c5b918deSChen Zheng     HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
18692164cf2SSjoerd Meijer                  const DataLayout &DL,
18792164cf2SSjoerd Meijer                  OptimizationRemarkEmitter *ORE) :
18892164cf2SSjoerd Meijer       SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
189c5ef502eSSam Parker       ExitCount(Info.ExitCount),
190c5ef502eSSam Parker       CountType(Info.CountType),
191c5ef502eSSam Parker       ExitBranch(Info.ExitBranch),
192c5ef502eSSam Parker       LoopDecrement(Info.LoopDecrement),
1939a92be1bSSam Parker       UsePHICounter(Info.CounterInReg),
1949a92be1bSSam Parker       UseLoopGuard(Info.PerformEntryTest) { }
195c5ef502eSSam Parker 
196c5ef502eSSam Parker     void Create();
197c5ef502eSSam Parker 
198c5ef502eSSam Parker   private:
199c5ef502eSSam Parker     ScalarEvolution &SE;
200c5ef502eSSam Parker     const DataLayout &DL;
20192164cf2SSjoerd Meijer     OptimizationRemarkEmitter *ORE = nullptr;
202c5ef502eSSam Parker     Loop *L                 = nullptr;
203c5ef502eSSam Parker     Module *M               = nullptr;
204c5ef502eSSam Parker     const SCEV *ExitCount   = nullptr;
205c5ef502eSSam Parker     Type *CountType         = nullptr;
206c5ef502eSSam Parker     BranchInst *ExitBranch  = nullptr;
207c5ef502eSSam Parker     Value *LoopDecrement    = nullptr;
208c5ef502eSSam Parker     bool UsePHICounter      = false;
2099a92be1bSSam Parker     bool UseLoopGuard       = false;
2109a92be1bSSam Parker     BasicBlock *BeginBB     = nullptr;
211c5ef502eSSam Parker   };
212c5ef502eSSam Parker }
213c5ef502eSSam Parker 
214c5ef502eSSam Parker char HardwareLoops::ID = 0;
215c5ef502eSSam Parker 
216c5ef502eSSam Parker bool HardwareLoops::runOnFunction(Function &F) {
217c5ef502eSSam Parker   if (skipFunction(F))
218c5ef502eSSam Parker     return false;
219c5ef502eSSam Parker 
220c5ef502eSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
221c5ef502eSSam Parker 
222c5ef502eSSam Parker   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
223c5ef502eSSam Parker   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
224c5ef502eSSam Parker   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
225c5ef502eSSam Parker   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
226c5ef502eSSam Parker   DL = &F.getParent()->getDataLayout();
22792164cf2SSjoerd Meijer   ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
228c5ef502eSSam Parker   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2299c27b59cSTeresa Johnson   LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
23006fef0b3SJinsong Ji   PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
231c5ef502eSSam Parker   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
232c5ef502eSSam Parker   M = F.getParent();
233c5ef502eSSam Parker 
234c5ef502eSSam Parker   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
235c5ef502eSSam Parker     Loop *L = *I;
236c5ef502eSSam Parker     if (!L->getParentLoop())
237c5ef502eSSam Parker       TryConvertLoop(L);
238c5ef502eSSam Parker   }
239c5ef502eSSam Parker 
240c5ef502eSSam Parker   return MadeChange;
241c5ef502eSSam Parker }
242c5ef502eSSam Parker 
243c5ef502eSSam Parker // Return true if the search should stop, which will be when an inner loop is
244c5ef502eSSam Parker // converted and the parent loop doesn't support containing a hardware loop.
245c5ef502eSSam Parker bool HardwareLoops::TryConvertLoop(Loop *L) {
246c5ef502eSSam Parker   // Process nested loops first.
24792164cf2SSjoerd Meijer   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
24892164cf2SSjoerd Meijer     if (TryConvertLoop(*I)) {
24992164cf2SSjoerd Meijer       reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
25092164cf2SSjoerd Meijer                           ORE, L);
251c5ef502eSSam Parker       return true; // Stop search.
25292164cf2SSjoerd Meijer     }
25392164cf2SSjoerd Meijer   }
254c5ef502eSSam Parker 
255c5b918deSChen Zheng   HardwareLoopInfo HWLoopInfo(L);
25692164cf2SSjoerd Meijer   if (!HWLoopInfo.canAnalyze(*LI)) {
25792164cf2SSjoerd Meijer     reportHWLoopFailure("cannot analyze loop, irreducible control flow",
25892164cf2SSjoerd Meijer                         "HWLoopCannotAnalyze", ORE, L);
259aa999528SChen Zheng     return false;
26092164cf2SSjoerd Meijer   }
261aa999528SChen Zheng 
26292164cf2SSjoerd Meijer   if (!ForceHardwareLoops &&
26392164cf2SSjoerd Meijer       !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
26492164cf2SSjoerd Meijer     reportHWLoopFailure("it's not profitable to create a hardware-loop",
26592164cf2SSjoerd Meijer                         "HWLoopNotProfitable", ORE, L);
26692164cf2SSjoerd Meijer     return false;
26792164cf2SSjoerd Meijer   }
268c5ef502eSSam Parker 
269c5ef502eSSam Parker   // Allow overriding of the counter width and loop decrement value.
270c5ef502eSSam Parker   if (CounterBitWidth.getNumOccurrences())
271c5ef502eSSam Parker     HWLoopInfo.CountType =
272c5ef502eSSam Parker       IntegerType::get(M->getContext(), CounterBitWidth);
273c5ef502eSSam Parker 
274c5ef502eSSam Parker   if (LoopDecrement.getNumOccurrences())
275c5ef502eSSam Parker     HWLoopInfo.LoopDecrement =
276c5ef502eSSam Parker       ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
277c5ef502eSSam Parker 
278c5ef502eSSam Parker   MadeChange |= TryConvertLoop(HWLoopInfo);
279c5ef502eSSam Parker   return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
280c5ef502eSSam Parker }
281c5ef502eSSam Parker 
282c5b918deSChen Zheng bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
283c5ef502eSSam Parker 
28406fef0b3SJinsong Ji   Loop *L = HWLoopInfo.L;
28506fef0b3SJinsong Ji   LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
286c5ef502eSSam Parker 
287c5b918deSChen Zheng   if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
28892164cf2SSjoerd Meijer                                           ForceHardwareLoopPHI)) {
28992164cf2SSjoerd Meijer     // TODO: there can be many reasons a loop is not considered a
29092164cf2SSjoerd Meijer     // candidate, so we should let isHardwareLoopCandidate fill in the
29192164cf2SSjoerd Meijer     // reason and then report a better message here.
29292164cf2SSjoerd Meijer     reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
293c5ef502eSSam Parker     return false;
29492164cf2SSjoerd Meijer   }
295c5ef502eSSam Parker 
296c5b918deSChen Zheng   assert(
297c5b918deSChen Zheng       (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
298c5b918deSChen Zheng       "Hardware Loop must have set exit info.");
299c5b918deSChen Zheng 
30006fef0b3SJinsong Ji   BasicBlock *Preheader = L->getLoopPreheader();
30106fef0b3SJinsong Ji 
30206fef0b3SJinsong Ji   // If we don't have a preheader, then insert one.
30306fef0b3SJinsong Ji   if (!Preheader)
30406fef0b3SJinsong Ji     Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
30506fef0b3SJinsong Ji   if (!Preheader)
30606fef0b3SJinsong Ji     return false;
30706fef0b3SJinsong Ji 
30892164cf2SSjoerd Meijer   HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
309c5ef502eSSam Parker   HWLoop.Create();
310c5ef502eSSam Parker   ++NumHWLoops;
311c5ef502eSSam Parker   return true;
312c5ef502eSSam Parker }
313c5ef502eSSam Parker 
314c5ef502eSSam Parker void HardwareLoop::Create() {
315c5ef502eSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
3169a92be1bSSam Parker 
3179a92be1bSSam Parker   Value *LoopCountInit = InitLoopCount();
31892164cf2SSjoerd Meijer   if (!LoopCountInit) {
31992164cf2SSjoerd Meijer     reportHWLoopFailure("could not safely create a loop count expression",
32092164cf2SSjoerd Meijer                         "HWLoopNotSafe", ORE, L);
32106fef0b3SJinsong Ji     return;
32292164cf2SSjoerd Meijer   }
323c5ef502eSSam Parker 
3249a92be1bSSam Parker   InsertIterationSetup(LoopCountInit);
325c5ef502eSSam Parker 
326c5ef502eSSam Parker   if (UsePHICounter || ForceHardwareLoopPHI) {
327c5ef502eSSam Parker     Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
328c5ef502eSSam Parker     Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec);
329c5ef502eSSam Parker     LoopDec->setOperand(0, EltsRem);
330c5ef502eSSam Parker     UpdateBranch(LoopDec);
331c5ef502eSSam Parker   } else
332c5ef502eSSam Parker     InsertLoopDec();
333c5ef502eSSam Parker 
334c5ef502eSSam Parker   // Run through the basic blocks of the loop and see if any of them have dead
335c5ef502eSSam Parker   // PHIs that can be removed.
336c5ef502eSSam Parker   for (auto I : L->blocks())
337c5ef502eSSam Parker     DeleteDeadPHIs(I);
338c5ef502eSSam Parker }
339c5ef502eSSam Parker 
3409a92be1bSSam Parker static bool CanGenerateTest(Loop *L, Value *Count) {
3419a92be1bSSam Parker   BasicBlock *Preheader = L->getLoopPreheader();
3429a92be1bSSam Parker   if (!Preheader->getSinglePredecessor())
3439a92be1bSSam Parker     return false;
3449a92be1bSSam Parker 
3459a92be1bSSam Parker   BasicBlock *Pred = Preheader->getSinglePredecessor();
3469a92be1bSSam Parker   if (!isa<BranchInst>(Pred->getTerminator()))
3479a92be1bSSam Parker     return false;
3489a92be1bSSam Parker 
3499a92be1bSSam Parker   auto *BI = cast<BranchInst>(Pred->getTerminator());
3509a92be1bSSam Parker   if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
3519a92be1bSSam Parker     return false;
3529a92be1bSSam Parker 
3539a92be1bSSam Parker   // Check that the icmp is checking for equality of Count and zero and that
3549a92be1bSSam Parker   // a non-zero value results in entering the loop.
3559a92be1bSSam Parker   auto ICmp = cast<ICmpInst>(BI->getCondition());
35698722691SSam Parker   LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
3579a92be1bSSam Parker   if (!ICmp->isEquality())
3589a92be1bSSam Parker     return false;
3599a92be1bSSam Parker 
3609a92be1bSSam Parker   auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
3619a92be1bSSam Parker     if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
3629a92be1bSSam Parker       return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
3639a92be1bSSam Parker     return false;
3649a92be1bSSam Parker   };
3659a92be1bSSam Parker 
3669a92be1bSSam Parker   if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
3679a92be1bSSam Parker     return false;
3689a92be1bSSam Parker 
3699a92be1bSSam Parker   unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
3709a92be1bSSam Parker   if (BI->getSuccessor(SuccIdx) != Preheader)
3719a92be1bSSam Parker     return false;
3729a92be1bSSam Parker 
3739a92be1bSSam Parker   return true;
3749a92be1bSSam Parker }
3759a92be1bSSam Parker 
3769a92be1bSSam Parker Value *HardwareLoop::InitLoopCount() {
3779a92be1bSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
3789a92be1bSSam Parker   // Can we replace a conditional branch with an intrinsic that sets the
3799a92be1bSSam Parker   // loop counter and tests that is not zero?
3809a92be1bSSam Parker 
381c5ef502eSSam Parker   SCEVExpander SCEVE(SE, DL, "loopcnt");
38206fef0b3SJinsong Ji   if (!ExitCount->getType()->isPointerTy() &&
38306fef0b3SJinsong Ji       ExitCount->getType() != CountType)
38406fef0b3SJinsong Ji     ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
38506fef0b3SJinsong Ji 
38606fef0b3SJinsong Ji   ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
387c5ef502eSSam Parker 
3889a92be1bSSam Parker   // If we're trying to use the 'test and set' form of the intrinsic, we need
3899a92be1bSSam Parker   // to replace a conditional branch that is controlling entry to the loop. It
3909a92be1bSSam Parker   // is likely (guaranteed?) that the preheader has an unconditional branch to
3919a92be1bSSam Parker   // the loop header, so also check if it has a single predecessor.
3929a92be1bSSam Parker   if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
39306fef0b3SJinsong Ji                                   SE.getZero(ExitCount->getType()))) {
39406fef0b3SJinsong Ji     LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
3959a92be1bSSam Parker     UseLoopGuard |= ForceGuardLoopEntry;
39606fef0b3SJinsong Ji   } else
3979a92be1bSSam Parker     UseLoopGuard = false;
3989a92be1bSSam Parker 
3999a92be1bSSam Parker   BasicBlock *BB = L->getLoopPreheader();
4009a92be1bSSam Parker   if (UseLoopGuard && BB->getSinglePredecessor() &&
40106fef0b3SJinsong Ji       cast<BranchInst>(BB->getTerminator())->isUnconditional())
4029a92be1bSSam Parker     BB = BB->getSinglePredecessor();
40306fef0b3SJinsong Ji 
40406fef0b3SJinsong Ji   if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
40506fef0b3SJinsong Ji     LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
40606fef0b3SJinsong Ji                << *ExitCount << "\n");
40706fef0b3SJinsong Ji     return nullptr;
408c5ef502eSSam Parker   }
409c5ef502eSSam Parker 
410c5ef502eSSam Parker   Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
411c5ef502eSSam Parker                                      BB->getTerminator());
4129a92be1bSSam Parker 
4139a92be1bSSam Parker   // FIXME: We've expanded Count where we hope to insert the counter setting
4149a92be1bSSam Parker   // intrinsic. But, in the case of the 'test and set' form, we may fallback to
4159a92be1bSSam Parker   // the just 'set' form and in which case the insertion block is most likely
4169a92be1bSSam Parker   // different. It means there will be instruction(s) in a block that possibly
4179a92be1bSSam Parker   // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
4189a92be1bSSam Parker   // but it's doesn't appear to work in all cases.
4199a92be1bSSam Parker 
4209a92be1bSSam Parker   UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
4219a92be1bSSam Parker   BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
4229a92be1bSSam Parker   LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
4239a92be1bSSam Parker              << " - Expanded Count in " << BB->getName() << "\n"
4249a92be1bSSam Parker              << " - Will insert set counter intrinsic into: "
4259a92be1bSSam Parker              << BeginBB->getName() << "\n");
426c5ef502eSSam Parker   return Count;
427c5ef502eSSam Parker }
428c5ef502eSSam Parker 
4299a92be1bSSam Parker void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
4309a92be1bSSam Parker   IRBuilder<> Builder(BeginBB->getTerminator());
431c5ef502eSSam Parker   Type *Ty = LoopCountInit->getType();
4329a92be1bSSam Parker   Intrinsic::ID ID = UseLoopGuard ?
4339a92be1bSSam Parker     Intrinsic::test_set_loop_iterations : Intrinsic::set_loop_iterations;
4349a92be1bSSam Parker   Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
4359a92be1bSSam Parker   Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit);
4369a92be1bSSam Parker 
4379a92be1bSSam Parker   // Use the return value of the intrinsic to control the entry of the loop.
4389a92be1bSSam Parker   if (UseLoopGuard) {
4399a92be1bSSam Parker     assert((isa<BranchInst>(BeginBB->getTerminator()) &&
4409a92be1bSSam Parker             cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
4419a92be1bSSam Parker            "Expected conditional branch");
4429a92be1bSSam Parker     auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
4439a92be1bSSam Parker     LoopGuard->setCondition(SetCount);
4449a92be1bSSam Parker     if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
4459a92be1bSSam Parker       LoopGuard->swapSuccessors();
4469a92be1bSSam Parker   }
4479a92be1bSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: "
4489a92be1bSSam Parker              << *SetCount << "\n");
449c5ef502eSSam Parker }
450c5ef502eSSam Parker 
451c5ef502eSSam Parker void HardwareLoop::InsertLoopDec() {
452c5ef502eSSam Parker   IRBuilder<> CondBuilder(ExitBranch);
453c5ef502eSSam Parker 
454c5ef502eSSam Parker   Function *DecFunc =
455c5ef502eSSam Parker     Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
456c5ef502eSSam Parker                               LoopDecrement->getType());
457c5ef502eSSam Parker   Value *Ops[] = { LoopDecrement };
458c5ef502eSSam Parker   Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
459c5ef502eSSam Parker   Value *OldCond = ExitBranch->getCondition();
460c5ef502eSSam Parker   ExitBranch->setCondition(NewCond);
461c5ef502eSSam Parker 
462c5ef502eSSam Parker   // The false branch must exit the loop.
463c5ef502eSSam Parker   if (!L->contains(ExitBranch->getSuccessor(0)))
464c5ef502eSSam Parker     ExitBranch->swapSuccessors();
465c5ef502eSSam Parker 
466c5ef502eSSam Parker   // The old condition may be dead now, and may have even created a dead PHI
467c5ef502eSSam Parker   // (the original induction variable).
468c5ef502eSSam Parker   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
469c5ef502eSSam Parker 
470c5ef502eSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
471c5ef502eSSam Parker }
472c5ef502eSSam Parker 
473c5ef502eSSam Parker Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
474c5ef502eSSam Parker   IRBuilder<> CondBuilder(ExitBranch);
475c5ef502eSSam Parker 
476c5ef502eSSam Parker   Function *DecFunc =
477c5ef502eSSam Parker       Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
478c5ef502eSSam Parker                                 { EltsRem->getType(), EltsRem->getType(),
479c5ef502eSSam Parker                                   LoopDecrement->getType()
480c5ef502eSSam Parker                                 });
481c5ef502eSSam Parker   Value *Ops[] = { EltsRem, LoopDecrement };
482c5ef502eSSam Parker   Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
483c5ef502eSSam Parker 
484c5ef502eSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
485c5ef502eSSam Parker   return cast<Instruction>(Call);
486c5ef502eSSam Parker }
487c5ef502eSSam Parker 
488c5ef502eSSam Parker PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
489c5ef502eSSam Parker   BasicBlock *Preheader = L->getLoopPreheader();
490c5ef502eSSam Parker   BasicBlock *Header = L->getHeader();
491c5ef502eSSam Parker   BasicBlock *Latch = ExitBranch->getParent();
492c5ef502eSSam Parker   IRBuilder<> Builder(Header->getFirstNonPHI());
493c5ef502eSSam Parker   PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
494c5ef502eSSam Parker   Index->addIncoming(NumElts, Preheader);
495c5ef502eSSam Parker   Index->addIncoming(EltsRem, Latch);
496c5ef502eSSam Parker   LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
497c5ef502eSSam Parker   return Index;
498c5ef502eSSam Parker }
499c5ef502eSSam Parker 
500c5ef502eSSam Parker void HardwareLoop::UpdateBranch(Value *EltsRem) {
501c5ef502eSSam Parker   IRBuilder<> CondBuilder(ExitBranch);
502c5ef502eSSam Parker   Value *NewCond =
503c5ef502eSSam Parker     CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
504c5ef502eSSam Parker   Value *OldCond = ExitBranch->getCondition();
505c5ef502eSSam Parker   ExitBranch->setCondition(NewCond);
506c5ef502eSSam Parker 
507c5ef502eSSam Parker   // The false branch must exit the loop.
508c5ef502eSSam Parker   if (!L->contains(ExitBranch->getSuccessor(0)))
509c5ef502eSSam Parker     ExitBranch->swapSuccessors();
510c5ef502eSSam Parker 
511c5ef502eSSam Parker   // The old condition may be dead now, and may have even created a dead PHI
512c5ef502eSSam Parker   // (the original induction variable).
513c5ef502eSSam Parker   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
514c5ef502eSSam Parker }
515c5ef502eSSam Parker 
516c5ef502eSSam Parker INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
517c5ef502eSSam Parker INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
518c5ef502eSSam Parker INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
519c5ef502eSSam Parker INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
52092164cf2SSjoerd Meijer INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
521c5ef502eSSam Parker INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
522c5ef502eSSam Parker 
523c5ef502eSSam Parker FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
524