1c5ef502eSSam Parker //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2c5ef502eSSam Parker //
3c5ef502eSSam Parker // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c5ef502eSSam Parker // See https://llvm.org/LICENSE.txt for license information.
5c5ef502eSSam Parker // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c5ef502eSSam Parker //
7c5ef502eSSam Parker //===----------------------------------------------------------------------===//
8c5ef502eSSam Parker /// \file
9c5ef502eSSam Parker /// Insert hardware loop intrinsics into loops which are deemed profitable by
10c5ef502eSSam Parker /// the target, by querying TargetTransformInfo. A hardware loop comprises of
11c5ef502eSSam Parker /// two intrinsics: one, outside the loop, to set the loop iteration count and
12c5ef502eSSam Parker /// another, in the exit block, to decrement the counter. The decremented value
13c5ef502eSSam Parker /// can either be carried through the loop via a phi or handled in some opaque
14c5ef502eSSam Parker /// way by the target.
15c5ef502eSSam Parker ///
16c5ef502eSSam Parker //===----------------------------------------------------------------------===//
17c5ef502eSSam Parker
18c5ef502eSSam Parker #include "llvm/ADT/Statistic.h"
19c5ef502eSSam Parker #include "llvm/Analysis/AssumptionCache.h"
20c5ef502eSSam Parker #include "llvm/Analysis/LoopInfo.h"
2192164cf2SSjoerd Meijer #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22c5ef502eSSam Parker #include "llvm/Analysis/ScalarEvolution.h"
23a53dddb3SSimon Pilgrim #include "llvm/Analysis/TargetLibraryInfo.h"
24c5ef502eSSam Parker #include "llvm/Analysis/TargetTransformInfo.h"
25c5ef502eSSam Parker #include "llvm/CodeGen/Passes.h"
26c5ef502eSSam Parker #include "llvm/IR/BasicBlock.h"
2705da2fe5SReid Kleckner #include "llvm/IR/Constants.h"
28c5ef502eSSam Parker #include "llvm/IR/Dominators.h"
29c5ef502eSSam Parker #include "llvm/IR/IRBuilder.h"
30c5ef502eSSam Parker #include "llvm/IR/Instructions.h"
31c5ef502eSSam Parker #include "llvm/IR/IntrinsicInst.h"
32c5ef502eSSam Parker #include "llvm/IR/Value.h"
3305da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
3405da2fe5SReid Kleckner #include "llvm/Pass.h"
3505da2fe5SReid Kleckner #include "llvm/PassRegistry.h"
364c1a1d3cSReid Kleckner #include "llvm/Support/CommandLine.h"
37c5ef502eSSam Parker #include "llvm/Support/Debug.h"
3806fef0b3SJinsong Ji #include "llvm/Transforms/Utils.h"
39c5ef502eSSam Parker #include "llvm/Transforms/Utils/BasicBlockUtils.h"
40c5ef502eSSam Parker #include "llvm/Transforms/Utils/Local.h"
4106fef0b3SJinsong Ji #include "llvm/Transforms/Utils/LoopUtils.h"
42bcbd26bfSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
43c5ef502eSSam Parker
44c5ef502eSSam Parker #define DEBUG_TYPE "hardware-loops"
45c5ef502eSSam Parker
46c5ef502eSSam Parker #define HW_LOOPS_NAME "Hardware Loop Insertion"
47c5ef502eSSam Parker
48c5ef502eSSam Parker using namespace llvm;
49c5ef502eSSam Parker
50c5ef502eSSam Parker static cl::opt<bool>
51c5ef502eSSam Parker ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
52c5ef502eSSam Parker cl::desc("Force hardware loops intrinsics to be inserted"));
53c5ef502eSSam Parker
54c5ef502eSSam Parker static cl::opt<bool>
55c5ef502eSSam Parker ForceHardwareLoopPHI(
56c5ef502eSSam Parker "force-hardware-loop-phi", cl::Hidden, cl::init(false),
57c5ef502eSSam Parker cl::desc("Force hardware loop counter to be updated through a phi"));
58c5ef502eSSam Parker
59c5ef502eSSam Parker static cl::opt<bool>
60c5ef502eSSam Parker ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
61c5ef502eSSam Parker cl::desc("Force allowance of nested hardware loops"));
62c5ef502eSSam Parker
63c5ef502eSSam Parker static cl::opt<unsigned>
64c5ef502eSSam Parker LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
65c5ef502eSSam Parker cl::desc("Set the loop decrement value"));
66c5ef502eSSam Parker
67c5ef502eSSam Parker static cl::opt<unsigned>
68c5ef502eSSam Parker CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
69c5ef502eSSam Parker cl::desc("Set the loop counter bitwidth"));
70c5ef502eSSam Parker
719a92be1bSSam Parker static cl::opt<bool>
729a92be1bSSam Parker ForceGuardLoopEntry(
739a92be1bSSam Parker "force-hardware-loop-guard", cl::Hidden, cl::init(false),
749a92be1bSSam Parker cl::desc("Force generation of loop guard intrinsic"));
759a92be1bSSam Parker
76c5ef502eSSam Parker STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
77c5ef502eSSam Parker
7892164cf2SSjoerd Meijer #ifndef NDEBUG
debugHWLoopFailure(const StringRef DebugMsg,Instruction * I)7992164cf2SSjoerd Meijer static void debugHWLoopFailure(const StringRef DebugMsg,
8092164cf2SSjoerd Meijer Instruction *I) {
8192164cf2SSjoerd Meijer dbgs() << "HWLoops: " << DebugMsg;
8292164cf2SSjoerd Meijer if (I)
8392164cf2SSjoerd Meijer dbgs() << ' ' << *I;
8492164cf2SSjoerd Meijer else
8592164cf2SSjoerd Meijer dbgs() << '.';
8692164cf2SSjoerd Meijer dbgs() << '\n';
8792164cf2SSjoerd Meijer }
8892164cf2SSjoerd Meijer #endif
8992164cf2SSjoerd Meijer
9092164cf2SSjoerd Meijer static OptimizationRemarkAnalysis
createHWLoopAnalysis(StringRef RemarkName,Loop * L,Instruction * I)9192164cf2SSjoerd Meijer createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
9292164cf2SSjoerd Meijer Value *CodeRegion = L->getHeader();
9392164cf2SSjoerd Meijer DebugLoc DL = L->getStartLoc();
9492164cf2SSjoerd Meijer
9592164cf2SSjoerd Meijer if (I) {
9692164cf2SSjoerd Meijer CodeRegion = I->getParent();
9792164cf2SSjoerd Meijer // If there is no debug location attached to the instruction, revert back to
9892164cf2SSjoerd Meijer // using the loop's.
9992164cf2SSjoerd Meijer if (I->getDebugLoc())
10092164cf2SSjoerd Meijer DL = I->getDebugLoc();
10192164cf2SSjoerd Meijer }
10292164cf2SSjoerd Meijer
10392164cf2SSjoerd Meijer OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
10492164cf2SSjoerd Meijer R << "hardware-loop not created: ";
10592164cf2SSjoerd Meijer return R;
10692164cf2SSjoerd Meijer }
10792164cf2SSjoerd Meijer
108c5ef502eSSam Parker namespace {
109c5ef502eSSam Parker
reportHWLoopFailure(const StringRef Msg,const StringRef ORETag,OptimizationRemarkEmitter * ORE,Loop * TheLoop,Instruction * I=nullptr)11092164cf2SSjoerd Meijer void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
11192164cf2SSjoerd Meijer OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
11292164cf2SSjoerd Meijer LLVM_DEBUG(debugHWLoopFailure(Msg, I));
11392164cf2SSjoerd Meijer ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
11492164cf2SSjoerd Meijer }
11592164cf2SSjoerd Meijer
116c5ef502eSSam Parker using TTI = TargetTransformInfo;
117c5ef502eSSam Parker
118c5ef502eSSam Parker class HardwareLoops : public FunctionPass {
119c5ef502eSSam Parker public:
120c5ef502eSSam Parker static char ID;
121c5ef502eSSam Parker
HardwareLoops()122c5ef502eSSam Parker HardwareLoops() : FunctionPass(ID) {
123c5ef502eSSam Parker initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
124c5ef502eSSam Parker }
125c5ef502eSSam Parker
126c5ef502eSSam Parker bool runOnFunction(Function &F) override;
127c5ef502eSSam Parker
getAnalysisUsage(AnalysisUsage & AU) const128c5ef502eSSam Parker void getAnalysisUsage(AnalysisUsage &AU) const override {
129c5ef502eSSam Parker AU.addRequired<LoopInfoWrapperPass>();
130c5ef502eSSam Parker AU.addPreserved<LoopInfoWrapperPass>();
131c5ef502eSSam Parker AU.addRequired<DominatorTreeWrapperPass>();
132c5ef502eSSam Parker AU.addPreserved<DominatorTreeWrapperPass>();
133c5ef502eSSam Parker AU.addRequired<ScalarEvolutionWrapperPass>();
134c5ef502eSSam Parker AU.addRequired<AssumptionCacheTracker>();
135c5ef502eSSam Parker AU.addRequired<TargetTransformInfoWrapperPass>();
13692164cf2SSjoerd Meijer AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
137c5ef502eSSam Parker }
138c5ef502eSSam Parker
139c5ef502eSSam Parker // Try to convert the given Loop into a hardware loop.
140c5ef502eSSam Parker bool TryConvertLoop(Loop *L);
141c5ef502eSSam Parker
142c5ef502eSSam Parker // Given that the target believes the loop to be profitable, try to
143c5ef502eSSam Parker // convert it.
144c5b918deSChen Zheng bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
145c5ef502eSSam Parker
146c5ef502eSSam Parker private:
147c5ef502eSSam Parker ScalarEvolution *SE = nullptr;
148c5ef502eSSam Parker LoopInfo *LI = nullptr;
149c5ef502eSSam Parker const DataLayout *DL = nullptr;
15092164cf2SSjoerd Meijer OptimizationRemarkEmitter *ORE = nullptr;
151c5ef502eSSam Parker const TargetTransformInfo *TTI = nullptr;
152c5ef502eSSam Parker DominatorTree *DT = nullptr;
15306fef0b3SJinsong Ji bool PreserveLCSSA = false;
154c5ef502eSSam Parker AssumptionCache *AC = nullptr;
155c5ef502eSSam Parker TargetLibraryInfo *LibInfo = nullptr;
156c5ef502eSSam Parker Module *M = nullptr;
157c5ef502eSSam Parker bool MadeChange = false;
158c5ef502eSSam Parker };
159c5ef502eSSam Parker
160c5ef502eSSam Parker class HardwareLoop {
161c5ef502eSSam Parker // Expand the trip count scev into a value that we can use.
1629a92be1bSSam Parker Value *InitLoopCount();
163c5ef502eSSam Parker
164c5ef502eSSam Parker // Insert the set_loop_iteration intrinsic.
165b2ac9681SDavid Green Value *InsertIterationSetup(Value *LoopCountInit);
166c5ef502eSSam Parker
167c5ef502eSSam Parker // Insert the loop_decrement intrinsic.
168c5ef502eSSam Parker void InsertLoopDec();
169c5ef502eSSam Parker
170c5ef502eSSam Parker // Insert the loop_decrement_reg intrinsic.
171c5ef502eSSam Parker Instruction *InsertLoopRegDec(Value *EltsRem);
172c5ef502eSSam Parker
173c5ef502eSSam Parker // If the target requires the counter value to be updated in the loop,
174c5ef502eSSam Parker // insert a phi to hold the value. The intended purpose is for use by
175c5ef502eSSam Parker // loop_decrement_reg.
176c5ef502eSSam Parker PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
177c5ef502eSSam Parker
178c5ef502eSSam Parker // Create a new cmp, that checks the returned value of loop_decrement*,
179c5ef502eSSam Parker // and update the exit branch to use it.
180c5ef502eSSam Parker void UpdateBranch(Value *EltsRem);
181c5ef502eSSam Parker
182c5ef502eSSam Parker public:
HardwareLoop(HardwareLoopInfo & Info,ScalarEvolution & SE,const DataLayout & DL,OptimizationRemarkEmitter * ORE)183c5b918deSChen Zheng HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
18492164cf2SSjoerd Meijer const DataLayout &DL,
18592164cf2SSjoerd Meijer OptimizationRemarkEmitter *ORE) :
18692164cf2SSjoerd Meijer SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
18734badc40SChen Zheng ExitCount(Info.ExitCount),
188c5ef502eSSam Parker CountType(Info.CountType),
189c5ef502eSSam Parker ExitBranch(Info.ExitBranch),
190c5ef502eSSam Parker LoopDecrement(Info.LoopDecrement),
1919a92be1bSSam Parker UsePHICounter(Info.CounterInReg),
1929a92be1bSSam Parker UseLoopGuard(Info.PerformEntryTest) { }
193c5ef502eSSam Parker
194c5ef502eSSam Parker void Create();
195c5ef502eSSam Parker
196c5ef502eSSam Parker private:
197c5ef502eSSam Parker ScalarEvolution &SE;
198c5ef502eSSam Parker const DataLayout &DL;
19992164cf2SSjoerd Meijer OptimizationRemarkEmitter *ORE = nullptr;
200c5ef502eSSam Parker Loop *L = nullptr;
201c5ef502eSSam Parker Module *M = nullptr;
20234badc40SChen Zheng const SCEV *ExitCount = nullptr;
203c5ef502eSSam Parker Type *CountType = nullptr;
204c5ef502eSSam Parker BranchInst *ExitBranch = nullptr;
205c5ef502eSSam Parker Value *LoopDecrement = nullptr;
206c5ef502eSSam Parker bool UsePHICounter = false;
2079a92be1bSSam Parker bool UseLoopGuard = false;
2089a92be1bSSam Parker BasicBlock *BeginBB = nullptr;
209c5ef502eSSam Parker };
210c5ef502eSSam Parker }
211c5ef502eSSam Parker
212c5ef502eSSam Parker char HardwareLoops::ID = 0;
213c5ef502eSSam Parker
runOnFunction(Function & F)214c5ef502eSSam Parker bool HardwareLoops::runOnFunction(Function &F) {
215c5ef502eSSam Parker if (skipFunction(F))
216c5ef502eSSam Parker return false;
217c5ef502eSSam Parker
218c5ef502eSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
219c5ef502eSSam Parker
220c5ef502eSSam Parker LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
221c5ef502eSSam Parker SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
222c5ef502eSSam Parker DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
223c5ef502eSSam Parker TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
224c5ef502eSSam Parker DL = &F.getParent()->getDataLayout();
22592164cf2SSjoerd Meijer ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
226c5ef502eSSam Parker auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2279c27b59cSTeresa Johnson LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
22806fef0b3SJinsong Ji PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
229c5ef502eSSam Parker AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
230c5ef502eSSam Parker M = F.getParent();
231c5ef502eSSam Parker
232905cf88dSKazu Hirata for (Loop *L : *LI)
23389c1e35fSStefanos Baziotis if (L->isOutermost())
234c5ef502eSSam Parker TryConvertLoop(L);
235c5ef502eSSam Parker
236c5ef502eSSam Parker return MadeChange;
237c5ef502eSSam Parker }
238c5ef502eSSam Parker
239c5ef502eSSam Parker // Return true if the search should stop, which will be when an inner loop is
240c5ef502eSSam Parker // converted and the parent loop doesn't support containing a hardware loop.
TryConvertLoop(Loop * L)241c5ef502eSSam Parker bool HardwareLoops::TryConvertLoop(Loop *L) {
242c5ef502eSSam Parker // Process nested loops first.
2439e03547cSDavid Green bool AnyChanged = false;
2449e03547cSDavid Green for (Loop *SL : *L)
2459e03547cSDavid Green AnyChanged |= TryConvertLoop(SL);
2469e03547cSDavid Green if (AnyChanged) {
24792164cf2SSjoerd Meijer reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
24892164cf2SSjoerd Meijer ORE, L);
249c5ef502eSSam Parker return true; // Stop search.
25092164cf2SSjoerd Meijer }
2519e03547cSDavid Green
2529e03547cSDavid Green LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
253c5ef502eSSam Parker
254c5b918deSChen Zheng HardwareLoopInfo HWLoopInfo(L);
25592164cf2SSjoerd Meijer if (!HWLoopInfo.canAnalyze(*LI)) {
25692164cf2SSjoerd Meijer reportHWLoopFailure("cannot analyze loop, irreducible control flow",
25792164cf2SSjoerd Meijer "HWLoopCannotAnalyze", ORE, L);
258aa999528SChen Zheng return false;
25992164cf2SSjoerd Meijer }
260aa999528SChen Zheng
26192164cf2SSjoerd Meijer if (!ForceHardwareLoops &&
26292164cf2SSjoerd Meijer !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
26392164cf2SSjoerd Meijer reportHWLoopFailure("it's not profitable to create a hardware-loop",
26492164cf2SSjoerd Meijer "HWLoopNotProfitable", ORE, L);
26592164cf2SSjoerd Meijer return false;
26692164cf2SSjoerd Meijer }
267c5ef502eSSam Parker
268c5ef502eSSam Parker // Allow overriding of the counter width and loop decrement value.
269c5ef502eSSam Parker if (CounterBitWidth.getNumOccurrences())
270c5ef502eSSam Parker HWLoopInfo.CountType =
271c5ef502eSSam Parker IntegerType::get(M->getContext(), CounterBitWidth);
272c5ef502eSSam Parker
273c5ef502eSSam Parker if (LoopDecrement.getNumOccurrences())
274c5ef502eSSam Parker HWLoopInfo.LoopDecrement =
275c5ef502eSSam Parker ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
276c5ef502eSSam Parker
277c5ef502eSSam Parker MadeChange |= TryConvertLoop(HWLoopInfo);
278c5ef502eSSam Parker return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
279c5ef502eSSam Parker }
280c5ef502eSSam Parker
TryConvertLoop(HardwareLoopInfo & HWLoopInfo)281c5b918deSChen Zheng bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
282c5ef502eSSam Parker
28306fef0b3SJinsong Ji Loop *L = HWLoopInfo.L;
28406fef0b3SJinsong Ji LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
285c5ef502eSSam Parker
286c5b918deSChen Zheng if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
28792164cf2SSjoerd Meijer ForceHardwareLoopPHI)) {
28892164cf2SSjoerd Meijer // TODO: there can be many reasons a loop is not considered a
28992164cf2SSjoerd Meijer // candidate, so we should let isHardwareLoopCandidate fill in the
29092164cf2SSjoerd Meijer // reason and then report a better message here.
29192164cf2SSjoerd Meijer reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
292c5ef502eSSam Parker return false;
29392164cf2SSjoerd Meijer }
294c5ef502eSSam Parker
295c5b918deSChen Zheng assert(
29634badc40SChen Zheng (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
297c5b918deSChen Zheng "Hardware Loop must have set exit info.");
298c5b918deSChen Zheng
29906fef0b3SJinsong Ji BasicBlock *Preheader = L->getLoopPreheader();
30006fef0b3SJinsong Ji
30106fef0b3SJinsong Ji // If we don't have a preheader, then insert one.
30206fef0b3SJinsong Ji if (!Preheader)
30306fef0b3SJinsong Ji Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
30406fef0b3SJinsong Ji if (!Preheader)
30506fef0b3SJinsong Ji return false;
30606fef0b3SJinsong Ji
30792164cf2SSjoerd Meijer HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
308c5ef502eSSam Parker HWLoop.Create();
309c5ef502eSSam Parker ++NumHWLoops;
310c5ef502eSSam Parker return true;
311c5ef502eSSam Parker }
312c5ef502eSSam Parker
Create()313c5ef502eSSam Parker void HardwareLoop::Create() {
314c5ef502eSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
3159a92be1bSSam Parker
3169a92be1bSSam Parker Value *LoopCountInit = InitLoopCount();
31792164cf2SSjoerd Meijer if (!LoopCountInit) {
31892164cf2SSjoerd Meijer reportHWLoopFailure("could not safely create a loop count expression",
31992164cf2SSjoerd Meijer "HWLoopNotSafe", ORE, L);
32006fef0b3SJinsong Ji return;
32192164cf2SSjoerd Meijer }
322c5ef502eSSam Parker
323b2ac9681SDavid Green Value *Setup = InsertIterationSetup(LoopCountInit);
324c5ef502eSSam Parker
325c5ef502eSSam Parker if (UsePHICounter || ForceHardwareLoopPHI) {
326c5ef502eSSam Parker Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
327b2ac9681SDavid Green Value *EltsRem = InsertPHICounter(Setup, LoopDec);
328c5ef502eSSam Parker LoopDec->setOperand(0, EltsRem);
329c5ef502eSSam Parker UpdateBranch(LoopDec);
330c5ef502eSSam Parker } else
331c5ef502eSSam Parker InsertLoopDec();
332c5ef502eSSam Parker
333c5ef502eSSam Parker // Run through the basic blocks of the loop and see if any of them have dead
334c5ef502eSSam Parker // PHIs that can be removed.
335*9e6d1f4bSKazu Hirata for (auto *I : L->blocks())
336c5ef502eSSam Parker DeleteDeadPHIs(I);
337c5ef502eSSam Parker }
338c5ef502eSSam Parker
CanGenerateTest(Loop * L,Value * Count)3399a92be1bSSam Parker static bool CanGenerateTest(Loop *L, Value *Count) {
3409a92be1bSSam Parker BasicBlock *Preheader = L->getLoopPreheader();
3419a92be1bSSam Parker if (!Preheader->getSinglePredecessor())
3429a92be1bSSam Parker return false;
3439a92be1bSSam Parker
3449a92be1bSSam Parker BasicBlock *Pred = Preheader->getSinglePredecessor();
3459a92be1bSSam Parker if (!isa<BranchInst>(Pred->getTerminator()))
3469a92be1bSSam Parker return false;
3479a92be1bSSam Parker
3489a92be1bSSam Parker auto *BI = cast<BranchInst>(Pred->getTerminator());
3499a92be1bSSam Parker if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
3509a92be1bSSam Parker return false;
3519a92be1bSSam Parker
3529a92be1bSSam Parker // Check that the icmp is checking for equality of Count and zero and that
3539a92be1bSSam Parker // a non-zero value results in entering the loop.
3549a92be1bSSam Parker auto ICmp = cast<ICmpInst>(BI->getCondition());
35598722691SSam Parker LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
3569a92be1bSSam Parker if (!ICmp->isEquality())
3579a92be1bSSam Parker return false;
3589a92be1bSSam Parker
3599a92be1bSSam Parker auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
3609a92be1bSSam Parker if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
3619a92be1bSSam Parker return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
3629a92be1bSSam Parker return false;
3639a92be1bSSam Parker };
3649a92be1bSSam Parker
365c98a8a09SSam Parker // Check if Count is a zext.
366c98a8a09SSam Parker Value *CountBefZext =
367c98a8a09SSam Parker isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
368c98a8a09SSam Parker
369c98a8a09SSam Parker if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
370c98a8a09SSam Parker !IsCompareZero(ICmp, CountBefZext, 0) &&
371c98a8a09SSam Parker !IsCompareZero(ICmp, CountBefZext, 1))
3729a92be1bSSam Parker return false;
3739a92be1bSSam Parker
3749a92be1bSSam Parker unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
3759a92be1bSSam Parker if (BI->getSuccessor(SuccIdx) != Preheader)
3769a92be1bSSam Parker return false;
3779a92be1bSSam Parker
3789a92be1bSSam Parker return true;
3799a92be1bSSam Parker }
3809a92be1bSSam Parker
InitLoopCount()3819a92be1bSSam Parker Value *HardwareLoop::InitLoopCount() {
3829a92be1bSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
3839a92be1bSSam Parker // Can we replace a conditional branch with an intrinsic that sets the
3849a92be1bSSam Parker // loop counter and tests that is not zero?
3859a92be1bSSam Parker
386c5ef502eSSam Parker SCEVExpander SCEVE(SE, DL, "loopcnt");
38734badc40SChen Zheng if (!ExitCount->getType()->isPointerTy() &&
38834badc40SChen Zheng ExitCount->getType() != CountType)
38934badc40SChen Zheng ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
39034badc40SChen Zheng
39134badc40SChen Zheng ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
392c5ef502eSSam Parker
3939a92be1bSSam Parker // If we're trying to use the 'test and set' form of the intrinsic, we need
3949a92be1bSSam Parker // to replace a conditional branch that is controlling entry to the loop. It
3959a92be1bSSam Parker // is likely (guaranteed?) that the preheader has an unconditional branch to
3969a92be1bSSam Parker // the loop header, so also check if it has a single predecessor.
39734badc40SChen Zheng if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
39834badc40SChen Zheng SE.getZero(ExitCount->getType()))) {
39906fef0b3SJinsong Ji LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
4009a92be1bSSam Parker UseLoopGuard |= ForceGuardLoopEntry;
40106fef0b3SJinsong Ji } else
4029a92be1bSSam Parker UseLoopGuard = false;
4039a92be1bSSam Parker
4049a92be1bSSam Parker BasicBlock *BB = L->getLoopPreheader();
4059a92be1bSSam Parker if (UseLoopGuard && BB->getSinglePredecessor() &&
4066c348e40SSam Tebbs cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
4076c348e40SSam Tebbs BasicBlock *Predecessor = BB->getSinglePredecessor();
4086c348e40SSam Tebbs // If it's not safe to create a while loop then don't force it and create a
4096c348e40SSam Tebbs // do-while loop instead
410dcf4b733SNikita Popov if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))
4116c348e40SSam Tebbs UseLoopGuard = false;
4126c348e40SSam Tebbs else
4136c348e40SSam Tebbs BB = Predecessor;
4146c348e40SSam Tebbs }
41506fef0b3SJinsong Ji
416dcf4b733SNikita Popov if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {
41734badc40SChen Zheng LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
41834badc40SChen Zheng << *ExitCount << "\n");
41906fef0b3SJinsong Ji return nullptr;
420c5ef502eSSam Parker }
421c5ef502eSSam Parker
42234badc40SChen Zheng Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
423c5ef502eSSam Parker BB->getTerminator());
4249a92be1bSSam Parker
4259a92be1bSSam Parker // FIXME: We've expanded Count where we hope to insert the counter setting
4269a92be1bSSam Parker // intrinsic. But, in the case of the 'test and set' form, we may fallback to
4279a92be1bSSam Parker // the just 'set' form and in which case the insertion block is most likely
4289a92be1bSSam Parker // different. It means there will be instruction(s) in a block that possibly
4299a92be1bSSam Parker // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
4309a92be1bSSam Parker // but it's doesn't appear to work in all cases.
4319a92be1bSSam Parker
4329a92be1bSSam Parker UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
4339a92be1bSSam Parker BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
4349a92be1bSSam Parker LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
4359a92be1bSSam Parker << " - Expanded Count in " << BB->getName() << "\n"
4369a92be1bSSam Parker << " - Will insert set counter intrinsic into: "
4379a92be1bSSam Parker << BeginBB->getName() << "\n");
438c5ef502eSSam Parker return Count;
439c5ef502eSSam Parker }
440c5ef502eSSam Parker
InsertIterationSetup(Value * LoopCountInit)441b2ac9681SDavid Green Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
4429a92be1bSSam Parker IRBuilder<> Builder(BeginBB->getTerminator());
443c5ef502eSSam Parker Type *Ty = LoopCountInit->getType();
444b2ac9681SDavid Green bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
445fad70c30SDavid Green Intrinsic::ID ID = UseLoopGuard
446fad70c30SDavid Green ? (UsePhi ? Intrinsic::test_start_loop_iterations
447fad70c30SDavid Green : Intrinsic::test_set_loop_iterations)
448b2ac9681SDavid Green : (UsePhi ? Intrinsic::start_loop_iterations
449b2ac9681SDavid Green : Intrinsic::set_loop_iterations);
4509a92be1bSSam Parker Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
451fad70c30SDavid Green Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit);
4529a92be1bSSam Parker
4539a92be1bSSam Parker // Use the return value of the intrinsic to control the entry of the loop.
4549a92be1bSSam Parker if (UseLoopGuard) {
4559a92be1bSSam Parker assert((isa<BranchInst>(BeginBB->getTerminator()) &&
4569a92be1bSSam Parker cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
4579a92be1bSSam Parker "Expected conditional branch");
458fad70c30SDavid Green
459fad70c30SDavid Green Value *SetCount =
460fad70c30SDavid Green UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
4619a92be1bSSam Parker auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
4629a92be1bSSam Parker LoopGuard->setCondition(SetCount);
4639a92be1bSSam Parker if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
4649a92be1bSSam Parker LoopGuard->swapSuccessors();
4659a92be1bSSam Parker }
466fad70c30SDavid Green LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
467fad70c30SDavid Green << "\n");
468fad70c30SDavid Green if (UsePhi && UseLoopGuard)
469fad70c30SDavid Green LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
470fad70c30SDavid Green return !UsePhi ? LoopCountInit : LoopSetup;
471c5ef502eSSam Parker }
472c5ef502eSSam Parker
InsertLoopDec()473c5ef502eSSam Parker void HardwareLoop::InsertLoopDec() {
474c5ef502eSSam Parker IRBuilder<> CondBuilder(ExitBranch);
475c5ef502eSSam Parker
476c5ef502eSSam Parker Function *DecFunc =
477c5ef502eSSam Parker Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
478c5ef502eSSam Parker LoopDecrement->getType());
479c5ef502eSSam Parker Value *Ops[] = { LoopDecrement };
480c5ef502eSSam Parker Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
481c5ef502eSSam Parker Value *OldCond = ExitBranch->getCondition();
482c5ef502eSSam Parker ExitBranch->setCondition(NewCond);
483c5ef502eSSam Parker
484c5ef502eSSam Parker // The false branch must exit the loop.
485c5ef502eSSam Parker if (!L->contains(ExitBranch->getSuccessor(0)))
486c5ef502eSSam Parker ExitBranch->swapSuccessors();
487c5ef502eSSam Parker
488c5ef502eSSam Parker // The old condition may be dead now, and may have even created a dead PHI
489c5ef502eSSam Parker // (the original induction variable).
490c5ef502eSSam Parker RecursivelyDeleteTriviallyDeadInstructions(OldCond);
491c5ef502eSSam Parker
492c5ef502eSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
493c5ef502eSSam Parker }
494c5ef502eSSam Parker
InsertLoopRegDec(Value * EltsRem)495c5ef502eSSam Parker Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
496c5ef502eSSam Parker IRBuilder<> CondBuilder(ExitBranch);
497c5ef502eSSam Parker
498c5ef502eSSam Parker Function *DecFunc =
499c5ef502eSSam Parker Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
500b0614509SSjoerd Meijer { EltsRem->getType() });
501c5ef502eSSam Parker Value *Ops[] = { EltsRem, LoopDecrement };
502c5ef502eSSam Parker Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
503c5ef502eSSam Parker
504c5ef502eSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
505c5ef502eSSam Parker return cast<Instruction>(Call);
506c5ef502eSSam Parker }
507c5ef502eSSam Parker
InsertPHICounter(Value * NumElts,Value * EltsRem)508c5ef502eSSam Parker PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
509c5ef502eSSam Parker BasicBlock *Preheader = L->getLoopPreheader();
510c5ef502eSSam Parker BasicBlock *Header = L->getHeader();
511c5ef502eSSam Parker BasicBlock *Latch = ExitBranch->getParent();
512c5ef502eSSam Parker IRBuilder<> Builder(Header->getFirstNonPHI());
513c5ef502eSSam Parker PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
514c5ef502eSSam Parker Index->addIncoming(NumElts, Preheader);
515c5ef502eSSam Parker Index->addIncoming(EltsRem, Latch);
516c5ef502eSSam Parker LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
517c5ef502eSSam Parker return Index;
518c5ef502eSSam Parker }
519c5ef502eSSam Parker
UpdateBranch(Value * EltsRem)520c5ef502eSSam Parker void HardwareLoop::UpdateBranch(Value *EltsRem) {
521c5ef502eSSam Parker IRBuilder<> CondBuilder(ExitBranch);
522c5ef502eSSam Parker Value *NewCond =
523c5ef502eSSam Parker CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
524c5ef502eSSam Parker Value *OldCond = ExitBranch->getCondition();
525c5ef502eSSam Parker ExitBranch->setCondition(NewCond);
526c5ef502eSSam Parker
527c5ef502eSSam Parker // The false branch must exit the loop.
528c5ef502eSSam Parker if (!L->contains(ExitBranch->getSuccessor(0)))
529c5ef502eSSam Parker ExitBranch->swapSuccessors();
530c5ef502eSSam Parker
531c5ef502eSSam Parker // The old condition may be dead now, and may have even created a dead PHI
532c5ef502eSSam Parker // (the original induction variable).
533c5ef502eSSam Parker RecursivelyDeleteTriviallyDeadInstructions(OldCond);
534c5ef502eSSam Parker }
535c5ef502eSSam Parker
INITIALIZE_PASS_BEGIN(HardwareLoops,DEBUG_TYPE,HW_LOOPS_NAME,false,false)536c5ef502eSSam Parker INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
537c5ef502eSSam Parker INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
538c5ef502eSSam Parker INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
539c5ef502eSSam Parker INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
54092164cf2SSjoerd Meijer INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
541c5ef502eSSam Parker INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
542c5ef502eSSam Parker
543c5ef502eSSam Parker FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
544