1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass identifies loops where we can generate the PPC branch instructions
10 // that decrement and test the count register (CTR) (bdnz and friends).
11 //
12 // The pattern that defines the induction variable can changed depending on
13 // prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
14 // normalizes induction variables, and the Loop Strength Reduction pass
15 // run by 'llc' may also make changes to the induction variable.
16 //
17 // Criteria for CTR loops:
18 //  - Countable loops (w/ ind. var for a trip count)
19 //  - Try inner-most loops first
20 //  - No nested CTR loops.
21 //  - No function calls in loops.
22 //
23 //===----------------------------------------------------------------------===//
24 
25 #include "PPC.h"
26 #include "PPCSubtarget.h"
27 #include "PPCTargetMachine.h"
28 #include "PPCTargetTransformInfo.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/AssumptionCache.h"
32 #include "llvm/Analysis/CFG.h"
33 #include "llvm/Analysis/CodeMetrics.h"
34 #include "llvm/Analysis/LoopInfo.h"
35 #include "llvm/Analysis/LoopIterator.h"
36 #include "llvm/Analysis/ScalarEvolutionExpander.h"
37 #include "llvm/Analysis/TargetLibraryInfo.h"
38 #include "llvm/Analysis/TargetTransformInfo.h"
39 #include "llvm/Transforms/Utils/Local.h"
40 #include "llvm/CodeGen/TargetPassConfig.h"
41 #include "llvm/CodeGen/TargetSchedule.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/InlineAsm.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/ValueHandle.h"
50 #include "llvm/PassSupport.h"
51 #include "llvm/Support/CommandLine.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include "llvm/Transforms/Scalar.h"
55 #include "llvm/Transforms/Utils.h"
56 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
57 #include "llvm/Transforms/Utils/LoopUtils.h"
58 
59 #ifndef NDEBUG
60 #include "llvm/CodeGen/MachineDominators.h"
61 #include "llvm/CodeGen/MachineFunction.h"
62 #include "llvm/CodeGen/MachineFunctionPass.h"
63 #include "llvm/CodeGen/MachineRegisterInfo.h"
64 #endif
65 
66 using namespace llvm;
67 
68 #define DEBUG_TYPE "ctrloops"
69 
70 #ifndef NDEBUG
71 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
72 #endif
73 
74 // The latency of mtctr is only justified if there are more than 4
75 // comparisons that will be removed as a result.
76 static cl::opt<unsigned>
77 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
78                       cl::desc("Loops with a constant trip count smaller than "
79                                "this value will not use the count register."));
80 
81 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
82 
83 namespace llvm {
84   void initializePPCCTRLoopsPass(PassRegistry&);
85 #ifndef NDEBUG
86   void initializePPCCTRLoopsVerifyPass(PassRegistry&);
87 #endif
88 }
89 
90 namespace {
91   struct PPCCTRLoops : public FunctionPass {
92 
93 #ifndef NDEBUG
94     static int Counter;
95 #endif
96 
97   public:
98     static char ID;
99 
100     PPCCTRLoops() : FunctionPass(ID) {
101       initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
102     }
103 
104     bool runOnFunction(Function &F) override;
105 
106     void getAnalysisUsage(AnalysisUsage &AU) const override {
107       AU.addRequired<LoopInfoWrapperPass>();
108       AU.addPreserved<LoopInfoWrapperPass>();
109       AU.addRequired<DominatorTreeWrapperPass>();
110       AU.addPreserved<DominatorTreeWrapperPass>();
111       AU.addRequired<ScalarEvolutionWrapperPass>();
112       AU.addRequired<AssumptionCacheTracker>();
113       AU.addRequired<TargetTransformInfoWrapperPass>();
114     }
115 
116   private:
117     bool mightUseCTR(BasicBlock *BB);
118     bool convertToCTRLoop(Loop *L);
119 
120   private:
121     const PPCTargetMachine *TM;
122     const PPCSubtarget *STI;
123     const PPCTargetLowering *TLI;
124     const DataLayout *DL;
125     const TargetLibraryInfo *LibInfo;
126     const TargetTransformInfo *TTI;
127     LoopInfo *LI;
128     ScalarEvolution *SE;
129     DominatorTree *DT;
130     bool PreserveLCSSA;
131     TargetSchedModel SchedModel;
132   };
133 
134   char PPCCTRLoops::ID = 0;
135 #ifndef NDEBUG
136   int PPCCTRLoops::Counter = 0;
137 #endif
138 
139 #ifndef NDEBUG
140   struct PPCCTRLoopsVerify : public MachineFunctionPass {
141   public:
142     static char ID;
143 
144     PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
145       initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
146     }
147 
148     void getAnalysisUsage(AnalysisUsage &AU) const override {
149       AU.addRequired<MachineDominatorTree>();
150       MachineFunctionPass::getAnalysisUsage(AU);
151     }
152 
153     bool runOnMachineFunction(MachineFunction &MF) override;
154 
155   private:
156     MachineDominatorTree *MDT;
157   };
158 
159   char PPCCTRLoopsVerify::ID = 0;
160 #endif // NDEBUG
161 } // end anonymous namespace
162 
163 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
164                       false, false)
165 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
166 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
167 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
168 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
169                     false, false)
170 
171 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
172 
173 #ifndef NDEBUG
174 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
175                       "PowerPC CTR Loops Verify", false, false)
176 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
177 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
178                     "PowerPC CTR Loops Verify", false, false)
179 
180 FunctionPass *llvm::createPPCCTRLoopsVerify() {
181   return new PPCCTRLoopsVerify();
182 }
183 #endif // NDEBUG
184 
185 bool PPCCTRLoops::runOnFunction(Function &F) {
186   if (skipFunction(F))
187     return false;
188 
189   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
190   if (!TPC)
191     return false;
192 
193   TM = &TPC->getTM<PPCTargetMachine>();
194   STI = TM->getSubtargetImpl(F);
195   TLI = STI->getTargetLowering();
196 
197   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
198   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
199   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
200   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
201   DL = &F.getParent()->getDataLayout();
202   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
203   LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
204   PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
205 
206   bool MadeChange = false;
207 
208   for (LoopInfo::iterator I = LI->begin(), E = LI->end();
209        I != E; ++I) {
210     Loop *L = *I;
211     if (!L->getParentLoop())
212       MadeChange |= convertToCTRLoop(L);
213   }
214 
215   return MadeChange;
216 }
217 
218 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
219   if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
220     return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
221 
222   return false;
223 }
224 
225 // Determining the address of a TLS variable results in a function call in
226 // certain TLS models.
227 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
228   const auto *GV = dyn_cast<GlobalValue>(MemAddr);
229   if (!GV) {
230     // Recurse to check for constants that refer to TLS global variables.
231     if (const auto *CV = dyn_cast<Constant>(MemAddr))
232       for (const auto &CO : CV->operands())
233         if (memAddrUsesCTR(TM, CO))
234           return true;
235 
236     return false;
237   }
238 
239   if (!GV->isThreadLocal())
240     return false;
241   TLSModel::Model Model = TM.getTLSModel(GV);
242   return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
243 }
244 
245 // Loop through the inline asm constraints and look for something that clobbers
246 // ctr.
247 static bool asmClobbersCTR(InlineAsm *IA) {
248   InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
249   for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
250     InlineAsm::ConstraintInfo &C = CIV[i];
251     if (C.Type != InlineAsm::isInput)
252       for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
253         if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
254           return true;
255   }
256   return false;
257 }
258 
259 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
260   for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
261        J != JE; ++J) {
262     if (CallInst *CI = dyn_cast<CallInst>(J)) {
263       // Inline ASM is okay, unless it clobbers the ctr register.
264       if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
265         if (asmClobbersCTR(IA))
266           return true;
267         continue;
268       }
269 
270       if (Function *F = CI->getCalledFunction()) {
271         // Most intrinsics don't become function calls, but some might.
272         // sin, cos, exp and log are always calls.
273         unsigned Opcode = 0;
274         if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
275           switch (F->getIntrinsicID()) {
276           default: continue;
277           // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
278           // we're definitely using CTR.
279           case Intrinsic::ppc_is_decremented_ctr_nonzero:
280           case Intrinsic::ppc_mtctr:
281             return true;
282 
283 // VisualStudio defines setjmp as _setjmp
284 #if defined(_MSC_VER) && defined(setjmp) && \
285                        !defined(setjmp_undefined_for_msvc)
286 #  pragma push_macro("setjmp")
287 #  undef setjmp
288 #  define setjmp_undefined_for_msvc
289 #endif
290 
291           case Intrinsic::setjmp:
292 
293 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
294  // let's return it to _setjmp state
295 #  pragma pop_macro("setjmp")
296 #  undef setjmp_undefined_for_msvc
297 #endif
298 
299           case Intrinsic::longjmp:
300 
301           // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
302           // because, although it does clobber the counter register, the
303           // control can't then return to inside the loop unless there is also
304           // an eh_sjlj_setjmp.
305           case Intrinsic::eh_sjlj_setjmp:
306 
307           case Intrinsic::memcpy:
308           case Intrinsic::memmove:
309           case Intrinsic::memset:
310           case Intrinsic::powi:
311           case Intrinsic::log:
312           case Intrinsic::log2:
313           case Intrinsic::log10:
314           case Intrinsic::exp:
315           case Intrinsic::exp2:
316           case Intrinsic::pow:
317           case Intrinsic::sin:
318           case Intrinsic::cos:
319             return true;
320           case Intrinsic::copysign:
321             if (CI->getArgOperand(0)->getType()->getScalarType()->
322                 isPPC_FP128Ty())
323               return true;
324             else
325               continue; // ISD::FCOPYSIGN is never a library call.
326           case Intrinsic::sqrt:               Opcode = ISD::FSQRT;      break;
327           case Intrinsic::floor:              Opcode = ISD::FFLOOR;     break;
328           case Intrinsic::ceil:               Opcode = ISD::FCEIL;      break;
329           case Intrinsic::trunc:              Opcode = ISD::FTRUNC;     break;
330           case Intrinsic::rint:               Opcode = ISD::FRINT;      break;
331           case Intrinsic::nearbyint:          Opcode = ISD::FNEARBYINT; break;
332           case Intrinsic::round:              Opcode = ISD::FROUND;     break;
333           case Intrinsic::minnum:             Opcode = ISD::FMINNUM;    break;
334           case Intrinsic::maxnum:             Opcode = ISD::FMAXNUM;    break;
335           case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO;      break;
336           case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO;      break;
337           }
338         }
339 
340         // PowerPC does not use [US]DIVREM or other library calls for
341         // operations on regular types which are not otherwise library calls
342         // (i.e. soft float or atomics). If adapting for targets that do,
343         // additional care is required here.
344 
345         LibFunc Func;
346         if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
347             LibInfo->getLibFunc(F->getName(), Func) &&
348             LibInfo->hasOptimizedCodeGen(Func)) {
349           // Non-read-only functions are never treated as intrinsics.
350           if (!CI->onlyReadsMemory())
351             return true;
352 
353           // Conversion happens only for FP calls.
354           if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
355             return true;
356 
357           switch (Func) {
358           default: return true;
359           case LibFunc_copysign:
360           case LibFunc_copysignf:
361             continue; // ISD::FCOPYSIGN is never a library call.
362           case LibFunc_copysignl:
363             return true;
364           case LibFunc_fabs:
365           case LibFunc_fabsf:
366           case LibFunc_fabsl:
367             continue; // ISD::FABS is never a library call.
368           case LibFunc_sqrt:
369           case LibFunc_sqrtf:
370           case LibFunc_sqrtl:
371             Opcode = ISD::FSQRT; break;
372           case LibFunc_floor:
373           case LibFunc_floorf:
374           case LibFunc_floorl:
375             Opcode = ISD::FFLOOR; break;
376           case LibFunc_nearbyint:
377           case LibFunc_nearbyintf:
378           case LibFunc_nearbyintl:
379             Opcode = ISD::FNEARBYINT; break;
380           case LibFunc_ceil:
381           case LibFunc_ceilf:
382           case LibFunc_ceill:
383             Opcode = ISD::FCEIL; break;
384           case LibFunc_rint:
385           case LibFunc_rintf:
386           case LibFunc_rintl:
387             Opcode = ISD::FRINT; break;
388           case LibFunc_round:
389           case LibFunc_roundf:
390           case LibFunc_roundl:
391             Opcode = ISD::FROUND; break;
392           case LibFunc_trunc:
393           case LibFunc_truncf:
394           case LibFunc_truncl:
395             Opcode = ISD::FTRUNC; break;
396           case LibFunc_fmin:
397           case LibFunc_fminf:
398           case LibFunc_fminl:
399             Opcode = ISD::FMINNUM; break;
400           case LibFunc_fmax:
401           case LibFunc_fmaxf:
402           case LibFunc_fmaxl:
403             Opcode = ISD::FMAXNUM; break;
404           }
405         }
406 
407         if (Opcode) {
408           EVT EVTy =
409               TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true);
410 
411           if (EVTy == MVT::Other)
412             return true;
413 
414           if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
415             continue;
416           else if (EVTy.isVector() &&
417                    TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
418             continue;
419 
420           return true;
421         }
422       }
423 
424       return true;
425     } else if (isa<BinaryOperator>(J) &&
426                J->getType()->getScalarType()->isPPC_FP128Ty()) {
427       // Most operations on ppc_f128 values become calls.
428       return true;
429     } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
430                isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
431       CastInst *CI = cast<CastInst>(J);
432       if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
433           CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
434           isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
435           isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
436         return true;
437     } else if (isLargeIntegerTy(!TM->isPPC64(),
438                                 J->getType()->getScalarType()) &&
439                (J->getOpcode() == Instruction::UDiv ||
440                 J->getOpcode() == Instruction::SDiv ||
441                 J->getOpcode() == Instruction::URem ||
442                 J->getOpcode() == Instruction::SRem)) {
443       return true;
444     } else if (!TM->isPPC64() &&
445                isLargeIntegerTy(false, J->getType()->getScalarType()) &&
446                (J->getOpcode() == Instruction::Shl ||
447                 J->getOpcode() == Instruction::AShr ||
448                 J->getOpcode() == Instruction::LShr)) {
449       // Only on PPC32, for 128-bit integers (specifically not 64-bit
450       // integers), these might be runtime calls.
451       return true;
452     } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
453       // On PowerPC, indirect jumps use the counter register.
454       return true;
455     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
456       if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
457         return true;
458     }
459 
460     // FREM is always a call.
461     if (J->getOpcode() == Instruction::FRem)
462       return true;
463 
464     if (STI->useSoftFloat()) {
465       switch(J->getOpcode()) {
466       case Instruction::FAdd:
467       case Instruction::FSub:
468       case Instruction::FMul:
469       case Instruction::FDiv:
470       case Instruction::FPTrunc:
471       case Instruction::FPExt:
472       case Instruction::FPToUI:
473       case Instruction::FPToSI:
474       case Instruction::UIToFP:
475       case Instruction::SIToFP:
476       case Instruction::FCmp:
477         return true;
478       }
479     }
480 
481     for (Value *Operand : J->operands())
482       if (memAddrUsesCTR(*TM, Operand))
483         return true;
484   }
485 
486   return false;
487 }
488 bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
489   bool MadeChange = false;
490 
491   // Do not convert small short loops to CTR loop.
492   unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
493   if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
494     SmallPtrSet<const Value *, 32> EphValues;
495     auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
496         *L->getHeader()->getParent());
497     CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
498     CodeMetrics Metrics;
499     for (BasicBlock *BB : L->blocks())
500       Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
501     // 6 is an approximate latency for the mtctr instruction.
502     if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
503       return false;
504   }
505 
506   // Process nested loops first.
507   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
508     MadeChange |= convertToCTRLoop(*I);
509     LLVM_DEBUG(dbgs() << "Nested loop converted\n");
510   }
511 
512   // If a nested loop has been converted, then we can't convert this loop.
513   if (MadeChange)
514     return MadeChange;
515 
516   // Bail out if the loop has irreducible control flow.
517   LoopBlocksRPO RPOT(L);
518   RPOT.perform(LI);
519   if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI))
520     return false;
521 
522 #ifndef NDEBUG
523   // Stop trying after reaching the limit (if any).
524   int Limit = CTRLoopLimit;
525   if (Limit >= 0) {
526     if (Counter >= CTRLoopLimit)
527       return false;
528     Counter++;
529   }
530 #endif
531 
532   // We don't want to spill/restore the counter register, and so we don't
533   // want to use the counter register if the loop contains calls.
534   for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
535        I != IE; ++I)
536     if (mightUseCTR(*I))
537       return MadeChange;
538 
539   SmallVector<BasicBlock*, 4> ExitingBlocks;
540   L->getExitingBlocks(ExitingBlocks);
541 
542   // If there is an exit edge known to be frequently taken,
543   // we should not transform this loop.
544   for (auto &BB : ExitingBlocks) {
545     Instruction *TI = BB->getTerminator();
546     if (!TI) continue;
547 
548     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
549       uint64_t TrueWeight = 0, FalseWeight = 0;
550       if (!BI->isConditional() ||
551           !BI->extractProfMetadata(TrueWeight, FalseWeight))
552         continue;
553 
554       // If the exit path is more frequent than the loop path,
555       // we return here without further analysis for this loop.
556       bool TrueIsExit = !L->contains(BI->getSuccessor(0));
557       if (( TrueIsExit && FalseWeight < TrueWeight) ||
558           (!TrueIsExit && FalseWeight > TrueWeight))
559         return MadeChange;
560     }
561   }
562 
563   BasicBlock *CountedExitBlock = nullptr;
564   const SCEV *ExitCount = nullptr;
565   BranchInst *CountedExitBranch = nullptr;
566   for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
567        IE = ExitingBlocks.end(); I != IE; ++I) {
568     const SCEV *EC = SE->getExitCount(L, *I);
569     LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block "
570                       << (*I)->getName() << ": " << *EC << "\n");
571     if (isa<SCEVCouldNotCompute>(EC))
572       continue;
573     if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
574       if (ConstEC->getValue()->isZero())
575         continue;
576     } else if (!SE->isLoopInvariant(EC, L))
577       continue;
578 
579     if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
580       continue;
581 
582     // If this exiting block is contained in a nested loop, it is not eligible
583     // for insertion of the branch-and-decrement since the inner loop would
584     // end up messing up the value in the CTR.
585     if (LI->getLoopFor(*I) != L)
586       continue;
587 
588     // We now have a loop-invariant count of loop iterations (which is not the
589     // constant zero) for which we know that this loop will not exit via this
590     // existing block.
591 
592     // We need to make sure that this block will run on every loop iteration.
593     // For this to be true, we must dominate all blocks with backedges. Such
594     // blocks are in-loop predecessors to the header block.
595     bool NotAlways = false;
596     for (pred_iterator PI = pred_begin(L->getHeader()),
597          PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
598       if (!L->contains(*PI))
599         continue;
600 
601       if (!DT->dominates(*I, *PI)) {
602         NotAlways = true;
603         break;
604       }
605     }
606 
607     if (NotAlways)
608       continue;
609 
610     // Make sure this blocks ends with a conditional branch.
611     Instruction *TI = (*I)->getTerminator();
612     if (!TI)
613       continue;
614 
615     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
616       if (!BI->isConditional())
617         continue;
618 
619       CountedExitBranch = BI;
620     } else
621       continue;
622 
623     // Note that this block may not be the loop latch block, even if the loop
624     // has a latch block.
625     CountedExitBlock = *I;
626     ExitCount = EC;
627     break;
628   }
629 
630   if (!CountedExitBlock)
631     return MadeChange;
632 
633   BasicBlock *Preheader = L->getLoopPreheader();
634 
635   // If we don't have a preheader, then insert one. If we already have a
636   // preheader, then we can use it (except if the preheader contains a use of
637   // the CTR register because some such uses might be reordered by the
638   // selection DAG after the mtctr instruction).
639   if (!Preheader || mightUseCTR(Preheader))
640     Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
641   if (!Preheader)
642     return MadeChange;
643 
644   LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName()
645                     << "\n");
646 
647   // Insert the count into the preheader and replace the condition used by the
648   // selected branch.
649   MadeChange = true;
650 
651   SCEVExpander SCEVE(*SE, *DL, "loopcnt");
652   LLVMContext &C = SE->getContext();
653   Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
654   if (!ExitCount->getType()->isPointerTy() &&
655       ExitCount->getType() != CountType)
656     ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
657   ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
658   Value *ECValue =
659       SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
660 
661   IRBuilder<> CountBuilder(Preheader->getTerminator());
662   Module *M = Preheader->getParent()->getParent();
663   Function *MTCTRFunc =
664       Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType);
665   CountBuilder.CreateCall(MTCTRFunc, ECValue);
666 
667   IRBuilder<> CondBuilder(CountedExitBranch);
668   Function *DecFunc =
669       Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
670   Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
671   Value *OldCond = CountedExitBranch->getCondition();
672   CountedExitBranch->setCondition(NewCond);
673 
674   // The false branch must exit the loop.
675   if (!L->contains(CountedExitBranch->getSuccessor(0)))
676     CountedExitBranch->swapSuccessors();
677 
678   // The old condition may be dead now, and may have even created a dead PHI
679   // (the original induction variable).
680   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
681   // Run through the basic blocks of the loop and see if any of them have dead
682   // PHIs that can be removed.
683   for (auto I : L->blocks())
684     DeleteDeadPHIs(I);
685 
686   ++NumCTRLoops;
687   return MadeChange;
688 }
689 
690 #ifndef NDEBUG
691 static bool clobbersCTR(const MachineInstr &MI) {
692   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
693     const MachineOperand &MO = MI.getOperand(i);
694     if (MO.isReg()) {
695       if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
696         return true;
697     } else if (MO.isRegMask()) {
698       if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
699         return true;
700     }
701   }
702 
703   return false;
704 }
705 
706 static bool verifyCTRBranch(MachineBasicBlock *MBB,
707                             MachineBasicBlock::iterator I) {
708   MachineBasicBlock::iterator BI = I;
709   SmallSet<MachineBasicBlock *, 16>   Visited;
710   SmallVector<MachineBasicBlock *, 8> Preds;
711   bool CheckPreds;
712 
713   if (I == MBB->begin()) {
714     Visited.insert(MBB);
715     goto queue_preds;
716   } else
717     --I;
718 
719 check_block:
720   Visited.insert(MBB);
721   if (I == MBB->end())
722     goto queue_preds;
723 
724   CheckPreds = true;
725   for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
726     unsigned Opc = I->getOpcode();
727     if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
728       CheckPreds = false;
729       break;
730     }
731 
732     if (I != BI && clobbersCTR(*I)) {
733       LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName()
734                         << ") instruction " << *I
735                         << " clobbers CTR, invalidating "
736                         << printMBBReference(*BI->getParent()) << " ("
737                         << BI->getParent()->getFullName() << ") instruction "
738                         << *BI << "\n");
739       return false;
740     }
741 
742     if (I == IE)
743       break;
744   }
745 
746   if (!CheckPreds && Preds.empty())
747     return true;
748 
749   if (CheckPreds) {
750 queue_preds:
751     if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
752       LLVM_DEBUG(dbgs() << "Unable to find a MTCTR instruction for "
753                         << printMBBReference(*BI->getParent()) << " ("
754                         << BI->getParent()->getFullName() << ") instruction "
755                         << *BI << "\n");
756       return false;
757     }
758 
759     for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
760          PIE = MBB->pred_end(); PI != PIE; ++PI)
761       Preds.push_back(*PI);
762   }
763 
764   do {
765     MBB = Preds.pop_back_val();
766     if (!Visited.count(MBB)) {
767       I = MBB->getLastNonDebugInstr();
768       goto check_block;
769     }
770   } while (!Preds.empty());
771 
772   return true;
773 }
774 
775 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
776   MDT = &getAnalysis<MachineDominatorTree>();
777 
778   // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
779   // any other instructions that might clobber the ctr register.
780   for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
781        I != IE; ++I) {
782     MachineBasicBlock *MBB = &*I;
783     if (!MDT->isReachableFromEntry(MBB))
784       continue;
785 
786     for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
787       MIIE = MBB->end(); MII != MIIE; ++MII) {
788       unsigned Opc = MII->getOpcode();
789       if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
790           Opc == PPC::BDZ8  || Opc == PPC::BDZ)
791         if (!verifyCTRBranch(MBB, MII))
792           llvm_unreachable("Invalid PPC CTR loop!");
793     }
794   }
795 
796   return false;
797 }
798 #endif // NDEBUG
799