1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass identifies loops where we can generate the PPC branch instructions
11 // that decrement and test the count register (CTR) (bdnz and friends).
12 //
13 // The pattern that defines the induction variable can changed depending on
14 // prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
15 // normalizes induction variables, and the Loop Strength Reduction pass
16 // run by 'llc' may also make changes to the induction variable.
17 //
18 // Criteria for CTR loops:
19 //  - Countable loops (w/ ind. var for a trip count)
20 //  - Try inner-most loops first
21 //  - No nested CTR loops.
22 //  - No function calls in loops.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "PPC.h"
27 #include "PPCSubtarget.h"
28 #include "PPCTargetMachine.h"
29 #include "PPCTargetTransformInfo.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AssumptionCache.h"
33 #include "llvm/Analysis/CodeMetrics.h"
34 #include "llvm/Analysis/LoopInfo.h"
35 #include "llvm/Analysis/ScalarEvolutionExpander.h"
36 #include "llvm/Analysis/TargetLibraryInfo.h"
37 #include "llvm/Analysis/TargetTransformInfo.h"
38 #include "llvm/Analysis/Utils/Local.h"
39 #include "llvm/CodeGen/TargetPassConfig.h"
40 #include "llvm/CodeGen/TargetSchedule.h"
41 #include "llvm/IR/Constants.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/Dominators.h"
44 #include "llvm/IR/InlineAsm.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/IR/ValueHandle.h"
49 #include "llvm/PassSupport.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include "llvm/Transforms/Scalar.h"
54 #include "llvm/Transforms/Utils.h"
55 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
56 #include "llvm/Transforms/Utils/LoopUtils.h"
57 
58 #ifndef NDEBUG
59 #include "llvm/CodeGen/MachineDominators.h"
60 #include "llvm/CodeGen/MachineFunction.h"
61 #include "llvm/CodeGen/MachineFunctionPass.h"
62 #include "llvm/CodeGen/MachineRegisterInfo.h"
63 #endif
64 
65 using namespace llvm;
66 
67 #define DEBUG_TYPE "ctrloops"
68 
69 #ifndef NDEBUG
70 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
71 #endif
72 
73 // The latency of mtctr is only justified if there are more than 4
74 // comparisons that will be removed as a result.
75 static cl::opt<unsigned>
76 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
77                       cl::desc("Loops with a constant trip count smaller than "
78                                "this value will not use the count register."));
79 
80 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
81 
82 namespace llvm {
83   void initializePPCCTRLoopsPass(PassRegistry&);
84 #ifndef NDEBUG
85   void initializePPCCTRLoopsVerifyPass(PassRegistry&);
86 #endif
87 }
88 
89 namespace {
90   struct PPCCTRLoops : public FunctionPass {
91 
92 #ifndef NDEBUG
93     static int Counter;
94 #endif
95 
96   public:
97     static char ID;
98 
99     PPCCTRLoops() : FunctionPass(ID) {
100       initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
101     }
102 
103     bool runOnFunction(Function &F) override;
104 
105     void getAnalysisUsage(AnalysisUsage &AU) const override {
106       AU.addRequired<LoopInfoWrapperPass>();
107       AU.addPreserved<LoopInfoWrapperPass>();
108       AU.addRequired<DominatorTreeWrapperPass>();
109       AU.addPreserved<DominatorTreeWrapperPass>();
110       AU.addRequired<ScalarEvolutionWrapperPass>();
111       AU.addRequired<AssumptionCacheTracker>();
112       AU.addRequired<TargetTransformInfoWrapperPass>();
113     }
114 
115   private:
116     bool mightUseCTR(BasicBlock *BB);
117     bool convertToCTRLoop(Loop *L);
118 
119   private:
120     const PPCTargetMachine *TM;
121     const PPCSubtarget *STI;
122     const PPCTargetLowering *TLI;
123     const DataLayout *DL;
124     const TargetLibraryInfo *LibInfo;
125     const TargetTransformInfo *TTI;
126     LoopInfo *LI;
127     ScalarEvolution *SE;
128     DominatorTree *DT;
129     bool PreserveLCSSA;
130     TargetSchedModel SchedModel;
131   };
132 
133   char PPCCTRLoops::ID = 0;
134 #ifndef NDEBUG
135   int PPCCTRLoops::Counter = 0;
136 #endif
137 
138 #ifndef NDEBUG
139   struct PPCCTRLoopsVerify : public MachineFunctionPass {
140   public:
141     static char ID;
142 
143     PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
144       initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
145     }
146 
147     void getAnalysisUsage(AnalysisUsage &AU) const override {
148       AU.addRequired<MachineDominatorTree>();
149       MachineFunctionPass::getAnalysisUsage(AU);
150     }
151 
152     bool runOnMachineFunction(MachineFunction &MF) override;
153 
154   private:
155     MachineDominatorTree *MDT;
156   };
157 
158   char PPCCTRLoopsVerify::ID = 0;
159 #endif // NDEBUG
160 } // end anonymous namespace
161 
162 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
163                       false, false)
164 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
165 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
166 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
167 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
168                     false, false)
169 
170 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
171 
172 #ifndef NDEBUG
173 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
174                       "PowerPC CTR Loops Verify", false, false)
175 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
176 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
177                     "PowerPC CTR Loops Verify", false, false)
178 
179 FunctionPass *llvm::createPPCCTRLoopsVerify() {
180   return new PPCCTRLoopsVerify();
181 }
182 #endif // NDEBUG
183 
184 bool PPCCTRLoops::runOnFunction(Function &F) {
185   if (skipFunction(F))
186     return false;
187 
188   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
189   if (!TPC)
190     return false;
191 
192   TM = &TPC->getTM<PPCTargetMachine>();
193   STI = TM->getSubtargetImpl(F);
194   TLI = STI->getTargetLowering();
195 
196   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
197   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
198   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
199   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
200   DL = &F.getParent()->getDataLayout();
201   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
202   LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
203   PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
204 
205   bool MadeChange = false;
206 
207   for (LoopInfo::iterator I = LI->begin(), E = LI->end();
208        I != E; ++I) {
209     Loop *L = *I;
210     if (!L->getParentLoop())
211       MadeChange |= convertToCTRLoop(L);
212   }
213 
214   return MadeChange;
215 }
216 
217 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
218   if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
219     return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
220 
221   return false;
222 }
223 
224 // Determining the address of a TLS variable results in a function call in
225 // certain TLS models.
226 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
227   const auto *GV = dyn_cast<GlobalValue>(MemAddr);
228   if (!GV) {
229     // Recurse to check for constants that refer to TLS global variables.
230     if (const auto *CV = dyn_cast<Constant>(MemAddr))
231       for (const auto &CO : CV->operands())
232         if (memAddrUsesCTR(TM, CO))
233           return true;
234 
235     return false;
236   }
237 
238   if (!GV->isThreadLocal())
239     return false;
240   TLSModel::Model Model = TM.getTLSModel(GV);
241   return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
242 }
243 
244 // Loop through the inline asm constraints and look for something that clobbers
245 // ctr.
246 static bool asmClobbersCTR(InlineAsm *IA) {
247   InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
248   for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
249     InlineAsm::ConstraintInfo &C = CIV[i];
250     if (C.Type != InlineAsm::isInput)
251       for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
252         if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
253           return true;
254   }
255   return false;
256 }
257 
258 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
259   for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
260        J != JE; ++J) {
261     if (CallInst *CI = dyn_cast<CallInst>(J)) {
262       // Inline ASM is okay, unless it clobbers the ctr register.
263       if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
264         if (asmClobbersCTR(IA))
265           return true;
266         continue;
267       }
268 
269       if (Function *F = CI->getCalledFunction()) {
270         // Most intrinsics don't become function calls, but some might.
271         // sin, cos, exp and log are always calls.
272         unsigned Opcode = 0;
273         if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
274           switch (F->getIntrinsicID()) {
275           default: continue;
276           // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
277           // we're definitely using CTR.
278           case Intrinsic::ppc_is_decremented_ctr_nonzero:
279           case Intrinsic::ppc_mtctr:
280             return true;
281 
282 // VisualStudio defines setjmp as _setjmp
283 #if defined(_MSC_VER) && defined(setjmp) && \
284                        !defined(setjmp_undefined_for_msvc)
285 #  pragma push_macro("setjmp")
286 #  undef setjmp
287 #  define setjmp_undefined_for_msvc
288 #endif
289 
290           case Intrinsic::setjmp:
291 
292 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
293  // let's return it to _setjmp state
294 #  pragma pop_macro("setjmp")
295 #  undef setjmp_undefined_for_msvc
296 #endif
297 
298           case Intrinsic::longjmp:
299 
300           // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
301           // because, although it does clobber the counter register, the
302           // control can't then return to inside the loop unless there is also
303           // an eh_sjlj_setjmp.
304           case Intrinsic::eh_sjlj_setjmp:
305 
306           case Intrinsic::memcpy:
307           case Intrinsic::memmove:
308           case Intrinsic::memset:
309           case Intrinsic::powi:
310           case Intrinsic::log:
311           case Intrinsic::log2:
312           case Intrinsic::log10:
313           case Intrinsic::exp:
314           case Intrinsic::exp2:
315           case Intrinsic::pow:
316           case Intrinsic::sin:
317           case Intrinsic::cos:
318             return true;
319           case Intrinsic::copysign:
320             if (CI->getArgOperand(0)->getType()->getScalarType()->
321                 isPPC_FP128Ty())
322               return true;
323             else
324               continue; // ISD::FCOPYSIGN is never a library call.
325           case Intrinsic::sqrt:               Opcode = ISD::FSQRT;      break;
326           case Intrinsic::floor:              Opcode = ISD::FFLOOR;     break;
327           case Intrinsic::ceil:               Opcode = ISD::FCEIL;      break;
328           case Intrinsic::trunc:              Opcode = ISD::FTRUNC;     break;
329           case Intrinsic::rint:               Opcode = ISD::FRINT;      break;
330           case Intrinsic::nearbyint:          Opcode = ISD::FNEARBYINT; break;
331           case Intrinsic::round:              Opcode = ISD::FROUND;     break;
332           case Intrinsic::minnum:             Opcode = ISD::FMINNUM;    break;
333           case Intrinsic::maxnum:             Opcode = ISD::FMAXNUM;    break;
334           case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO;      break;
335           case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO;      break;
336           }
337         }
338 
339         // PowerPC does not use [US]DIVREM or other library calls for
340         // operations on regular types which are not otherwise library calls
341         // (i.e. soft float or atomics). If adapting for targets that do,
342         // additional care is required here.
343 
344         LibFunc Func;
345         if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
346             LibInfo->getLibFunc(F->getName(), Func) &&
347             LibInfo->hasOptimizedCodeGen(Func)) {
348           // Non-read-only functions are never treated as intrinsics.
349           if (!CI->onlyReadsMemory())
350             return true;
351 
352           // Conversion happens only for FP calls.
353           if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
354             return true;
355 
356           switch (Func) {
357           default: return true;
358           case LibFunc_copysign:
359           case LibFunc_copysignf:
360             continue; // ISD::FCOPYSIGN is never a library call.
361           case LibFunc_copysignl:
362             return true;
363           case LibFunc_fabs:
364           case LibFunc_fabsf:
365           case LibFunc_fabsl:
366             continue; // ISD::FABS is never a library call.
367           case LibFunc_sqrt:
368           case LibFunc_sqrtf:
369           case LibFunc_sqrtl:
370             Opcode = ISD::FSQRT; break;
371           case LibFunc_floor:
372           case LibFunc_floorf:
373           case LibFunc_floorl:
374             Opcode = ISD::FFLOOR; break;
375           case LibFunc_nearbyint:
376           case LibFunc_nearbyintf:
377           case LibFunc_nearbyintl:
378             Opcode = ISD::FNEARBYINT; break;
379           case LibFunc_ceil:
380           case LibFunc_ceilf:
381           case LibFunc_ceill:
382             Opcode = ISD::FCEIL; break;
383           case LibFunc_rint:
384           case LibFunc_rintf:
385           case LibFunc_rintl:
386             Opcode = ISD::FRINT; break;
387           case LibFunc_round:
388           case LibFunc_roundf:
389           case LibFunc_roundl:
390             Opcode = ISD::FROUND; break;
391           case LibFunc_trunc:
392           case LibFunc_truncf:
393           case LibFunc_truncl:
394             Opcode = ISD::FTRUNC; break;
395           case LibFunc_fmin:
396           case LibFunc_fminf:
397           case LibFunc_fminl:
398             Opcode = ISD::FMINNUM; break;
399           case LibFunc_fmax:
400           case LibFunc_fmaxf:
401           case LibFunc_fmaxl:
402             Opcode = ISD::FMAXNUM; break;
403           }
404         }
405 
406         if (Opcode) {
407           EVT EVTy =
408               TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true);
409 
410           if (EVTy == MVT::Other)
411             return true;
412 
413           if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
414             continue;
415           else if (EVTy.isVector() &&
416                    TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
417             continue;
418 
419           return true;
420         }
421       }
422 
423       return true;
424     } else if (isa<BinaryOperator>(J) &&
425                J->getType()->getScalarType()->isPPC_FP128Ty()) {
426       // Most operations on ppc_f128 values become calls.
427       return true;
428     } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
429                isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
430       CastInst *CI = cast<CastInst>(J);
431       if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
432           CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
433           isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
434           isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
435         return true;
436     } else if (isLargeIntegerTy(!TM->isPPC64(),
437                                 J->getType()->getScalarType()) &&
438                (J->getOpcode() == Instruction::UDiv ||
439                 J->getOpcode() == Instruction::SDiv ||
440                 J->getOpcode() == Instruction::URem ||
441                 J->getOpcode() == Instruction::SRem)) {
442       return true;
443     } else if (!TM->isPPC64() &&
444                isLargeIntegerTy(false, J->getType()->getScalarType()) &&
445                (J->getOpcode() == Instruction::Shl ||
446                 J->getOpcode() == Instruction::AShr ||
447                 J->getOpcode() == Instruction::LShr)) {
448       // Only on PPC32, for 128-bit integers (specifically not 64-bit
449       // integers), these might be runtime calls.
450       return true;
451     } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
452       // On PowerPC, indirect jumps use the counter register.
453       return true;
454     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
455       if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
456         return true;
457     }
458 
459     // FREM is always a call.
460     if (J->getOpcode() == Instruction::FRem)
461       return true;
462 
463     if (STI->useSoftFloat()) {
464       switch(J->getOpcode()) {
465       case Instruction::FAdd:
466       case Instruction::FSub:
467       case Instruction::FMul:
468       case Instruction::FDiv:
469       case Instruction::FPTrunc:
470       case Instruction::FPExt:
471       case Instruction::FPToUI:
472       case Instruction::FPToSI:
473       case Instruction::UIToFP:
474       case Instruction::SIToFP:
475       case Instruction::FCmp:
476         return true;
477       }
478     }
479 
480     for (Value *Operand : J->operands())
481       if (memAddrUsesCTR(*TM, Operand))
482         return true;
483   }
484 
485   return false;
486 }
487 bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
488   bool MadeChange = false;
489 
490   // Do not convert small short loops to CTR loop.
491   unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
492   if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
493     SmallPtrSet<const Value *, 32> EphValues;
494     auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
495         *L->getHeader()->getParent());
496     CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
497     CodeMetrics Metrics;
498     for (BasicBlock *BB : L->blocks())
499       Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
500     // 6 is an approximate latency for the mtctr instruction.
501     if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
502       return false;
503   }
504 
505   // Process nested loops first.
506   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
507     MadeChange |= convertToCTRLoop(*I);
508     DEBUG(dbgs() << "Nested loop converted\n");
509   }
510 
511   // If a nested loop has been converted, then we can't convert this loop.
512   if (MadeChange)
513     return MadeChange;
514 
515 #ifndef NDEBUG
516   // Stop trying after reaching the limit (if any).
517   int Limit = CTRLoopLimit;
518   if (Limit >= 0) {
519     if (Counter >= CTRLoopLimit)
520       return false;
521     Counter++;
522   }
523 #endif
524 
525   // We don't want to spill/restore the counter register, and so we don't
526   // want to use the counter register if the loop contains calls.
527   for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
528        I != IE; ++I)
529     if (mightUseCTR(*I))
530       return MadeChange;
531 
532   SmallVector<BasicBlock*, 4> ExitingBlocks;
533   L->getExitingBlocks(ExitingBlocks);
534 
535   // If there is an exit edge known to be frequently taken,
536   // we should not transform this loop.
537   for (auto &BB : ExitingBlocks) {
538     Instruction *TI = BB->getTerminator();
539     if (!TI) continue;
540 
541     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
542       uint64_t TrueWeight = 0, FalseWeight = 0;
543       if (!BI->isConditional() ||
544           !BI->extractProfMetadata(TrueWeight, FalseWeight))
545         continue;
546 
547       // If the exit path is more frequent than the loop path,
548       // we return here without further analysis for this loop.
549       bool TrueIsExit = !L->contains(BI->getSuccessor(0));
550       if (( TrueIsExit && FalseWeight < TrueWeight) ||
551           (!TrueIsExit && FalseWeight > TrueWeight))
552         return MadeChange;
553     }
554   }
555 
556   BasicBlock *CountedExitBlock = nullptr;
557   const SCEV *ExitCount = nullptr;
558   BranchInst *CountedExitBranch = nullptr;
559   for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
560        IE = ExitingBlocks.end(); I != IE; ++I) {
561     const SCEV *EC = SE->getExitCount(L, *I);
562     DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
563                     (*I)->getName() << ": " << *EC << "\n");
564     if (isa<SCEVCouldNotCompute>(EC))
565       continue;
566     if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
567       if (ConstEC->getValue()->isZero())
568         continue;
569     } else if (!SE->isLoopInvariant(EC, L))
570       continue;
571 
572     if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
573       continue;
574 
575     // We now have a loop-invariant count of loop iterations (which is not the
576     // constant zero) for which we know that this loop will not exit via this
577     // exisiting block.
578 
579     // We need to make sure that this block will run on every loop iteration.
580     // For this to be true, we must dominate all blocks with backedges. Such
581     // blocks are in-loop predecessors to the header block.
582     bool NotAlways = false;
583     for (pred_iterator PI = pred_begin(L->getHeader()),
584          PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
585       if (!L->contains(*PI))
586         continue;
587 
588       if (!DT->dominates(*I, *PI)) {
589         NotAlways = true;
590         break;
591       }
592     }
593 
594     if (NotAlways)
595       continue;
596 
597     // Make sure this blocks ends with a conditional branch.
598     Instruction *TI = (*I)->getTerminator();
599     if (!TI)
600       continue;
601 
602     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
603       if (!BI->isConditional())
604         continue;
605 
606       CountedExitBranch = BI;
607     } else
608       continue;
609 
610     // Note that this block may not be the loop latch block, even if the loop
611     // has a latch block.
612     CountedExitBlock = *I;
613     ExitCount = EC;
614     break;
615   }
616 
617   if (!CountedExitBlock)
618     return MadeChange;
619 
620   BasicBlock *Preheader = L->getLoopPreheader();
621 
622   // If we don't have a preheader, then insert one. If we already have a
623   // preheader, then we can use it (except if the preheader contains a use of
624   // the CTR register because some such uses might be reordered by the
625   // selection DAG after the mtctr instruction).
626   if (!Preheader || mightUseCTR(Preheader))
627     Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
628   if (!Preheader)
629     return MadeChange;
630 
631   DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
632 
633   // Insert the count into the preheader and replace the condition used by the
634   // selected branch.
635   MadeChange = true;
636 
637   SCEVExpander SCEVE(*SE, *DL, "loopcnt");
638   LLVMContext &C = SE->getContext();
639   Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
640   if (!ExitCount->getType()->isPointerTy() &&
641       ExitCount->getType() != CountType)
642     ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
643   ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
644   Value *ECValue =
645       SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
646 
647   IRBuilder<> CountBuilder(Preheader->getTerminator());
648   Module *M = Preheader->getParent()->getParent();
649   Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
650                                                CountType);
651   CountBuilder.CreateCall(MTCTRFunc, ECValue);
652 
653   IRBuilder<> CondBuilder(CountedExitBranch);
654   Value *DecFunc =
655     Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
656   Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
657   Value *OldCond = CountedExitBranch->getCondition();
658   CountedExitBranch->setCondition(NewCond);
659 
660   // The false branch must exit the loop.
661   if (!L->contains(CountedExitBranch->getSuccessor(0)))
662     CountedExitBranch->swapSuccessors();
663 
664   // The old condition may be dead now, and may have even created a dead PHI
665   // (the original induction variable).
666   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
667   // Run through the basic blocks of the loop and see if any of them have dead
668   // PHIs that can be removed.
669   for (auto I : L->blocks())
670     DeleteDeadPHIs(I);
671 
672   ++NumCTRLoops;
673   return MadeChange;
674 }
675 
676 #ifndef NDEBUG
677 static bool clobbersCTR(const MachineInstr &MI) {
678   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
679     const MachineOperand &MO = MI.getOperand(i);
680     if (MO.isReg()) {
681       if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
682         return true;
683     } else if (MO.isRegMask()) {
684       if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
685         return true;
686     }
687   }
688 
689   return false;
690 }
691 
692 static bool verifyCTRBranch(MachineBasicBlock *MBB,
693                             MachineBasicBlock::iterator I) {
694   MachineBasicBlock::iterator BI = I;
695   SmallSet<MachineBasicBlock *, 16>   Visited;
696   SmallVector<MachineBasicBlock *, 8> Preds;
697   bool CheckPreds;
698 
699   if (I == MBB->begin()) {
700     Visited.insert(MBB);
701     goto queue_preds;
702   } else
703     --I;
704 
705 check_block:
706   Visited.insert(MBB);
707   if (I == MBB->end())
708     goto queue_preds;
709 
710   CheckPreds = true;
711   for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
712     unsigned Opc = I->getOpcode();
713     if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
714       CheckPreds = false;
715       break;
716     }
717 
718     if (I != BI && clobbersCTR(*I)) {
719       DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName()
720                    << ") instruction " << *I << " clobbers CTR, invalidating "
721                    << printMBBReference(*BI->getParent()) << " ("
722                    << BI->getParent()->getFullName() << ") instruction " << *BI
723                    << "\n");
724       return false;
725     }
726 
727     if (I == IE)
728       break;
729   }
730 
731   if (!CheckPreds && Preds.empty())
732     return true;
733 
734   if (CheckPreds) {
735 queue_preds:
736     if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
737       DEBUG(dbgs() << "Unable to find a MTCTR instruction for "
738                    << printMBBReference(*BI->getParent()) << " ("
739                    << BI->getParent()->getFullName() << ") instruction " << *BI
740                    << "\n");
741       return false;
742     }
743 
744     for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
745          PIE = MBB->pred_end(); PI != PIE; ++PI)
746       Preds.push_back(*PI);
747   }
748 
749   do {
750     MBB = Preds.pop_back_val();
751     if (!Visited.count(MBB)) {
752       I = MBB->getLastNonDebugInstr();
753       goto check_block;
754     }
755   } while (!Preds.empty());
756 
757   return true;
758 }
759 
760 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
761   MDT = &getAnalysis<MachineDominatorTree>();
762 
763   // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
764   // any other instructions that might clobber the ctr register.
765   for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
766        I != IE; ++I) {
767     MachineBasicBlock *MBB = &*I;
768     if (!MDT->isReachableFromEntry(MBB))
769       continue;
770 
771     for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
772       MIIE = MBB->end(); MII != MIIE; ++MII) {
773       unsigned Opc = MII->getOpcode();
774       if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
775           Opc == PPC::BDZ8  || Opc == PPC::BDZ)
776         if (!verifyCTRBranch(MBB, MII))
777           llvm_unreachable("Invalid PPC CTR loop!");
778     }
779   }
780 
781   return false;
782 }
783 #endif // NDEBUG
784