1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass identifies loops where we can generate the PPC branch instructions 11 // that decrement and test the count register (CTR) (bdnz and friends). 12 // 13 // The pattern that defines the induction variable can changed depending on 14 // prior optimizations. For example, the IndVarSimplify phase run by 'opt' 15 // normalizes induction variables, and the Loop Strength Reduction pass 16 // run by 'llc' may also make changes to the induction variable. 17 // 18 // Criteria for CTR loops: 19 // - Countable loops (w/ ind. var for a trip count) 20 // - Try inner-most loops first 21 // - No nested CTR loops. 22 // - No function calls in loops. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "PPC.h" 27 #include "PPCSubtarget.h" 28 #include "PPCTargetMachine.h" 29 #include "PPCTargetTransformInfo.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/Statistic.h" 32 #include "llvm/Analysis/AssumptionCache.h" 33 #include "llvm/Analysis/CodeMetrics.h" 34 #include "llvm/Analysis/LoopInfo.h" 35 #include "llvm/Analysis/ScalarEvolutionExpander.h" 36 #include "llvm/Analysis/TargetLibraryInfo.h" 37 #include "llvm/Analysis/TargetTransformInfo.h" 38 #include "llvm/Analysis/Utils/Local.h" 39 #include "llvm/CodeGen/TargetPassConfig.h" 40 #include "llvm/CodeGen/TargetSchedule.h" 41 #include "llvm/IR/Constants.h" 42 #include "llvm/IR/DerivedTypes.h" 43 #include "llvm/IR/Dominators.h" 44 #include "llvm/IR/InlineAsm.h" 45 #include "llvm/IR/Instructions.h" 46 #include "llvm/IR/IntrinsicInst.h" 47 #include "llvm/IR/Module.h" 48 #include "llvm/IR/ValueHandle.h" 49 #include "llvm/PassSupport.h" 50 #include "llvm/Support/CommandLine.h" 51 #include "llvm/Support/Debug.h" 52 #include "llvm/Support/raw_ostream.h" 53 #include "llvm/Transforms/Scalar.h" 54 #include "llvm/Transforms/Utils.h" 55 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 56 #include "llvm/Transforms/Utils/LoopUtils.h" 57 58 #ifndef NDEBUG 59 #include "llvm/CodeGen/MachineDominators.h" 60 #include "llvm/CodeGen/MachineFunction.h" 61 #include "llvm/CodeGen/MachineFunctionPass.h" 62 #include "llvm/CodeGen/MachineRegisterInfo.h" 63 #endif 64 65 using namespace llvm; 66 67 #define DEBUG_TYPE "ctrloops" 68 69 #ifndef NDEBUG 70 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); 71 #endif 72 73 // The latency of mtctr is only justified if there are more than 4 74 // comparisons that will be removed as a result. 75 static cl::opt<unsigned> 76 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, 77 cl::desc("Loops with a constant trip count smaller than " 78 "this value will not use the count register.")); 79 80 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); 81 82 namespace llvm { 83 void initializePPCCTRLoopsPass(PassRegistry&); 84 #ifndef NDEBUG 85 void initializePPCCTRLoopsVerifyPass(PassRegistry&); 86 #endif 87 } 88 89 namespace { 90 struct PPCCTRLoops : public FunctionPass { 91 92 #ifndef NDEBUG 93 static int Counter; 94 #endif 95 96 public: 97 static char ID; 98 99 PPCCTRLoops() : FunctionPass(ID) { 100 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 101 } 102 103 bool runOnFunction(Function &F) override; 104 105 void getAnalysisUsage(AnalysisUsage &AU) const override { 106 AU.addRequired<LoopInfoWrapperPass>(); 107 AU.addPreserved<LoopInfoWrapperPass>(); 108 AU.addRequired<DominatorTreeWrapperPass>(); 109 AU.addPreserved<DominatorTreeWrapperPass>(); 110 AU.addRequired<ScalarEvolutionWrapperPass>(); 111 AU.addRequired<AssumptionCacheTracker>(); 112 AU.addRequired<TargetTransformInfoWrapperPass>(); 113 } 114 115 private: 116 bool mightUseCTR(BasicBlock *BB); 117 bool convertToCTRLoop(Loop *L); 118 119 private: 120 const PPCTargetMachine *TM; 121 const PPCSubtarget *STI; 122 const PPCTargetLowering *TLI; 123 const DataLayout *DL; 124 const TargetLibraryInfo *LibInfo; 125 const TargetTransformInfo *TTI; 126 LoopInfo *LI; 127 ScalarEvolution *SE; 128 DominatorTree *DT; 129 bool PreserveLCSSA; 130 TargetSchedModel SchedModel; 131 }; 132 133 char PPCCTRLoops::ID = 0; 134 #ifndef NDEBUG 135 int PPCCTRLoops::Counter = 0; 136 #endif 137 138 #ifndef NDEBUG 139 struct PPCCTRLoopsVerify : public MachineFunctionPass { 140 public: 141 static char ID; 142 143 PPCCTRLoopsVerify() : MachineFunctionPass(ID) { 144 initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); 145 } 146 147 void getAnalysisUsage(AnalysisUsage &AU) const override { 148 AU.addRequired<MachineDominatorTree>(); 149 MachineFunctionPass::getAnalysisUsage(AU); 150 } 151 152 bool runOnMachineFunction(MachineFunction &MF) override; 153 154 private: 155 MachineDominatorTree *MDT; 156 }; 157 158 char PPCCTRLoopsVerify::ID = 0; 159 #endif // NDEBUG 160 } // end anonymous namespace 161 162 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 163 false, false) 164 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 165 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 166 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 167 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 168 false, false) 169 170 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); } 171 172 #ifndef NDEBUG 173 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 174 "PowerPC CTR Loops Verify", false, false) 175 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 176 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 177 "PowerPC CTR Loops Verify", false, false) 178 179 FunctionPass *llvm::createPPCCTRLoopsVerify() { 180 return new PPCCTRLoopsVerify(); 181 } 182 #endif // NDEBUG 183 184 bool PPCCTRLoops::runOnFunction(Function &F) { 185 if (skipFunction(F)) 186 return false; 187 188 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 189 if (!TPC) 190 return false; 191 192 TM = &TPC->getTM<PPCTargetMachine>(); 193 STI = TM->getSubtargetImpl(F); 194 TLI = STI->getTargetLowering(); 195 196 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 197 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 198 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 199 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 200 DL = &F.getParent()->getDataLayout(); 201 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 202 LibInfo = TLIP ? &TLIP->getTLI() : nullptr; 203 PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 204 205 bool MadeChange = false; 206 207 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); 208 I != E; ++I) { 209 Loop *L = *I; 210 if (!L->getParentLoop()) 211 MadeChange |= convertToCTRLoop(L); 212 } 213 214 return MadeChange; 215 } 216 217 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { 218 if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) 219 return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); 220 221 return false; 222 } 223 224 // Determining the address of a TLS variable results in a function call in 225 // certain TLS models. 226 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) { 227 const auto *GV = dyn_cast<GlobalValue>(MemAddr); 228 if (!GV) { 229 // Recurse to check for constants that refer to TLS global variables. 230 if (const auto *CV = dyn_cast<Constant>(MemAddr)) 231 for (const auto &CO : CV->operands()) 232 if (memAddrUsesCTR(TM, CO)) 233 return true; 234 235 return false; 236 } 237 238 if (!GV->isThreadLocal()) 239 return false; 240 TLSModel::Model Model = TM.getTLSModel(GV); 241 return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; 242 } 243 244 // Loop through the inline asm constraints and look for something that clobbers 245 // ctr. 246 static bool asmClobbersCTR(InlineAsm *IA) { 247 InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); 248 for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { 249 InlineAsm::ConstraintInfo &C = CIV[i]; 250 if (C.Type != InlineAsm::isInput) 251 for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) 252 if (StringRef(C.Codes[j]).equals_lower("{ctr}")) 253 return true; 254 } 255 return false; 256 } 257 258 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { 259 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); 260 J != JE; ++J) { 261 if (CallInst *CI = dyn_cast<CallInst>(J)) { 262 // Inline ASM is okay, unless it clobbers the ctr register. 263 if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { 264 if (asmClobbersCTR(IA)) 265 return true; 266 continue; 267 } 268 269 if (Function *F = CI->getCalledFunction()) { 270 // Most intrinsics don't become function calls, but some might. 271 // sin, cos, exp and log are always calls. 272 unsigned Opcode = 0; 273 if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { 274 switch (F->getIntrinsicID()) { 275 default: continue; 276 // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr 277 // we're definitely using CTR. 278 case Intrinsic::ppc_is_decremented_ctr_nonzero: 279 case Intrinsic::ppc_mtctr: 280 return true; 281 282 // VisualStudio defines setjmp as _setjmp 283 #if defined(_MSC_VER) && defined(setjmp) && \ 284 !defined(setjmp_undefined_for_msvc) 285 # pragma push_macro("setjmp") 286 # undef setjmp 287 # define setjmp_undefined_for_msvc 288 #endif 289 290 case Intrinsic::setjmp: 291 292 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) 293 // let's return it to _setjmp state 294 # pragma pop_macro("setjmp") 295 # undef setjmp_undefined_for_msvc 296 #endif 297 298 case Intrinsic::longjmp: 299 300 // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp 301 // because, although it does clobber the counter register, the 302 // control can't then return to inside the loop unless there is also 303 // an eh_sjlj_setjmp. 304 case Intrinsic::eh_sjlj_setjmp: 305 306 case Intrinsic::memcpy: 307 case Intrinsic::memmove: 308 case Intrinsic::memset: 309 case Intrinsic::powi: 310 case Intrinsic::log: 311 case Intrinsic::log2: 312 case Intrinsic::log10: 313 case Intrinsic::exp: 314 case Intrinsic::exp2: 315 case Intrinsic::pow: 316 case Intrinsic::sin: 317 case Intrinsic::cos: 318 return true; 319 case Intrinsic::copysign: 320 if (CI->getArgOperand(0)->getType()->getScalarType()-> 321 isPPC_FP128Ty()) 322 return true; 323 else 324 continue; // ISD::FCOPYSIGN is never a library call. 325 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; 326 case Intrinsic::floor: Opcode = ISD::FFLOOR; break; 327 case Intrinsic::ceil: Opcode = ISD::FCEIL; break; 328 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; 329 case Intrinsic::rint: Opcode = ISD::FRINT; break; 330 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; 331 case Intrinsic::round: Opcode = ISD::FROUND; break; 332 case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; 333 case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; 334 case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; 335 case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; 336 } 337 } 338 339 // PowerPC does not use [US]DIVREM or other library calls for 340 // operations on regular types which are not otherwise library calls 341 // (i.e. soft float or atomics). If adapting for targets that do, 342 // additional care is required here. 343 344 LibFunc Func; 345 if (!F->hasLocalLinkage() && F->hasName() && LibInfo && 346 LibInfo->getLibFunc(F->getName(), Func) && 347 LibInfo->hasOptimizedCodeGen(Func)) { 348 // Non-read-only functions are never treated as intrinsics. 349 if (!CI->onlyReadsMemory()) 350 return true; 351 352 // Conversion happens only for FP calls. 353 if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) 354 return true; 355 356 switch (Func) { 357 default: return true; 358 case LibFunc_copysign: 359 case LibFunc_copysignf: 360 continue; // ISD::FCOPYSIGN is never a library call. 361 case LibFunc_copysignl: 362 return true; 363 case LibFunc_fabs: 364 case LibFunc_fabsf: 365 case LibFunc_fabsl: 366 continue; // ISD::FABS is never a library call. 367 case LibFunc_sqrt: 368 case LibFunc_sqrtf: 369 case LibFunc_sqrtl: 370 Opcode = ISD::FSQRT; break; 371 case LibFunc_floor: 372 case LibFunc_floorf: 373 case LibFunc_floorl: 374 Opcode = ISD::FFLOOR; break; 375 case LibFunc_nearbyint: 376 case LibFunc_nearbyintf: 377 case LibFunc_nearbyintl: 378 Opcode = ISD::FNEARBYINT; break; 379 case LibFunc_ceil: 380 case LibFunc_ceilf: 381 case LibFunc_ceill: 382 Opcode = ISD::FCEIL; break; 383 case LibFunc_rint: 384 case LibFunc_rintf: 385 case LibFunc_rintl: 386 Opcode = ISD::FRINT; break; 387 case LibFunc_round: 388 case LibFunc_roundf: 389 case LibFunc_roundl: 390 Opcode = ISD::FROUND; break; 391 case LibFunc_trunc: 392 case LibFunc_truncf: 393 case LibFunc_truncl: 394 Opcode = ISD::FTRUNC; break; 395 case LibFunc_fmin: 396 case LibFunc_fminf: 397 case LibFunc_fminl: 398 Opcode = ISD::FMINNUM; break; 399 case LibFunc_fmax: 400 case LibFunc_fmaxf: 401 case LibFunc_fmaxl: 402 Opcode = ISD::FMAXNUM; break; 403 } 404 } 405 406 if (Opcode) { 407 EVT EVTy = 408 TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true); 409 410 if (EVTy == MVT::Other) 411 return true; 412 413 if (TLI->isOperationLegalOrCustom(Opcode, EVTy)) 414 continue; 415 else if (EVTy.isVector() && 416 TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType())) 417 continue; 418 419 return true; 420 } 421 } 422 423 return true; 424 } else if (isa<BinaryOperator>(J) && 425 J->getType()->getScalarType()->isPPC_FP128Ty()) { 426 // Most operations on ppc_f128 values become calls. 427 return true; 428 } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || 429 isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { 430 CastInst *CI = cast<CastInst>(J); 431 if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || 432 CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || 433 isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) || 434 isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType())) 435 return true; 436 } else if (isLargeIntegerTy(!TM->isPPC64(), 437 J->getType()->getScalarType()) && 438 (J->getOpcode() == Instruction::UDiv || 439 J->getOpcode() == Instruction::SDiv || 440 J->getOpcode() == Instruction::URem || 441 J->getOpcode() == Instruction::SRem)) { 442 return true; 443 } else if (!TM->isPPC64() && 444 isLargeIntegerTy(false, J->getType()->getScalarType()) && 445 (J->getOpcode() == Instruction::Shl || 446 J->getOpcode() == Instruction::AShr || 447 J->getOpcode() == Instruction::LShr)) { 448 // Only on PPC32, for 128-bit integers (specifically not 64-bit 449 // integers), these might be runtime calls. 450 return true; 451 } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { 452 // On PowerPC, indirect jumps use the counter register. 453 return true; 454 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { 455 if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) 456 return true; 457 } 458 459 // FREM is always a call. 460 if (J->getOpcode() == Instruction::FRem) 461 return true; 462 463 if (STI->useSoftFloat()) { 464 switch(J->getOpcode()) { 465 case Instruction::FAdd: 466 case Instruction::FSub: 467 case Instruction::FMul: 468 case Instruction::FDiv: 469 case Instruction::FPTrunc: 470 case Instruction::FPExt: 471 case Instruction::FPToUI: 472 case Instruction::FPToSI: 473 case Instruction::UIToFP: 474 case Instruction::SIToFP: 475 case Instruction::FCmp: 476 return true; 477 } 478 } 479 480 for (Value *Operand : J->operands()) 481 if (memAddrUsesCTR(*TM, Operand)) 482 return true; 483 } 484 485 return false; 486 } 487 bool PPCCTRLoops::convertToCTRLoop(Loop *L) { 488 bool MadeChange = false; 489 490 // Do not convert small short loops to CTR loop. 491 unsigned ConstTripCount = SE->getSmallConstantTripCount(L); 492 if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { 493 SmallPtrSet<const Value *, 32> EphValues; 494 auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( 495 *L->getHeader()->getParent()); 496 CodeMetrics::collectEphemeralValues(L, &AC, EphValues); 497 CodeMetrics Metrics; 498 for (BasicBlock *BB : L->blocks()) 499 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 500 // 6 is an approximate latency for the mtctr instruction. 501 if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) 502 return false; 503 } 504 505 // Process nested loops first. 506 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { 507 MadeChange |= convertToCTRLoop(*I); 508 DEBUG(dbgs() << "Nested loop converted\n"); 509 } 510 511 // If a nested loop has been converted, then we can't convert this loop. 512 if (MadeChange) 513 return MadeChange; 514 515 #ifndef NDEBUG 516 // Stop trying after reaching the limit (if any). 517 int Limit = CTRLoopLimit; 518 if (Limit >= 0) { 519 if (Counter >= CTRLoopLimit) 520 return false; 521 Counter++; 522 } 523 #endif 524 525 // We don't want to spill/restore the counter register, and so we don't 526 // want to use the counter register if the loop contains calls. 527 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 528 I != IE; ++I) 529 if (mightUseCTR(*I)) 530 return MadeChange; 531 532 SmallVector<BasicBlock*, 4> ExitingBlocks; 533 L->getExitingBlocks(ExitingBlocks); 534 535 // If there is an exit edge known to be frequently taken, 536 // we should not transform this loop. 537 for (auto &BB : ExitingBlocks) { 538 Instruction *TI = BB->getTerminator(); 539 if (!TI) continue; 540 541 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 542 uint64_t TrueWeight = 0, FalseWeight = 0; 543 if (!BI->isConditional() || 544 !BI->extractProfMetadata(TrueWeight, FalseWeight)) 545 continue; 546 547 // If the exit path is more frequent than the loop path, 548 // we return here without further analysis for this loop. 549 bool TrueIsExit = !L->contains(BI->getSuccessor(0)); 550 if (( TrueIsExit && FalseWeight < TrueWeight) || 551 (!TrueIsExit && FalseWeight > TrueWeight)) 552 return MadeChange; 553 } 554 } 555 556 BasicBlock *CountedExitBlock = nullptr; 557 const SCEV *ExitCount = nullptr; 558 BranchInst *CountedExitBranch = nullptr; 559 for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), 560 IE = ExitingBlocks.end(); I != IE; ++I) { 561 const SCEV *EC = SE->getExitCount(L, *I); 562 DEBUG(dbgs() << "Exit Count for " << *L << " from block " << 563 (*I)->getName() << ": " << *EC << "\n"); 564 if (isa<SCEVCouldNotCompute>(EC)) 565 continue; 566 if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { 567 if (ConstEC->getValue()->isZero()) 568 continue; 569 } else if (!SE->isLoopInvariant(EC, L)) 570 continue; 571 572 if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) 573 continue; 574 575 // We now have a loop-invariant count of loop iterations (which is not the 576 // constant zero) for which we know that this loop will not exit via this 577 // exisiting block. 578 579 // We need to make sure that this block will run on every loop iteration. 580 // For this to be true, we must dominate all blocks with backedges. Such 581 // blocks are in-loop predecessors to the header block. 582 bool NotAlways = false; 583 for (pred_iterator PI = pred_begin(L->getHeader()), 584 PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { 585 if (!L->contains(*PI)) 586 continue; 587 588 if (!DT->dominates(*I, *PI)) { 589 NotAlways = true; 590 break; 591 } 592 } 593 594 if (NotAlways) 595 continue; 596 597 // Make sure this blocks ends with a conditional branch. 598 Instruction *TI = (*I)->getTerminator(); 599 if (!TI) 600 continue; 601 602 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 603 if (!BI->isConditional()) 604 continue; 605 606 CountedExitBranch = BI; 607 } else 608 continue; 609 610 // Note that this block may not be the loop latch block, even if the loop 611 // has a latch block. 612 CountedExitBlock = *I; 613 ExitCount = EC; 614 break; 615 } 616 617 if (!CountedExitBlock) 618 return MadeChange; 619 620 BasicBlock *Preheader = L->getLoopPreheader(); 621 622 // If we don't have a preheader, then insert one. If we already have a 623 // preheader, then we can use it (except if the preheader contains a use of 624 // the CTR register because some such uses might be reordered by the 625 // selection DAG after the mtctr instruction). 626 if (!Preheader || mightUseCTR(Preheader)) 627 Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); 628 if (!Preheader) 629 return MadeChange; 630 631 DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); 632 633 // Insert the count into the preheader and replace the condition used by the 634 // selected branch. 635 MadeChange = true; 636 637 SCEVExpander SCEVE(*SE, *DL, "loopcnt"); 638 LLVMContext &C = SE->getContext(); 639 Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); 640 if (!ExitCount->getType()->isPointerTy() && 641 ExitCount->getType() != CountType) 642 ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); 643 ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); 644 Value *ECValue = 645 SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); 646 647 IRBuilder<> CountBuilder(Preheader->getTerminator()); 648 Module *M = Preheader->getParent()->getParent(); 649 Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, 650 CountType); 651 CountBuilder.CreateCall(MTCTRFunc, ECValue); 652 653 IRBuilder<> CondBuilder(CountedExitBranch); 654 Value *DecFunc = 655 Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); 656 Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); 657 Value *OldCond = CountedExitBranch->getCondition(); 658 CountedExitBranch->setCondition(NewCond); 659 660 // The false branch must exit the loop. 661 if (!L->contains(CountedExitBranch->getSuccessor(0))) 662 CountedExitBranch->swapSuccessors(); 663 664 // The old condition may be dead now, and may have even created a dead PHI 665 // (the original induction variable). 666 RecursivelyDeleteTriviallyDeadInstructions(OldCond); 667 // Run through the basic blocks of the loop and see if any of them have dead 668 // PHIs that can be removed. 669 for (auto I : L->blocks()) 670 DeleteDeadPHIs(I); 671 672 ++NumCTRLoops; 673 return MadeChange; 674 } 675 676 #ifndef NDEBUG 677 static bool clobbersCTR(const MachineInstr &MI) { 678 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 679 const MachineOperand &MO = MI.getOperand(i); 680 if (MO.isReg()) { 681 if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) 682 return true; 683 } else if (MO.isRegMask()) { 684 if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) 685 return true; 686 } 687 } 688 689 return false; 690 } 691 692 static bool verifyCTRBranch(MachineBasicBlock *MBB, 693 MachineBasicBlock::iterator I) { 694 MachineBasicBlock::iterator BI = I; 695 SmallSet<MachineBasicBlock *, 16> Visited; 696 SmallVector<MachineBasicBlock *, 8> Preds; 697 bool CheckPreds; 698 699 if (I == MBB->begin()) { 700 Visited.insert(MBB); 701 goto queue_preds; 702 } else 703 --I; 704 705 check_block: 706 Visited.insert(MBB); 707 if (I == MBB->end()) 708 goto queue_preds; 709 710 CheckPreds = true; 711 for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { 712 unsigned Opc = I->getOpcode(); 713 if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { 714 CheckPreds = false; 715 break; 716 } 717 718 if (I != BI && clobbersCTR(*I)) { 719 DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName() 720 << ") instruction " << *I << " clobbers CTR, invalidating " 721 << printMBBReference(*BI->getParent()) << " (" 722 << BI->getParent()->getFullName() << ") instruction " << *BI 723 << "\n"); 724 return false; 725 } 726 727 if (I == IE) 728 break; 729 } 730 731 if (!CheckPreds && Preds.empty()) 732 return true; 733 734 if (CheckPreds) { 735 queue_preds: 736 if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { 737 DEBUG(dbgs() << "Unable to find a MTCTR instruction for " 738 << printMBBReference(*BI->getParent()) << " (" 739 << BI->getParent()->getFullName() << ") instruction " << *BI 740 << "\n"); 741 return false; 742 } 743 744 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 745 PIE = MBB->pred_end(); PI != PIE; ++PI) 746 Preds.push_back(*PI); 747 } 748 749 do { 750 MBB = Preds.pop_back_val(); 751 if (!Visited.count(MBB)) { 752 I = MBB->getLastNonDebugInstr(); 753 goto check_block; 754 } 755 } while (!Preds.empty()); 756 757 return true; 758 } 759 760 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { 761 MDT = &getAnalysis<MachineDominatorTree>(); 762 763 // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before 764 // any other instructions that might clobber the ctr register. 765 for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); 766 I != IE; ++I) { 767 MachineBasicBlock *MBB = &*I; 768 if (!MDT->isReachableFromEntry(MBB)) 769 continue; 770 771 for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(), 772 MIIE = MBB->end(); MII != MIIE; ++MII) { 773 unsigned Opc = MII->getOpcode(); 774 if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || 775 Opc == PPC::BDZ8 || Opc == PPC::BDZ) 776 if (!verifyCTRBranch(MBB, MII)) 777 llvm_unreachable("Invalid PPC CTR loop!"); 778 } 779 } 780 781 return false; 782 } 783 #endif // NDEBUG 784