1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass identifies loops where we can generate the PPC branch instructions 11 // that decrement and test the count register (CTR) (bdnz and friends). 12 // 13 // The pattern that defines the induction variable can changed depending on 14 // prior optimizations. For example, the IndVarSimplify phase run by 'opt' 15 // normalizes induction variables, and the Loop Strength Reduction pass 16 // run by 'llc' may also make changes to the induction variable. 17 // 18 // Criteria for CTR loops: 19 // - Countable loops (w/ ind. var for a trip count) 20 // - Try inner-most loops first 21 // - No nested CTR loops. 22 // - No function calls in loops. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "PPC.h" 27 #include "PPCSubtarget.h" 28 #include "PPCTargetMachine.h" 29 #include "PPCTargetTransformInfo.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/Statistic.h" 32 #include "llvm/Analysis/AssumptionCache.h" 33 #include "llvm/Analysis/CodeMetrics.h" 34 #include "llvm/Analysis/LoopInfo.h" 35 #include "llvm/Analysis/ScalarEvolutionExpander.h" 36 #include "llvm/Analysis/TargetLibraryInfo.h" 37 #include "llvm/Analysis/TargetTransformInfo.h" 38 #include "llvm/CodeGen/TargetPassConfig.h" 39 #include "llvm/CodeGen/TargetSchedule.h" 40 #include "llvm/IR/Constants.h" 41 #include "llvm/IR/DerivedTypes.h" 42 #include "llvm/IR/Dominators.h" 43 #include "llvm/IR/InlineAsm.h" 44 #include "llvm/IR/Instructions.h" 45 #include "llvm/IR/IntrinsicInst.h" 46 #include "llvm/IR/Module.h" 47 #include "llvm/IR/ValueHandle.h" 48 #include "llvm/PassSupport.h" 49 #include "llvm/Support/CommandLine.h" 50 #include "llvm/Support/Debug.h" 51 #include "llvm/Support/raw_ostream.h" 52 #include "llvm/Transforms/Scalar.h" 53 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 54 #include "llvm/Transforms/Utils/Local.h" 55 #include "llvm/Transforms/Utils/LoopUtils.h" 56 57 #ifndef NDEBUG 58 #include "llvm/CodeGen/MachineDominators.h" 59 #include "llvm/CodeGen/MachineFunction.h" 60 #include "llvm/CodeGen/MachineFunctionPass.h" 61 #include "llvm/CodeGen/MachineRegisterInfo.h" 62 #endif 63 64 using namespace llvm; 65 66 #define DEBUG_TYPE "ctrloops" 67 68 #ifndef NDEBUG 69 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); 70 #endif 71 72 // The latency of mtctr is only justified if there are more than 4 73 // comparisons that will be removed as a result. 74 static cl::opt<unsigned> 75 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, 76 cl::desc("Loops with a constant trip count smaller than " 77 "this value will not use the count register.")); 78 79 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); 80 81 namespace llvm { 82 void initializePPCCTRLoopsPass(PassRegistry&); 83 #ifndef NDEBUG 84 void initializePPCCTRLoopsVerifyPass(PassRegistry&); 85 #endif 86 } 87 88 namespace { 89 struct PPCCTRLoops : public FunctionPass { 90 91 #ifndef NDEBUG 92 static int Counter; 93 #endif 94 95 public: 96 static char ID; 97 98 PPCCTRLoops() : FunctionPass(ID) { 99 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 100 } 101 102 bool runOnFunction(Function &F) override; 103 104 void getAnalysisUsage(AnalysisUsage &AU) const override { 105 AU.addRequired<LoopInfoWrapperPass>(); 106 AU.addPreserved<LoopInfoWrapperPass>(); 107 AU.addRequired<DominatorTreeWrapperPass>(); 108 AU.addPreserved<DominatorTreeWrapperPass>(); 109 AU.addRequired<ScalarEvolutionWrapperPass>(); 110 AU.addRequired<AssumptionCacheTracker>(); 111 AU.addRequired<TargetTransformInfoWrapperPass>(); 112 } 113 114 private: 115 bool mightUseCTR(BasicBlock *BB); 116 bool convertToCTRLoop(Loop *L); 117 118 private: 119 const PPCTargetMachine *TM; 120 const PPCSubtarget *STI; 121 const PPCTargetLowering *TLI; 122 const DataLayout *DL; 123 const TargetLibraryInfo *LibInfo; 124 const TargetTransformInfo *TTI; 125 LoopInfo *LI; 126 ScalarEvolution *SE; 127 DominatorTree *DT; 128 bool PreserveLCSSA; 129 TargetSchedModel SchedModel; 130 }; 131 132 char PPCCTRLoops::ID = 0; 133 #ifndef NDEBUG 134 int PPCCTRLoops::Counter = 0; 135 #endif 136 137 #ifndef NDEBUG 138 struct PPCCTRLoopsVerify : public MachineFunctionPass { 139 public: 140 static char ID; 141 142 PPCCTRLoopsVerify() : MachineFunctionPass(ID) { 143 initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); 144 } 145 146 void getAnalysisUsage(AnalysisUsage &AU) const override { 147 AU.addRequired<MachineDominatorTree>(); 148 MachineFunctionPass::getAnalysisUsage(AU); 149 } 150 151 bool runOnMachineFunction(MachineFunction &MF) override; 152 153 private: 154 MachineDominatorTree *MDT; 155 }; 156 157 char PPCCTRLoopsVerify::ID = 0; 158 #endif // NDEBUG 159 } // end anonymous namespace 160 161 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 162 false, false) 163 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 164 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 165 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 166 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 167 false, false) 168 169 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); } 170 171 #ifndef NDEBUG 172 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 173 "PowerPC CTR Loops Verify", false, false) 174 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 175 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 176 "PowerPC CTR Loops Verify", false, false) 177 178 FunctionPass *llvm::createPPCCTRLoopsVerify() { 179 return new PPCCTRLoopsVerify(); 180 } 181 #endif // NDEBUG 182 183 bool PPCCTRLoops::runOnFunction(Function &F) { 184 if (skipFunction(F)) 185 return false; 186 187 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 188 if (!TPC) 189 return false; 190 191 TM = &TPC->getTM<PPCTargetMachine>(); 192 STI = TM->getSubtargetImpl(F); 193 TLI = STI->getTargetLowering(); 194 195 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 196 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 197 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 198 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 199 DL = &F.getParent()->getDataLayout(); 200 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 201 LibInfo = TLIP ? &TLIP->getTLI() : nullptr; 202 PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 203 204 bool MadeChange = false; 205 206 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); 207 I != E; ++I) { 208 Loop *L = *I; 209 if (!L->getParentLoop()) 210 MadeChange |= convertToCTRLoop(L); 211 } 212 213 return MadeChange; 214 } 215 216 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { 217 if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) 218 return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); 219 220 return false; 221 } 222 223 // Determining the address of a TLS variable results in a function call in 224 // certain TLS models. 225 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) { 226 const auto *GV = dyn_cast<GlobalValue>(MemAddr); 227 if (!GV) { 228 // Recurse to check for constants that refer to TLS global variables. 229 if (const auto *CV = dyn_cast<Constant>(MemAddr)) 230 for (const auto &CO : CV->operands()) 231 if (memAddrUsesCTR(TM, CO)) 232 return true; 233 234 return false; 235 } 236 237 if (!GV->isThreadLocal()) 238 return false; 239 TLSModel::Model Model = TM.getTLSModel(GV); 240 return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; 241 } 242 243 // Loop through the inline asm constraints and look for something that clobbers 244 // ctr. 245 static bool asmClobbersCTR(InlineAsm *IA) { 246 InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); 247 for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { 248 InlineAsm::ConstraintInfo &C = CIV[i]; 249 if (C.Type != InlineAsm::isInput) 250 for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) 251 if (StringRef(C.Codes[j]).equals_lower("{ctr}")) 252 return true; 253 } 254 return false; 255 } 256 257 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { 258 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); 259 J != JE; ++J) { 260 if (CallInst *CI = dyn_cast<CallInst>(J)) { 261 // Inline ASM is okay, unless it clobbers the ctr register. 262 if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { 263 if (asmClobbersCTR(IA)) 264 return true; 265 continue; 266 } 267 268 if (Function *F = CI->getCalledFunction()) { 269 // Most intrinsics don't become function calls, but some might. 270 // sin, cos, exp and log are always calls. 271 unsigned Opcode = 0; 272 if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { 273 switch (F->getIntrinsicID()) { 274 default: continue; 275 // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr 276 // we're definitely using CTR. 277 case Intrinsic::ppc_is_decremented_ctr_nonzero: 278 case Intrinsic::ppc_mtctr: 279 return true; 280 281 // VisualStudio defines setjmp as _setjmp 282 #if defined(_MSC_VER) && defined(setjmp) && \ 283 !defined(setjmp_undefined_for_msvc) 284 # pragma push_macro("setjmp") 285 # undef setjmp 286 # define setjmp_undefined_for_msvc 287 #endif 288 289 case Intrinsic::setjmp: 290 291 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) 292 // let's return it to _setjmp state 293 # pragma pop_macro("setjmp") 294 # undef setjmp_undefined_for_msvc 295 #endif 296 297 case Intrinsic::longjmp: 298 299 // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp 300 // because, although it does clobber the counter register, the 301 // control can't then return to inside the loop unless there is also 302 // an eh_sjlj_setjmp. 303 case Intrinsic::eh_sjlj_setjmp: 304 305 case Intrinsic::memcpy: 306 case Intrinsic::memmove: 307 case Intrinsic::memset: 308 case Intrinsic::powi: 309 case Intrinsic::log: 310 case Intrinsic::log2: 311 case Intrinsic::log10: 312 case Intrinsic::exp: 313 case Intrinsic::exp2: 314 case Intrinsic::pow: 315 case Intrinsic::sin: 316 case Intrinsic::cos: 317 return true; 318 case Intrinsic::copysign: 319 if (CI->getArgOperand(0)->getType()->getScalarType()-> 320 isPPC_FP128Ty()) 321 return true; 322 else 323 continue; // ISD::FCOPYSIGN is never a library call. 324 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; 325 case Intrinsic::floor: Opcode = ISD::FFLOOR; break; 326 case Intrinsic::ceil: Opcode = ISD::FCEIL; break; 327 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; 328 case Intrinsic::rint: Opcode = ISD::FRINT; break; 329 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; 330 case Intrinsic::round: Opcode = ISD::FROUND; break; 331 case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; 332 case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; 333 case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; 334 case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; 335 } 336 } 337 338 // PowerPC does not use [US]DIVREM or other library calls for 339 // operations on regular types which are not otherwise library calls 340 // (i.e. soft float or atomics). If adapting for targets that do, 341 // additional care is required here. 342 343 LibFunc Func; 344 if (!F->hasLocalLinkage() && F->hasName() && LibInfo && 345 LibInfo->getLibFunc(F->getName(), Func) && 346 LibInfo->hasOptimizedCodeGen(Func)) { 347 // Non-read-only functions are never treated as intrinsics. 348 if (!CI->onlyReadsMemory()) 349 return true; 350 351 // Conversion happens only for FP calls. 352 if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) 353 return true; 354 355 switch (Func) { 356 default: return true; 357 case LibFunc_copysign: 358 case LibFunc_copysignf: 359 continue; // ISD::FCOPYSIGN is never a library call. 360 case LibFunc_copysignl: 361 return true; 362 case LibFunc_fabs: 363 case LibFunc_fabsf: 364 case LibFunc_fabsl: 365 continue; // ISD::FABS is never a library call. 366 case LibFunc_sqrt: 367 case LibFunc_sqrtf: 368 case LibFunc_sqrtl: 369 Opcode = ISD::FSQRT; break; 370 case LibFunc_floor: 371 case LibFunc_floorf: 372 case LibFunc_floorl: 373 Opcode = ISD::FFLOOR; break; 374 case LibFunc_nearbyint: 375 case LibFunc_nearbyintf: 376 case LibFunc_nearbyintl: 377 Opcode = ISD::FNEARBYINT; break; 378 case LibFunc_ceil: 379 case LibFunc_ceilf: 380 case LibFunc_ceill: 381 Opcode = ISD::FCEIL; break; 382 case LibFunc_rint: 383 case LibFunc_rintf: 384 case LibFunc_rintl: 385 Opcode = ISD::FRINT; break; 386 case LibFunc_round: 387 case LibFunc_roundf: 388 case LibFunc_roundl: 389 Opcode = ISD::FROUND; break; 390 case LibFunc_trunc: 391 case LibFunc_truncf: 392 case LibFunc_truncl: 393 Opcode = ISD::FTRUNC; break; 394 case LibFunc_fmin: 395 case LibFunc_fminf: 396 case LibFunc_fminl: 397 Opcode = ISD::FMINNUM; break; 398 case LibFunc_fmax: 399 case LibFunc_fmaxf: 400 case LibFunc_fmaxl: 401 Opcode = ISD::FMAXNUM; break; 402 } 403 } 404 405 if (Opcode) { 406 EVT EVTy = 407 TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true); 408 409 if (EVTy == MVT::Other) 410 return true; 411 412 if (TLI->isOperationLegalOrCustom(Opcode, EVTy)) 413 continue; 414 else if (EVTy.isVector() && 415 TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType())) 416 continue; 417 418 return true; 419 } 420 } 421 422 return true; 423 } else if (isa<BinaryOperator>(J) && 424 J->getType()->getScalarType()->isPPC_FP128Ty()) { 425 // Most operations on ppc_f128 values become calls. 426 return true; 427 } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || 428 isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { 429 CastInst *CI = cast<CastInst>(J); 430 if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || 431 CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || 432 isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) || 433 isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType())) 434 return true; 435 } else if (isLargeIntegerTy(!TM->isPPC64(), 436 J->getType()->getScalarType()) && 437 (J->getOpcode() == Instruction::UDiv || 438 J->getOpcode() == Instruction::SDiv || 439 J->getOpcode() == Instruction::URem || 440 J->getOpcode() == Instruction::SRem)) { 441 return true; 442 } else if (!TM->isPPC64() && 443 isLargeIntegerTy(false, J->getType()->getScalarType()) && 444 (J->getOpcode() == Instruction::Shl || 445 J->getOpcode() == Instruction::AShr || 446 J->getOpcode() == Instruction::LShr)) { 447 // Only on PPC32, for 128-bit integers (specifically not 64-bit 448 // integers), these might be runtime calls. 449 return true; 450 } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { 451 // On PowerPC, indirect jumps use the counter register. 452 return true; 453 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { 454 if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) 455 return true; 456 } 457 458 // FREM is always a call. 459 if (J->getOpcode() == Instruction::FRem) 460 return true; 461 462 if (STI->useSoftFloat()) { 463 switch(J->getOpcode()) { 464 case Instruction::FAdd: 465 case Instruction::FSub: 466 case Instruction::FMul: 467 case Instruction::FDiv: 468 case Instruction::FPTrunc: 469 case Instruction::FPExt: 470 case Instruction::FPToUI: 471 case Instruction::FPToSI: 472 case Instruction::UIToFP: 473 case Instruction::SIToFP: 474 case Instruction::FCmp: 475 return true; 476 } 477 } 478 479 for (Value *Operand : J->operands()) 480 if (memAddrUsesCTR(*TM, Operand)) 481 return true; 482 } 483 484 return false; 485 } 486 bool PPCCTRLoops::convertToCTRLoop(Loop *L) { 487 bool MadeChange = false; 488 489 // Do not convert small short loops to CTR loop. 490 unsigned ConstTripCount = SE->getSmallConstantTripCount(L); 491 if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { 492 SmallPtrSet<const Value *, 32> EphValues; 493 auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( 494 *L->getHeader()->getParent()); 495 CodeMetrics::collectEphemeralValues(L, &AC, EphValues); 496 CodeMetrics Metrics; 497 for (BasicBlock *BB : L->blocks()) 498 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 499 // 6 is an approximate latency for the mtctr instruction. 500 if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) 501 return false; 502 } 503 504 // Process nested loops first. 505 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { 506 MadeChange |= convertToCTRLoop(*I); 507 DEBUG(dbgs() << "Nested loop converted\n"); 508 } 509 510 // If a nested loop has been converted, then we can't convert this loop. 511 if (MadeChange) 512 return MadeChange; 513 514 #ifndef NDEBUG 515 // Stop trying after reaching the limit (if any). 516 int Limit = CTRLoopLimit; 517 if (Limit >= 0) { 518 if (Counter >= CTRLoopLimit) 519 return false; 520 Counter++; 521 } 522 #endif 523 524 // We don't want to spill/restore the counter register, and so we don't 525 // want to use the counter register if the loop contains calls. 526 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 527 I != IE; ++I) 528 if (mightUseCTR(*I)) 529 return MadeChange; 530 531 SmallVector<BasicBlock*, 4> ExitingBlocks; 532 L->getExitingBlocks(ExitingBlocks); 533 534 // If there is an exit edge known to be frequently taken, 535 // we should not transform this loop. 536 for (auto &BB : ExitingBlocks) { 537 Instruction *TI = BB->getTerminator(); 538 if (!TI) continue; 539 540 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 541 uint64_t TrueWeight = 0, FalseWeight = 0; 542 if (!BI->isConditional() || 543 !BI->extractProfMetadata(TrueWeight, FalseWeight)) 544 continue; 545 546 // If the exit path is more frequent than the loop path, 547 // we return here without further analysis for this loop. 548 bool TrueIsExit = !L->contains(BI->getSuccessor(0)); 549 if (( TrueIsExit && FalseWeight < TrueWeight) || 550 (!TrueIsExit && FalseWeight > TrueWeight)) 551 return MadeChange; 552 } 553 } 554 555 BasicBlock *CountedExitBlock = nullptr; 556 const SCEV *ExitCount = nullptr; 557 BranchInst *CountedExitBranch = nullptr; 558 for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), 559 IE = ExitingBlocks.end(); I != IE; ++I) { 560 const SCEV *EC = SE->getExitCount(L, *I); 561 DEBUG(dbgs() << "Exit Count for " << *L << " from block " << 562 (*I)->getName() << ": " << *EC << "\n"); 563 if (isa<SCEVCouldNotCompute>(EC)) 564 continue; 565 if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { 566 if (ConstEC->getValue()->isZero()) 567 continue; 568 } else if (!SE->isLoopInvariant(EC, L)) 569 continue; 570 571 if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) 572 continue; 573 574 // We now have a loop-invariant count of loop iterations (which is not the 575 // constant zero) for which we know that this loop will not exit via this 576 // exisiting block. 577 578 // We need to make sure that this block will run on every loop iteration. 579 // For this to be true, we must dominate all blocks with backedges. Such 580 // blocks are in-loop predecessors to the header block. 581 bool NotAlways = false; 582 for (pred_iterator PI = pred_begin(L->getHeader()), 583 PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { 584 if (!L->contains(*PI)) 585 continue; 586 587 if (!DT->dominates(*I, *PI)) { 588 NotAlways = true; 589 break; 590 } 591 } 592 593 if (NotAlways) 594 continue; 595 596 // Make sure this blocks ends with a conditional branch. 597 Instruction *TI = (*I)->getTerminator(); 598 if (!TI) 599 continue; 600 601 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 602 if (!BI->isConditional()) 603 continue; 604 605 CountedExitBranch = BI; 606 } else 607 continue; 608 609 // Note that this block may not be the loop latch block, even if the loop 610 // has a latch block. 611 CountedExitBlock = *I; 612 ExitCount = EC; 613 break; 614 } 615 616 if (!CountedExitBlock) 617 return MadeChange; 618 619 BasicBlock *Preheader = L->getLoopPreheader(); 620 621 // If we don't have a preheader, then insert one. If we already have a 622 // preheader, then we can use it (except if the preheader contains a use of 623 // the CTR register because some such uses might be reordered by the 624 // selection DAG after the mtctr instruction). 625 if (!Preheader || mightUseCTR(Preheader)) 626 Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); 627 if (!Preheader) 628 return MadeChange; 629 630 DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); 631 632 // Insert the count into the preheader and replace the condition used by the 633 // selected branch. 634 MadeChange = true; 635 636 SCEVExpander SCEVE(*SE, *DL, "loopcnt"); 637 LLVMContext &C = SE->getContext(); 638 Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); 639 if (!ExitCount->getType()->isPointerTy() && 640 ExitCount->getType() != CountType) 641 ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); 642 ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); 643 Value *ECValue = 644 SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); 645 646 IRBuilder<> CountBuilder(Preheader->getTerminator()); 647 Module *M = Preheader->getParent()->getParent(); 648 Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, 649 CountType); 650 CountBuilder.CreateCall(MTCTRFunc, ECValue); 651 652 IRBuilder<> CondBuilder(CountedExitBranch); 653 Value *DecFunc = 654 Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); 655 Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); 656 Value *OldCond = CountedExitBranch->getCondition(); 657 CountedExitBranch->setCondition(NewCond); 658 659 // The false branch must exit the loop. 660 if (!L->contains(CountedExitBranch->getSuccessor(0))) 661 CountedExitBranch->swapSuccessors(); 662 663 // The old condition may be dead now, and may have even created a dead PHI 664 // (the original induction variable). 665 RecursivelyDeleteTriviallyDeadInstructions(OldCond); 666 // Run through the basic blocks of the loop and see if any of them have dead 667 // PHIs that can be removed. 668 for (auto I : L->blocks()) 669 DeleteDeadPHIs(I); 670 671 ++NumCTRLoops; 672 return MadeChange; 673 } 674 675 #ifndef NDEBUG 676 static bool clobbersCTR(const MachineInstr &MI) { 677 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 678 const MachineOperand &MO = MI.getOperand(i); 679 if (MO.isReg()) { 680 if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) 681 return true; 682 } else if (MO.isRegMask()) { 683 if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) 684 return true; 685 } 686 } 687 688 return false; 689 } 690 691 static bool verifyCTRBranch(MachineBasicBlock *MBB, 692 MachineBasicBlock::iterator I) { 693 MachineBasicBlock::iterator BI = I; 694 SmallSet<MachineBasicBlock *, 16> Visited; 695 SmallVector<MachineBasicBlock *, 8> Preds; 696 bool CheckPreds; 697 698 if (I == MBB->begin()) { 699 Visited.insert(MBB); 700 goto queue_preds; 701 } else 702 --I; 703 704 check_block: 705 Visited.insert(MBB); 706 if (I == MBB->end()) 707 goto queue_preds; 708 709 CheckPreds = true; 710 for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { 711 unsigned Opc = I->getOpcode(); 712 if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { 713 CheckPreds = false; 714 break; 715 } 716 717 if (I != BI && clobbersCTR(*I)) { 718 DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName() 719 << ") instruction " << *I << " clobbers CTR, invalidating " 720 << printMBBReference(*BI->getParent()) << " (" 721 << BI->getParent()->getFullName() << ") instruction " << *BI 722 << "\n"); 723 return false; 724 } 725 726 if (I == IE) 727 break; 728 } 729 730 if (!CheckPreds && Preds.empty()) 731 return true; 732 733 if (CheckPreds) { 734 queue_preds: 735 if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { 736 DEBUG(dbgs() << "Unable to find a MTCTR instruction for " 737 << printMBBReference(*BI->getParent()) << " (" 738 << BI->getParent()->getFullName() << ") instruction " << *BI 739 << "\n"); 740 return false; 741 } 742 743 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 744 PIE = MBB->pred_end(); PI != PIE; ++PI) 745 Preds.push_back(*PI); 746 } 747 748 do { 749 MBB = Preds.pop_back_val(); 750 if (!Visited.count(MBB)) { 751 I = MBB->getLastNonDebugInstr(); 752 goto check_block; 753 } 754 } while (!Preds.empty()); 755 756 return true; 757 } 758 759 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { 760 MDT = &getAnalysis<MachineDominatorTree>(); 761 762 // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before 763 // any other instructions that might clobber the ctr register. 764 for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); 765 I != IE; ++I) { 766 MachineBasicBlock *MBB = &*I; 767 if (!MDT->isReachableFromEntry(MBB)) 768 continue; 769 770 for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(), 771 MIIE = MBB->end(); MII != MIIE; ++MII) { 772 unsigned Opc = MII->getOpcode(); 773 if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || 774 Opc == PPC::BDZ8 || Opc == PPC::BDZ) 775 if (!verifyCTRBranch(MBB, MII)) 776 llvm_unreachable("Invalid PPC CTR loop!"); 777 } 778 } 779 780 return false; 781 } 782 #endif // NDEBUG 783