1 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass identifies loops where we can generate the PPC branch instructions 10 // that decrement and test the count register (CTR) (bdnz and friends). 11 // 12 // The pattern that defines the induction variable can changed depending on 13 // prior optimizations. For example, the IndVarSimplify phase run by 'opt' 14 // normalizes induction variables, and the Loop Strength Reduction pass 15 // run by 'llc' may also make changes to the induction variable. 16 // 17 // Criteria for CTR loops: 18 // - Countable loops (w/ ind. var for a trip count) 19 // - Try inner-most loops first 20 // - No nested CTR loops. 21 // - No function calls in loops. 22 // 23 //===----------------------------------------------------------------------===// 24 25 #include "PPC.h" 26 #include "PPCSubtarget.h" 27 #include "PPCTargetMachine.h" 28 #include "PPCTargetTransformInfo.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/Statistic.h" 31 #include "llvm/Analysis/AssumptionCache.h" 32 #include "llvm/Analysis/CFG.h" 33 #include "llvm/Analysis/CodeMetrics.h" 34 #include "llvm/Analysis/LoopInfo.h" 35 #include "llvm/Analysis/LoopIterator.h" 36 #include "llvm/Analysis/ScalarEvolutionExpander.h" 37 #include "llvm/Analysis/TargetLibraryInfo.h" 38 #include "llvm/Analysis/TargetTransformInfo.h" 39 #include "llvm/Transforms/Utils/Local.h" 40 #include "llvm/CodeGen/TargetPassConfig.h" 41 #include "llvm/CodeGen/TargetSchedule.h" 42 #include "llvm/IR/Constants.h" 43 #include "llvm/IR/DerivedTypes.h" 44 #include "llvm/IR/Dominators.h" 45 #include "llvm/IR/InlineAsm.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/IntrinsicInst.h" 48 #include "llvm/IR/Module.h" 49 #include "llvm/IR/ValueHandle.h" 50 #include "llvm/PassSupport.h" 51 #include "llvm/Support/CommandLine.h" 52 #include "llvm/Support/Debug.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include "llvm/Transforms/Scalar.h" 55 #include "llvm/Transforms/Utils.h" 56 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 57 #include "llvm/Transforms/Utils/LoopUtils.h" 58 59 #ifndef NDEBUG 60 #include "llvm/CodeGen/MachineDominators.h" 61 #include "llvm/CodeGen/MachineFunction.h" 62 #include "llvm/CodeGen/MachineFunctionPass.h" 63 #include "llvm/CodeGen/MachineRegisterInfo.h" 64 #endif 65 66 using namespace llvm; 67 68 #define DEBUG_TYPE "ctrloops" 69 70 #ifndef NDEBUG 71 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); 72 #endif 73 74 // The latency of mtctr is only justified if there are more than 4 75 // comparisons that will be removed as a result. 76 static cl::opt<unsigned> 77 SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, 78 cl::desc("Loops with a constant trip count smaller than " 79 "this value will not use the count register.")); 80 81 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); 82 83 namespace llvm { 84 void initializePPCCTRLoopsPass(PassRegistry&); 85 #ifndef NDEBUG 86 void initializePPCCTRLoopsVerifyPass(PassRegistry&); 87 #endif 88 } 89 90 namespace { 91 struct PPCCTRLoops : public FunctionPass { 92 93 #ifndef NDEBUG 94 static int Counter; 95 #endif 96 97 public: 98 static char ID; 99 100 PPCCTRLoops() : FunctionPass(ID) { 101 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 102 } 103 104 bool runOnFunction(Function &F) override; 105 106 void getAnalysisUsage(AnalysisUsage &AU) const override { 107 AU.addRequired<LoopInfoWrapperPass>(); 108 AU.addPreserved<LoopInfoWrapperPass>(); 109 AU.addRequired<DominatorTreeWrapperPass>(); 110 AU.addPreserved<DominatorTreeWrapperPass>(); 111 AU.addRequired<ScalarEvolutionWrapperPass>(); 112 AU.addRequired<AssumptionCacheTracker>(); 113 AU.addRequired<TargetTransformInfoWrapperPass>(); 114 } 115 116 private: 117 bool mightUseCTR(BasicBlock *BB); 118 bool convertToCTRLoop(Loop *L); 119 120 private: 121 const PPCTargetMachine *TM; 122 const PPCSubtarget *STI; 123 const PPCTargetLowering *TLI; 124 const DataLayout *DL; 125 const TargetLibraryInfo *LibInfo; 126 const TargetTransformInfo *TTI; 127 LoopInfo *LI; 128 ScalarEvolution *SE; 129 DominatorTree *DT; 130 bool PreserveLCSSA; 131 TargetSchedModel SchedModel; 132 }; 133 134 char PPCCTRLoops::ID = 0; 135 #ifndef NDEBUG 136 int PPCCTRLoops::Counter = 0; 137 #endif 138 139 #ifndef NDEBUG 140 struct PPCCTRLoopsVerify : public MachineFunctionPass { 141 public: 142 static char ID; 143 144 PPCCTRLoopsVerify() : MachineFunctionPass(ID) { 145 initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); 146 } 147 148 void getAnalysisUsage(AnalysisUsage &AU) const override { 149 AU.addRequired<MachineDominatorTree>(); 150 MachineFunctionPass::getAnalysisUsage(AU); 151 } 152 153 bool runOnMachineFunction(MachineFunction &MF) override; 154 155 private: 156 MachineDominatorTree *MDT; 157 }; 158 159 char PPCCTRLoopsVerify::ID = 0; 160 #endif // NDEBUG 161 } // end anonymous namespace 162 163 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 164 false, false) 165 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 166 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 167 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 168 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", 169 false, false) 170 171 FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); } 172 173 #ifndef NDEBUG 174 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 175 "PowerPC CTR Loops Verify", false, false) 176 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 177 INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", 178 "PowerPC CTR Loops Verify", false, false) 179 180 FunctionPass *llvm::createPPCCTRLoopsVerify() { 181 return new PPCCTRLoopsVerify(); 182 } 183 #endif // NDEBUG 184 185 bool PPCCTRLoops::runOnFunction(Function &F) { 186 if (skipFunction(F)) 187 return false; 188 189 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 190 if (!TPC) 191 return false; 192 193 TM = &TPC->getTM<PPCTargetMachine>(); 194 STI = TM->getSubtargetImpl(F); 195 TLI = STI->getTargetLowering(); 196 197 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 198 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 199 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 200 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 201 DL = &F.getParent()->getDataLayout(); 202 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 203 LibInfo = TLIP ? &TLIP->getTLI() : nullptr; 204 PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 205 206 bool MadeChange = false; 207 208 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); 209 I != E; ++I) { 210 Loop *L = *I; 211 if (!L->getParentLoop()) 212 MadeChange |= convertToCTRLoop(L); 213 } 214 215 return MadeChange; 216 } 217 218 static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { 219 if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) 220 return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); 221 222 return false; 223 } 224 225 // Determining the address of a TLS variable results in a function call in 226 // certain TLS models. 227 static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) { 228 const auto *GV = dyn_cast<GlobalValue>(MemAddr); 229 if (!GV) { 230 // Recurse to check for constants that refer to TLS global variables. 231 if (const auto *CV = dyn_cast<Constant>(MemAddr)) 232 for (const auto &CO : CV->operands()) 233 if (memAddrUsesCTR(TM, CO)) 234 return true; 235 236 return false; 237 } 238 239 if (!GV->isThreadLocal()) 240 return false; 241 TLSModel::Model Model = TM.getTLSModel(GV); 242 return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; 243 } 244 245 // Loop through the inline asm constraints and look for something that clobbers 246 // ctr. 247 static bool asmClobbersCTR(InlineAsm *IA) { 248 InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); 249 for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { 250 InlineAsm::ConstraintInfo &C = CIV[i]; 251 if (C.Type != InlineAsm::isInput) 252 for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) 253 if (StringRef(C.Codes[j]).equals_lower("{ctr}")) 254 return true; 255 } 256 return false; 257 } 258 259 bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { 260 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); 261 J != JE; ++J) { 262 if (CallInst *CI = dyn_cast<CallInst>(J)) { 263 // Inline ASM is okay, unless it clobbers the ctr register. 264 if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { 265 if (asmClobbersCTR(IA)) 266 return true; 267 continue; 268 } 269 270 if (Function *F = CI->getCalledFunction()) { 271 // Most intrinsics don't become function calls, but some might. 272 // sin, cos, exp and log are always calls. 273 unsigned Opcode = 0; 274 if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { 275 switch (F->getIntrinsicID()) { 276 default: continue; 277 // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr 278 // we're definitely using CTR. 279 case Intrinsic::ppc_is_decremented_ctr_nonzero: 280 case Intrinsic::ppc_mtctr: 281 return true; 282 283 // VisualStudio defines setjmp as _setjmp 284 #if defined(_MSC_VER) && defined(setjmp) && \ 285 !defined(setjmp_undefined_for_msvc) 286 # pragma push_macro("setjmp") 287 # undef setjmp 288 # define setjmp_undefined_for_msvc 289 #endif 290 291 case Intrinsic::setjmp: 292 293 #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) 294 // let's return it to _setjmp state 295 # pragma pop_macro("setjmp") 296 # undef setjmp_undefined_for_msvc 297 #endif 298 299 case Intrinsic::longjmp: 300 301 // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp 302 // because, although it does clobber the counter register, the 303 // control can't then return to inside the loop unless there is also 304 // an eh_sjlj_setjmp. 305 case Intrinsic::eh_sjlj_setjmp: 306 307 case Intrinsic::memcpy: 308 case Intrinsic::memmove: 309 case Intrinsic::memset: 310 case Intrinsic::powi: 311 case Intrinsic::log: 312 case Intrinsic::log2: 313 case Intrinsic::log10: 314 case Intrinsic::exp: 315 case Intrinsic::exp2: 316 case Intrinsic::pow: 317 case Intrinsic::sin: 318 case Intrinsic::cos: 319 return true; 320 case Intrinsic::copysign: 321 if (CI->getArgOperand(0)->getType()->getScalarType()-> 322 isPPC_FP128Ty()) 323 return true; 324 else 325 continue; // ISD::FCOPYSIGN is never a library call. 326 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; 327 case Intrinsic::floor: Opcode = ISD::FFLOOR; break; 328 case Intrinsic::ceil: Opcode = ISD::FCEIL; break; 329 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; 330 case Intrinsic::rint: Opcode = ISD::FRINT; break; 331 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; 332 case Intrinsic::round: Opcode = ISD::FROUND; break; 333 case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; 334 case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; 335 case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; 336 case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; 337 } 338 } 339 340 // PowerPC does not use [US]DIVREM or other library calls for 341 // operations on regular types which are not otherwise library calls 342 // (i.e. soft float or atomics). If adapting for targets that do, 343 // additional care is required here. 344 345 LibFunc Func; 346 if (!F->hasLocalLinkage() && F->hasName() && LibInfo && 347 LibInfo->getLibFunc(F->getName(), Func) && 348 LibInfo->hasOptimizedCodeGen(Func)) { 349 // Non-read-only functions are never treated as intrinsics. 350 if (!CI->onlyReadsMemory()) 351 return true; 352 353 // Conversion happens only for FP calls. 354 if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) 355 return true; 356 357 switch (Func) { 358 default: return true; 359 case LibFunc_copysign: 360 case LibFunc_copysignf: 361 continue; // ISD::FCOPYSIGN is never a library call. 362 case LibFunc_copysignl: 363 return true; 364 case LibFunc_fabs: 365 case LibFunc_fabsf: 366 case LibFunc_fabsl: 367 continue; // ISD::FABS is never a library call. 368 case LibFunc_sqrt: 369 case LibFunc_sqrtf: 370 case LibFunc_sqrtl: 371 Opcode = ISD::FSQRT; break; 372 case LibFunc_floor: 373 case LibFunc_floorf: 374 case LibFunc_floorl: 375 Opcode = ISD::FFLOOR; break; 376 case LibFunc_nearbyint: 377 case LibFunc_nearbyintf: 378 case LibFunc_nearbyintl: 379 Opcode = ISD::FNEARBYINT; break; 380 case LibFunc_ceil: 381 case LibFunc_ceilf: 382 case LibFunc_ceill: 383 Opcode = ISD::FCEIL; break; 384 case LibFunc_rint: 385 case LibFunc_rintf: 386 case LibFunc_rintl: 387 Opcode = ISD::FRINT; break; 388 case LibFunc_round: 389 case LibFunc_roundf: 390 case LibFunc_roundl: 391 Opcode = ISD::FROUND; break; 392 case LibFunc_trunc: 393 case LibFunc_truncf: 394 case LibFunc_truncl: 395 Opcode = ISD::FTRUNC; break; 396 case LibFunc_fmin: 397 case LibFunc_fminf: 398 case LibFunc_fminl: 399 Opcode = ISD::FMINNUM; break; 400 case LibFunc_fmax: 401 case LibFunc_fmaxf: 402 case LibFunc_fmaxl: 403 Opcode = ISD::FMAXNUM; break; 404 } 405 } 406 407 if (Opcode) { 408 EVT EVTy = 409 TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true); 410 411 if (EVTy == MVT::Other) 412 return true; 413 414 if (TLI->isOperationLegalOrCustom(Opcode, EVTy)) 415 continue; 416 else if (EVTy.isVector() && 417 TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType())) 418 continue; 419 420 return true; 421 } 422 } 423 424 return true; 425 } else if (isa<BinaryOperator>(J) && 426 J->getType()->getScalarType()->isPPC_FP128Ty()) { 427 // Most operations on ppc_f128 values become calls. 428 return true; 429 } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || 430 isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { 431 CastInst *CI = cast<CastInst>(J); 432 if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || 433 CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || 434 isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) || 435 isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType())) 436 return true; 437 } else if (isLargeIntegerTy(!TM->isPPC64(), 438 J->getType()->getScalarType()) && 439 (J->getOpcode() == Instruction::UDiv || 440 J->getOpcode() == Instruction::SDiv || 441 J->getOpcode() == Instruction::URem || 442 J->getOpcode() == Instruction::SRem)) { 443 return true; 444 } else if (!TM->isPPC64() && 445 isLargeIntegerTy(false, J->getType()->getScalarType()) && 446 (J->getOpcode() == Instruction::Shl || 447 J->getOpcode() == Instruction::AShr || 448 J->getOpcode() == Instruction::LShr)) { 449 // Only on PPC32, for 128-bit integers (specifically not 64-bit 450 // integers), these might be runtime calls. 451 return true; 452 } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { 453 // On PowerPC, indirect jumps use the counter register. 454 return true; 455 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { 456 if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) 457 return true; 458 } 459 460 // FREM is always a call. 461 if (J->getOpcode() == Instruction::FRem) 462 return true; 463 464 if (STI->useSoftFloat()) { 465 switch(J->getOpcode()) { 466 case Instruction::FAdd: 467 case Instruction::FSub: 468 case Instruction::FMul: 469 case Instruction::FDiv: 470 case Instruction::FPTrunc: 471 case Instruction::FPExt: 472 case Instruction::FPToUI: 473 case Instruction::FPToSI: 474 case Instruction::UIToFP: 475 case Instruction::SIToFP: 476 case Instruction::FCmp: 477 return true; 478 } 479 } 480 481 for (Value *Operand : J->operands()) 482 if (memAddrUsesCTR(*TM, Operand)) 483 return true; 484 } 485 486 return false; 487 } 488 bool PPCCTRLoops::convertToCTRLoop(Loop *L) { 489 bool MadeChange = false; 490 491 // Do not convert small short loops to CTR loop. 492 unsigned ConstTripCount = SE->getSmallConstantTripCount(L); 493 if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { 494 SmallPtrSet<const Value *, 32> EphValues; 495 auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( 496 *L->getHeader()->getParent()); 497 CodeMetrics::collectEphemeralValues(L, &AC, EphValues); 498 CodeMetrics Metrics; 499 for (BasicBlock *BB : L->blocks()) 500 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 501 // 6 is an approximate latency for the mtctr instruction. 502 if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) 503 return false; 504 } 505 506 // Process nested loops first. 507 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { 508 MadeChange |= convertToCTRLoop(*I); 509 LLVM_DEBUG(dbgs() << "Nested loop converted\n"); 510 } 511 512 // If a nested loop has been converted, then we can't convert this loop. 513 if (MadeChange) 514 return MadeChange; 515 516 // Bail out if the loop has irreducible control flow. 517 LoopBlocksRPO RPOT(L); 518 RPOT.perform(LI); 519 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI)) 520 return false; 521 522 #ifndef NDEBUG 523 // Stop trying after reaching the limit (if any). 524 int Limit = CTRLoopLimit; 525 if (Limit >= 0) { 526 if (Counter >= CTRLoopLimit) 527 return false; 528 Counter++; 529 } 530 #endif 531 532 // We don't want to spill/restore the counter register, and so we don't 533 // want to use the counter register if the loop contains calls. 534 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 535 I != IE; ++I) 536 if (mightUseCTR(*I)) 537 return MadeChange; 538 539 SmallVector<BasicBlock*, 4> ExitingBlocks; 540 L->getExitingBlocks(ExitingBlocks); 541 542 // If there is an exit edge known to be frequently taken, 543 // we should not transform this loop. 544 for (auto &BB : ExitingBlocks) { 545 Instruction *TI = BB->getTerminator(); 546 if (!TI) continue; 547 548 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 549 uint64_t TrueWeight = 0, FalseWeight = 0; 550 if (!BI->isConditional() || 551 !BI->extractProfMetadata(TrueWeight, FalseWeight)) 552 continue; 553 554 // If the exit path is more frequent than the loop path, 555 // we return here without further analysis for this loop. 556 bool TrueIsExit = !L->contains(BI->getSuccessor(0)); 557 if (( TrueIsExit && FalseWeight < TrueWeight) || 558 (!TrueIsExit && FalseWeight > TrueWeight)) 559 return MadeChange; 560 } 561 } 562 563 BasicBlock *CountedExitBlock = nullptr; 564 const SCEV *ExitCount = nullptr; 565 BranchInst *CountedExitBranch = nullptr; 566 for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), 567 IE = ExitingBlocks.end(); I != IE; ++I) { 568 const SCEV *EC = SE->getExitCount(L, *I); 569 LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block " 570 << (*I)->getName() << ": " << *EC << "\n"); 571 if (isa<SCEVCouldNotCompute>(EC)) 572 continue; 573 if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { 574 if (ConstEC->getValue()->isZero()) 575 continue; 576 } else if (!SE->isLoopInvariant(EC, L)) 577 continue; 578 579 if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) 580 continue; 581 582 // If this exiting block is contained in a nested loop, it is not eligible 583 // for insertion of the branch-and-decrement since the inner loop would 584 // end up messing up the value in the CTR. 585 if (LI->getLoopFor(*I) != L) 586 continue; 587 588 // We now have a loop-invariant count of loop iterations (which is not the 589 // constant zero) for which we know that this loop will not exit via this 590 // existing block. 591 592 // We need to make sure that this block will run on every loop iteration. 593 // For this to be true, we must dominate all blocks with backedges. Such 594 // blocks are in-loop predecessors to the header block. 595 bool NotAlways = false; 596 for (pred_iterator PI = pred_begin(L->getHeader()), 597 PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { 598 if (!L->contains(*PI)) 599 continue; 600 601 if (!DT->dominates(*I, *PI)) { 602 NotAlways = true; 603 break; 604 } 605 } 606 607 if (NotAlways) 608 continue; 609 610 // Make sure this blocks ends with a conditional branch. 611 Instruction *TI = (*I)->getTerminator(); 612 if (!TI) 613 continue; 614 615 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 616 if (!BI->isConditional()) 617 continue; 618 619 CountedExitBranch = BI; 620 } else 621 continue; 622 623 // Note that this block may not be the loop latch block, even if the loop 624 // has a latch block. 625 CountedExitBlock = *I; 626 ExitCount = EC; 627 break; 628 } 629 630 if (!CountedExitBlock) 631 return MadeChange; 632 633 BasicBlock *Preheader = L->getLoopPreheader(); 634 635 // If we don't have a preheader, then insert one. If we already have a 636 // preheader, then we can use it (except if the preheader contains a use of 637 // the CTR register because some such uses might be reordered by the 638 // selection DAG after the mtctr instruction). 639 if (!Preheader || mightUseCTR(Preheader)) 640 Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); 641 if (!Preheader) 642 return MadeChange; 643 644 LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() 645 << "\n"); 646 647 // Insert the count into the preheader and replace the condition used by the 648 // selected branch. 649 MadeChange = true; 650 651 SCEVExpander SCEVE(*SE, *DL, "loopcnt"); 652 LLVMContext &C = SE->getContext(); 653 Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); 654 if (!ExitCount->getType()->isPointerTy() && 655 ExitCount->getType() != CountType) 656 ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); 657 ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); 658 Value *ECValue = 659 SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); 660 661 IRBuilder<> CountBuilder(Preheader->getTerminator()); 662 Module *M = Preheader->getParent()->getParent(); 663 Function *MTCTRFunc = 664 Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, CountType); 665 CountBuilder.CreateCall(MTCTRFunc, ECValue); 666 667 IRBuilder<> CondBuilder(CountedExitBranch); 668 Function *DecFunc = 669 Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); 670 Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); 671 Value *OldCond = CountedExitBranch->getCondition(); 672 CountedExitBranch->setCondition(NewCond); 673 674 // The false branch must exit the loop. 675 if (!L->contains(CountedExitBranch->getSuccessor(0))) 676 CountedExitBranch->swapSuccessors(); 677 678 // The old condition may be dead now, and may have even created a dead PHI 679 // (the original induction variable). 680 RecursivelyDeleteTriviallyDeadInstructions(OldCond); 681 // Run through the basic blocks of the loop and see if any of them have dead 682 // PHIs that can be removed. 683 for (auto I : L->blocks()) 684 DeleteDeadPHIs(I); 685 686 ++NumCTRLoops; 687 return MadeChange; 688 } 689 690 #ifndef NDEBUG 691 static bool clobbersCTR(const MachineInstr &MI) { 692 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 693 const MachineOperand &MO = MI.getOperand(i); 694 if (MO.isReg()) { 695 if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) 696 return true; 697 } else if (MO.isRegMask()) { 698 if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) 699 return true; 700 } 701 } 702 703 return false; 704 } 705 706 static bool verifyCTRBranch(MachineBasicBlock *MBB, 707 MachineBasicBlock::iterator I) { 708 MachineBasicBlock::iterator BI = I; 709 SmallSet<MachineBasicBlock *, 16> Visited; 710 SmallVector<MachineBasicBlock *, 8> Preds; 711 bool CheckPreds; 712 713 if (I == MBB->begin()) { 714 Visited.insert(MBB); 715 goto queue_preds; 716 } else 717 --I; 718 719 check_block: 720 Visited.insert(MBB); 721 if (I == MBB->end()) 722 goto queue_preds; 723 724 CheckPreds = true; 725 for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { 726 unsigned Opc = I->getOpcode(); 727 if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { 728 CheckPreds = false; 729 break; 730 } 731 732 if (I != BI && clobbersCTR(*I)) { 733 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName() 734 << ") instruction " << *I 735 << " clobbers CTR, invalidating " 736 << printMBBReference(*BI->getParent()) << " (" 737 << BI->getParent()->getFullName() << ") instruction " 738 << *BI << "\n"); 739 return false; 740 } 741 742 if (I == IE) 743 break; 744 } 745 746 if (!CheckPreds && Preds.empty()) 747 return true; 748 749 if (CheckPreds) { 750 queue_preds: 751 if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { 752 LLVM_DEBUG(dbgs() << "Unable to find a MTCTR instruction for " 753 << printMBBReference(*BI->getParent()) << " (" 754 << BI->getParent()->getFullName() << ") instruction " 755 << *BI << "\n"); 756 return false; 757 } 758 759 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 760 PIE = MBB->pred_end(); PI != PIE; ++PI) 761 Preds.push_back(*PI); 762 } 763 764 do { 765 MBB = Preds.pop_back_val(); 766 if (!Visited.count(MBB)) { 767 I = MBB->getLastNonDebugInstr(); 768 goto check_block; 769 } 770 } while (!Preds.empty()); 771 772 return true; 773 } 774 775 bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { 776 MDT = &getAnalysis<MachineDominatorTree>(); 777 778 // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before 779 // any other instructions that might clobber the ctr register. 780 for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); 781 I != IE; ++I) { 782 MachineBasicBlock *MBB = &*I; 783 if (!MDT->isReachableFromEntry(MBB)) 784 continue; 785 786 for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(), 787 MIIE = MBB->end(); MII != MIIE; ++MII) { 788 unsigned Opc = MII->getOpcode(); 789 if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || 790 Opc == PPC::BDZ8 || Opc == PPC::BDZ) 791 if (!verifyCTRBranch(MBB, MII)) 792 llvm_unreachable("Invalid PPC CTR loop!"); 793 } 794 } 795 796 return false; 797 } 798 #endif // NDEBUG 799