1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This transformation analyzes and transforms the induction variables (and 11 // computations derived from them) into simpler forms suitable for subsequent 12 // analysis and transformation. 13 // 14 // This transformation makes the following changes to each loop with an 15 // identifiable induction variable: 16 // 1. All loops are transformed to have a SINGLE canonical induction variable 17 // which starts at zero and steps by one. 18 // 2. The canonical induction variable is guaranteed to be the first PHI node 19 // in the loop header block. 20 // 3. The canonical induction variable is guaranteed to be in a wide enough 21 // type so that IV expressions need not be (directly) zero-extended or 22 // sign-extended. 23 // 4. Any pointer arithmetic recurrences are raised to use array subscripts. 24 // 25 // If the trip count of a loop is computable, this pass also makes the following 26 // changes: 27 // 1. The exit condition for the loop is canonicalized to compare the 28 // induction value against the exit value. This turns loops like: 29 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 30 // 2. Any use outside of the loop of an expression derived from the indvar 31 // is changed to compute the derived value outside of the loop, eliminating 32 // the dependence on the exit value of the induction variable. If the only 33 // purpose of the loop is to compute the exit value of some derived 34 // expression, this transformation will make the loop dead. 35 // 36 // This transformation should be followed by strength reduction after all of the 37 // desired loop transformations have been performed. 38 // 39 //===----------------------------------------------------------------------===// 40 41 #define DEBUG_TYPE "indvars" 42 #include "llvm/Transforms/Scalar.h" 43 #include "llvm/BasicBlock.h" 44 #include "llvm/Constants.h" 45 #include "llvm/Instructions.h" 46 #include "llvm/IntrinsicInst.h" 47 #include "llvm/LLVMContext.h" 48 #include "llvm/Type.h" 49 #include "llvm/Analysis/Dominators.h" 50 #include "llvm/Analysis/IVUsers.h" 51 #include "llvm/Analysis/ScalarEvolutionExpander.h" 52 #include "llvm/Analysis/LoopInfo.h" 53 #include "llvm/Analysis/LoopPass.h" 54 #include "llvm/Support/CFG.h" 55 #include "llvm/Support/Debug.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include "llvm/Transforms/Utils/Local.h" 58 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 59 #include "llvm/Target/TargetData.h" 60 #include "llvm/ADT/SmallVector.h" 61 #include "llvm/ADT/Statistic.h" 62 #include "llvm/ADT/STLExtras.h" 63 using namespace llvm; 64 65 STATISTIC(NumRemoved , "Number of aux indvars removed"); 66 STATISTIC(NumWidened , "Number of indvars widened"); 67 STATISTIC(NumInserted, "Number of canonical indvars added"); 68 STATISTIC(NumReplaced, "Number of exit values replaced"); 69 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 70 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 71 STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); 72 STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); 73 74 // DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis 75 // and referenced here. 76 namespace llvm { 77 extern bool DisableIVRewrite; 78 } 79 80 namespace { 81 class IndVarSimplify : public LoopPass { 82 IVUsers *IU; 83 LoopInfo *LI; 84 ScalarEvolution *SE; 85 DominatorTree *DT; 86 TargetData *TD; 87 SmallVector<WeakVH, 16> DeadInsts; 88 bool Changed; 89 public: 90 91 static char ID; // Pass identification, replacement for typeid 92 IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0) { 93 initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); 94 } 95 96 virtual bool runOnLoop(Loop *L, LPPassManager &LPM); 97 98 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 99 AU.addRequired<DominatorTree>(); 100 AU.addRequired<LoopInfo>(); 101 AU.addRequired<ScalarEvolution>(); 102 AU.addRequiredID(LoopSimplifyID); 103 AU.addRequiredID(LCSSAID); 104 AU.addRequired<IVUsers>(); 105 AU.addPreserved<ScalarEvolution>(); 106 AU.addPreservedID(LoopSimplifyID); 107 AU.addPreservedID(LCSSAID); 108 AU.addPreserved<IVUsers>(); 109 AU.setPreservesCFG(); 110 } 111 112 private: 113 bool isValidRewrite(Value *FromVal, Value *ToVal); 114 115 void SimplifyIVUsers(SCEVExpander &Rewriter); 116 void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); 117 void EliminateIVRemainder(BinaryOperator *Rem, 118 Value *IVOperand, 119 bool IsSigned, 120 PHINode *IVPhi); 121 void RewriteNonIntegerIVs(Loop *L); 122 123 ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, 124 PHINode *IndVar, 125 SCEVExpander &Rewriter); 126 127 void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 128 129 void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); 130 131 void SinkUnusedInvariants(Loop *L); 132 133 void HandleFloatingPointIV(Loop *L, PHINode *PH); 134 }; 135 } 136 137 char IndVarSimplify::ID = 0; 138 INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars", 139 "Induction Variable Simplification", false, false) 140 INITIALIZE_PASS_DEPENDENCY(DominatorTree) 141 INITIALIZE_PASS_DEPENDENCY(LoopInfo) 142 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 143 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 144 INITIALIZE_PASS_DEPENDENCY(LCSSA) 145 INITIALIZE_PASS_DEPENDENCY(IVUsers) 146 INITIALIZE_PASS_END(IndVarSimplify, "indvars", 147 "Induction Variable Simplification", false, false) 148 149 Pass *llvm::createIndVarSimplifyPass() { 150 return new IndVarSimplify(); 151 } 152 153 /// isValidRewrite - Return true if the SCEV expansion generated by the 154 /// rewriter can replace the original value. SCEV guarantees that it 155 /// produces the same value, but the way it is produced may be illegal IR. 156 /// Ideally, this function will only be called for verification. 157 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 158 // If an SCEV expression subsumed multiple pointers, its expansion could 159 // reassociate the GEP changing the base pointer. This is illegal because the 160 // final address produced by a GEP chain must be inbounds relative to its 161 // underlying object. Otherwise basic alias analysis, among other things, 162 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 163 // producing an expression involving multiple pointers. Until then, we must 164 // bail out here. 165 // 166 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 167 // because it understands lcssa phis while SCEV does not. 168 Value *FromPtr = FromVal; 169 Value *ToPtr = ToVal; 170 if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) { 171 FromPtr = GEP->getPointerOperand(); 172 } 173 if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) { 174 ToPtr = GEP->getPointerOperand(); 175 } 176 if (FromPtr != FromVal || ToPtr != ToVal) { 177 // Quickly check the common case 178 if (FromPtr == ToPtr) 179 return true; 180 181 // SCEV may have rewritten an expression that produces the GEP's pointer 182 // operand. That's ok as long as the pointer operand has the same base 183 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 184 // base of a recurrence. This handles the case in which SCEV expansion 185 // converts a pointer type recurrence into a nonrecurrent pointer base 186 // indexed by an integer recurrence. 187 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 188 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 189 if (FromBase == ToBase) 190 return true; 191 192 DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " 193 << *FromBase << " != " << *ToBase << "\n"); 194 195 return false; 196 } 197 return true; 198 } 199 200 /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken 201 /// count expression can be safely and cheaply expanded into an instruction 202 /// sequence that can be used by LinearFunctionTestReplace. 203 static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { 204 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 205 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || 206 BackedgeTakenCount->isZero()) 207 return false; 208 209 if (!L->getExitingBlock()) 210 return false; 211 212 // Can't rewrite non-branch yet. 213 BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); 214 if (!BI) 215 return false; 216 217 // Special case: If the backedge-taken count is a UDiv, it's very likely a 218 // UDiv that ScalarEvolution produced in order to compute a precise 219 // expression, rather than a UDiv from the user's code. If we can't find a 220 // UDiv in the code with some simple searching, assume the former and forego 221 // rewriting the loop. 222 if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { 223 ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); 224 if (!OrigCond) return false; 225 const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); 226 R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); 227 if (R != BackedgeTakenCount) { 228 const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); 229 L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); 230 if (L != BackedgeTakenCount) 231 return false; 232 } 233 } 234 return true; 235 } 236 237 /// getBackedgeIVType - Get the widest type used by the loop test after peeking 238 /// through Truncs. 239 /// 240 /// TODO: Unnecessary once LinearFunctionTestReplace is removed. 241 static const Type *getBackedgeIVType(Loop *L) { 242 if (!L->getExitingBlock()) 243 return 0; 244 245 // Can't rewrite non-branch yet. 246 BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); 247 if (!BI) 248 return 0; 249 250 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 251 if (!Cond) 252 return 0; 253 254 const Type *Ty = 0; 255 for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); 256 OI != OE; ++OI) { 257 assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); 258 TruncInst *Trunc = dyn_cast<TruncInst>(*OI); 259 if (!Trunc) 260 continue; 261 262 return Trunc->getSrcTy(); 263 } 264 return Ty; 265 } 266 267 /// LinearFunctionTestReplace - This method rewrites the exit condition of the 268 /// loop to be a canonical != comparison against the incremented loop induction 269 /// variable. This pass is able to rewrite the exit tests of any loop where the 270 /// SCEV analysis can determine a loop-invariant trip count of the loop, which 271 /// is actually a much broader range than just linear tests. 272 ICmpInst *IndVarSimplify:: 273 LinearFunctionTestReplace(Loop *L, 274 const SCEV *BackedgeTakenCount, 275 PHINode *IndVar, 276 SCEVExpander &Rewriter) { 277 assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); 278 BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); 279 280 // If the exiting block is not the same as the backedge block, we must compare 281 // against the preincremented value, otherwise we prefer to compare against 282 // the post-incremented value. 283 Value *CmpIndVar; 284 const SCEV *RHS = BackedgeTakenCount; 285 if (L->getExitingBlock() == L->getLoopLatch()) { 286 // Add one to the "backedge-taken" count to get the trip count. 287 // If this addition may overflow, we have to be more pessimistic and 288 // cast the induction variable before doing the add. 289 const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0); 290 const SCEV *N = 291 SE->getAddExpr(BackedgeTakenCount, 292 SE->getConstant(BackedgeTakenCount->getType(), 1)); 293 if ((isa<SCEVConstant>(N) && !N->isZero()) || 294 SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { 295 // No overflow. Cast the sum. 296 RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); 297 } else { 298 // Potential overflow. Cast before doing the add. 299 RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, 300 IndVar->getType()); 301 RHS = SE->getAddExpr(RHS, 302 SE->getConstant(IndVar->getType(), 1)); 303 } 304 305 // The BackedgeTaken expression contains the number of times that the 306 // backedge branches to the loop header. This is one less than the 307 // number of times the loop executes, so use the incremented indvar. 308 CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); 309 } else { 310 // We have to use the preincremented value... 311 RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, 312 IndVar->getType()); 313 CmpIndVar = IndVar; 314 } 315 316 // Expand the code for the iteration count. 317 assert(SE->isLoopInvariant(RHS, L) && 318 "Computed iteration count is not loop invariant!"); 319 Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI); 320 321 // Insert a new icmp_ne or icmp_eq instruction before the branch. 322 ICmpInst::Predicate Opcode; 323 if (L->contains(BI->getSuccessor(0))) 324 Opcode = ICmpInst::ICMP_NE; 325 else 326 Opcode = ICmpInst::ICMP_EQ; 327 328 DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 329 << " LHS:" << *CmpIndVar << '\n' 330 << " op:\t" 331 << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" 332 << " RHS:\t" << *RHS << "\n"); 333 334 ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); 335 336 Value *OrigCond = BI->getCondition(); 337 // It's tempting to use replaceAllUsesWith here to fully replace the old 338 // comparison, but that's not immediately safe, since users of the old 339 // comparison may not be dominated by the new comparison. Instead, just 340 // update the branch to use the new comparison; in the common case this 341 // will make old comparison dead. 342 BI->setCondition(Cond); 343 DeadInsts.push_back(OrigCond); 344 345 ++NumLFTR; 346 Changed = true; 347 return Cond; 348 } 349 350 /// RewriteLoopExitValues - Check to see if this loop has a computable 351 /// loop-invariant execution count. If so, this means that we can compute the 352 /// final value of any expressions that are recurrent in the loop, and 353 /// substitute the exit values from the loop into any instructions outside of 354 /// the loop that use the final values of the current expressions. 355 /// 356 /// This is mostly redundant with the regular IndVarSimplify activities that 357 /// happen later, except that it's more powerful in some cases, because it's 358 /// able to brute-force evaluate arbitrary instructions as long as they have 359 /// constant operands at the beginning of the loop. 360 void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 361 // Verify the input to the pass in already in LCSSA form. 362 assert(L->isLCSSAForm(*DT)); 363 364 SmallVector<BasicBlock*, 8> ExitBlocks; 365 L->getUniqueExitBlocks(ExitBlocks); 366 367 // Find all values that are computed inside the loop, but used outside of it. 368 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 369 // the exit blocks of the loop to find them. 370 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 371 BasicBlock *ExitBB = ExitBlocks[i]; 372 373 // If there are no PHI nodes in this exit block, then no values defined 374 // inside the loop are used on this path, skip it. 375 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 376 if (!PN) continue; 377 378 unsigned NumPreds = PN->getNumIncomingValues(); 379 380 // Iterate over all of the PHI nodes. 381 BasicBlock::iterator BBI = ExitBB->begin(); 382 while ((PN = dyn_cast<PHINode>(BBI++))) { 383 if (PN->use_empty()) 384 continue; // dead use, don't replace it 385 386 // SCEV only supports integer expressions for now. 387 if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy()) 388 continue; 389 390 // It's necessary to tell ScalarEvolution about this explicitly so that 391 // it can walk the def-use list and forget all SCEVs, as it may not be 392 // watching the PHI itself. Once the new exit value is in place, there 393 // may not be a def-use connection between the loop and every instruction 394 // which got a SCEVAddRecExpr for that loop. 395 SE->forgetValue(PN); 396 397 // Iterate over all of the values in all the PHI nodes. 398 for (unsigned i = 0; i != NumPreds; ++i) { 399 // If the value being merged in is not integer or is not defined 400 // in the loop, skip it. 401 Value *InVal = PN->getIncomingValue(i); 402 if (!isa<Instruction>(InVal)) 403 continue; 404 405 // If this pred is for a subloop, not L itself, skip it. 406 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 407 continue; // The Block is in a subloop, skip it. 408 409 // Check that InVal is defined in the loop. 410 Instruction *Inst = cast<Instruction>(InVal); 411 if (!L->contains(Inst)) 412 continue; 413 414 // Okay, this instruction has a user outside of the current loop 415 // and varies predictably *inside* the loop. Evaluate the value it 416 // contains when the loop exits, if possible. 417 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 418 if (!SE->isLoopInvariant(ExitValue, L)) 419 continue; 420 421 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 422 423 DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' 424 << " LoopVal = " << *Inst << "\n"); 425 426 if (!isValidRewrite(Inst, ExitVal)) { 427 DeadInsts.push_back(ExitVal); 428 continue; 429 } 430 Changed = true; 431 ++NumReplaced; 432 433 PN->setIncomingValue(i, ExitVal); 434 435 // If this instruction is dead now, delete it. 436 RecursivelyDeleteTriviallyDeadInstructions(Inst); 437 438 if (NumPreds == 1) { 439 // Completely replace a single-pred PHI. This is safe, because the 440 // NewVal won't be variant in the loop, so we don't need an LCSSA phi 441 // node anymore. 442 PN->replaceAllUsesWith(ExitVal); 443 RecursivelyDeleteTriviallyDeadInstructions(PN); 444 } 445 } 446 if (NumPreds != 1) { 447 // Clone the PHI and delete the original one. This lets IVUsers and 448 // any other maps purge the original user from their records. 449 PHINode *NewPN = cast<PHINode>(PN->clone()); 450 NewPN->takeName(PN); 451 NewPN->insertBefore(PN); 452 PN->replaceAllUsesWith(NewPN); 453 PN->eraseFromParent(); 454 } 455 } 456 } 457 458 // The insertion point instruction may have been deleted; clear it out 459 // so that the rewriter doesn't trip over it later. 460 Rewriter.clearInsertPoint(); 461 } 462 463 void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { 464 // First step. Check to see if there are any floating-point recurrences. 465 // If there are, change them into integer recurrences, permitting analysis by 466 // the SCEV routines. 467 // 468 BasicBlock *Header = L->getHeader(); 469 470 SmallVector<WeakVH, 8> PHIs; 471 for (BasicBlock::iterator I = Header->begin(); 472 PHINode *PN = dyn_cast<PHINode>(I); ++I) 473 PHIs.push_back(PN); 474 475 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 476 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 477 HandleFloatingPointIV(L, PN); 478 479 // If the loop previously had floating-point IV, ScalarEvolution 480 // may not have been able to compute a trip count. Now that we've done some 481 // re-writing, the trip count may be computable. 482 if (Changed) 483 SE->forgetLoop(L); 484 } 485 486 namespace { 487 // Collect information about induction variables that are used by sign/zero 488 // extend operations. This information is recorded by CollectExtend and 489 // provides the input to WidenIV. 490 struct WideIVInfo { 491 const Type *WidestNativeType; // Widest integer type created [sz]ext 492 bool IsSigned; // Was an sext user seen before a zext? 493 494 WideIVInfo() : WidestNativeType(0), IsSigned(false) {} 495 }; 496 typedef std::map<PHINode *, WideIVInfo> WideIVMap; 497 } 498 499 /// CollectExtend - Update information about the induction variable that is 500 /// extended by this sign or zero extend operation. This is used to determine 501 /// the final width of the IV before actually widening it. 502 static void CollectExtend(CastInst *Cast, PHINode *Phi, bool IsSigned, 503 WideIVMap &IVMap, ScalarEvolution *SE, 504 const TargetData *TD) { 505 const Type *Ty = Cast->getType(); 506 uint64_t Width = SE->getTypeSizeInBits(Ty); 507 if (TD && !TD->isLegalInteger(Width)) 508 return; 509 510 WideIVInfo &IVInfo = IVMap[Phi]; 511 if (!IVInfo.WidestNativeType) { 512 IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty); 513 IVInfo.IsSigned = IsSigned; 514 return; 515 } 516 517 // We extend the IV to satisfy the sign of its first user, arbitrarily. 518 if (IVInfo.IsSigned != IsSigned) 519 return; 520 521 if (Width > SE->getTypeSizeInBits(IVInfo.WidestNativeType)) 522 IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty); 523 } 524 525 namespace { 526 /// WidenIV - The goal of this transform is to remove sign and zero extends 527 /// without creating any new induction variables. To do this, it creates a new 528 /// phi of the wider type and redirects all users, either removing extends or 529 /// inserting truncs whenever we stop propagating the type. 530 /// 531 class WidenIV { 532 PHINode *OrigPhi; 533 const Type *WideType; 534 bool IsSigned; 535 536 IVUsers *IU; 537 LoopInfo *LI; 538 Loop *L; 539 ScalarEvolution *SE; 540 DominatorTree *DT; 541 SmallVectorImpl<WeakVH> &DeadInsts; 542 543 PHINode *WidePhi; 544 Instruction *WideInc; 545 const SCEV *WideIncExpr; 546 547 SmallPtrSet<Instruction*,16> Processed; 548 549 public: 550 WidenIV(PHINode *PN, const WideIVInfo &IVInfo, IVUsers *IUsers, 551 LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, 552 SmallVectorImpl<WeakVH> &DI) : 553 OrigPhi(PN), 554 WideType(IVInfo.WidestNativeType), 555 IsSigned(IVInfo.IsSigned), 556 IU(IUsers), 557 LI(LInfo), 558 L(LI->getLoopFor(OrigPhi->getParent())), 559 SE(SEv), 560 DT(DTree), 561 DeadInsts(DI), 562 WidePhi(0), 563 WideInc(0), 564 WideIncExpr(0) { 565 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 566 } 567 568 bool CreateWideIV(SCEVExpander &Rewriter); 569 570 protected: 571 Instruction *CloneIVUser(Instruction *NarrowUse, 572 Instruction *NarrowDef, 573 Instruction *WideDef); 574 575 const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); 576 577 Instruction *WidenIVUse(Instruction *NarrowUse, 578 Instruction *NarrowDef, 579 Instruction *WideDef); 580 }; 581 } // anonymous namespace 582 583 /// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this 584 /// loop. IVUsers is treated as a worklist. Each successive simplification may 585 /// push more users which may themselves be candidates for simplification. 586 /// 587 void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) { 588 WideIVMap IVMap; 589 590 // Each round of simplification involves a round of eliminating operations 591 // followed by a round of widening IVs. A single IVUsers worklist is used 592 // across all rounds. The inner loop advances the user. If widening exposes 593 // more uses, then another pass through the outer loop is triggered. 594 for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E;) { 595 for(; I != E; ++I) { 596 Instruction *UseInst = I->getUser(); 597 Value *IVOperand = I->getOperandValToReplace(); 598 599 if (DisableIVRewrite) { 600 if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { 601 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 602 if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { 603 CollectExtend(Cast, I->getPhi(), IsSigned, IVMap, SE, TD); 604 continue; 605 } 606 } 607 } 608 if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { 609 EliminateIVComparison(ICmp, IVOperand); 610 continue; 611 } 612 if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { 613 bool IsSigned = Rem->getOpcode() == Instruction::SRem; 614 if (IsSigned || Rem->getOpcode() == Instruction::URem) { 615 EliminateIVRemainder(Rem, IVOperand, IsSigned, I->getPhi()); 616 continue; 617 } 618 } 619 } 620 for (WideIVMap::const_iterator I = IVMap.begin(), E = IVMap.end(); 621 I != E; ++I) { 622 WidenIV Widener(I->first, I->second, IU, LI, SE, DT, DeadInsts); 623 if (Widener.CreateWideIV(Rewriter)) 624 Changed = true; 625 } 626 } 627 } 628 629 static Value *getExtend( Value *NarrowOper, const Type *WideType, 630 bool IsSigned, IRBuilder<> &Builder) { 631 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 632 Builder.CreateZExt(NarrowOper, WideType); 633 } 634 635 /// CloneIVUser - Instantiate a wide operation to replace a narrow 636 /// operation. This only needs to handle operations that can evaluation to 637 /// SCEVAddRec. It can safely return 0 for any operation we decide not to clone. 638 Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, 639 Instruction *NarrowDef, 640 Instruction *WideDef) { 641 unsigned Opcode = NarrowUse->getOpcode(); 642 switch (Opcode) { 643 default: 644 return 0; 645 case Instruction::Add: 646 case Instruction::Mul: 647 case Instruction::UDiv: 648 case Instruction::Sub: 649 case Instruction::And: 650 case Instruction::Or: 651 case Instruction::Xor: 652 case Instruction::Shl: 653 case Instruction::LShr: 654 case Instruction::AShr: 655 DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n"); 656 657 IRBuilder<> Builder(NarrowUse); 658 659 // Replace NarrowDef operands with WideDef. Otherwise, we don't know 660 // anything about the narrow operand yet so must insert a [sz]ext. It is 661 // probably loop invariant and will be folded or hoisted. If it actually 662 // comes from a widened IV, it should be removed during a future call to 663 // WidenIVUse. 664 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : 665 getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder); 666 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef : 667 getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder); 668 669 BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse); 670 BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), 671 LHS, RHS, 672 NarrowBO->getName()); 673 Builder.Insert(WideBO); 674 if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); 675 if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); 676 677 return WideBO; 678 } 679 llvm_unreachable(0); 680 } 681 682 // GetWideRecurrence - Is this instruction potentially interesting from IVUsers' 683 // perspective after widening it's type? In other words, can the extend be 684 // safely hoisted out of the loop with SCEV reducing the value to a recurrence 685 // on the same loop. If so, return the sign or zero extended 686 // recurrence. Otherwise return NULL. 687 const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { 688 if (!SE->isSCEVable(NarrowUse->getType())) 689 return 0; 690 691 const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); 692 const SCEV *WideExpr = IsSigned ? 693 SE->getSignExtendExpr(NarrowExpr, WideType) : 694 SE->getZeroExtendExpr(NarrowExpr, WideType); 695 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 696 if (!AddRec || AddRec->getLoop() != L) 697 return 0; 698 699 return AddRec; 700 } 701 702 /// HoistStep - Attempt to hoist an IV increment above a potential use. 703 /// 704 /// To successfully hoist, two criteria must be met: 705 /// - IncV operands dominate InsertPos and 706 /// - InsertPos dominates IncV 707 /// 708 /// Meeting the second condition means that we don't need to check all of IncV's 709 /// existing uses (it's moving up in the domtree). 710 /// 711 /// This does not yet recursively hoist the operands, although that would 712 /// not be difficult. 713 static bool HoistStep(Instruction *IncV, Instruction *InsertPos, 714 const DominatorTree *DT) 715 { 716 if (DT->dominates(IncV, InsertPos)) 717 return true; 718 719 if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) 720 return false; 721 722 if (IncV->mayHaveSideEffects()) 723 return false; 724 725 // Attempt to hoist IncV 726 for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); 727 OI != OE; ++OI) { 728 Instruction *OInst = dyn_cast<Instruction>(OI); 729 if (OInst && !DT->dominates(OInst, InsertPos)) 730 return false; 731 } 732 IncV->moveBefore(InsertPos); 733 return true; 734 } 735 736 /// WidenIVUse - Determine whether an individual user of the narrow IV can be 737 /// widened. If so, return the wide clone of the user. 738 Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, 739 Instruction *NarrowDef, 740 Instruction *WideDef) { 741 // To be consistent with IVUsers, stop traversing the def-use chain at 742 // inner-loop phis or post-loop phis. 743 if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L) 744 return 0; 745 746 // Handle data flow merges and bizarre phi cycles. 747 if (!Processed.insert(NarrowUse)) 748 return 0; 749 750 // Our raison d'etre! Eliminate sign and zero extension. 751 if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) { 752 Value *NewDef = WideDef; 753 if (NarrowUse->getType() != WideType) { 754 unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType()); 755 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 756 if (CastWidth < IVWidth) { 757 // The cast isn't as wide as the IV, so insert a Trunc. 758 IRBuilder<> Builder(NarrowUse); 759 NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType()); 760 } 761 else { 762 // A wider extend was hidden behind a narrower one. This may induce 763 // another round of IV widening in which the intermediate IV becomes 764 // dead. It should be very rare. 765 DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 766 << " not wide enough to subsume " << *NarrowUse << "\n"); 767 NarrowUse->replaceUsesOfWith(NarrowDef, WideDef); 768 NewDef = NarrowUse; 769 } 770 } 771 if (NewDef != NarrowUse) { 772 DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse 773 << " replaced by " << *WideDef << "\n"); 774 ++NumElimExt; 775 NarrowUse->replaceAllUsesWith(NewDef); 776 DeadInsts.push_back(NarrowUse); 777 } 778 // Now that the extend is gone, expose it's uses to IVUsers for potential 779 // further simplification within SimplifyIVUsers. 780 IU->AddUsersIfInteresting(WideDef, WidePhi); 781 782 // No further widening is needed. The deceased [sz]ext had done it for us. 783 return 0; 784 } 785 const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse); 786 if (!WideAddRec) { 787 // This user does not evaluate to a recurence after widening, so don't 788 // follow it. Instead insert a Trunc to kill off the original use, 789 // eventually isolating the original narrow IV so it can be removed. 790 IRBuilder<> Builder(NarrowUse); 791 Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType()); 792 NarrowUse->replaceUsesOfWith(NarrowDef, Trunc); 793 return 0; 794 } 795 // Reuse the IV increment that SCEVExpander created as long as it dominates 796 // NarrowUse. 797 Instruction *WideUse = 0; 798 if (WideAddRec == WideIncExpr && HoistStep(WideInc, NarrowUse, DT)) { 799 WideUse = WideInc; 800 } 801 else { 802 WideUse = CloneIVUser(NarrowUse, NarrowDef, WideDef); 803 if (!WideUse) 804 return 0; 805 } 806 // GetWideRecurrence ensured that the narrow expression could be extended 807 // outside the loop without overflow. This suggests that the wide use 808 // evaluates to the same expression as the extended narrow use, but doesn't 809 // absolutely guarantee it. Hence the following failsafe check. In rare cases 810 // where it fails, we simple throw away the newly created wide use. 811 if (WideAddRec != SE->getSCEV(WideUse)) { 812 DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse 813 << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); 814 DeadInsts.push_back(WideUse); 815 return 0; 816 } 817 818 // Returning WideUse pushes it on the worklist. 819 return WideUse; 820 } 821 822 /// CreateWideIV - Process a single induction variable. First use the 823 /// SCEVExpander to create a wide induction variable that evaluates to the same 824 /// recurrence as the original narrow IV. Then use a worklist to forward 825 /// traverse the narrow IV's def-use chain. After WidenIVUse as processed all 826 /// interesting IV users, the narrow IV will be isolated for removal by 827 /// DeleteDeadPHIs. 828 /// 829 /// It would be simpler to delete uses as they are processed, but we must avoid 830 /// invalidating SCEV expressions. 831 /// 832 bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) { 833 // Is this phi an induction variable? 834 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 835 if (!AddRec) 836 return false; 837 838 // Widen the induction variable expression. 839 const SCEV *WideIVExpr = IsSigned ? 840 SE->getSignExtendExpr(AddRec, WideType) : 841 SE->getZeroExtendExpr(AddRec, WideType); 842 843 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 844 "Expect the new IV expression to preserve its type"); 845 846 // Can the IV be extended outside the loop without overflow? 847 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 848 if (!AddRec || AddRec->getLoop() != L) 849 return false; 850 851 // An AddRec must have loop-invariant operands. Since this AddRec it 852 // materialized by a loop header phi, the expression cannot have any post-loop 853 // operands, so they must dominate the loop header. 854 assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 855 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) 856 && "Loop header phi recurrence inputs do not dominate the loop"); 857 858 // The rewriter provides a value for the desired IV expression. This may 859 // either find an existing phi or materialize a new one. Either way, we 860 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 861 // of the phi-SCC dominates the loop entry. 862 Instruction *InsertPt = L->getHeader()->begin(); 863 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 864 865 // Remembering the WideIV increment generated by SCEVExpander allows 866 // WidenIVUse to reuse it when widening the narrow IV's increment. We don't 867 // employ a general reuse mechanism because the call above is the only call to 868 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 869 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 870 WideInc = 871 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 872 WideIncExpr = SE->getSCEV(WideInc); 873 } 874 875 DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 876 ++NumWidened; 877 878 // Traverse the def-use chain using a worklist starting at the original IV. 879 assert(Processed.empty() && "expect initial state" ); 880 881 // Each worklist entry has a Narrow def-use link and Wide def. 882 SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers; 883 for (Value::use_iterator UI = OrigPhi->use_begin(), 884 UE = OrigPhi->use_end(); UI != UE; ++UI) { 885 NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WidePhi)); 886 } 887 while (!NarrowIVUsers.empty()) { 888 Use *NarrowDefUse; 889 Instruction *WideDef; 890 tie(NarrowDefUse, WideDef) = NarrowIVUsers.pop_back_val(); 891 892 // Process a def-use edge. This may replace the use, so don't hold a 893 // use_iterator across it. 894 Instruction *NarrowDef = cast<Instruction>(NarrowDefUse->get()); 895 Instruction *NarrowUse = cast<Instruction>(NarrowDefUse->getUser()); 896 Instruction *WideUse = WidenIVUse(NarrowUse, NarrowDef, WideDef); 897 898 // Follow all def-use edges from the previous narrow use. 899 if (WideUse) { 900 for (Value::use_iterator UI = NarrowUse->use_begin(), 901 UE = NarrowUse->use_end(); UI != UE; ++UI) { 902 NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideUse)); 903 } 904 } 905 // WidenIVUse may have removed the def-use edge. 906 if (NarrowDef->use_empty()) 907 DeadInsts.push_back(NarrowDef); 908 } 909 return true; 910 } 911 912 void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { 913 unsigned IVOperIdx = 0; 914 ICmpInst::Predicate Pred = ICmp->getPredicate(); 915 if (IVOperand != ICmp->getOperand(0)) { 916 // Swapped 917 assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); 918 IVOperIdx = 1; 919 Pred = ICmpInst::getSwappedPredicate(Pred); 920 } 921 922 // Get the SCEVs for the ICmp operands. 923 const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx)); 924 const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx)); 925 926 // Simplify unnecessary loops away. 927 const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); 928 S = SE->getSCEVAtScope(S, ICmpLoop); 929 X = SE->getSCEVAtScope(X, ICmpLoop); 930 931 // If the condition is always true or always false, replace it with 932 // a constant value. 933 if (SE->isKnownPredicate(Pred, S, X)) 934 ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); 935 else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) 936 ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); 937 else 938 return; 939 940 DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); 941 ++NumElimCmp; 942 Changed = true; 943 DeadInsts.push_back(ICmp); 944 } 945 946 void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem, 947 Value *IVOperand, 948 bool IsSigned, 949 PHINode *IVPhi) { 950 // We're only interested in the case where we know something about 951 // the numerator. 952 if (IVOperand != Rem->getOperand(0)) 953 return; 954 955 // Get the SCEVs for the ICmp operands. 956 const SCEV *S = SE->getSCEV(Rem->getOperand(0)); 957 const SCEV *X = SE->getSCEV(Rem->getOperand(1)); 958 959 // Simplify unnecessary loops away. 960 const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); 961 S = SE->getSCEVAtScope(S, ICmpLoop); 962 X = SE->getSCEVAtScope(X, ICmpLoop); 963 964 // i % n --> i if i is in [0,n). 965 if ((!IsSigned || SE->isKnownNonNegative(S)) && 966 SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, 967 S, X)) 968 Rem->replaceAllUsesWith(Rem->getOperand(0)); 969 else { 970 // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). 971 const SCEV *LessOne = 972 SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); 973 if (IsSigned && !SE->isKnownNonNegative(LessOne)) 974 return; 975 976 if (!SE->isKnownPredicate(IsSigned ? 977 ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, 978 LessOne, X)) 979 return; 980 981 ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, 982 Rem->getOperand(0), Rem->getOperand(1), 983 "tmp"); 984 SelectInst *Sel = 985 SelectInst::Create(ICmp, 986 ConstantInt::get(Rem->getType(), 0), 987 Rem->getOperand(0), "tmp", Rem); 988 Rem->replaceAllUsesWith(Sel); 989 } 990 991 // Inform IVUsers about the new users. 992 if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) 993 IU->AddUsersIfInteresting(I, IVPhi); 994 995 DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); 996 ++NumElimRem; 997 Changed = true; 998 DeadInsts.push_back(Rem); 999 } 1000 1001 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { 1002 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1003 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1004 // canonicalization can be a pessimization without LSR to "clean up" 1005 // afterwards. 1006 // - We depend on having a preheader; in particular, 1007 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1008 // and we're in trouble if we can't find the induction variable even when 1009 // we've manually inserted one. 1010 if (!L->isLoopSimplifyForm()) 1011 return false; 1012 1013 IU = &getAnalysis<IVUsers>(); 1014 LI = &getAnalysis<LoopInfo>(); 1015 SE = &getAnalysis<ScalarEvolution>(); 1016 DT = &getAnalysis<DominatorTree>(); 1017 TD = getAnalysisIfAvailable<TargetData>(); 1018 1019 DeadInsts.clear(); 1020 Changed = false; 1021 1022 // If there are any floating-point recurrences, attempt to 1023 // transform them to use integer recurrences. 1024 RewriteNonIntegerIVs(L); 1025 1026 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1027 1028 // Create a rewriter object which we'll use to transform the code with. 1029 SCEVExpander Rewriter(*SE); 1030 if (DisableIVRewrite) 1031 Rewriter.disableCanonicalMode(); 1032 1033 // Check to see if this loop has a computable loop-invariant execution count. 1034 // If so, this means that we can compute the final value of any expressions 1035 // that are recurrent in the loop, and substitute the exit values from the 1036 // loop into any instructions outside of the loop that use the final values of 1037 // the current expressions. 1038 // 1039 if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) 1040 RewriteLoopExitValues(L, Rewriter); 1041 1042 // Eliminate redundant IV users. 1043 SimplifyIVUsers(Rewriter); 1044 1045 // Compute the type of the largest recurrence expression, and decide whether 1046 // a canonical induction variable should be inserted. 1047 const Type *LargestType = 0; 1048 bool NeedCannIV = false; 1049 bool ExpandBECount = canExpandBackedgeTakenCount(L, SE); 1050 if (ExpandBECount) { 1051 // If we have a known trip count and a single exit block, we'll be 1052 // rewriting the loop exit test condition below, which requires a 1053 // canonical induction variable. 1054 NeedCannIV = true; 1055 const Type *Ty = BackedgeTakenCount->getType(); 1056 if (DisableIVRewrite) { 1057 // In this mode, SimplifyIVUsers may have already widened the IV used by 1058 // the backedge test and inserted a Trunc on the compare's operand. Get 1059 // the wider type to avoid creating a redundant narrow IV only used by the 1060 // loop test. 1061 LargestType = getBackedgeIVType(L); 1062 } 1063 if (!LargestType || 1064 SE->getTypeSizeInBits(Ty) > 1065 SE->getTypeSizeInBits(LargestType)) 1066 LargestType = SE->getEffectiveSCEVType(Ty); 1067 } 1068 if (!DisableIVRewrite) { 1069 for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { 1070 NeedCannIV = true; 1071 const Type *Ty = 1072 SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); 1073 if (!LargestType || 1074 SE->getTypeSizeInBits(Ty) > 1075 SE->getTypeSizeInBits(LargestType)) 1076 LargestType = Ty; 1077 } 1078 } 1079 1080 // Now that we know the largest of the induction variable expressions 1081 // in this loop, insert a canonical induction variable of the largest size. 1082 PHINode *IndVar = 0; 1083 if (NeedCannIV) { 1084 // Check to see if the loop already has any canonical-looking induction 1085 // variables. If any are present and wider than the planned canonical 1086 // induction variable, temporarily remove them, so that the Rewriter 1087 // doesn't attempt to reuse them. 1088 SmallVector<PHINode *, 2> OldCannIVs; 1089 while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) { 1090 if (SE->getTypeSizeInBits(OldCannIV->getType()) > 1091 SE->getTypeSizeInBits(LargestType)) 1092 OldCannIV->removeFromParent(); 1093 else 1094 break; 1095 OldCannIVs.push_back(OldCannIV); 1096 } 1097 1098 IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType); 1099 1100 ++NumInserted; 1101 Changed = true; 1102 DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n'); 1103 1104 // Now that the official induction variable is established, reinsert 1105 // any old canonical-looking variables after it so that the IR remains 1106 // consistent. They will be deleted as part of the dead-PHI deletion at 1107 // the end of the pass. 1108 while (!OldCannIVs.empty()) { 1109 PHINode *OldCannIV = OldCannIVs.pop_back_val(); 1110 OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI()); 1111 } 1112 } 1113 1114 // If we have a trip count expression, rewrite the loop's exit condition 1115 // using it. We can currently only handle loops with a single exit. 1116 ICmpInst *NewICmp = 0; 1117 if (ExpandBECount) { 1118 assert(canExpandBackedgeTakenCount(L, SE) && 1119 "canonical IV disrupted BackedgeTaken expansion"); 1120 assert(NeedCannIV && 1121 "LinearFunctionTestReplace requires a canonical induction variable"); 1122 NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, 1123 Rewriter); 1124 } 1125 // Rewrite IV-derived expressions. 1126 if (!DisableIVRewrite) 1127 RewriteIVExpressions(L, Rewriter); 1128 1129 // Clear the rewriter cache, because values that are in the rewriter's cache 1130 // can be deleted in the loop below, causing the AssertingVH in the cache to 1131 // trigger. 1132 Rewriter.clear(); 1133 1134 // Now that we're done iterating through lists, clean up any instructions 1135 // which are now dead. 1136 while (!DeadInsts.empty()) 1137 if (Instruction *Inst = 1138 dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) 1139 RecursivelyDeleteTriviallyDeadInstructions(Inst); 1140 1141 // The Rewriter may not be used from this point on. 1142 1143 // Loop-invariant instructions in the preheader that aren't used in the 1144 // loop may be sunk below the loop to reduce register pressure. 1145 SinkUnusedInvariants(L); 1146 1147 // For completeness, inform IVUsers of the IV use in the newly-created 1148 // loop exit test instruction. 1149 if (NewICmp) 1150 IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)), 1151 IndVar); 1152 1153 // Clean up dead instructions. 1154 Changed |= DeleteDeadPHIs(L->getHeader()); 1155 // Check a post-condition. 1156 assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); 1157 return Changed; 1158 } 1159 1160 // FIXME: It is an extremely bad idea to indvar substitute anything more 1161 // complex than affine induction variables. Doing so will put expensive 1162 // polynomial evaluations inside of the loop, and the str reduction pass 1163 // currently can only reduce affine polynomials. For now just disable 1164 // indvar subst on anything more complex than an affine addrec, unless 1165 // it can be expanded to a trivial value. 1166 static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { 1167 // Loop-invariant values are safe. 1168 if (SE->isLoopInvariant(S, L)) return true; 1169 1170 // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how 1171 // to transform them into efficient code. 1172 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) 1173 return AR->isAffine(); 1174 1175 // An add is safe it all its operands are safe. 1176 if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { 1177 for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), 1178 E = Commutative->op_end(); I != E; ++I) 1179 if (!isSafe(*I, L, SE)) return false; 1180 return true; 1181 } 1182 1183 // A cast is safe if its operand is. 1184 if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) 1185 return isSafe(C->getOperand(), L, SE); 1186 1187 // A udiv is safe if its operands are. 1188 if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) 1189 return isSafe(UD->getLHS(), L, SE) && 1190 isSafe(UD->getRHS(), L, SE); 1191 1192 // SCEVUnknown is always safe. 1193 if (isa<SCEVUnknown>(S)) 1194 return true; 1195 1196 // Nothing else is safe. 1197 return false; 1198 } 1199 1200 void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { 1201 // Rewrite all induction variable expressions in terms of the canonical 1202 // induction variable. 1203 // 1204 // If there were induction variables of other sizes or offsets, manually 1205 // add the offsets to the primary induction variable and cast, avoiding 1206 // the need for the code evaluation methods to insert induction variables 1207 // of different sizes. 1208 for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { 1209 Value *Op = UI->getOperandValToReplace(); 1210 const Type *UseTy = Op->getType(); 1211 Instruction *User = UI->getUser(); 1212 1213 // Compute the final addrec to expand into code. 1214 const SCEV *AR = IU->getReplacementExpr(*UI); 1215 1216 // Evaluate the expression out of the loop, if possible. 1217 if (!L->contains(UI->getUser())) { 1218 const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); 1219 if (SE->isLoopInvariant(ExitVal, L)) 1220 AR = ExitVal; 1221 } 1222 1223 // FIXME: It is an extremely bad idea to indvar substitute anything more 1224 // complex than affine induction variables. Doing so will put expensive 1225 // polynomial evaluations inside of the loop, and the str reduction pass 1226 // currently can only reduce affine polynomials. For now just disable 1227 // indvar subst on anything more complex than an affine addrec, unless 1228 // it can be expanded to a trivial value. 1229 if (!isSafe(AR, L, SE)) 1230 continue; 1231 1232 // Determine the insertion point for this user. By default, insert 1233 // immediately before the user. The SCEVExpander class will automatically 1234 // hoist loop invariants out of the loop. For PHI nodes, there may be 1235 // multiple uses, so compute the nearest common dominator for the 1236 // incoming blocks. 1237 Instruction *InsertPt = User; 1238 if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) 1239 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) 1240 if (PHI->getIncomingValue(i) == Op) { 1241 if (InsertPt == User) 1242 InsertPt = PHI->getIncomingBlock(i)->getTerminator(); 1243 else 1244 InsertPt = 1245 DT->findNearestCommonDominator(InsertPt->getParent(), 1246 PHI->getIncomingBlock(i)) 1247 ->getTerminator(); 1248 } 1249 1250 // Now expand it into actual Instructions and patch it into place. 1251 Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); 1252 1253 DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' 1254 << " into = " << *NewVal << "\n"); 1255 1256 if (!isValidRewrite(Op, NewVal)) { 1257 DeadInsts.push_back(NewVal); 1258 continue; 1259 } 1260 // Inform ScalarEvolution that this value is changing. The change doesn't 1261 // affect its value, but it does potentially affect which use lists the 1262 // value will be on after the replacement, which affects ScalarEvolution's 1263 // ability to walk use lists and drop dangling pointers when a value is 1264 // deleted. 1265 SE->forgetValue(User); 1266 1267 // Patch the new value into place. 1268 if (Op->hasName()) 1269 NewVal->takeName(Op); 1270 User->replaceUsesOfWith(Op, NewVal); 1271 UI->setOperandValToReplace(NewVal); 1272 1273 ++NumRemoved; 1274 Changed = true; 1275 1276 // The old value may be dead now. 1277 DeadInsts.push_back(Op); 1278 } 1279 } 1280 1281 /// If there's a single exit block, sink any loop-invariant values that 1282 /// were defined in the preheader but not used inside the loop into the 1283 /// exit block to reduce register pressure in the loop. 1284 void IndVarSimplify::SinkUnusedInvariants(Loop *L) { 1285 BasicBlock *ExitBlock = L->getExitBlock(); 1286 if (!ExitBlock) return; 1287 1288 BasicBlock *Preheader = L->getLoopPreheader(); 1289 if (!Preheader) return; 1290 1291 Instruction *InsertPt = ExitBlock->getFirstNonPHI(); 1292 BasicBlock::iterator I = Preheader->getTerminator(); 1293 while (I != Preheader->begin()) { 1294 --I; 1295 // New instructions were inserted at the end of the preheader. 1296 if (isa<PHINode>(I)) 1297 break; 1298 1299 // Don't move instructions which might have side effects, since the side 1300 // effects need to complete before instructions inside the loop. Also don't 1301 // move instructions which might read memory, since the loop may modify 1302 // memory. Note that it's okay if the instruction might have undefined 1303 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1304 // block. 1305 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1306 continue; 1307 1308 // Skip debug info intrinsics. 1309 if (isa<DbgInfoIntrinsic>(I)) 1310 continue; 1311 1312 // Don't sink static AllocaInsts out of the entry block, which would 1313 // turn them into dynamic allocas! 1314 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) 1315 if (AI->isStaticAlloca()) 1316 continue; 1317 1318 // Determine if there is a use in or before the loop (direct or 1319 // otherwise). 1320 bool UsedInLoop = false; 1321 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); 1322 UI != UE; ++UI) { 1323 User *U = *UI; 1324 BasicBlock *UseBB = cast<Instruction>(U)->getParent(); 1325 if (PHINode *P = dyn_cast<PHINode>(U)) { 1326 unsigned i = 1327 PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); 1328 UseBB = P->getIncomingBlock(i); 1329 } 1330 if (UseBB == Preheader || L->contains(UseBB)) { 1331 UsedInLoop = true; 1332 break; 1333 } 1334 } 1335 1336 // If there is, the def must remain in the preheader. 1337 if (UsedInLoop) 1338 continue; 1339 1340 // Otherwise, sink it to the exit block. 1341 Instruction *ToMove = I; 1342 bool Done = false; 1343 1344 if (I != Preheader->begin()) { 1345 // Skip debug info intrinsics. 1346 do { 1347 --I; 1348 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 1349 1350 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 1351 Done = true; 1352 } else { 1353 Done = true; 1354 } 1355 1356 ToMove->moveBefore(InsertPt); 1357 if (Done) break; 1358 InsertPt = ToMove; 1359 } 1360 } 1361 1362 /// ConvertToSInt - Convert APF to an integer, if possible. 1363 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 1364 bool isExact = false; 1365 if (&APF.getSemantics() == &APFloat::PPCDoubleDouble) 1366 return false; 1367 // See if we can convert this to an int64_t 1368 uint64_t UIntVal; 1369 if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero, 1370 &isExact) != APFloat::opOK || !isExact) 1371 return false; 1372 IntVal = UIntVal; 1373 return true; 1374 } 1375 1376 /// HandleFloatingPointIV - If the loop has floating induction variable 1377 /// then insert corresponding integer induction variable if possible. 1378 /// For example, 1379 /// for(double i = 0; i < 10000; ++i) 1380 /// bar(i) 1381 /// is converted into 1382 /// for(int i = 0; i < 10000; ++i) 1383 /// bar((double)i); 1384 /// 1385 void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { 1386 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 1387 unsigned BackEdge = IncomingEdge^1; 1388 1389 // Check incoming value. 1390 ConstantFP *InitValueVal = 1391 dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 1392 1393 int64_t InitValue; 1394 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 1395 return; 1396 1397 // Check IV increment. Reject this PN if increment operation is not 1398 // an add or increment value can not be represented by an integer. 1399 BinaryOperator *Incr = 1400 dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 1401 if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; 1402 1403 // If this is not an add of the PHI with a constantfp, or if the constant fp 1404 // is not an integer, bail out. 1405 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 1406 int64_t IncValue; 1407 if (IncValueVal == 0 || Incr->getOperand(0) != PN || 1408 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 1409 return; 1410 1411 // Check Incr uses. One user is PN and the other user is an exit condition 1412 // used by the conditional terminator. 1413 Value::use_iterator IncrUse = Incr->use_begin(); 1414 Instruction *U1 = cast<Instruction>(*IncrUse++); 1415 if (IncrUse == Incr->use_end()) return; 1416 Instruction *U2 = cast<Instruction>(*IncrUse++); 1417 if (IncrUse != Incr->use_end()) return; 1418 1419 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 1420 // only used by a branch, we can't transform it. 1421 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 1422 if (!Compare) 1423 Compare = dyn_cast<FCmpInst>(U2); 1424 if (Compare == 0 || !Compare->hasOneUse() || 1425 !isa<BranchInst>(Compare->use_back())) 1426 return; 1427 1428 BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); 1429 1430 // We need to verify that the branch actually controls the iteration count 1431 // of the loop. If not, the new IV can overflow and no one will notice. 1432 // The branch block must be in the loop and one of the successors must be out 1433 // of the loop. 1434 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 1435 if (!L->contains(TheBr->getParent()) || 1436 (L->contains(TheBr->getSuccessor(0)) && 1437 L->contains(TheBr->getSuccessor(1)))) 1438 return; 1439 1440 1441 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 1442 // transform it. 1443 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 1444 int64_t ExitValue; 1445 if (ExitValueVal == 0 || 1446 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 1447 return; 1448 1449 // Find new predicate for integer comparison. 1450 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 1451 switch (Compare->getPredicate()) { 1452 default: return; // Unknown comparison. 1453 case CmpInst::FCMP_OEQ: 1454 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 1455 case CmpInst::FCMP_ONE: 1456 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 1457 case CmpInst::FCMP_OGT: 1458 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 1459 case CmpInst::FCMP_OGE: 1460 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 1461 case CmpInst::FCMP_OLT: 1462 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 1463 case CmpInst::FCMP_OLE: 1464 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 1465 } 1466 1467 // We convert the floating point induction variable to a signed i32 value if 1468 // we can. This is only safe if the comparison will not overflow in a way 1469 // that won't be trapped by the integer equivalent operations. Check for this 1470 // now. 1471 // TODO: We could use i64 if it is native and the range requires it. 1472 1473 // The start/stride/exit values must all fit in signed i32. 1474 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 1475 return; 1476 1477 // If not actually striding (add x, 0.0), avoid touching the code. 1478 if (IncValue == 0) 1479 return; 1480 1481 // Positive and negative strides have different safety conditions. 1482 if (IncValue > 0) { 1483 // If we have a positive stride, we require the init to be less than the 1484 // exit value and an equality or less than comparison. 1485 if (InitValue >= ExitValue || 1486 NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) 1487 return; 1488 1489 uint32_t Range = uint32_t(ExitValue-InitValue); 1490 if (NewPred == CmpInst::ICMP_SLE) { 1491 // Normalize SLE -> SLT, check for infinite loop. 1492 if (++Range == 0) return; // Range overflows. 1493 } 1494 1495 unsigned Leftover = Range % uint32_t(IncValue); 1496 1497 // If this is an equality comparison, we require that the strided value 1498 // exactly land on the exit value, otherwise the IV condition will wrap 1499 // around and do things the fp IV wouldn't. 1500 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 1501 Leftover != 0) 1502 return; 1503 1504 // If the stride would wrap around the i32 before exiting, we can't 1505 // transform the IV. 1506 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 1507 return; 1508 1509 } else { 1510 // If we have a negative stride, we require the init to be greater than the 1511 // exit value and an equality or greater than comparison. 1512 if (InitValue >= ExitValue || 1513 NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) 1514 return; 1515 1516 uint32_t Range = uint32_t(InitValue-ExitValue); 1517 if (NewPred == CmpInst::ICMP_SGE) { 1518 // Normalize SGE -> SGT, check for infinite loop. 1519 if (++Range == 0) return; // Range overflows. 1520 } 1521 1522 unsigned Leftover = Range % uint32_t(-IncValue); 1523 1524 // If this is an equality comparison, we require that the strided value 1525 // exactly land on the exit value, otherwise the IV condition will wrap 1526 // around and do things the fp IV wouldn't. 1527 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 1528 Leftover != 0) 1529 return; 1530 1531 // If the stride would wrap around the i32 before exiting, we can't 1532 // transform the IV. 1533 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 1534 return; 1535 } 1536 1537 const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 1538 1539 // Insert new integer induction variable. 1540 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 1541 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 1542 PN->getIncomingBlock(IncomingEdge)); 1543 1544 Value *NewAdd = 1545 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 1546 Incr->getName()+".int", Incr); 1547 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 1548 1549 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 1550 ConstantInt::get(Int32Ty, ExitValue), 1551 Compare->getName()); 1552 1553 // In the following deletions, PN may become dead and may be deleted. 1554 // Use a WeakVH to observe whether this happens. 1555 WeakVH WeakPH = PN; 1556 1557 // Delete the old floating point exit comparison. The branch starts using the 1558 // new comparison. 1559 NewCompare->takeName(Compare); 1560 Compare->replaceAllUsesWith(NewCompare); 1561 RecursivelyDeleteTriviallyDeadInstructions(Compare); 1562 1563 // Delete the old floating point increment. 1564 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 1565 RecursivelyDeleteTriviallyDeadInstructions(Incr); 1566 1567 // If the FP induction variable still has uses, this is because something else 1568 // in the loop uses its value. In order to canonicalize the induction 1569 // variable, we chose to eliminate the IV and rewrite it in terms of an 1570 // int->fp cast. 1571 // 1572 // We give preference to sitofp over uitofp because it is faster on most 1573 // platforms. 1574 if (WeakPH) { 1575 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 1576 PN->getParent()->getFirstNonPHI()); 1577 PN->replaceAllUsesWith(Conv); 1578 RecursivelyDeleteTriviallyDeadInstructions(PN); 1579 } 1580 1581 // Add a new IVUsers entry for the newly-created integer PHI. 1582 IU->AddUsersIfInteresting(NewPHI, NewPHI); 1583 } 1584