1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 45 #include "llvm/Analysis/TargetLibraryInfo.h" 46 #include "llvm/Analysis/TargetTransformInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/InitializePasses.h" 72 #include "llvm/Pass.h" 73 #include "llvm/Support/Casting.h" 74 #include "llvm/Support/CommandLine.h" 75 #include "llvm/Support/Compiler.h" 76 #include "llvm/Support/Debug.h" 77 #include "llvm/Support/ErrorHandling.h" 78 #include "llvm/Support/MathExtras.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Scalar/LoopPassManager.h" 82 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 83 #include "llvm/Transforms/Utils/Local.h" 84 #include "llvm/Transforms/Utils/LoopUtils.h" 85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 93 #define DEBUG_TYPE "indvars" 94 95 STATISTIC(NumWidened , "Number of indvars widened"); 96 STATISTIC(NumReplaced , "Number of exit values replaced"); 97 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 98 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 99 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 100 101 // Trip count verification can be enabled by default under NDEBUG if we 102 // implement a strong expression equivalence checker in SCEV. Until then, we 103 // use the verify-indvars flag, which may assert in some cases. 104 static cl::opt<bool> VerifyIndvars( 105 "verify-indvars", cl::Hidden, 106 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 107 "effect in release builds. (Note: this adds additional SCEV " 108 "queries potentially changing the analysis result)")); 109 110 static cl::opt<ReplaceExitVal> ReplaceExitValue( 111 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 112 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 113 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 114 clEnumValN(OnlyCheapRepl, "cheap", 115 "only replace exit value when the cost is cheap"), 116 clEnumValN(NoHardUse, "noharduse", 117 "only replace exit values when loop def likely dead"), 118 clEnumValN(AlwaysRepl, "always", 119 "always replace exit value whenever possible"))); 120 121 static cl::opt<bool> UsePostIncrementRanges( 122 "indvars-post-increment-ranges", cl::Hidden, 123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 124 cl::init(true)); 125 126 static cl::opt<bool> 127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 128 cl::desc("Disable Linear Function Test Replace optimization")); 129 130 static cl::opt<bool> 131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 132 cl::desc("Predicate conditions in read only loops")); 133 134 static cl::opt<bool> 135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 136 cl::desc("Allow widening of indvars to eliminate s/zext")); 137 138 namespace { 139 140 struct RewritePhi; 141 142 class IndVarSimplify { 143 LoopInfo *LI; 144 ScalarEvolution *SE; 145 DominatorTree *DT; 146 const DataLayout &DL; 147 TargetLibraryInfo *TLI; 148 const TargetTransformInfo *TTI; 149 std::unique_ptr<MemorySSAUpdater> MSSAU; 150 151 SmallVector<WeakTrackingVH, 16> DeadInsts; 152 153 bool handleFloatingPointIV(Loop *L, PHINode *PH); 154 bool rewriteNonIntegerIVs(Loop *L); 155 156 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 157 /// Try to eliminate loop exits based on analyzeable exit counts 158 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 159 /// Try to form loop invariant tests for loop exits by changing how many 160 /// iterations of the loop run when that is unobservable. 161 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 162 163 bool rewriteFirstIterationLoopExitValues(Loop *L); 164 165 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 166 const SCEV *ExitCount, 167 PHINode *IndVar, SCEVExpander &Rewriter); 168 169 bool sinkUnusedInvariants(Loop *L); 170 171 public: 172 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 173 const DataLayout &DL, TargetLibraryInfo *TLI, 174 TargetTransformInfo *TTI, MemorySSA *MSSA) 175 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) { 176 if (MSSA) 177 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 178 } 179 180 bool run(Loop *L); 181 }; 182 183 } // end anonymous namespace 184 185 /// Determine the insertion point for this user. By default, insert immediately 186 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 187 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 188 /// common dominator for the incoming blocks. A nullptr can be returned if no 189 /// viable location is found: it may happen if User is a PHI and Def only comes 190 /// to this PHI from unreachable blocks. 191 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 192 DominatorTree *DT, LoopInfo *LI) { 193 PHINode *PHI = dyn_cast<PHINode>(User); 194 if (!PHI) 195 return User; 196 197 Instruction *InsertPt = nullptr; 198 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 199 if (PHI->getIncomingValue(i) != Def) 200 continue; 201 202 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 203 204 if (!DT->isReachableFromEntry(InsertBB)) 205 continue; 206 207 if (!InsertPt) { 208 InsertPt = InsertBB->getTerminator(); 209 continue; 210 } 211 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 212 InsertPt = InsertBB->getTerminator(); 213 } 214 215 // If we have skipped all inputs, it means that Def only comes to Phi from 216 // unreachable blocks. 217 if (!InsertPt) 218 return nullptr; 219 220 auto *DefI = dyn_cast<Instruction>(Def); 221 if (!DefI) 222 return InsertPt; 223 224 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 225 226 auto *L = LI->getLoopFor(DefI->getParent()); 227 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 228 229 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 230 if (LI->getLoopFor(DTN->getBlock()) == L) 231 return DTN->getBlock()->getTerminator(); 232 233 llvm_unreachable("DefI dominates InsertPt!"); 234 } 235 236 //===----------------------------------------------------------------------===// 237 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 238 //===----------------------------------------------------------------------===// 239 240 /// Convert APF to an integer, if possible. 241 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 242 bool isExact = false; 243 // See if we can convert this to an int64_t 244 uint64_t UIntVal; 245 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 246 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 247 !isExact) 248 return false; 249 IntVal = UIntVal; 250 return true; 251 } 252 253 /// If the loop has floating induction variable then insert corresponding 254 /// integer induction variable if possible. 255 /// For example, 256 /// for(double i = 0; i < 10000; ++i) 257 /// bar(i) 258 /// is converted into 259 /// for(int i = 0; i < 10000; ++i) 260 /// bar((double)i); 261 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 262 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 263 unsigned BackEdge = IncomingEdge^1; 264 265 // Check incoming value. 266 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 267 268 int64_t InitValue; 269 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 270 return false; 271 272 // Check IV increment. Reject this PN if increment operation is not 273 // an add or increment value can not be represented by an integer. 274 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 275 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 276 277 // If this is not an add of the PHI with a constantfp, or if the constant fp 278 // is not an integer, bail out. 279 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 280 int64_t IncValue; 281 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 282 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 283 return false; 284 285 // Check Incr uses. One user is PN and the other user is an exit condition 286 // used by the conditional terminator. 287 Value::user_iterator IncrUse = Incr->user_begin(); 288 Instruction *U1 = cast<Instruction>(*IncrUse++); 289 if (IncrUse == Incr->user_end()) return false; 290 Instruction *U2 = cast<Instruction>(*IncrUse++); 291 if (IncrUse != Incr->user_end()) return false; 292 293 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 294 // only used by a branch, we can't transform it. 295 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 296 if (!Compare) 297 Compare = dyn_cast<FCmpInst>(U2); 298 if (!Compare || !Compare->hasOneUse() || 299 !isa<BranchInst>(Compare->user_back())) 300 return false; 301 302 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 303 304 // We need to verify that the branch actually controls the iteration count 305 // of the loop. If not, the new IV can overflow and no one will notice. 306 // The branch block must be in the loop and one of the successors must be out 307 // of the loop. 308 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 309 if (!L->contains(TheBr->getParent()) || 310 (L->contains(TheBr->getSuccessor(0)) && 311 L->contains(TheBr->getSuccessor(1)))) 312 return false; 313 314 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 315 // transform it. 316 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 317 int64_t ExitValue; 318 if (ExitValueVal == nullptr || 319 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 320 return false; 321 322 // Find new predicate for integer comparison. 323 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 324 switch (Compare->getPredicate()) { 325 default: return false; // Unknown comparison. 326 case CmpInst::FCMP_OEQ: 327 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 328 case CmpInst::FCMP_ONE: 329 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 330 case CmpInst::FCMP_OGT: 331 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 332 case CmpInst::FCMP_OGE: 333 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 334 case CmpInst::FCMP_OLT: 335 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 336 case CmpInst::FCMP_OLE: 337 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 338 } 339 340 // We convert the floating point induction variable to a signed i32 value if 341 // we can. This is only safe if the comparison will not overflow in a way 342 // that won't be trapped by the integer equivalent operations. Check for this 343 // now. 344 // TODO: We could use i64 if it is native and the range requires it. 345 346 // The start/stride/exit values must all fit in signed i32. 347 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 348 return false; 349 350 // If not actually striding (add x, 0.0), avoid touching the code. 351 if (IncValue == 0) 352 return false; 353 354 // Positive and negative strides have different safety conditions. 355 if (IncValue > 0) { 356 // If we have a positive stride, we require the init to be less than the 357 // exit value. 358 if (InitValue >= ExitValue) 359 return false; 360 361 uint32_t Range = uint32_t(ExitValue-InitValue); 362 // Check for infinite loop, either: 363 // while (i <= Exit) or until (i > Exit) 364 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 365 if (++Range == 0) return false; // Range overflows. 366 } 367 368 unsigned Leftover = Range % uint32_t(IncValue); 369 370 // If this is an equality comparison, we require that the strided value 371 // exactly land on the exit value, otherwise the IV condition will wrap 372 // around and do things the fp IV wouldn't. 373 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 374 Leftover != 0) 375 return false; 376 377 // If the stride would wrap around the i32 before exiting, we can't 378 // transform the IV. 379 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 380 return false; 381 } else { 382 // If we have a negative stride, we require the init to be greater than the 383 // exit value. 384 if (InitValue <= ExitValue) 385 return false; 386 387 uint32_t Range = uint32_t(InitValue-ExitValue); 388 // Check for infinite loop, either: 389 // while (i >= Exit) or until (i < Exit) 390 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 391 if (++Range == 0) return false; // Range overflows. 392 } 393 394 unsigned Leftover = Range % uint32_t(-IncValue); 395 396 // If this is an equality comparison, we require that the strided value 397 // exactly land on the exit value, otherwise the IV condition will wrap 398 // around and do things the fp IV wouldn't. 399 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 400 Leftover != 0) 401 return false; 402 403 // If the stride would wrap around the i32 before exiting, we can't 404 // transform the IV. 405 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 406 return false; 407 } 408 409 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 410 411 // Insert new integer induction variable. 412 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 413 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 414 PN->getIncomingBlock(IncomingEdge)); 415 416 Value *NewAdd = 417 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 418 Incr->getName()+".int", Incr); 419 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 420 421 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 422 ConstantInt::get(Int32Ty, ExitValue), 423 Compare->getName()); 424 425 // In the following deletions, PN may become dead and may be deleted. 426 // Use a WeakTrackingVH to observe whether this happens. 427 WeakTrackingVH WeakPH = PN; 428 429 // Delete the old floating point exit comparison. The branch starts using the 430 // new comparison. 431 NewCompare->takeName(Compare); 432 Compare->replaceAllUsesWith(NewCompare); 433 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 434 435 // Delete the old floating point increment. 436 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 437 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 438 439 // If the FP induction variable still has uses, this is because something else 440 // in the loop uses its value. In order to canonicalize the induction 441 // variable, we chose to eliminate the IV and rewrite it in terms of an 442 // int->fp cast. 443 // 444 // We give preference to sitofp over uitofp because it is faster on most 445 // platforms. 446 if (WeakPH) { 447 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 448 &*PN->getParent()->getFirstInsertionPt()); 449 PN->replaceAllUsesWith(Conv); 450 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 451 } 452 return true; 453 } 454 455 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 456 // First step. Check to see if there are any floating-point recurrences. 457 // If there are, change them into integer recurrences, permitting analysis by 458 // the SCEV routines. 459 BasicBlock *Header = L->getHeader(); 460 461 SmallVector<WeakTrackingVH, 8> PHIs; 462 for (PHINode &PN : Header->phis()) 463 PHIs.push_back(&PN); 464 465 bool Changed = false; 466 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 467 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 468 Changed |= handleFloatingPointIV(L, PN); 469 470 // If the loop previously had floating-point IV, ScalarEvolution 471 // may not have been able to compute a trip count. Now that we've done some 472 // re-writing, the trip count may be computable. 473 if (Changed) 474 SE->forgetLoop(L); 475 return Changed; 476 } 477 478 //===---------------------------------------------------------------------===// 479 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 480 // they will exit at the first iteration. 481 //===---------------------------------------------------------------------===// 482 483 /// Check to see if this loop has loop invariant conditions which lead to loop 484 /// exits. If so, we know that if the exit path is taken, it is at the first 485 /// loop iteration. This lets us predict exit values of PHI nodes that live in 486 /// loop header. 487 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 488 // Verify the input to the pass is already in LCSSA form. 489 assert(L->isLCSSAForm(*DT)); 490 491 SmallVector<BasicBlock *, 8> ExitBlocks; 492 L->getUniqueExitBlocks(ExitBlocks); 493 494 bool MadeAnyChanges = false; 495 for (auto *ExitBB : ExitBlocks) { 496 // If there are no more PHI nodes in this exit block, then no more 497 // values defined inside the loop are used on this path. 498 for (PHINode &PN : ExitBB->phis()) { 499 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 500 IncomingValIdx != E; ++IncomingValIdx) { 501 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 502 503 // Can we prove that the exit must run on the first iteration if it 504 // runs at all? (i.e. early exits are fine for our purposes, but 505 // traces which lead to this exit being taken on the 2nd iteration 506 // aren't.) Note that this is about whether the exit branch is 507 // executed, not about whether it is taken. 508 if (!L->getLoopLatch() || 509 !DT->dominates(IncomingBB, L->getLoopLatch())) 510 continue; 511 512 // Get condition that leads to the exit path. 513 auto *TermInst = IncomingBB->getTerminator(); 514 515 Value *Cond = nullptr; 516 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 517 // Must be a conditional branch, otherwise the block 518 // should not be in the loop. 519 Cond = BI->getCondition(); 520 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 521 Cond = SI->getCondition(); 522 else 523 continue; 524 525 if (!L->isLoopInvariant(Cond)) 526 continue; 527 528 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 529 530 // Only deal with PHIs in the loop header. 531 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 532 continue; 533 534 // If ExitVal is a PHI on the loop header, then we know its 535 // value along this exit because the exit can only be taken 536 // on the first iteration. 537 auto *LoopPreheader = L->getLoopPreheader(); 538 assert(LoopPreheader && "Invalid loop"); 539 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 540 if (PreheaderIdx != -1) { 541 assert(ExitVal->getParent() == L->getHeader() && 542 "ExitVal must be in loop header"); 543 MadeAnyChanges = true; 544 PN.setIncomingValue(IncomingValIdx, 545 ExitVal->getIncomingValue(PreheaderIdx)); 546 } 547 } 548 } 549 } 550 return MadeAnyChanges; 551 } 552 553 //===----------------------------------------------------------------------===// 554 // IV Widening - Extend the width of an IV to cover its widest uses. 555 //===----------------------------------------------------------------------===// 556 557 namespace { 558 559 // Collect information about induction variables that are used by sign/zero 560 // extend operations. This information is recorded by CollectExtend and provides 561 // the input to WidenIV. 562 struct WideIVInfo { 563 PHINode *NarrowIV = nullptr; 564 565 // Widest integer type created [sz]ext 566 Type *WidestNativeType = nullptr; 567 568 // Was a sext user seen before a zext? 569 bool IsSigned = false; 570 }; 571 572 } // end anonymous namespace 573 574 /// Update information about the induction variable that is extended by this 575 /// sign or zero extend operation. This is used to determine the final width of 576 /// the IV before actually widening it. 577 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 578 const TargetTransformInfo *TTI) { 579 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 580 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 581 return; 582 583 Type *Ty = Cast->getType(); 584 uint64_t Width = SE->getTypeSizeInBits(Ty); 585 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 586 return; 587 588 // Check that `Cast` actually extends the induction variable (we rely on this 589 // later). This takes care of cases where `Cast` is extending a truncation of 590 // the narrow induction variable, and thus can end up being narrower than the 591 // "narrow" induction variable. 592 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 593 if (NarrowIVWidth >= Width) 594 return; 595 596 // Cast is either an sext or zext up to this point. 597 // We should not widen an indvar if arithmetics on the wider indvar are more 598 // expensive than those on the narrower indvar. We check only the cost of ADD 599 // because at least an ADD is required to increment the induction variable. We 600 // could compute more comprehensively the cost of all instructions on the 601 // induction variable when necessary. 602 if (TTI && 603 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 604 TTI->getArithmeticInstrCost(Instruction::Add, 605 Cast->getOperand(0)->getType())) { 606 return; 607 } 608 609 if (!WI.WidestNativeType) { 610 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 611 WI.IsSigned = IsSigned; 612 return; 613 } 614 615 // We extend the IV to satisfy the sign of its first user, arbitrarily. 616 if (WI.IsSigned != IsSigned) 617 return; 618 619 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 620 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 621 } 622 623 namespace { 624 625 /// Record a link in the Narrow IV def-use chain along with the WideIV that 626 /// computes the same value as the Narrow IV def. This avoids caching Use* 627 /// pointers. 628 struct NarrowIVDefUse { 629 Instruction *NarrowDef = nullptr; 630 Instruction *NarrowUse = nullptr; 631 Instruction *WideDef = nullptr; 632 633 // True if the narrow def is never negative. Tracking this information lets 634 // us use a sign extension instead of a zero extension or vice versa, when 635 // profitable and legal. 636 bool NeverNegative = false; 637 638 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 639 bool NeverNegative) 640 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 641 NeverNegative(NeverNegative) {} 642 }; 643 644 /// The goal of this transform is to remove sign and zero extends without 645 /// creating any new induction variables. To do this, it creates a new phi of 646 /// the wider type and redirects all users, either removing extends or inserting 647 /// truncs whenever we stop propagating the type. 648 class WidenIV { 649 // Parameters 650 PHINode *OrigPhi; 651 Type *WideType; 652 653 // Context 654 LoopInfo *LI; 655 Loop *L; 656 ScalarEvolution *SE; 657 DominatorTree *DT; 658 659 // Does the module have any calls to the llvm.experimental.guard intrinsic 660 // at all? If not we can avoid scanning instructions looking for guards. 661 bool HasGuards; 662 663 // Result 664 PHINode *WidePhi = nullptr; 665 Instruction *WideInc = nullptr; 666 const SCEV *WideIncExpr = nullptr; 667 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 668 669 SmallPtrSet<Instruction *,16> Widened; 670 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 671 672 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 673 674 // A map tracking the kind of extension used to widen each narrow IV 675 // and narrow IV user. 676 // Key: pointer to a narrow IV or IV user. 677 // Value: the kind of extension used to widen this Instruction. 678 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 679 680 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 681 682 // A map with control-dependent ranges for post increment IV uses. The key is 683 // a pair of IV def and a use of this def denoting the context. The value is 684 // a ConstantRange representing possible values of the def at the given 685 // context. 686 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 687 688 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 689 Instruction *UseI) { 690 DefUserPair Key(Def, UseI); 691 auto It = PostIncRangeInfos.find(Key); 692 return It == PostIncRangeInfos.end() 693 ? Optional<ConstantRange>(None) 694 : Optional<ConstantRange>(It->second); 695 } 696 697 void calculatePostIncRanges(PHINode *OrigPhi); 698 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 699 700 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 701 DefUserPair Key(Def, UseI); 702 auto It = PostIncRangeInfos.find(Key); 703 if (It == PostIncRangeInfos.end()) 704 PostIncRangeInfos.insert({Key, R}); 705 else 706 It->second = R.intersectWith(It->second); 707 } 708 709 public: 710 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 711 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 712 bool HasGuards) 713 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 714 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 715 HasGuards(HasGuards), DeadInsts(DI) { 716 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 717 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 718 } 719 720 PHINode *createWideIV(SCEVExpander &Rewriter); 721 722 protected: 723 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 724 Instruction *Use); 725 726 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 727 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 728 const SCEVAddRecExpr *WideAR); 729 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 730 731 ExtendKind getExtendKind(Instruction *I); 732 733 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 734 735 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 736 737 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 738 739 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 740 unsigned OpCode) const; 741 742 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 743 744 bool widenLoopCompare(NarrowIVDefUse DU); 745 bool widenWithVariantUse(NarrowIVDefUse DU); 746 void widenWithVariantUseCodegen(NarrowIVDefUse DU); 747 748 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 749 }; 750 751 } // end anonymous namespace 752 753 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 754 bool IsSigned, Instruction *Use) { 755 // Set the debug location and conservative insertion point. 756 IRBuilder<> Builder(Use); 757 // Hoist the insertion point into loop preheaders as far as possible. 758 for (const Loop *L = LI->getLoopFor(Use->getParent()); 759 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 760 L = L->getParentLoop()) 761 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 762 763 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 764 Builder.CreateZExt(NarrowOper, WideType); 765 } 766 767 /// Instantiate a wide operation to replace a narrow operation. This only needs 768 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 769 /// 0 for any operation we decide not to clone. 770 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 771 const SCEVAddRecExpr *WideAR) { 772 unsigned Opcode = DU.NarrowUse->getOpcode(); 773 switch (Opcode) { 774 default: 775 return nullptr; 776 case Instruction::Add: 777 case Instruction::Mul: 778 case Instruction::UDiv: 779 case Instruction::Sub: 780 return cloneArithmeticIVUser(DU, WideAR); 781 782 case Instruction::And: 783 case Instruction::Or: 784 case Instruction::Xor: 785 case Instruction::Shl: 786 case Instruction::LShr: 787 case Instruction::AShr: 788 return cloneBitwiseIVUser(DU); 789 } 790 } 791 792 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 793 Instruction *NarrowUse = DU.NarrowUse; 794 Instruction *NarrowDef = DU.NarrowDef; 795 Instruction *WideDef = DU.WideDef; 796 797 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 798 799 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 800 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 801 // invariant and will be folded or hoisted. If it actually comes from a 802 // widened IV, it should be removed during a future call to widenIVUse. 803 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 804 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 805 ? WideDef 806 : createExtendInst(NarrowUse->getOperand(0), WideType, 807 IsSigned, NarrowUse); 808 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 809 ? WideDef 810 : createExtendInst(NarrowUse->getOperand(1), WideType, 811 IsSigned, NarrowUse); 812 813 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 814 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 815 NarrowBO->getName()); 816 IRBuilder<> Builder(NarrowUse); 817 Builder.Insert(WideBO); 818 WideBO->copyIRFlags(NarrowBO); 819 return WideBO; 820 } 821 822 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 823 const SCEVAddRecExpr *WideAR) { 824 Instruction *NarrowUse = DU.NarrowUse; 825 Instruction *NarrowDef = DU.NarrowDef; 826 Instruction *WideDef = DU.WideDef; 827 828 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 829 830 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 831 832 // We're trying to find X such that 833 // 834 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 835 // 836 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 837 // and check using SCEV if any of them are correct. 838 839 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 840 // correct solution to X. 841 auto GuessNonIVOperand = [&](bool SignExt) { 842 const SCEV *WideLHS; 843 const SCEV *WideRHS; 844 845 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 846 if (SignExt) 847 return SE->getSignExtendExpr(S, Ty); 848 return SE->getZeroExtendExpr(S, Ty); 849 }; 850 851 if (IVOpIdx == 0) { 852 WideLHS = SE->getSCEV(WideDef); 853 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 854 WideRHS = GetExtend(NarrowRHS, WideType); 855 } else { 856 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 857 WideLHS = GetExtend(NarrowLHS, WideType); 858 WideRHS = SE->getSCEV(WideDef); 859 } 860 861 // WideUse is "WideDef `op.wide` X" as described in the comment. 862 const SCEV *WideUse = nullptr; 863 864 switch (NarrowUse->getOpcode()) { 865 default: 866 llvm_unreachable("No other possibility!"); 867 868 case Instruction::Add: 869 WideUse = SE->getAddExpr(WideLHS, WideRHS); 870 break; 871 872 case Instruction::Mul: 873 WideUse = SE->getMulExpr(WideLHS, WideRHS); 874 break; 875 876 case Instruction::UDiv: 877 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 878 break; 879 880 case Instruction::Sub: 881 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 882 break; 883 } 884 885 return WideUse == WideAR; 886 }; 887 888 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 889 if (!GuessNonIVOperand(SignExtend)) { 890 SignExtend = !SignExtend; 891 if (!GuessNonIVOperand(SignExtend)) 892 return nullptr; 893 } 894 895 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 896 ? WideDef 897 : createExtendInst(NarrowUse->getOperand(0), WideType, 898 SignExtend, NarrowUse); 899 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 900 ? WideDef 901 : createExtendInst(NarrowUse->getOperand(1), WideType, 902 SignExtend, NarrowUse); 903 904 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 905 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 906 NarrowBO->getName()); 907 908 IRBuilder<> Builder(NarrowUse); 909 Builder.Insert(WideBO); 910 WideBO->copyIRFlags(NarrowBO); 911 return WideBO; 912 } 913 914 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 915 auto It = ExtendKindMap.find(I); 916 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 917 return It->second; 918 } 919 920 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 921 unsigned OpCode) const { 922 if (OpCode == Instruction::Add) 923 return SE->getAddExpr(LHS, RHS); 924 if (OpCode == Instruction::Sub) 925 return SE->getMinusSCEV(LHS, RHS); 926 if (OpCode == Instruction::Mul) 927 return SE->getMulExpr(LHS, RHS); 928 929 llvm_unreachable("Unsupported opcode."); 930 } 931 932 /// No-wrap operations can transfer sign extension of their result to their 933 /// operands. Generate the SCEV value for the widened operation without 934 /// actually modifying the IR yet. If the expression after extending the 935 /// operands is an AddRec for this loop, return the AddRec and the kind of 936 /// extension used. 937 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 938 // Handle the common case of add<nsw/nuw> 939 const unsigned OpCode = DU.NarrowUse->getOpcode(); 940 // Only Add/Sub/Mul instructions supported yet. 941 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 942 OpCode != Instruction::Mul) 943 return {nullptr, Unknown}; 944 945 // One operand (NarrowDef) has already been extended to WideDef. Now determine 946 // if extending the other will lead to a recurrence. 947 const unsigned ExtendOperIdx = 948 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 949 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 950 951 const SCEV *ExtendOperExpr = nullptr; 952 const OverflowingBinaryOperator *OBO = 953 cast<OverflowingBinaryOperator>(DU.NarrowUse); 954 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 955 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 956 ExtendOperExpr = SE->getSignExtendExpr( 957 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 958 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 959 ExtendOperExpr = SE->getZeroExtendExpr( 960 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 961 else 962 return {nullptr, Unknown}; 963 964 // When creating this SCEV expr, don't apply the current operations NSW or NUW 965 // flags. This instruction may be guarded by control flow that the no-wrap 966 // behavior depends on. Non-control-equivalent instructions can be mapped to 967 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 968 // semantics to those operations. 969 const SCEV *lhs = SE->getSCEV(DU.WideDef); 970 const SCEV *rhs = ExtendOperExpr; 971 972 // Let's swap operands to the initial order for the case of non-commutative 973 // operations, like SUB. See PR21014. 974 if (ExtendOperIdx == 0) 975 std::swap(lhs, rhs); 976 const SCEVAddRecExpr *AddRec = 977 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 978 979 if (!AddRec || AddRec->getLoop() != L) 980 return {nullptr, Unknown}; 981 982 return {AddRec, ExtKind}; 983 } 984 985 /// Is this instruction potentially interesting for further simplification after 986 /// widening it's type? In other words, can the extend be safely hoisted out of 987 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 988 /// so, return the extended recurrence and the kind of extension used. Otherwise 989 /// return {nullptr, Unknown}. 990 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 991 if (!SE->isSCEVable(DU.NarrowUse->getType())) 992 return {nullptr, Unknown}; 993 994 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 995 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 996 SE->getTypeSizeInBits(WideType)) { 997 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 998 // index. So don't follow this use. 999 return {nullptr, Unknown}; 1000 } 1001 1002 const SCEV *WideExpr; 1003 ExtendKind ExtKind; 1004 if (DU.NeverNegative) { 1005 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1006 if (isa<SCEVAddRecExpr>(WideExpr)) 1007 ExtKind = SignExtended; 1008 else { 1009 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1010 ExtKind = ZeroExtended; 1011 } 1012 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1013 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1014 ExtKind = SignExtended; 1015 } else { 1016 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1017 ExtKind = ZeroExtended; 1018 } 1019 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1020 if (!AddRec || AddRec->getLoop() != L) 1021 return {nullptr, Unknown}; 1022 return {AddRec, ExtKind}; 1023 } 1024 1025 /// This IV user cannot be widened. Replace this use of the original narrow IV 1026 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1027 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1028 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1029 if (!InsertPt) 1030 return; 1031 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1032 << *DU.NarrowUse << "\n"); 1033 IRBuilder<> Builder(InsertPt); 1034 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1035 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1036 } 1037 1038 /// If the narrow use is a compare instruction, then widen the compare 1039 // (and possibly the other operand). The extend operation is hoisted into the 1040 // loop preheader as far as possible. 1041 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1042 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1043 if (!Cmp) 1044 return false; 1045 1046 // We can legally widen the comparison in the following two cases: 1047 // 1048 // - The signedness of the IV extension and comparison match 1049 // 1050 // - The narrow IV is always positive (and thus its sign extension is equal 1051 // to its zero extension). For instance, let's say we're zero extending 1052 // %narrow for the following use 1053 // 1054 // icmp slt i32 %narrow, %val ... (A) 1055 // 1056 // and %narrow is always positive. Then 1057 // 1058 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1059 // == icmp slt i32 zext(%narrow), sext(%val) 1060 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1061 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1062 return false; 1063 1064 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1065 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1066 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1067 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1068 1069 // Widen the compare instruction. 1070 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1071 if (!InsertPt) 1072 return false; 1073 IRBuilder<> Builder(InsertPt); 1074 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1075 1076 // Widen the other operand of the compare, if necessary. 1077 if (CastWidth < IVWidth) { 1078 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1079 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1080 } 1081 return true; 1082 } 1083 1084 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this 1085 // will not work when: 1086 // 1) SCEV traces back to an instruction inside the loop that SCEV can not 1087 // expand, eg. add %indvar, (load %addr) 1088 // 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant 1089 // While SCEV fails to avoid trunc, we can still try to use instruction 1090 // combining approach to prove trunc is not required. This can be further 1091 // extended with other instruction combining checks, but for now we handle the 1092 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") 1093 // 1094 // Src: 1095 // %c = sub nsw %b, %indvar 1096 // %d = sext %c to i64 1097 // Dst: 1098 // %indvar.ext1 = sext %indvar to i64 1099 // %m = sext %b to i64 1100 // %d = sub nsw i64 %m, %indvar.ext1 1101 // Therefore, as long as the result of add/sub/mul is extended to wide type, no 1102 // trunc is required regardless of how %b is generated. This pattern is common 1103 // when calculating address in 64 bit architecture 1104 bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) { 1105 Instruction *NarrowUse = DU.NarrowUse; 1106 Instruction *NarrowDef = DU.NarrowDef; 1107 Instruction *WideDef = DU.WideDef; 1108 1109 // Handle the common case of add<nsw/nuw> 1110 const unsigned OpCode = NarrowUse->getOpcode(); 1111 // Only Add/Sub/Mul instructions are supported. 1112 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1113 OpCode != Instruction::Mul) 1114 return false; 1115 1116 // The operand that is not defined by NarrowDef of DU. Let's call it the 1117 // other operand. 1118 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1119 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1120 "bad DU"); 1121 1122 const SCEV *ExtendOperExpr = nullptr; 1123 const OverflowingBinaryOperator *OBO = 1124 cast<OverflowingBinaryOperator>(NarrowUse); 1125 ExtendKind ExtKind = getExtendKind(NarrowDef); 1126 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1127 ExtendOperExpr = SE->getSignExtendExpr( 1128 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1129 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1130 ExtendOperExpr = SE->getZeroExtendExpr( 1131 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1132 else 1133 return false; 1134 1135 // Verifying that Defining operand is an AddRec 1136 const SCEV *Op1 = SE->getSCEV(WideDef); 1137 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1138 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1139 return false; 1140 // Verifying that other operand is an Extend. 1141 if (ExtKind == SignExtended) { 1142 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1143 return false; 1144 } else { 1145 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1146 return false; 1147 } 1148 1149 if (ExtKind == SignExtended) { 1150 for (Use &U : NarrowUse->uses()) { 1151 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1152 if (!User || User->getType() != WideType) 1153 return false; 1154 } 1155 } else { // ExtKind == ZeroExtended 1156 for (Use &U : NarrowUse->uses()) { 1157 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1158 if (!User || User->getType() != WideType) 1159 return false; 1160 } 1161 } 1162 1163 return true; 1164 } 1165 1166 /// Special Case for widening with loop variant (see 1167 /// WidenIV::widenWithVariant). This is the code generation part. 1168 void WidenIV::widenWithVariantUseCodegen(NarrowIVDefUse DU) { 1169 Instruction *NarrowUse = DU.NarrowUse; 1170 Instruction *NarrowDef = DU.NarrowDef; 1171 Instruction *WideDef = DU.WideDef; 1172 1173 ExtendKind ExtKind = getExtendKind(NarrowDef); 1174 1175 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1176 1177 // Generating a widening use instruction. 1178 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1179 ? WideDef 1180 : createExtendInst(NarrowUse->getOperand(0), WideType, 1181 ExtKind, NarrowUse); 1182 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1183 ? WideDef 1184 : createExtendInst(NarrowUse->getOperand(1), WideType, 1185 ExtKind, NarrowUse); 1186 1187 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1188 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1189 NarrowBO->getName()); 1190 IRBuilder<> Builder(NarrowUse); 1191 Builder.Insert(WideBO); 1192 WideBO->copyIRFlags(NarrowBO); 1193 1194 assert(ExtKind != Unknown && "Unknown ExtKind not handled"); 1195 1196 ExtendKindMap[NarrowUse] = ExtKind; 1197 1198 for (Use &U : NarrowUse->uses()) { 1199 Instruction *User = nullptr; 1200 if (ExtKind == SignExtended) 1201 User = dyn_cast<SExtInst>(U.getUser()); 1202 else 1203 User = dyn_cast<ZExtInst>(U.getUser()); 1204 if (User && User->getType() == WideType) { 1205 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1206 << *WideBO << "\n"); 1207 ++NumElimExt; 1208 User->replaceAllUsesWith(WideBO); 1209 DeadInsts.emplace_back(User); 1210 } 1211 } 1212 } 1213 1214 /// Determine whether an individual user of the narrow IV can be widened. If so, 1215 /// return the wide clone of the user. 1216 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1217 assert(ExtendKindMap.count(DU.NarrowDef) && 1218 "Should already know the kind of extension used to widen NarrowDef"); 1219 1220 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1221 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1222 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1223 // For LCSSA phis, sink the truncate outside the loop. 1224 // After SimplifyCFG most loop exit targets have a single predecessor. 1225 // Otherwise fall back to a truncate within the loop. 1226 if (UsePhi->getNumOperands() != 1) 1227 truncateIVUse(DU, DT, LI); 1228 else { 1229 // Widening the PHI requires us to insert a trunc. The logical place 1230 // for this trunc is in the same BB as the PHI. This is not possible if 1231 // the BB is terminated by a catchswitch. 1232 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1233 return nullptr; 1234 1235 PHINode *WidePhi = 1236 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1237 UsePhi); 1238 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1239 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1240 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1241 UsePhi->replaceAllUsesWith(Trunc); 1242 DeadInsts.emplace_back(UsePhi); 1243 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1244 << *WidePhi << "\n"); 1245 } 1246 return nullptr; 1247 } 1248 } 1249 1250 // This narrow use can be widened by a sext if it's non-negative or its narrow 1251 // def was widended by a sext. Same for zext. 1252 auto canWidenBySExt = [&]() { 1253 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1254 }; 1255 auto canWidenByZExt = [&]() { 1256 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1257 }; 1258 1259 // Our raison d'etre! Eliminate sign and zero extension. 1260 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1261 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1262 Value *NewDef = DU.WideDef; 1263 if (DU.NarrowUse->getType() != WideType) { 1264 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1265 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1266 if (CastWidth < IVWidth) { 1267 // The cast isn't as wide as the IV, so insert a Trunc. 1268 IRBuilder<> Builder(DU.NarrowUse); 1269 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1270 } 1271 else { 1272 // A wider extend was hidden behind a narrower one. This may induce 1273 // another round of IV widening in which the intermediate IV becomes 1274 // dead. It should be very rare. 1275 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1276 << " not wide enough to subsume " << *DU.NarrowUse 1277 << "\n"); 1278 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1279 NewDef = DU.NarrowUse; 1280 } 1281 } 1282 if (NewDef != DU.NarrowUse) { 1283 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1284 << " replaced by " << *DU.WideDef << "\n"); 1285 ++NumElimExt; 1286 DU.NarrowUse->replaceAllUsesWith(NewDef); 1287 DeadInsts.emplace_back(DU.NarrowUse); 1288 } 1289 // Now that the extend is gone, we want to expose it's uses for potential 1290 // further simplification. We don't need to directly inform SimplifyIVUsers 1291 // of the new users, because their parent IV will be processed later as a 1292 // new loop phi. If we preserved IVUsers analysis, we would also want to 1293 // push the uses of WideDef here. 1294 1295 // No further widening is needed. The deceased [sz]ext had done it for us. 1296 return nullptr; 1297 } 1298 1299 // Does this user itself evaluate to a recurrence after widening? 1300 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1301 if (!WideAddRec.first) 1302 WideAddRec = getWideRecurrence(DU); 1303 1304 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1305 if (!WideAddRec.first) { 1306 // If use is a loop condition, try to promote the condition instead of 1307 // truncating the IV first. 1308 if (widenLoopCompare(DU)) 1309 return nullptr; 1310 1311 // We are here about to generate a truncate instruction that may hurt 1312 // performance because the scalar evolution expression computed earlier 1313 // in WideAddRec.first does not indicate a polynomial induction expression. 1314 // In that case, look at the operands of the use instruction to determine 1315 // if we can still widen the use instead of truncating its operand. 1316 if (widenWithVariantUse(DU)) { 1317 widenWithVariantUseCodegen(DU); 1318 return nullptr; 1319 } 1320 1321 // This user does not evaluate to a recurrence after widening, so don't 1322 // follow it. Instead insert a Trunc to kill off the original use, 1323 // eventually isolating the original narrow IV so it can be removed. 1324 truncateIVUse(DU, DT, LI); 1325 return nullptr; 1326 } 1327 // Assume block terminators cannot evaluate to a recurrence. We can't to 1328 // insert a Trunc after a terminator if there happens to be a critical edge. 1329 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1330 "SCEV is not expected to evaluate a block terminator"); 1331 1332 // Reuse the IV increment that SCEVExpander created as long as it dominates 1333 // NarrowUse. 1334 Instruction *WideUse = nullptr; 1335 if (WideAddRec.first == WideIncExpr && 1336 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1337 WideUse = WideInc; 1338 else { 1339 WideUse = cloneIVUser(DU, WideAddRec.first); 1340 if (!WideUse) 1341 return nullptr; 1342 } 1343 // Evaluation of WideAddRec ensured that the narrow expression could be 1344 // extended outside the loop without overflow. This suggests that the wide use 1345 // evaluates to the same expression as the extended narrow use, but doesn't 1346 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1347 // where it fails, we simply throw away the newly created wide use. 1348 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1349 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1350 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1351 << "\n"); 1352 DeadInsts.emplace_back(WideUse); 1353 return nullptr; 1354 } 1355 1356 // if we reached this point then we are going to replace 1357 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1358 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1359 1360 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1361 // Returning WideUse pushes it on the worklist. 1362 return WideUse; 1363 } 1364 1365 /// Add eligible users of NarrowDef to NarrowIVUsers. 1366 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1367 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1368 bool NonNegativeDef = 1369 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1370 SE->getZero(NarrowSCEV->getType())); 1371 for (User *U : NarrowDef->users()) { 1372 Instruction *NarrowUser = cast<Instruction>(U); 1373 1374 // Handle data flow merges and bizarre phi cycles. 1375 if (!Widened.insert(NarrowUser).second) 1376 continue; 1377 1378 bool NonNegativeUse = false; 1379 if (!NonNegativeDef) { 1380 // We might have a control-dependent range information for this context. 1381 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1382 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1383 } 1384 1385 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1386 NonNegativeDef || NonNegativeUse); 1387 } 1388 } 1389 1390 /// Process a single induction variable. First use the SCEVExpander to create a 1391 /// wide induction variable that evaluates to the same recurrence as the 1392 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1393 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1394 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1395 /// 1396 /// It would be simpler to delete uses as they are processed, but we must avoid 1397 /// invalidating SCEV expressions. 1398 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1399 // Bail if we disallowed widening. 1400 if(!AllowIVWidening) 1401 return nullptr; 1402 1403 // Is this phi an induction variable? 1404 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1405 if (!AddRec) 1406 return nullptr; 1407 1408 // Widen the induction variable expression. 1409 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1410 ? SE->getSignExtendExpr(AddRec, WideType) 1411 : SE->getZeroExtendExpr(AddRec, WideType); 1412 1413 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1414 "Expect the new IV expression to preserve its type"); 1415 1416 // Can the IV be extended outside the loop without overflow? 1417 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1418 if (!AddRec || AddRec->getLoop() != L) 1419 return nullptr; 1420 1421 // An AddRec must have loop-invariant operands. Since this AddRec is 1422 // materialized by a loop header phi, the expression cannot have any post-loop 1423 // operands, so they must dominate the loop header. 1424 assert( 1425 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1426 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1427 "Loop header phi recurrence inputs do not dominate the loop"); 1428 1429 // Iterate over IV uses (including transitive ones) looking for IV increments 1430 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1431 // the increment calculate control-dependent range information basing on 1432 // dominating conditions inside of the loop (e.g. a range check inside of the 1433 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1434 // 1435 // Control-dependent range information is later used to prove that a narrow 1436 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1437 // this on demand because when pushNarrowIVUsers needs this information some 1438 // of the dominating conditions might be already widened. 1439 if (UsePostIncrementRanges) 1440 calculatePostIncRanges(OrigPhi); 1441 1442 // The rewriter provides a value for the desired IV expression. This may 1443 // either find an existing phi or materialize a new one. Either way, we 1444 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1445 // of the phi-SCC dominates the loop entry. 1446 Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); 1447 Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); 1448 // If the wide phi is not a phi node, for example a cast node, like bitcast, 1449 // inttoptr, ptrtoint, just skip for now. 1450 if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) { 1451 // if the cast node is an inserted instruction without any user, we should 1452 // remove it to make sure the pass don't touch the function as we can not 1453 // wide the phi. 1454 if (ExpandInst->hasNUses(0) && 1455 Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst))) 1456 DeadInsts.emplace_back(ExpandInst); 1457 return nullptr; 1458 } 1459 1460 // Remembering the WideIV increment generated by SCEVExpander allows 1461 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1462 // employ a general reuse mechanism because the call above is the only call to 1463 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1464 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1465 WideInc = 1466 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1467 WideIncExpr = SE->getSCEV(WideInc); 1468 // Propagate the debug location associated with the original loop increment 1469 // to the new (widened) increment. 1470 auto *OrigInc = 1471 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1472 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1473 } 1474 1475 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1476 ++NumWidened; 1477 1478 // Traverse the def-use chain using a worklist starting at the original IV. 1479 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1480 1481 Widened.insert(OrigPhi); 1482 pushNarrowIVUsers(OrigPhi, WidePhi); 1483 1484 while (!NarrowIVUsers.empty()) { 1485 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1486 1487 // Process a def-use edge. This may replace the use, so don't hold a 1488 // use_iterator across it. 1489 Instruction *WideUse = widenIVUse(DU, Rewriter); 1490 1491 // Follow all def-use edges from the previous narrow use. 1492 if (WideUse) 1493 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1494 1495 // widenIVUse may have removed the def-use edge. 1496 if (DU.NarrowDef->use_empty()) 1497 DeadInsts.emplace_back(DU.NarrowDef); 1498 } 1499 1500 // Attach any debug information to the new PHI. 1501 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1502 1503 return WidePhi; 1504 } 1505 1506 /// Calculates control-dependent range for the given def at the given context 1507 /// by looking at dominating conditions inside of the loop 1508 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1509 Instruction *NarrowUser) { 1510 using namespace llvm::PatternMatch; 1511 1512 Value *NarrowDefLHS; 1513 const APInt *NarrowDefRHS; 1514 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1515 m_APInt(NarrowDefRHS))) || 1516 !NarrowDefRHS->isNonNegative()) 1517 return; 1518 1519 auto UpdateRangeFromCondition = [&] (Value *Condition, 1520 bool TrueDest) { 1521 CmpInst::Predicate Pred; 1522 Value *CmpRHS; 1523 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1524 m_Value(CmpRHS)))) 1525 return; 1526 1527 CmpInst::Predicate P = 1528 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1529 1530 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1531 auto CmpConstrainedLHSRange = 1532 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1533 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1534 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1535 1536 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1537 }; 1538 1539 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1540 if (!HasGuards) 1541 return; 1542 1543 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1544 Ctx->getParent()->rend())) { 1545 Value *C = nullptr; 1546 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1547 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1548 } 1549 }; 1550 1551 UpdateRangeFromGuards(NarrowUser); 1552 1553 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1554 // If NarrowUserBB is statically unreachable asking dominator queries may 1555 // yield surprising results. (e.g. the block may not have a dom tree node) 1556 if (!DT->isReachableFromEntry(NarrowUserBB)) 1557 return; 1558 1559 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1560 L->contains(DTB->getBlock()); 1561 DTB = DTB->getIDom()) { 1562 auto *BB = DTB->getBlock(); 1563 auto *TI = BB->getTerminator(); 1564 UpdateRangeFromGuards(TI); 1565 1566 auto *BI = dyn_cast<BranchInst>(TI); 1567 if (!BI || !BI->isConditional()) 1568 continue; 1569 1570 auto *TrueSuccessor = BI->getSuccessor(0); 1571 auto *FalseSuccessor = BI->getSuccessor(1); 1572 1573 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1574 return BBE.isSingleEdge() && 1575 DT->dominates(BBE, NarrowUser->getParent()); 1576 }; 1577 1578 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1579 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1580 1581 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1582 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1583 } 1584 } 1585 1586 /// Calculates PostIncRangeInfos map for the given IV 1587 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1588 SmallPtrSet<Instruction *, 16> Visited; 1589 SmallVector<Instruction *, 6> Worklist; 1590 Worklist.push_back(OrigPhi); 1591 Visited.insert(OrigPhi); 1592 1593 while (!Worklist.empty()) { 1594 Instruction *NarrowDef = Worklist.pop_back_val(); 1595 1596 for (Use &U : NarrowDef->uses()) { 1597 auto *NarrowUser = cast<Instruction>(U.getUser()); 1598 1599 // Don't go looking outside the current loop. 1600 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1601 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1602 continue; 1603 1604 if (!Visited.insert(NarrowUser).second) 1605 continue; 1606 1607 Worklist.push_back(NarrowUser); 1608 1609 calculatePostIncRange(NarrowDef, NarrowUser); 1610 } 1611 } 1612 } 1613 1614 //===----------------------------------------------------------------------===// 1615 // Live IV Reduction - Minimize IVs live across the loop. 1616 //===----------------------------------------------------------------------===// 1617 1618 //===----------------------------------------------------------------------===// 1619 // Simplification of IV users based on SCEV evaluation. 1620 //===----------------------------------------------------------------------===// 1621 1622 namespace { 1623 1624 class IndVarSimplifyVisitor : public IVVisitor { 1625 ScalarEvolution *SE; 1626 const TargetTransformInfo *TTI; 1627 PHINode *IVPhi; 1628 1629 public: 1630 WideIVInfo WI; 1631 1632 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1633 const TargetTransformInfo *TTI, 1634 const DominatorTree *DTree) 1635 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1636 DT = DTree; 1637 WI.NarrowIV = IVPhi; 1638 } 1639 1640 // Implement the interface used by simplifyUsersOfIV. 1641 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1642 }; 1643 1644 } // end anonymous namespace 1645 1646 /// Iteratively perform simplification on a worklist of IV users. Each 1647 /// successive simplification may push more users which may themselves be 1648 /// candidates for simplification. 1649 /// 1650 /// Sign/Zero extend elimination is interleaved with IV simplification. 1651 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1652 SCEVExpander &Rewriter, 1653 LoopInfo *LI) { 1654 SmallVector<WideIVInfo, 8> WideIVs; 1655 1656 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1657 Intrinsic::getName(Intrinsic::experimental_guard)); 1658 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1659 1660 SmallVector<PHINode*, 8> LoopPhis; 1661 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1662 LoopPhis.push_back(cast<PHINode>(I)); 1663 } 1664 // Each round of simplification iterates through the SimplifyIVUsers worklist 1665 // for all current phis, then determines whether any IVs can be 1666 // widened. Widening adds new phis to LoopPhis, inducing another round of 1667 // simplification on the wide IVs. 1668 bool Changed = false; 1669 while (!LoopPhis.empty()) { 1670 // Evaluate as many IV expressions as possible before widening any IVs. This 1671 // forces SCEV to set no-wrap flags before evaluating sign/zero 1672 // extension. The first time SCEV attempts to normalize sign/zero extension, 1673 // the result becomes final. So for the most predictable results, we delay 1674 // evaluation of sign/zero extend evaluation until needed, and avoid running 1675 // other SCEV based analysis prior to simplifyAndExtend. 1676 do { 1677 PHINode *CurrIV = LoopPhis.pop_back_val(); 1678 1679 // Information about sign/zero extensions of CurrIV. 1680 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1681 1682 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 1683 &Visitor); 1684 1685 if (Visitor.WI.WidestNativeType) { 1686 WideIVs.push_back(Visitor.WI); 1687 } 1688 } while(!LoopPhis.empty()); 1689 1690 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1691 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1692 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1693 Changed = true; 1694 LoopPhis.push_back(WidePhi); 1695 } 1696 } 1697 } 1698 return Changed; 1699 } 1700 1701 //===----------------------------------------------------------------------===// 1702 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1703 //===----------------------------------------------------------------------===// 1704 1705 /// Given an Value which is hoped to be part of an add recurance in the given 1706 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1707 /// that this is less general than SCEVs AddRec checking. 1708 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1709 Instruction *IncI = dyn_cast<Instruction>(IncV); 1710 if (!IncI) 1711 return nullptr; 1712 1713 switch (IncI->getOpcode()) { 1714 case Instruction::Add: 1715 case Instruction::Sub: 1716 break; 1717 case Instruction::GetElementPtr: 1718 // An IV counter must preserve its type. 1719 if (IncI->getNumOperands() == 2) 1720 break; 1721 LLVM_FALLTHROUGH; 1722 default: 1723 return nullptr; 1724 } 1725 1726 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 1727 if (Phi && Phi->getParent() == L->getHeader()) { 1728 if (L->isLoopInvariant(IncI->getOperand(1))) 1729 return Phi; 1730 return nullptr; 1731 } 1732 if (IncI->getOpcode() == Instruction::GetElementPtr) 1733 return nullptr; 1734 1735 // Allow add/sub to be commuted. 1736 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 1737 if (Phi && Phi->getParent() == L->getHeader()) { 1738 if (L->isLoopInvariant(IncI->getOperand(0))) 1739 return Phi; 1740 } 1741 return nullptr; 1742 } 1743 1744 /// Whether the current loop exit test is based on this value. Currently this 1745 /// is limited to a direct use in the loop condition. 1746 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 1747 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1748 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1749 // TODO: Allow non-icmp loop test. 1750 if (!ICmp) 1751 return false; 1752 1753 // TODO: Allow indirect use. 1754 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 1755 } 1756 1757 /// linearFunctionTestReplace policy. Return true unless we can show that the 1758 /// current exit test is already sufficiently canonical. 1759 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 1760 assert(L->getLoopLatch() && "Must be in simplified form"); 1761 1762 // Avoid converting a constant or loop invariant test back to a runtime 1763 // test. This is critical for when SCEV's cached ExitCount is less precise 1764 // than the current IR (such as after we've proven a particular exit is 1765 // actually dead and thus the BE count never reaches our ExitCount.) 1766 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1767 if (L->isLoopInvariant(BI->getCondition())) 1768 return false; 1769 1770 // Do LFTR to simplify the exit condition to an ICMP. 1771 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 1772 if (!Cond) 1773 return true; 1774 1775 // Do LFTR to simplify the exit ICMP to EQ/NE 1776 ICmpInst::Predicate Pred = Cond->getPredicate(); 1777 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 1778 return true; 1779 1780 // Look for a loop invariant RHS 1781 Value *LHS = Cond->getOperand(0); 1782 Value *RHS = Cond->getOperand(1); 1783 if (!L->isLoopInvariant(RHS)) { 1784 if (!L->isLoopInvariant(LHS)) 1785 return true; 1786 std::swap(LHS, RHS); 1787 } 1788 // Look for a simple IV counter LHS 1789 PHINode *Phi = dyn_cast<PHINode>(LHS); 1790 if (!Phi) 1791 Phi = getLoopPhiForCounter(LHS, L); 1792 1793 if (!Phi) 1794 return true; 1795 1796 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 1797 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1798 if (Idx < 0) 1799 return true; 1800 1801 // Do LFTR if the exit condition's IV is *not* a simple counter. 1802 Value *IncV = Phi->getIncomingValue(Idx); 1803 return Phi != getLoopPhiForCounter(IncV, L); 1804 } 1805 1806 /// Return true if undefined behavior would provable be executed on the path to 1807 /// OnPathTo if Root produced a posion result. Note that this doesn't say 1808 /// anything about whether OnPathTo is actually executed or whether Root is 1809 /// actually poison. This can be used to assess whether a new use of Root can 1810 /// be added at a location which is control equivalent with OnPathTo (such as 1811 /// immediately before it) without introducing UB which didn't previously 1812 /// exist. Note that a false result conveys no information. 1813 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 1814 Instruction *OnPathTo, 1815 DominatorTree *DT) { 1816 // Basic approach is to assume Root is poison, propagate poison forward 1817 // through all users we can easily track, and then check whether any of those 1818 // users are provable UB and must execute before out exiting block might 1819 // exit. 1820 1821 // The set of all recursive users we've visited (which are assumed to all be 1822 // poison because of said visit) 1823 SmallSet<const Value *, 16> KnownPoison; 1824 SmallVector<const Instruction*, 16> Worklist; 1825 Worklist.push_back(Root); 1826 while (!Worklist.empty()) { 1827 const Instruction *I = Worklist.pop_back_val(); 1828 1829 // If we know this must trigger UB on a path leading our target. 1830 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 1831 return true; 1832 1833 // If we can't analyze propagation through this instruction, just skip it 1834 // and transitive users. Safe as false is a conservative result. 1835 if (!propagatesPoison(cast<Operator>(I)) && I != Root) 1836 continue; 1837 1838 if (KnownPoison.insert(I).second) 1839 for (const User *User : I->users()) 1840 Worklist.push_back(cast<Instruction>(User)); 1841 } 1842 1843 // Might be non-UB, or might have a path we couldn't prove must execute on 1844 // way to exiting bb. 1845 return false; 1846 } 1847 1848 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 1849 /// down to checking that all operands are constant and listing instructions 1850 /// that may hide undef. 1851 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 1852 unsigned Depth) { 1853 if (isa<Constant>(V)) 1854 return !isa<UndefValue>(V); 1855 1856 if (Depth >= 6) 1857 return false; 1858 1859 // Conservatively handle non-constant non-instructions. For example, Arguments 1860 // may be undef. 1861 Instruction *I = dyn_cast<Instruction>(V); 1862 if (!I) 1863 return false; 1864 1865 // Load and return values may be undef. 1866 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 1867 return false; 1868 1869 // Optimistically handle other instructions. 1870 for (Value *Op : I->operands()) { 1871 if (!Visited.insert(Op).second) 1872 continue; 1873 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 1874 return false; 1875 } 1876 return true; 1877 } 1878 1879 /// Return true if the given value is concrete. We must prove that undef can 1880 /// never reach it. 1881 /// 1882 /// TODO: If we decide that this is a good approach to checking for undef, we 1883 /// may factor it into a common location. 1884 static bool hasConcreteDef(Value *V) { 1885 SmallPtrSet<Value*, 8> Visited; 1886 Visited.insert(V); 1887 return hasConcreteDefImpl(V, Visited, 0); 1888 } 1889 1890 /// Return true if this IV has any uses other than the (soon to be rewritten) 1891 /// loop exit test. 1892 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 1893 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 1894 Value *IncV = Phi->getIncomingValue(LatchIdx); 1895 1896 for (User *U : Phi->users()) 1897 if (U != Cond && U != IncV) return false; 1898 1899 for (User *U : IncV->users()) 1900 if (U != Cond && U != Phi) return false; 1901 return true; 1902 } 1903 1904 /// Return true if the given phi is a "counter" in L. A counter is an 1905 /// add recurance (of integer or pointer type) with an arbitrary start, and a 1906 /// step of 1. Note that L must have exactly one latch. 1907 static bool isLoopCounter(PHINode* Phi, Loop *L, 1908 ScalarEvolution *SE) { 1909 assert(Phi->getParent() == L->getHeader()); 1910 assert(L->getLoopLatch()); 1911 1912 if (!SE->isSCEVable(Phi->getType())) 1913 return false; 1914 1915 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1916 if (!AR || AR->getLoop() != L || !AR->isAffine()) 1917 return false; 1918 1919 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 1920 if (!Step || !Step->isOne()) 1921 return false; 1922 1923 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1924 Value *IncV = Phi->getIncomingValue(LatchIdx); 1925 return (getLoopPhiForCounter(IncV, L) == Phi); 1926 } 1927 1928 /// Search the loop header for a loop counter (anadd rec w/step of one) 1929 /// suitable for use by LFTR. If multiple counters are available, select the 1930 /// "best" one based profitable heuristics. 1931 /// 1932 /// BECount may be an i8* pointer type. The pointer difference is already 1933 /// valid count without scaling the address stride, so it remains a pointer 1934 /// expression as far as SCEV is concerned. 1935 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 1936 const SCEV *BECount, 1937 ScalarEvolution *SE, DominatorTree *DT) { 1938 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 1939 1940 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 1941 1942 // Loop over all of the PHI nodes, looking for a simple counter. 1943 PHINode *BestPhi = nullptr; 1944 const SCEV *BestInit = nullptr; 1945 BasicBlock *LatchBlock = L->getLoopLatch(); 1946 assert(LatchBlock && "Must be in simplified form"); 1947 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1948 1949 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1950 PHINode *Phi = cast<PHINode>(I); 1951 if (!isLoopCounter(Phi, L, SE)) 1952 continue; 1953 1954 // Avoid comparing an integer IV against a pointer Limit. 1955 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 1956 continue; 1957 1958 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1959 1960 // AR may be a pointer type, while BECount is an integer type. 1961 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 1962 // AR may not be a narrower type, or we may never exit. 1963 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 1964 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 1965 continue; 1966 1967 // Avoid reusing a potentially undef value to compute other values that may 1968 // have originally had a concrete definition. 1969 if (!hasConcreteDef(Phi)) { 1970 // We explicitly allow unknown phis as long as they are already used by 1971 // the loop exit test. This is legal since performing LFTR could not 1972 // increase the number of undef users. 1973 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 1974 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 1975 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 1976 continue; 1977 } 1978 1979 // Avoid introducing undefined behavior due to poison which didn't exist in 1980 // the original program. (Annoyingly, the rules for poison and undef 1981 // propagation are distinct, so this does NOT cover the undef case above.) 1982 // We have to ensure that we don't introduce UB by introducing a use on an 1983 // iteration where said IV produces poison. Our strategy here differs for 1984 // pointers and integer IVs. For integers, we strip and reinfer as needed, 1985 // see code in linearFunctionTestReplace. For pointers, we restrict 1986 // transforms as there is no good way to reinfer inbounds once lost. 1987 if (!Phi->getType()->isIntegerTy() && 1988 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 1989 continue; 1990 1991 const SCEV *Init = AR->getStart(); 1992 1993 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 1994 // Don't force a live loop counter if another IV can be used. 1995 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 1996 continue; 1997 1998 // Prefer to count-from-zero. This is a more "canonical" counter form. It 1999 // also prefers integer to pointer IVs. 2000 if (BestInit->isZero() != Init->isZero()) { 2001 if (BestInit->isZero()) 2002 continue; 2003 } 2004 // If two IVs both count from zero or both count from nonzero then the 2005 // narrower is likely a dead phi that has been widened. Use the wider phi 2006 // to allow the other to be eliminated. 2007 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 2008 continue; 2009 } 2010 BestPhi = Phi; 2011 BestInit = Init; 2012 } 2013 return BestPhi; 2014 } 2015 2016 /// Insert an IR expression which computes the value held by the IV IndVar 2017 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2018 /// is taken ExitCount times. 2019 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2020 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2021 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2022 assert(isLoopCounter(IndVar, L, SE)); 2023 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2024 const SCEV *IVInit = AR->getStart(); 2025 2026 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2027 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2028 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2029 // the existing GEPs whenever possible. 2030 if (IndVar->getType()->isPointerTy() && 2031 !ExitCount->getType()->isPointerTy()) { 2032 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2033 // signed value. ExitCount on the other hand represents the loop trip count, 2034 // which is an unsigned value. FindLoopCounter only allows induction 2035 // variables that have a positive unit stride of one. This means we don't 2036 // have to handle the case of negative offsets (yet) and just need to zero 2037 // extend ExitCount. 2038 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2039 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2040 if (UsePostInc) 2041 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2042 2043 // Expand the code for the iteration count. 2044 assert(SE->isLoopInvariant(IVOffset, L) && 2045 "Computed iteration count is not loop invariant!"); 2046 2047 // We could handle pointer IVs other than i8*, but we need to compensate for 2048 // gep index scaling. 2049 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2050 cast<PointerType>(IndVar->getType()) 2051 ->getElementType())->isOne() && 2052 "unit stride pointer IV must be i8*"); 2053 2054 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2055 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2056 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2057 } else { 2058 // In any other case, convert both IVInit and ExitCount to integers before 2059 // comparing. This may result in SCEV expansion of pointers, but in practice 2060 // SCEV will fold the pointer arithmetic away as such: 2061 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2062 // 2063 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2064 // for simple memset-style loops. 2065 // 2066 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2067 // were generated on top of case #2, which is not expected. 2068 2069 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2070 // For unit stride, IVCount = Start + ExitCount with 2's complement 2071 // overflow. 2072 2073 // For integer IVs, truncate the IV before computing IVInit + BECount, 2074 // unless we know apriori that the limit must be a constant when evaluated 2075 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2076 // of the IV in the loop over a (potentially) expensive expansion of the 2077 // widened exit count add(zext(add)) expression. 2078 if (SE->getTypeSizeInBits(IVInit->getType()) 2079 > SE->getTypeSizeInBits(ExitCount->getType())) { 2080 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2081 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2082 else 2083 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2084 } 2085 2086 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2087 2088 if (UsePostInc) 2089 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2090 2091 // Expand the code for the iteration count. 2092 assert(SE->isLoopInvariant(IVLimit, L) && 2093 "Computed iteration count is not loop invariant!"); 2094 // Ensure that we generate the same type as IndVar, or a smaller integer 2095 // type. In the presence of null pointer values, we have an integer type 2096 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2097 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2098 IndVar->getType() : ExitCount->getType(); 2099 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2100 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2101 } 2102 } 2103 2104 /// This method rewrites the exit condition of the loop to be a canonical != 2105 /// comparison against the incremented loop induction variable. This pass is 2106 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2107 /// determine a loop-invariant trip count of the loop, which is actually a much 2108 /// broader range than just linear tests. 2109 bool IndVarSimplify:: 2110 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2111 const SCEV *ExitCount, 2112 PHINode *IndVar, SCEVExpander &Rewriter) { 2113 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2114 assert(isLoopCounter(IndVar, L, SE)); 2115 Instruction * const IncVar = 2116 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2117 2118 // Initialize CmpIndVar to the preincremented IV. 2119 Value *CmpIndVar = IndVar; 2120 bool UsePostInc = false; 2121 2122 // If the exiting block is the same as the backedge block, we prefer to 2123 // compare against the post-incremented value, otherwise we must compare 2124 // against the preincremented value. 2125 if (ExitingBB == L->getLoopLatch()) { 2126 // For pointer IVs, we chose to not strip inbounds which requires us not 2127 // to add a potentially UB introducing use. We need to either a) show 2128 // the loop test we're modifying is already in post-inc form, or b) show 2129 // that adding a use must not introduce UB. 2130 bool SafeToPostInc = 2131 IndVar->getType()->isIntegerTy() || 2132 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2133 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2134 if (SafeToPostInc) { 2135 UsePostInc = true; 2136 CmpIndVar = IncVar; 2137 } 2138 } 2139 2140 // It may be necessary to drop nowrap flags on the incrementing instruction 2141 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2142 // case the increment might have previously been poison on the last iteration 2143 // only) or if LFTR switches to a different IV that was previously dynamically 2144 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2145 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2146 // check), because the pre-inc addrec flags may be adopted from the original 2147 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2148 // TODO: This handling is inaccurate for one case: If we switch to a 2149 // dynamically dead IV that wraps on the first loop iteration only, which is 2150 // not covered by the post-inc addrec. (If the new IV was not dynamically 2151 // dead, it could not be poison on the first iteration in the first place.) 2152 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2153 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2154 if (BO->hasNoUnsignedWrap()) 2155 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2156 if (BO->hasNoSignedWrap()) 2157 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2158 } 2159 2160 Value *ExitCnt = genLoopLimit( 2161 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2162 assert(ExitCnt->getType()->isPointerTy() == 2163 IndVar->getType()->isPointerTy() && 2164 "genLoopLimit missed a cast"); 2165 2166 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2167 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2168 ICmpInst::Predicate P; 2169 if (L->contains(BI->getSuccessor(0))) 2170 P = ICmpInst::ICMP_NE; 2171 else 2172 P = ICmpInst::ICMP_EQ; 2173 2174 IRBuilder<> Builder(BI); 2175 2176 // The new loop exit condition should reuse the debug location of the 2177 // original loop exit condition. 2178 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2179 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2180 2181 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2182 // avoid the expensive expansion of the limit expression in the wider type, 2183 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2184 // since we know (from the exit count bitwidth), that we can't self-wrap in 2185 // the narrower type. 2186 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2187 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2188 if (CmpIndVarSize > ExitCntSize) { 2189 assert(!CmpIndVar->getType()->isPointerTy() && 2190 !ExitCnt->getType()->isPointerTy()); 2191 2192 // Before resorting to actually inserting the truncate, use the same 2193 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2194 // the other side of the comparison instead. We still evaluate the limit 2195 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2196 // a truncate within in. 2197 bool Extended = false; 2198 const SCEV *IV = SE->getSCEV(CmpIndVar); 2199 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2200 ExitCnt->getType()); 2201 const SCEV *ZExtTrunc = 2202 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2203 2204 if (ZExtTrunc == IV) { 2205 Extended = true; 2206 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2207 "wide.trip.count"); 2208 } else { 2209 const SCEV *SExtTrunc = 2210 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2211 if (SExtTrunc == IV) { 2212 Extended = true; 2213 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2214 "wide.trip.count"); 2215 } 2216 } 2217 2218 if (Extended) { 2219 bool Discard; 2220 L->makeLoopInvariant(ExitCnt, Discard); 2221 } else 2222 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2223 "lftr.wideiv"); 2224 } 2225 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2226 << " LHS:" << *CmpIndVar << '\n' 2227 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2228 << "\n" 2229 << " RHS:\t" << *ExitCnt << "\n" 2230 << "ExitCount:\t" << *ExitCount << "\n" 2231 << " was: " << *BI->getCondition() << "\n"); 2232 2233 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2234 Value *OrigCond = BI->getCondition(); 2235 // It's tempting to use replaceAllUsesWith here to fully replace the old 2236 // comparison, but that's not immediately safe, since users of the old 2237 // comparison may not be dominated by the new comparison. Instead, just 2238 // update the branch to use the new comparison; in the common case this 2239 // will make old comparison dead. 2240 BI->setCondition(Cond); 2241 DeadInsts.emplace_back(OrigCond); 2242 2243 ++NumLFTR; 2244 return true; 2245 } 2246 2247 //===----------------------------------------------------------------------===// 2248 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2249 //===----------------------------------------------------------------------===// 2250 2251 /// If there's a single exit block, sink any loop-invariant values that 2252 /// were defined in the preheader but not used inside the loop into the 2253 /// exit block to reduce register pressure in the loop. 2254 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2255 BasicBlock *ExitBlock = L->getExitBlock(); 2256 if (!ExitBlock) return false; 2257 2258 BasicBlock *Preheader = L->getLoopPreheader(); 2259 if (!Preheader) return false; 2260 2261 bool MadeAnyChanges = false; 2262 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2263 BasicBlock::iterator I(Preheader->getTerminator()); 2264 while (I != Preheader->begin()) { 2265 --I; 2266 // New instructions were inserted at the end of the preheader. 2267 if (isa<PHINode>(I)) 2268 break; 2269 2270 // Don't move instructions which might have side effects, since the side 2271 // effects need to complete before instructions inside the loop. Also don't 2272 // move instructions which might read memory, since the loop may modify 2273 // memory. Note that it's okay if the instruction might have undefined 2274 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2275 // block. 2276 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2277 continue; 2278 2279 // Skip debug info intrinsics. 2280 if (isa<DbgInfoIntrinsic>(I)) 2281 continue; 2282 2283 // Skip eh pad instructions. 2284 if (I->isEHPad()) 2285 continue; 2286 2287 // Don't sink alloca: we never want to sink static alloca's out of the 2288 // entry block, and correctly sinking dynamic alloca's requires 2289 // checks for stacksave/stackrestore intrinsics. 2290 // FIXME: Refactor this check somehow? 2291 if (isa<AllocaInst>(I)) 2292 continue; 2293 2294 // Determine if there is a use in or before the loop (direct or 2295 // otherwise). 2296 bool UsedInLoop = false; 2297 for (Use &U : I->uses()) { 2298 Instruction *User = cast<Instruction>(U.getUser()); 2299 BasicBlock *UseBB = User->getParent(); 2300 if (PHINode *P = dyn_cast<PHINode>(User)) { 2301 unsigned i = 2302 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2303 UseBB = P->getIncomingBlock(i); 2304 } 2305 if (UseBB == Preheader || L->contains(UseBB)) { 2306 UsedInLoop = true; 2307 break; 2308 } 2309 } 2310 2311 // If there is, the def must remain in the preheader. 2312 if (UsedInLoop) 2313 continue; 2314 2315 // Otherwise, sink it to the exit block. 2316 Instruction *ToMove = &*I; 2317 bool Done = false; 2318 2319 if (I != Preheader->begin()) { 2320 // Skip debug info intrinsics. 2321 do { 2322 --I; 2323 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2324 2325 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2326 Done = true; 2327 } else { 2328 Done = true; 2329 } 2330 2331 MadeAnyChanges = true; 2332 ToMove->moveBefore(*ExitBlock, InsertPt); 2333 if (Done) break; 2334 InsertPt = ToMove->getIterator(); 2335 } 2336 2337 return MadeAnyChanges; 2338 } 2339 2340 // Returns true if the condition of \p BI being checked is invariant and can be 2341 // proved to be trivially true. 2342 static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE, 2343 bool ProvingLoopExit) { 2344 ICmpInst::Predicate Pred; 2345 Value *LHS, *RHS; 2346 using namespace PatternMatch; 2347 BasicBlock *TrueSucc, *FalseSucc; 2348 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 2349 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) 2350 return false; 2351 2352 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && 2353 "Not a loop exit!"); 2354 2355 // 'LHS pred RHS' should now mean that we stay in loop. 2356 if (L->contains(FalseSucc)) 2357 Pred = CmpInst::getInversePredicate(Pred); 2358 2359 // If we are proving loop exit, invert the predicate. 2360 if (ProvingLoopExit) 2361 Pred = CmpInst::getInversePredicate(Pred); 2362 2363 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 2364 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 2365 // Can we prove it to be trivially true? 2366 if (SE->isKnownPredicate(Pred, LHSS, RHSS)) 2367 return true; 2368 2369 return false; 2370 } 2371 2372 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2373 SmallVector<BasicBlock*, 16> ExitingBlocks; 2374 L->getExitingBlocks(ExitingBlocks); 2375 2376 // Remove all exits which aren't both rewriteable and execute on every 2377 // iteration. 2378 auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2379 // If our exitting block exits multiple loops, we can only rewrite the 2380 // innermost one. Otherwise, we're changing how many times the innermost 2381 // loop runs before it exits. 2382 if (LI->getLoopFor(ExitingBB) != L) 2383 return true; 2384 2385 // Can't rewrite non-branch yet. 2386 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2387 if (!BI) 2388 return true; 2389 2390 // If already constant, nothing to do. 2391 if (isa<Constant>(BI->getCondition())) 2392 return true; 2393 2394 // Likewise, the loop latch must be dominated by the exiting BB. 2395 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 2396 return true; 2397 2398 return false; 2399 }); 2400 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 2401 2402 if (ExitingBlocks.empty()) 2403 return false; 2404 2405 // Get a symbolic upper bound on the loop backedge taken count. 2406 const SCEV *MaxExitCount = SE->computeMaxBackedgeTakenCount(L); 2407 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 2408 return false; 2409 2410 // Visit our exit blocks in order of dominance. We know from the fact that 2411 // all exits must dominate the latch, so there is a total dominance order 2412 // between them. 2413 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 2414 // std::sort sorts in ascending order, so we want the inverse of 2415 // the normal dominance relation. 2416 if (A == B) return false; 2417 if (DT->properlyDominates(A, B)) 2418 return true; 2419 else { 2420 assert(DT->properlyDominates(B, A) && 2421 "expected total dominance order!"); 2422 return false; 2423 } 2424 }); 2425 #ifdef ASSERT 2426 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 2427 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 2428 } 2429 #endif 2430 2431 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 2432 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2433 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2434 auto *OldCond = BI->getCondition(); 2435 auto *NewCond = ConstantInt::get(OldCond->getType(), 2436 IsTaken ? ExitIfTrue : !ExitIfTrue); 2437 BI->setCondition(NewCond); 2438 if (OldCond->use_empty()) 2439 DeadInsts.emplace_back(OldCond); 2440 }; 2441 2442 bool Changed = false; 2443 SmallSet<const SCEV*, 8> DominatingExitCounts; 2444 for (BasicBlock *ExitingBB : ExitingBlocks) { 2445 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2446 if (isa<SCEVCouldNotCompute>(ExitCount)) { 2447 // Okay, we do not know the exit count here. Can we at least prove that it 2448 // will remain the same within iteration space? 2449 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2450 if (isTrivialCond(L, BI, SE, false)) { 2451 FoldExit(ExitingBB, false); 2452 Changed = true; 2453 } 2454 if (isTrivialCond(L, BI, SE, true)) { 2455 FoldExit(ExitingBB, true); 2456 Changed = true; 2457 } 2458 continue; 2459 } 2460 2461 // If we know we'd exit on the first iteration, rewrite the exit to 2462 // reflect this. This does not imply the loop must exit through this 2463 // exit; there may be an earlier one taken on the first iteration. 2464 // TODO: Given we know the backedge can't be taken, we should go ahead 2465 // and break it. Or at least, kill all the header phis and simplify. 2466 if (ExitCount->isZero()) { 2467 FoldExit(ExitingBB, true); 2468 Changed = true; 2469 continue; 2470 } 2471 2472 // If we end up with a pointer exit count, bail. Note that we can end up 2473 // with a pointer exit count for one exiting block, and not for another in 2474 // the same loop. 2475 if (!ExitCount->getType()->isIntegerTy() || 2476 !MaxExitCount->getType()->isIntegerTy()) 2477 continue; 2478 2479 Type *WiderType = 2480 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2481 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2482 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2483 assert(MaxExitCount->getType() == ExitCount->getType()); 2484 2485 // Can we prove that some other exit must be taken strictly before this 2486 // one? 2487 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2488 MaxExitCount, ExitCount)) { 2489 FoldExit(ExitingBB, false); 2490 Changed = true; 2491 continue; 2492 } 2493 2494 // As we run, keep track of which exit counts we've encountered. If we 2495 // find a duplicate, we've found an exit which would have exited on the 2496 // exiting iteration, but (from the visit order) strictly follows another 2497 // which does the same and is thus dead. 2498 if (!DominatingExitCounts.insert(ExitCount).second) { 2499 FoldExit(ExitingBB, false); 2500 Changed = true; 2501 continue; 2502 } 2503 2504 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 2505 // here. If we kept track of the min of dominanting exits so far, we could 2506 // discharge exits with EC >= MDEC. This is less powerful than the existing 2507 // transform (since later exits aren't considered), but potentially more 2508 // powerful for any case where SCEV can prove a >=u b, but neither a == b 2509 // or a >u b. Such a case is not currently known. 2510 } 2511 return Changed; 2512 } 2513 2514 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 2515 SmallVector<BasicBlock*, 16> ExitingBlocks; 2516 L->getExitingBlocks(ExitingBlocks); 2517 2518 // Finally, see if we can rewrite our exit conditions into a loop invariant 2519 // form. If we have a read-only loop, and we can tell that we must exit down 2520 // a path which does not need any of the values computed within the loop, we 2521 // can rewrite the loop to exit on the first iteration. Note that this 2522 // doesn't either a) tell us the loop exits on the first iteration (unless 2523 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2524 // This transformation looks a lot like a restricted form of dead loop 2525 // elimination, but restricted to read-only loops and without neccesssarily 2526 // needing to kill the loop entirely. 2527 if (!LoopPredication) 2528 return false; 2529 2530 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2531 return false; 2532 2533 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2534 // through *explicit* control flow. We have to eliminate the possibility of 2535 // implicit exits (see below) before we know it's truly exact. 2536 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2537 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2538 !SE->isLoopInvariant(ExactBTC, L) || 2539 !isSafeToExpand(ExactBTC, *SE)) 2540 return false; 2541 2542 // If we end up with a pointer exit count, bail. It may be unsized. 2543 if (!ExactBTC->getType()->isIntegerTy()) 2544 return false; 2545 2546 auto BadExit = [&](BasicBlock *ExitingBB) { 2547 // If our exiting block exits multiple loops, we can only rewrite the 2548 // innermost one. Otherwise, we're changing how many times the innermost 2549 // loop runs before it exits. 2550 if (LI->getLoopFor(ExitingBB) != L) 2551 return true; 2552 2553 // Can't rewrite non-branch yet. 2554 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2555 if (!BI) 2556 return true; 2557 2558 // If already constant, nothing to do. 2559 if (isa<Constant>(BI->getCondition())) 2560 return true; 2561 2562 // If the exit block has phis, we need to be able to compute the values 2563 // within the loop which contains them. This assumes trivially lcssa phis 2564 // have already been removed; TODO: generalize 2565 BasicBlock *ExitBlock = 2566 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2567 if (!ExitBlock->phis().empty()) 2568 return true; 2569 2570 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2571 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2572 if (!SE->isLoopInvariant(ExitCount, L) || 2573 !isSafeToExpand(ExitCount, *SE)) 2574 return true; 2575 2576 // If we end up with a pointer exit count, bail. It may be unsized. 2577 if (!ExitCount->getType()->isIntegerTy()) 2578 return true; 2579 2580 return false; 2581 }; 2582 2583 // If we have any exits which can't be predicated themselves, than we can't 2584 // predicate any exit which isn't guaranteed to execute before it. Consider 2585 // two exits (a) and (b) which would both exit on the same iteration. If we 2586 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2587 // we could convert a loop from exiting through (a) to one exiting through 2588 // (b). Note that this problem exists only for exits with the same exit 2589 // count, and we could be more aggressive when exit counts are known inequal. 2590 llvm::sort(ExitingBlocks, 2591 [&](BasicBlock *A, BasicBlock *B) { 2592 // std::sort sorts in ascending order, so we want the inverse of 2593 // the normal dominance relation, plus a tie breaker for blocks 2594 // unordered by dominance. 2595 if (DT->properlyDominates(A, B)) return true; 2596 if (DT->properlyDominates(B, A)) return false; 2597 return A->getName() < B->getName(); 2598 }); 2599 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2600 // exits before the backedge). If they aren't, reasoning about reachability 2601 // is complicated and we choose not to for now. 2602 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2603 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2604 return false; 2605 2606 // Given our sorted total order, we know that exit[j] must be evaluated 2607 // after all exit[i] such j > i. 2608 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2609 if (BadExit(ExitingBlocks[i])) { 2610 ExitingBlocks.resize(i); 2611 break; 2612 } 2613 2614 if (ExitingBlocks.empty()) 2615 return false; 2616 2617 // We rely on not being able to reach an exiting block on a later iteration 2618 // then it's statically compute exit count. The implementaton of 2619 // getExitCount currently has this invariant, but assert it here so that 2620 // breakage is obvious if this ever changes.. 2621 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2622 return DT->dominates(ExitingBB, L->getLoopLatch()); 2623 })); 2624 2625 // At this point, ExitingBlocks consists of only those blocks which are 2626 // predicatable. Given that, we know we have at least one exit we can 2627 // predicate if the loop is doesn't have side effects and doesn't have any 2628 // implicit exits (because then our exact BTC isn't actually exact). 2629 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2630 // suggestions on how to improve this? I can obviously bail out for outer 2631 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2632 // is that enough for *all* side effects? 2633 for (BasicBlock *BB : L->blocks()) 2634 for (auto &I : *BB) 2635 // TODO:isGuaranteedToTransfer 2636 if (I.mayHaveSideEffects() || I.mayThrow()) 2637 return false; 2638 2639 bool Changed = false; 2640 // Finally, do the actual predication for all predicatable blocks. A couple 2641 // of notes here: 2642 // 1) We don't bother to constant fold dominated exits with identical exit 2643 // counts; that's simply a form of CSE/equality propagation and we leave 2644 // it for dedicated passes. 2645 // 2) We insert the comparison at the branch. Hoisting introduces additional 2646 // legality constraints and we leave that to dedicated logic. We want to 2647 // predicate even if we can't insert a loop invariant expression as 2648 // peeling or unrolling will likely reduce the cost of the otherwise loop 2649 // varying check. 2650 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2651 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2652 Value *ExactBTCV = nullptr; // Lazily generated if needed. 2653 for (BasicBlock *ExitingBB : ExitingBlocks) { 2654 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2655 2656 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2657 Value *NewCond; 2658 if (ExitCount == ExactBTC) { 2659 NewCond = L->contains(BI->getSuccessor(0)) ? 2660 B.getFalse() : B.getTrue(); 2661 } else { 2662 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2663 if (!ExactBTCV) 2664 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2665 Value *RHS = ExactBTCV; 2666 if (ECV->getType() != RHS->getType()) { 2667 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2668 ECV = B.CreateZExt(ECV, WiderTy); 2669 RHS = B.CreateZExt(RHS, WiderTy); 2670 } 2671 auto Pred = L->contains(BI->getSuccessor(0)) ? 2672 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2673 NewCond = B.CreateICmp(Pred, ECV, RHS); 2674 } 2675 Value *OldCond = BI->getCondition(); 2676 BI->setCondition(NewCond); 2677 if (OldCond->use_empty()) 2678 DeadInsts.emplace_back(OldCond); 2679 Changed = true; 2680 } 2681 2682 return Changed; 2683 } 2684 2685 //===----------------------------------------------------------------------===// 2686 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2687 //===----------------------------------------------------------------------===// 2688 2689 bool IndVarSimplify::run(Loop *L) { 2690 // We need (and expect!) the incoming loop to be in LCSSA. 2691 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2692 "LCSSA required to run indvars!"); 2693 2694 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2695 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2696 // canonicalization can be a pessimization without LSR to "clean up" 2697 // afterwards. 2698 // - We depend on having a preheader; in particular, 2699 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2700 // and we're in trouble if we can't find the induction variable even when 2701 // we've manually inserted one. 2702 // - LFTR relies on having a single backedge. 2703 if (!L->isLoopSimplifyForm()) 2704 return false; 2705 2706 #ifndef NDEBUG 2707 // Used below for a consistency check only 2708 // Note: Since the result returned by ScalarEvolution may depend on the order 2709 // in which previous results are added to its cache, the call to 2710 // getBackedgeTakenCount() may change following SCEV queries. 2711 const SCEV *BackedgeTakenCount; 2712 if (VerifyIndvars) 2713 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2714 #endif 2715 2716 bool Changed = false; 2717 // If there are any floating-point recurrences, attempt to 2718 // transform them to use integer recurrences. 2719 Changed |= rewriteNonIntegerIVs(L); 2720 2721 // Create a rewriter object which we'll use to transform the code with. 2722 SCEVExpander Rewriter(*SE, DL, "indvars"); 2723 #ifndef NDEBUG 2724 Rewriter.setDebugType(DEBUG_TYPE); 2725 #endif 2726 2727 // Eliminate redundant IV users. 2728 // 2729 // Simplification works best when run before other consumers of SCEV. We 2730 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2731 // other expressions involving loop IVs have been evaluated. This helps SCEV 2732 // set no-wrap flags before normalizing sign/zero extension. 2733 Rewriter.disableCanonicalMode(); 2734 Changed |= simplifyAndExtend(L, Rewriter, LI); 2735 2736 // Check to see if we can compute the final value of any expressions 2737 // that are recurrent in the loop, and substitute the exit values from the 2738 // loop into any instructions outside of the loop that use the final values 2739 // of the current expressions. 2740 if (ReplaceExitValue != NeverRepl) { 2741 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 2742 ReplaceExitValue, DeadInsts)) { 2743 NumReplaced += Rewrites; 2744 Changed = true; 2745 } 2746 } 2747 2748 // Eliminate redundant IV cycles. 2749 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2750 2751 // Try to eliminate loop exits based on analyzeable exit counts 2752 if (optimizeLoopExits(L, Rewriter)) { 2753 Changed = true; 2754 // Given we've changed exit counts, notify SCEV 2755 SE->forgetLoop(L); 2756 } 2757 2758 // Try to form loop invariant tests for loop exits by changing how many 2759 // iterations of the loop run when that is unobservable. 2760 if (predicateLoopExits(L, Rewriter)) { 2761 Changed = true; 2762 // Given we've changed exit counts, notify SCEV 2763 SE->forgetLoop(L); 2764 } 2765 2766 // If we have a trip count expression, rewrite the loop's exit condition 2767 // using it. 2768 if (!DisableLFTR) { 2769 BasicBlock *PreHeader = L->getLoopPreheader(); 2770 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); 2771 2772 SmallVector<BasicBlock*, 16> ExitingBlocks; 2773 L->getExitingBlocks(ExitingBlocks); 2774 for (BasicBlock *ExitingBB : ExitingBlocks) { 2775 // Can't rewrite non-branch yet. 2776 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2777 continue; 2778 2779 // If our exitting block exits multiple loops, we can only rewrite the 2780 // innermost one. Otherwise, we're changing how many times the innermost 2781 // loop runs before it exits. 2782 if (LI->getLoopFor(ExitingBB) != L) 2783 continue; 2784 2785 if (!needsLFTR(L, ExitingBB)) 2786 continue; 2787 2788 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2789 if (isa<SCEVCouldNotCompute>(ExitCount)) 2790 continue; 2791 2792 // This was handled above, but as we form SCEVs, we can sometimes refine 2793 // existing ones; this allows exit counts to be folded to zero which 2794 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2795 // until stable to handle cases like this better. 2796 if (ExitCount->isZero()) 2797 continue; 2798 2799 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2800 if (!IndVar) 2801 continue; 2802 2803 // Avoid high cost expansions. Note: This heuristic is questionable in 2804 // that our definition of "high cost" is not exactly principled. 2805 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2806 TTI, PreHeaderBR)) 2807 continue; 2808 2809 // Check preconditions for proper SCEVExpander operation. SCEV does not 2810 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2811 // any pass that uses the SCEVExpander must do it. This does not work 2812 // well for loop passes because SCEVExpander makes assumptions about 2813 // all loops, while LoopPassManager only forces the current loop to be 2814 // simplified. 2815 // 2816 // FIXME: SCEV expansion has no way to bail out, so the caller must 2817 // explicitly check any assumptions made by SCEV. Brittle. 2818 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2819 if (!AR || AR->getLoop()->getLoopPreheader()) 2820 Changed |= linearFunctionTestReplace(L, ExitingBB, 2821 ExitCount, IndVar, 2822 Rewriter); 2823 } 2824 } 2825 // Clear the rewriter cache, because values that are in the rewriter's cache 2826 // can be deleted in the loop below, causing the AssertingVH in the cache to 2827 // trigger. 2828 Rewriter.clear(); 2829 2830 // Now that we're done iterating through lists, clean up any instructions 2831 // which are now dead. 2832 while (!DeadInsts.empty()) 2833 if (Instruction *Inst = 2834 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2835 Changed |= 2836 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2837 2838 // The Rewriter may not be used from this point on. 2839 2840 // Loop-invariant instructions in the preheader that aren't used in the 2841 // loop may be sunk below the loop to reduce register pressure. 2842 Changed |= sinkUnusedInvariants(L); 2843 2844 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2845 // trip count and therefore can further simplify exit values in addition to 2846 // rewriteLoopExitValues. 2847 Changed |= rewriteFirstIterationLoopExitValues(L); 2848 2849 // Clean up dead instructions. 2850 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2851 2852 // Check a post-condition. 2853 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2854 "Indvars did not preserve LCSSA!"); 2855 2856 // Verify that LFTR, and any other change have not interfered with SCEV's 2857 // ability to compute trip count. We may have *changed* the exit count, but 2858 // only by reducing it. 2859 #ifndef NDEBUG 2860 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2861 SE->forgetLoop(L); 2862 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2863 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2864 SE->getTypeSizeInBits(NewBECount->getType())) 2865 NewBECount = SE->getTruncateOrNoop(NewBECount, 2866 BackedgeTakenCount->getType()); 2867 else 2868 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2869 NewBECount->getType()); 2870 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2871 NewBECount) && "indvars must preserve SCEV"); 2872 } 2873 if (VerifyMemorySSA && MSSAU) 2874 MSSAU->getMemorySSA()->verifyMemorySSA(); 2875 #endif 2876 2877 return Changed; 2878 } 2879 2880 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2881 LoopStandardAnalysisResults &AR, 2882 LPMUpdater &) { 2883 Function *F = L.getHeader()->getParent(); 2884 const DataLayout &DL = F->getParent()->getDataLayout(); 2885 2886 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA); 2887 if (!IVS.run(&L)) 2888 return PreservedAnalyses::all(); 2889 2890 auto PA = getLoopPassPreservedAnalyses(); 2891 PA.preserveSet<CFGAnalyses>(); 2892 if (AR.MSSA) 2893 PA.preserve<MemorySSAAnalysis>(); 2894 return PA; 2895 } 2896 2897 namespace { 2898 2899 struct IndVarSimplifyLegacyPass : public LoopPass { 2900 static char ID; // Pass identification, replacement for typeid 2901 2902 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2903 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2904 } 2905 2906 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2907 if (skipLoop(L)) 2908 return false; 2909 2910 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2911 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2912 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2913 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2914 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2915 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2916 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2917 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2918 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2919 MemorySSA *MSSA = nullptr; 2920 if (MSSAAnalysis) 2921 MSSA = &MSSAAnalysis->getMSSA(); 2922 2923 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA); 2924 return IVS.run(L); 2925 } 2926 2927 void getAnalysisUsage(AnalysisUsage &AU) const override { 2928 AU.setPreservesCFG(); 2929 AU.addPreserved<MemorySSAWrapperPass>(); 2930 getLoopAnalysisUsage(AU); 2931 } 2932 }; 2933 2934 } // end anonymous namespace 2935 2936 char IndVarSimplifyLegacyPass::ID = 0; 2937 2938 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2939 "Induction Variable Simplification", false, false) 2940 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2941 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2942 "Induction Variable Simplification", false, false) 2943 2944 Pass *llvm::createIndVarSimplifyPass() { 2945 return new IndVarSimplifyLegacyPass(); 2946 } 2947