1 //===-- SimplifyIndVar.cpp - Induction variable simplification ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements induction variable simplification. It does 10 // not define any actual pass or policy, but provides a single function to 11 // simplify a loop's induction variables based on ScalarEvolution. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/Analysis/LoopInfo.h" 19 #include "llvm/IR/Dominators.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/IR/Instructions.h" 22 #include "llvm/IR/IntrinsicInst.h" 23 #include "llvm/IR/PatternMatch.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include "llvm/Transforms/Utils/Local.h" 27 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "indvars" 32 33 STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); 34 STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); 35 STATISTIC(NumFoldedUser, "Number of IV users folded into a constant"); 36 STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); 37 STATISTIC( 38 NumSimplifiedSDiv, 39 "Number of IV signed division operations converted to unsigned division"); 40 STATISTIC( 41 NumSimplifiedSRem, 42 "Number of IV signed remainder operations converted to unsigned remainder"); 43 STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); 44 45 namespace { 46 /// This is a utility for simplifying induction variables 47 /// based on ScalarEvolution. It is the primary instrument of the 48 /// IndvarSimplify pass, but it may also be directly invoked to cleanup after 49 /// other loop passes that preserve SCEV. 50 class SimplifyIndvar { 51 Loop *L; 52 LoopInfo *LI; 53 ScalarEvolution *SE; 54 DominatorTree *DT; 55 const TargetTransformInfo *TTI; 56 SCEVExpander &Rewriter; 57 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 58 59 bool Changed = false; 60 61 public: 62 SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, 63 LoopInfo *LI, const TargetTransformInfo *TTI, 64 SCEVExpander &Rewriter, 65 SmallVectorImpl<WeakTrackingVH> &Dead) 66 : L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter), 67 DeadInsts(Dead) { 68 assert(LI && "IV simplification requires LoopInfo"); 69 } 70 71 bool hasChanged() const { return Changed; } 72 73 /// Iteratively perform simplification on a worklist of users of the 74 /// specified induction variable. This is the top-level driver that applies 75 /// all simplifications to users of an IV. 76 void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr); 77 78 Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); 79 80 bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); 81 bool replaceIVUserWithLoopInvariant(Instruction *UseInst); 82 83 bool eliminateOverflowIntrinsic(WithOverflowInst *WO); 84 bool eliminateSaturatingIntrinsic(SaturatingInst *SI); 85 bool eliminateTrunc(TruncInst *TI); 86 bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); 87 bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand); 88 void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); 89 void simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand, 90 bool IsSigned); 91 void replaceRemWithNumerator(BinaryOperator *Rem); 92 void replaceRemWithNumeratorOrZero(BinaryOperator *Rem); 93 void replaceSRemWithURem(BinaryOperator *Rem); 94 bool eliminateSDiv(BinaryOperator *SDiv); 95 bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand); 96 bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand); 97 }; 98 } 99 100 /// Find a point in code which dominates all given instructions. We can safely 101 /// assume that, whatever fact we can prove at the found point, this fact is 102 /// also true for each of the given instructions. 103 static Instruction *findCommonDominator(ArrayRef<Instruction *> Instructions, 104 DominatorTree &DT) { 105 Instruction *CommonDom = nullptr; 106 for (auto *Insn : Instructions) 107 if (!CommonDom || DT.dominates(Insn, CommonDom)) 108 CommonDom = Insn; 109 else if (!DT.dominates(CommonDom, Insn)) 110 // If there is no dominance relation, use common dominator. 111 CommonDom = 112 DT.findNearestCommonDominator(CommonDom->getParent(), 113 Insn->getParent())->getTerminator(); 114 assert(CommonDom && "Common dominator not found?"); 115 return CommonDom; 116 } 117 118 /// Fold an IV operand into its use. This removes increments of an 119 /// aligned IV when used by a instruction that ignores the low bits. 120 /// 121 /// IVOperand is guaranteed SCEVable, but UseInst may not be. 122 /// 123 /// Return the operand of IVOperand for this induction variable if IVOperand can 124 /// be folded (in case more folding opportunities have been exposed). 125 /// Otherwise return null. 126 Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { 127 Value *IVSrc = nullptr; 128 const unsigned OperIdx = 0; 129 const SCEV *FoldedExpr = nullptr; 130 bool MustDropExactFlag = false; 131 switch (UseInst->getOpcode()) { 132 default: 133 return nullptr; 134 case Instruction::UDiv: 135 case Instruction::LShr: 136 // We're only interested in the case where we know something about 137 // the numerator and have a constant denominator. 138 if (IVOperand != UseInst->getOperand(OperIdx) || 139 !isa<ConstantInt>(UseInst->getOperand(1))) 140 return nullptr; 141 142 // Attempt to fold a binary operator with constant operand. 143 // e.g. ((I + 1) >> 2) => I >> 2 144 if (!isa<BinaryOperator>(IVOperand) 145 || !isa<ConstantInt>(IVOperand->getOperand(1))) 146 return nullptr; 147 148 IVSrc = IVOperand->getOperand(0); 149 // IVSrc must be the (SCEVable) IV, since the other operand is const. 150 assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand"); 151 152 ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1)); 153 if (UseInst->getOpcode() == Instruction::LShr) { 154 // Get a constant for the divisor. See createSCEV. 155 uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth(); 156 if (D->getValue().uge(BitWidth)) 157 return nullptr; 158 159 D = ConstantInt::get(UseInst->getContext(), 160 APInt::getOneBitSet(BitWidth, D->getZExtValue())); 161 } 162 FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D)); 163 // We might have 'exact' flag set at this point which will no longer be 164 // correct after we make the replacement. 165 if (UseInst->isExact() && 166 SE->getSCEV(IVSrc) != SE->getMulExpr(FoldedExpr, SE->getSCEV(D))) 167 MustDropExactFlag = true; 168 } 169 // We have something that might fold it's operand. Compare SCEVs. 170 if (!SE->isSCEVable(UseInst->getType())) 171 return nullptr; 172 173 // Bypass the operand if SCEV can prove it has no effect. 174 if (SE->getSCEV(UseInst) != FoldedExpr) 175 return nullptr; 176 177 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand 178 << " -> " << *UseInst << '\n'); 179 180 UseInst->setOperand(OperIdx, IVSrc); 181 assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper"); 182 183 if (MustDropExactFlag) 184 UseInst->dropPoisonGeneratingFlags(); 185 186 ++NumElimOperand; 187 Changed = true; 188 if (IVOperand->use_empty()) 189 DeadInsts.emplace_back(IVOperand); 190 return IVSrc; 191 } 192 193 bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, 194 Value *IVOperand) { 195 unsigned IVOperIdx = 0; 196 ICmpInst::Predicate Pred = ICmp->getPredicate(); 197 if (IVOperand != ICmp->getOperand(0)) { 198 // Swapped 199 assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); 200 IVOperIdx = 1; 201 Pred = ICmpInst::getSwappedPredicate(Pred); 202 } 203 204 // Get the SCEVs for the ICmp operands (in the specific context of the 205 // current loop) 206 const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); 207 const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); 208 const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); 209 210 auto *PN = dyn_cast<PHINode>(IVOperand); 211 if (!PN) 212 return false; 213 auto LIP = SE->getLoopInvariantPredicate(Pred, S, X, L); 214 if (!LIP) 215 return false; 216 ICmpInst::Predicate InvariantPredicate = LIP->Pred; 217 const SCEV *InvariantLHS = LIP->LHS; 218 const SCEV *InvariantRHS = LIP->RHS; 219 220 // Rewrite the comparison to a loop invariant comparison if it can be done 221 // cheaply, where cheaply means "we don't need to emit any new 222 // instructions". 223 224 SmallDenseMap<const SCEV*, Value*> CheapExpansions; 225 CheapExpansions[S] = ICmp->getOperand(IVOperIdx); 226 CheapExpansions[X] = ICmp->getOperand(1 - IVOperIdx); 227 228 // TODO: Support multiple entry loops? (We currently bail out of these in 229 // the IndVarSimplify pass) 230 if (auto *BB = L->getLoopPredecessor()) { 231 const int Idx = PN->getBasicBlockIndex(BB); 232 if (Idx >= 0) { 233 Value *Incoming = PN->getIncomingValue(Idx); 234 const SCEV *IncomingS = SE->getSCEV(Incoming); 235 CheapExpansions[IncomingS] = Incoming; 236 } 237 } 238 Value *NewLHS = CheapExpansions[InvariantLHS]; 239 Value *NewRHS = CheapExpansions[InvariantRHS]; 240 241 if (!NewLHS) 242 if (auto *ConstLHS = dyn_cast<SCEVConstant>(InvariantLHS)) 243 NewLHS = ConstLHS->getValue(); 244 if (!NewRHS) 245 if (auto *ConstRHS = dyn_cast<SCEVConstant>(InvariantRHS)) 246 NewRHS = ConstRHS->getValue(); 247 248 if (!NewLHS || !NewRHS) 249 // We could not find an existing value to replace either LHS or RHS. 250 // Generating new instructions has subtler tradeoffs, so avoid doing that 251 // for now. 252 return false; 253 254 LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); 255 ICmp->setPredicate(InvariantPredicate); 256 ICmp->setOperand(0, NewLHS); 257 ICmp->setOperand(1, NewRHS); 258 return true; 259 } 260 261 /// SimplifyIVUsers helper for eliminating useless 262 /// comparisons against an induction variable. 263 void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { 264 unsigned IVOperIdx = 0; 265 ICmpInst::Predicate Pred = ICmp->getPredicate(); 266 ICmpInst::Predicate OriginalPred = Pred; 267 if (IVOperand != ICmp->getOperand(0)) { 268 // Swapped 269 assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); 270 IVOperIdx = 1; 271 Pred = ICmpInst::getSwappedPredicate(Pred); 272 } 273 274 // Get the SCEVs for the ICmp operands (in the specific context of the 275 // current loop) 276 const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); 277 const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop); 278 const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop); 279 280 // If the condition is always true or always false in the given context, 281 // replace it with a constant value. 282 SmallVector<Instruction *, 4> Users; 283 for (auto *U : ICmp->users()) 284 Users.push_back(cast<Instruction>(U)); 285 const Instruction *CtxI = findCommonDominator(Users, *DT); 286 if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) { 287 ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev)); 288 DeadInsts.emplace_back(ICmp); 289 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); 290 } else if (makeIVComparisonInvariant(ICmp, IVOperand)) { 291 // fallthrough to end of function 292 } else if (ICmpInst::isSigned(OriginalPred) && 293 SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { 294 // If we were unable to make anything above, all we can is to canonicalize 295 // the comparison hoping that it will open the doors for other 296 // optimizations. If we find out that we compare two non-negative values, 297 // we turn the instruction's predicate to its unsigned version. Note that 298 // we cannot rely on Pred here unless we check if we have swapped it. 299 assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); 300 LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp 301 << '\n'); 302 ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred)); 303 } else 304 return; 305 306 ++NumElimCmp; 307 Changed = true; 308 } 309 310 bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) { 311 // Get the SCEVs for the ICmp operands. 312 auto *N = SE->getSCEV(SDiv->getOperand(0)); 313 auto *D = SE->getSCEV(SDiv->getOperand(1)); 314 315 // Simplify unnecessary loops away. 316 const Loop *L = LI->getLoopFor(SDiv->getParent()); 317 N = SE->getSCEVAtScope(N, L); 318 D = SE->getSCEVAtScope(D, L); 319 320 // Replace sdiv by udiv if both of the operands are non-negative 321 if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) { 322 auto *UDiv = BinaryOperator::Create( 323 BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1), 324 SDiv->getName() + ".udiv", SDiv); 325 UDiv->setIsExact(SDiv->isExact()); 326 SDiv->replaceAllUsesWith(UDiv); 327 LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n'); 328 ++NumSimplifiedSDiv; 329 Changed = true; 330 DeadInsts.push_back(SDiv); 331 return true; 332 } 333 334 return false; 335 } 336 337 // i %s n -> i %u n if i >= 0 and n >= 0 338 void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) { 339 auto *N = Rem->getOperand(0), *D = Rem->getOperand(1); 340 auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D, 341 Rem->getName() + ".urem", Rem); 342 Rem->replaceAllUsesWith(URem); 343 LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n'); 344 ++NumSimplifiedSRem; 345 Changed = true; 346 DeadInsts.emplace_back(Rem); 347 } 348 349 // i % n --> i if i is in [0,n). 350 void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) { 351 Rem->replaceAllUsesWith(Rem->getOperand(0)); 352 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); 353 ++NumElimRem; 354 Changed = true; 355 DeadInsts.emplace_back(Rem); 356 } 357 358 // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). 359 void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) { 360 auto *T = Rem->getType(); 361 auto *N = Rem->getOperand(0), *D = Rem->getOperand(1); 362 ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, N, D); 363 SelectInst *Sel = 364 SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem); 365 Rem->replaceAllUsesWith(Sel); 366 LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); 367 ++NumElimRem; 368 Changed = true; 369 DeadInsts.emplace_back(Rem); 370 } 371 372 /// SimplifyIVUsers helper for eliminating useless remainder operations 373 /// operating on an induction variable or replacing srem by urem. 374 void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand, 375 bool IsSigned) { 376 auto *NValue = Rem->getOperand(0); 377 auto *DValue = Rem->getOperand(1); 378 // We're only interested in the case where we know something about 379 // the numerator, unless it is a srem, because we want to replace srem by urem 380 // in general. 381 bool UsedAsNumerator = IVOperand == NValue; 382 if (!UsedAsNumerator && !IsSigned) 383 return; 384 385 const SCEV *N = SE->getSCEV(NValue); 386 387 // Simplify unnecessary loops away. 388 const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); 389 N = SE->getSCEVAtScope(N, ICmpLoop); 390 391 bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N); 392 393 // Do not proceed if the Numerator may be negative 394 if (!IsNumeratorNonNegative) 395 return; 396 397 const SCEV *D = SE->getSCEV(DValue); 398 D = SE->getSCEVAtScope(D, ICmpLoop); 399 400 if (UsedAsNumerator) { 401 auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; 402 if (SE->isKnownPredicate(LT, N, D)) { 403 replaceRemWithNumerator(Rem); 404 return; 405 } 406 407 auto *T = Rem->getType(); 408 const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T)); 409 if (SE->isKnownPredicate(LT, NLessOne, D)) { 410 replaceRemWithNumeratorOrZero(Rem); 411 return; 412 } 413 } 414 415 // Try to replace SRem with URem, if both N and D are known non-negative. 416 // Since we had already check N, we only need to check D now 417 if (!IsSigned || !SE->isKnownNonNegative(D)) 418 return; 419 420 replaceSRemWithURem(Rem); 421 } 422 423 bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) { 424 const SCEV *LHS = SE->getSCEV(WO->getLHS()); 425 const SCEV *RHS = SE->getSCEV(WO->getRHS()); 426 if (!SE->willNotOverflow(WO->getBinaryOp(), WO->isSigned(), LHS, RHS)) 427 return false; 428 429 // Proved no overflow, nuke the overflow check and, if possible, the overflow 430 // intrinsic as well. 431 432 BinaryOperator *NewResult = BinaryOperator::Create( 433 WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO); 434 435 if (WO->isSigned()) 436 NewResult->setHasNoSignedWrap(true); 437 else 438 NewResult->setHasNoUnsignedWrap(true); 439 440 SmallVector<ExtractValueInst *, 4> ToDelete; 441 442 for (auto *U : WO->users()) { 443 if (auto *EVI = dyn_cast<ExtractValueInst>(U)) { 444 if (EVI->getIndices()[0] == 1) 445 EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext())); 446 else { 447 assert(EVI->getIndices()[0] == 0 && "Only two possibilities!"); 448 EVI->replaceAllUsesWith(NewResult); 449 } 450 ToDelete.push_back(EVI); 451 } 452 } 453 454 for (auto *EVI : ToDelete) 455 EVI->eraseFromParent(); 456 457 if (WO->use_empty()) 458 WO->eraseFromParent(); 459 460 Changed = true; 461 return true; 462 } 463 464 bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) { 465 const SCEV *LHS = SE->getSCEV(SI->getLHS()); 466 const SCEV *RHS = SE->getSCEV(SI->getRHS()); 467 if (!SE->willNotOverflow(SI->getBinaryOp(), SI->isSigned(), LHS, RHS)) 468 return false; 469 470 BinaryOperator *BO = BinaryOperator::Create( 471 SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI); 472 if (SI->isSigned()) 473 BO->setHasNoSignedWrap(); 474 else 475 BO->setHasNoUnsignedWrap(); 476 477 SI->replaceAllUsesWith(BO); 478 DeadInsts.emplace_back(SI); 479 Changed = true; 480 return true; 481 } 482 483 bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { 484 // It is always legal to replace 485 // icmp <pred> i32 trunc(iv), n 486 // with 487 // icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate. 488 // Or with 489 // icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate. 490 // Or with either of these if pred is an equality predicate. 491 // 492 // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for 493 // every comparison which uses trunc, it means that we can replace each of 494 // them with comparison of iv against sext/zext(n). We no longer need trunc 495 // after that. 496 // 497 // TODO: Should we do this if we can widen *some* comparisons, but not all 498 // of them? Sometimes it is enough to enable other optimizations, but the 499 // trunc instruction will stay in the loop. 500 Value *IV = TI->getOperand(0); 501 Type *IVTy = IV->getType(); 502 const SCEV *IVSCEV = SE->getSCEV(IV); 503 const SCEV *TISCEV = SE->getSCEV(TI); 504 505 // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can 506 // get rid of trunc 507 bool DoesSExtCollapse = false; 508 bool DoesZExtCollapse = false; 509 if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy)) 510 DoesSExtCollapse = true; 511 if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy)) 512 DoesZExtCollapse = true; 513 514 // If neither sext nor zext does collapse, it is not profitable to do any 515 // transform. Bail. 516 if (!DoesSExtCollapse && !DoesZExtCollapse) 517 return false; 518 519 // Collect users of the trunc that look like comparisons against invariants. 520 // Bail if we find something different. 521 SmallVector<ICmpInst *, 4> ICmpUsers; 522 for (auto *U : TI->users()) { 523 // We don't care about users in unreachable blocks. 524 if (isa<Instruction>(U) && 525 !DT->isReachableFromEntry(cast<Instruction>(U)->getParent())) 526 continue; 527 ICmpInst *ICI = dyn_cast<ICmpInst>(U); 528 if (!ICI) return false; 529 assert(L->contains(ICI->getParent()) && "LCSSA form broken?"); 530 if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) && 531 !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0)))) 532 return false; 533 // If we cannot get rid of trunc, bail. 534 if (ICI->isSigned() && !DoesSExtCollapse) 535 return false; 536 if (ICI->isUnsigned() && !DoesZExtCollapse) 537 return false; 538 // For equality, either signed or unsigned works. 539 ICmpUsers.push_back(ICI); 540 } 541 542 auto CanUseZExt = [&](ICmpInst *ICI) { 543 // Unsigned comparison can be widened as unsigned. 544 if (ICI->isUnsigned()) 545 return true; 546 // Is it profitable to do zext? 547 if (!DoesZExtCollapse) 548 return false; 549 // For equality, we can safely zext both parts. 550 if (ICI->isEquality()) 551 return true; 552 // Otherwise we can only use zext when comparing two non-negative or two 553 // negative values. But in practice, we will never pass DoesZExtCollapse 554 // check for a negative value, because zext(trunc(x)) is non-negative. So 555 // it only make sense to check for non-negativity here. 556 const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0)); 557 const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1)); 558 return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2); 559 }; 560 // Replace all comparisons against trunc with comparisons against IV. 561 for (auto *ICI : ICmpUsers) { 562 bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0)); 563 auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1); 564 Instruction *Ext = nullptr; 565 // For signed/unsigned predicate, replace the old comparison with comparison 566 // of immediate IV against sext/zext of the invariant argument. If we can 567 // use either sext or zext (i.e. we are dealing with equality predicate), 568 // then prefer zext as a more canonical form. 569 // TODO: If we see a signed comparison which can be turned into unsigned, 570 // we can do it here for canonicalization purposes. 571 ICmpInst::Predicate Pred = ICI->getPredicate(); 572 if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred); 573 if (CanUseZExt(ICI)) { 574 assert(DoesZExtCollapse && "Unprofitable zext?"); 575 Ext = new ZExtInst(Op1, IVTy, "zext", ICI); 576 Pred = ICmpInst::getUnsignedPredicate(Pred); 577 } else { 578 assert(DoesSExtCollapse && "Unprofitable sext?"); 579 Ext = new SExtInst(Op1, IVTy, "sext", ICI); 580 assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!"); 581 } 582 bool Changed; 583 L->makeLoopInvariant(Ext, Changed); 584 (void)Changed; 585 ICmpInst *NewICI = new ICmpInst(ICI, Pred, IV, Ext); 586 ICI->replaceAllUsesWith(NewICI); 587 DeadInsts.emplace_back(ICI); 588 } 589 590 // Trunc no longer needed. 591 TI->replaceAllUsesWith(UndefValue::get(TI->getType())); 592 DeadInsts.emplace_back(TI); 593 return true; 594 } 595 596 /// Eliminate an operation that consumes a simple IV and has no observable 597 /// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, 598 /// but UseInst may not be. 599 bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, 600 Instruction *IVOperand) { 601 if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { 602 eliminateIVComparison(ICmp, IVOperand); 603 return true; 604 } 605 if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) { 606 bool IsSRem = Bin->getOpcode() == Instruction::SRem; 607 if (IsSRem || Bin->getOpcode() == Instruction::URem) { 608 simplifyIVRemainder(Bin, IVOperand, IsSRem); 609 return true; 610 } 611 612 if (Bin->getOpcode() == Instruction::SDiv) 613 return eliminateSDiv(Bin); 614 } 615 616 if (auto *WO = dyn_cast<WithOverflowInst>(UseInst)) 617 if (eliminateOverflowIntrinsic(WO)) 618 return true; 619 620 if (auto *SI = dyn_cast<SaturatingInst>(UseInst)) 621 if (eliminateSaturatingIntrinsic(SI)) 622 return true; 623 624 if (auto *TI = dyn_cast<TruncInst>(UseInst)) 625 if (eliminateTrunc(TI)) 626 return true; 627 628 if (eliminateIdentitySCEV(UseInst, IVOperand)) 629 return true; 630 631 return false; 632 } 633 634 static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) { 635 if (auto *BB = L->getLoopPreheader()) 636 return BB->getTerminator(); 637 638 return Hint; 639 } 640 641 /// Replace the UseInst with a loop invariant expression if it is safe. 642 bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { 643 if (!SE->isSCEVable(I->getType())) 644 return false; 645 646 // Get the symbolic expression for this instruction. 647 const SCEV *S = SE->getSCEV(I); 648 649 if (!SE->isLoopInvariant(S, L)) 650 return false; 651 652 // Do not generate something ridiculous even if S is loop invariant. 653 if (Rewriter.isHighCostExpansion(S, L, SCEVCheapExpansionBudget, TTI, I)) 654 return false; 655 656 auto *IP = GetLoopInvariantInsertPosition(L, I); 657 658 if (!isSafeToExpandAt(S, IP, *SE)) { 659 LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I 660 << " with non-speculable loop invariant: " << *S << '\n'); 661 return false; 662 } 663 664 auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP); 665 666 I->replaceAllUsesWith(Invariant); 667 LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I 668 << " with loop invariant: " << *S << '\n'); 669 ++NumFoldedUser; 670 Changed = true; 671 DeadInsts.emplace_back(I); 672 return true; 673 } 674 675 /// Eliminate any operation that SCEV can prove is an identity function. 676 bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, 677 Instruction *IVOperand) { 678 if (!SE->isSCEVable(UseInst->getType()) || 679 (UseInst->getType() != IVOperand->getType()) || 680 (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) 681 return false; 682 683 // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the 684 // dominator tree, even if X is an operand to Y. For instance, in 685 // 686 // %iv = phi i32 {0,+,1} 687 // br %cond, label %left, label %merge 688 // 689 // left: 690 // %X = add i32 %iv, 0 691 // br label %merge 692 // 693 // merge: 694 // %M = phi (%X, %iv) 695 // 696 // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and 697 // %M.replaceAllUsesWith(%X) would be incorrect. 698 699 if (isa<PHINode>(UseInst)) 700 // If UseInst is not a PHI node then we know that IVOperand dominates 701 // UseInst directly from the legality of SSA. 702 if (!DT || !DT->dominates(IVOperand, UseInst)) 703 return false; 704 705 if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand)) 706 return false; 707 708 LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); 709 710 UseInst->replaceAllUsesWith(IVOperand); 711 ++NumElimIdentity; 712 Changed = true; 713 DeadInsts.emplace_back(UseInst); 714 return true; 715 } 716 717 /// Annotate BO with nsw / nuw if it provably does not signed-overflow / 718 /// unsigned-overflow. Returns true if anything changed, false otherwise. 719 bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, 720 Value *IVOperand) { 721 SCEV::NoWrapFlags Flags; 722 bool Deduced; 723 std::tie(Flags, Deduced) = SE->getStrengthenedNoWrapFlagsFromBinOp( 724 cast<OverflowingBinaryOperator>(BO)); 725 726 if (!Deduced) 727 return Deduced; 728 729 BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(Flags, SCEV::FlagNUW) == 730 SCEV::FlagNUW); 731 BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(Flags, SCEV::FlagNSW) == 732 SCEV::FlagNSW); 733 734 // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap 735 // flags on addrecs while performing zero/sign extensions. We could call 736 // forgetValue() here to make sure those flags also propagate to any other 737 // SCEV expressions based on the addrec. However, this can have pathological 738 // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384. 739 return Deduced; 740 } 741 742 /// Annotate the Shr in (X << IVOperand) >> C as exact using the 743 /// information from the IV's range. Returns true if anything changed, false 744 /// otherwise. 745 bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, 746 Value *IVOperand) { 747 using namespace llvm::PatternMatch; 748 749 if (BO->getOpcode() == Instruction::Shl) { 750 bool Changed = false; 751 ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand)); 752 for (auto *U : BO->users()) { 753 const APInt *C; 754 if (match(U, 755 m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) || 756 match(U, 757 m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) { 758 BinaryOperator *Shr = cast<BinaryOperator>(U); 759 if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) { 760 Shr->setIsExact(true); 761 Changed = true; 762 } 763 } 764 } 765 return Changed; 766 } 767 768 return false; 769 } 770 771 /// Add all uses of Def to the current IV's worklist. 772 static void pushIVUsers( 773 Instruction *Def, Loop *L, 774 SmallPtrSet<Instruction*,16> &Simplified, 775 SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { 776 777 for (User *U : Def->users()) { 778 Instruction *UI = cast<Instruction>(U); 779 780 // Avoid infinite or exponential worklist processing. 781 // Also ensure unique worklist users. 782 // If Def is a LoopPhi, it may not be in the Simplified set, so check for 783 // self edges first. 784 if (UI == Def) 785 continue; 786 787 // Only change the current Loop, do not change the other parts (e.g. other 788 // Loops). 789 if (!L->contains(UI)) 790 continue; 791 792 // Do not push the same instruction more than once. 793 if (!Simplified.insert(UI).second) 794 continue; 795 796 SimpleIVUsers.push_back(std::make_pair(UI, Def)); 797 } 798 } 799 800 /// Return true if this instruction generates a simple SCEV 801 /// expression in terms of that IV. 802 /// 803 /// This is similar to IVUsers' isInteresting() but processes each instruction 804 /// non-recursively when the operand is already known to be a simpleIVUser. 805 /// 806 static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { 807 if (!SE->isSCEVable(I->getType())) 808 return false; 809 810 // Get the symbolic expression for this instruction. 811 const SCEV *S = SE->getSCEV(I); 812 813 // Only consider affine recurrences. 814 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); 815 if (AR && AR->getLoop() == L) 816 return true; 817 818 return false; 819 } 820 821 /// Iteratively perform simplification on a worklist of users 822 /// of the specified induction variable. Each successive simplification may push 823 /// more users which may themselves be candidates for simplification. 824 /// 825 /// This algorithm does not require IVUsers analysis. Instead, it simplifies 826 /// instructions in-place during analysis. Rather than rewriting induction 827 /// variables bottom-up from their users, it transforms a chain of IVUsers 828 /// top-down, updating the IR only when it encounters a clear optimization 829 /// opportunity. 830 /// 831 /// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. 832 /// 833 void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { 834 if (!SE->isSCEVable(CurrIV->getType())) 835 return; 836 837 // Instructions processed by SimplifyIndvar for CurrIV. 838 SmallPtrSet<Instruction*,16> Simplified; 839 840 // Use-def pairs if IV users waiting to be processed for CurrIV. 841 SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; 842 843 // Push users of the current LoopPhi. In rare cases, pushIVUsers may be 844 // called multiple times for the same LoopPhi. This is the proper thing to 845 // do for loop header phis that use each other. 846 pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers); 847 848 while (!SimpleIVUsers.empty()) { 849 std::pair<Instruction*, Instruction*> UseOper = 850 SimpleIVUsers.pop_back_val(); 851 Instruction *UseInst = UseOper.first; 852 853 // If a user of the IndVar is trivially dead, we prefer just to mark it dead 854 // rather than try to do some complex analysis or transformation (such as 855 // widening) basing on it. 856 // TODO: Propagate TLI and pass it here to handle more cases. 857 if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) { 858 DeadInsts.emplace_back(UseInst); 859 continue; 860 } 861 862 // Bypass back edges to avoid extra work. 863 if (UseInst == CurrIV) continue; 864 865 // Try to replace UseInst with a loop invariant before any other 866 // simplifications. 867 if (replaceIVUserWithLoopInvariant(UseInst)) 868 continue; 869 870 Instruction *IVOperand = UseOper.second; 871 for (unsigned N = 0; IVOperand; ++N) { 872 assert(N <= Simplified.size() && "runaway iteration"); 873 874 Value *NewOper = foldIVUser(UseInst, IVOperand); 875 if (!NewOper) 876 break; // done folding 877 IVOperand = dyn_cast<Instruction>(NewOper); 878 } 879 if (!IVOperand) 880 continue; 881 882 if (eliminateIVUser(UseInst, IVOperand)) { 883 pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); 884 continue; 885 } 886 887 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) { 888 if ((isa<OverflowingBinaryOperator>(BO) && 889 strengthenOverflowingOperation(BO, IVOperand)) || 890 (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) { 891 // re-queue uses of the now modified binary operator and fall 892 // through to the checks that remain. 893 pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); 894 } 895 } 896 897 CastInst *Cast = dyn_cast<CastInst>(UseInst); 898 if (V && Cast) { 899 V->visitCast(Cast); 900 continue; 901 } 902 if (isSimpleIVUser(UseInst, L, SE)) { 903 pushIVUsers(UseInst, L, Simplified, SimpleIVUsers); 904 } 905 } 906 } 907 908 namespace llvm { 909 910 void IVVisitor::anchor() { } 911 912 /// Simplify instructions that use this induction variable 913 /// by using ScalarEvolution to analyze the IV's recurrence. 914 bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, 915 LoopInfo *LI, const TargetTransformInfo *TTI, 916 SmallVectorImpl<WeakTrackingVH> &Dead, 917 SCEVExpander &Rewriter, IVVisitor *V) { 918 SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI, 919 Rewriter, Dead); 920 SIV.simplifyUsers(CurrIV, V); 921 return SIV.hasChanged(); 922 } 923 924 /// Simplify users of induction variables within this 925 /// loop. This does not actually change or add IVs. 926 bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, 927 LoopInfo *LI, const TargetTransformInfo *TTI, 928 SmallVectorImpl<WeakTrackingVH> &Dead) { 929 SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars"); 930 #ifndef NDEBUG 931 Rewriter.setDebugType(DEBUG_TYPE); 932 #endif 933 bool Changed = false; 934 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 935 Changed |= 936 simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter); 937 } 938 return Changed; 939 } 940 941 } // namespace llvm 942 943 namespace { 944 //===----------------------------------------------------------------------===// 945 // Widen Induction Variables - Extend the width of an IV to cover its 946 // widest uses. 947 //===----------------------------------------------------------------------===// 948 949 class WidenIV { 950 // Parameters 951 PHINode *OrigPhi; 952 Type *WideType; 953 954 // Context 955 LoopInfo *LI; 956 Loop *L; 957 ScalarEvolution *SE; 958 DominatorTree *DT; 959 960 // Does the module have any calls to the llvm.experimental.guard intrinsic 961 // at all? If not we can avoid scanning instructions looking for guards. 962 bool HasGuards; 963 964 bool UsePostIncrementRanges; 965 966 // Statistics 967 unsigned NumElimExt = 0; 968 unsigned NumWidened = 0; 969 970 // Result 971 PHINode *WidePhi = nullptr; 972 Instruction *WideInc = nullptr; 973 const SCEV *WideIncExpr = nullptr; 974 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 975 976 SmallPtrSet<Instruction *,16> Widened; 977 978 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 979 980 // A map tracking the kind of extension used to widen each narrow IV 981 // and narrow IV user. 982 // Key: pointer to a narrow IV or IV user. 983 // Value: the kind of extension used to widen this Instruction. 984 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 985 986 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 987 988 // A map with control-dependent ranges for post increment IV uses. The key is 989 // a pair of IV def and a use of this def denoting the context. The value is 990 // a ConstantRange representing possible values of the def at the given 991 // context. 992 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 993 994 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 995 Instruction *UseI) { 996 DefUserPair Key(Def, UseI); 997 auto It = PostIncRangeInfos.find(Key); 998 return It == PostIncRangeInfos.end() 999 ? Optional<ConstantRange>(None) 1000 : Optional<ConstantRange>(It->second); 1001 } 1002 1003 void calculatePostIncRanges(PHINode *OrigPhi); 1004 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 1005 1006 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 1007 DefUserPair Key(Def, UseI); 1008 auto It = PostIncRangeInfos.find(Key); 1009 if (It == PostIncRangeInfos.end()) 1010 PostIncRangeInfos.insert({Key, R}); 1011 else 1012 It->second = R.intersectWith(It->second); 1013 } 1014 1015 public: 1016 /// Record a link in the Narrow IV def-use chain along with the WideIV that 1017 /// computes the same value as the Narrow IV def. This avoids caching Use* 1018 /// pointers. 1019 struct NarrowIVDefUse { 1020 Instruction *NarrowDef = nullptr; 1021 Instruction *NarrowUse = nullptr; 1022 Instruction *WideDef = nullptr; 1023 1024 // True if the narrow def is never negative. Tracking this information lets 1025 // us use a sign extension instead of a zero extension or vice versa, when 1026 // profitable and legal. 1027 bool NeverNegative = false; 1028 1029 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 1030 bool NeverNegative) 1031 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 1032 NeverNegative(NeverNegative) {} 1033 }; 1034 1035 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 1036 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 1037 bool HasGuards, bool UsePostIncrementRanges = true); 1038 1039 PHINode *createWideIV(SCEVExpander &Rewriter); 1040 1041 unsigned getNumElimExt() { return NumElimExt; }; 1042 unsigned getNumWidened() { return NumWidened; }; 1043 1044 protected: 1045 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 1046 Instruction *Use); 1047 1048 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 1049 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 1050 const SCEVAddRecExpr *WideAR); 1051 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 1052 1053 ExtendKind getExtendKind(Instruction *I); 1054 1055 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 1056 1057 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 1058 1059 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 1060 1061 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1062 unsigned OpCode) const; 1063 1064 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 1065 1066 bool widenLoopCompare(NarrowIVDefUse DU); 1067 bool widenWithVariantUse(NarrowIVDefUse DU); 1068 1069 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 1070 1071 private: 1072 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 1073 }; 1074 } // namespace 1075 1076 /// Determine the insertion point for this user. By default, insert immediately 1077 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 1078 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 1079 /// common dominator for the incoming blocks. A nullptr can be returned if no 1080 /// viable location is found: it may happen if User is a PHI and Def only comes 1081 /// to this PHI from unreachable blocks. 1082 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 1083 DominatorTree *DT, LoopInfo *LI) { 1084 PHINode *PHI = dyn_cast<PHINode>(User); 1085 if (!PHI) 1086 return User; 1087 1088 Instruction *InsertPt = nullptr; 1089 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 1090 if (PHI->getIncomingValue(i) != Def) 1091 continue; 1092 1093 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 1094 1095 if (!DT->isReachableFromEntry(InsertBB)) 1096 continue; 1097 1098 if (!InsertPt) { 1099 InsertPt = InsertBB->getTerminator(); 1100 continue; 1101 } 1102 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 1103 InsertPt = InsertBB->getTerminator(); 1104 } 1105 1106 // If we have skipped all inputs, it means that Def only comes to Phi from 1107 // unreachable blocks. 1108 if (!InsertPt) 1109 return nullptr; 1110 1111 auto *DefI = dyn_cast<Instruction>(Def); 1112 if (!DefI) 1113 return InsertPt; 1114 1115 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 1116 1117 auto *L = LI->getLoopFor(DefI->getParent()); 1118 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 1119 1120 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 1121 if (LI->getLoopFor(DTN->getBlock()) == L) 1122 return DTN->getBlock()->getTerminator(); 1123 1124 llvm_unreachable("DefI dominates InsertPt!"); 1125 } 1126 1127 WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 1128 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 1129 bool HasGuards, bool UsePostIncrementRanges) 1130 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 1131 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 1132 HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges), 1133 DeadInsts(DI) { 1134 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 1135 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 1136 } 1137 1138 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 1139 bool IsSigned, Instruction *Use) { 1140 // Set the debug location and conservative insertion point. 1141 IRBuilder<> Builder(Use); 1142 // Hoist the insertion point into loop preheaders as far as possible. 1143 for (const Loop *L = LI->getLoopFor(Use->getParent()); 1144 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 1145 L = L->getParentLoop()) 1146 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 1147 1148 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 1149 Builder.CreateZExt(NarrowOper, WideType); 1150 } 1151 1152 /// Instantiate a wide operation to replace a narrow operation. This only needs 1153 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 1154 /// 0 for any operation we decide not to clone. 1155 Instruction *WidenIV::cloneIVUser(WidenIV::NarrowIVDefUse DU, 1156 const SCEVAddRecExpr *WideAR) { 1157 unsigned Opcode = DU.NarrowUse->getOpcode(); 1158 switch (Opcode) { 1159 default: 1160 return nullptr; 1161 case Instruction::Add: 1162 case Instruction::Mul: 1163 case Instruction::UDiv: 1164 case Instruction::Sub: 1165 return cloneArithmeticIVUser(DU, WideAR); 1166 1167 case Instruction::And: 1168 case Instruction::Or: 1169 case Instruction::Xor: 1170 case Instruction::Shl: 1171 case Instruction::LShr: 1172 case Instruction::AShr: 1173 return cloneBitwiseIVUser(DU); 1174 } 1175 } 1176 1177 Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) { 1178 Instruction *NarrowUse = DU.NarrowUse; 1179 Instruction *NarrowDef = DU.NarrowDef; 1180 Instruction *WideDef = DU.WideDef; 1181 1182 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 1183 1184 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 1185 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 1186 // invariant and will be folded or hoisted. If it actually comes from a 1187 // widened IV, it should be removed during a future call to widenIVUse. 1188 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 1189 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1190 ? WideDef 1191 : createExtendInst(NarrowUse->getOperand(0), WideType, 1192 IsSigned, NarrowUse); 1193 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1194 ? WideDef 1195 : createExtendInst(NarrowUse->getOperand(1), WideType, 1196 IsSigned, NarrowUse); 1197 1198 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1199 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1200 NarrowBO->getName()); 1201 IRBuilder<> Builder(NarrowUse); 1202 Builder.Insert(WideBO); 1203 WideBO->copyIRFlags(NarrowBO); 1204 return WideBO; 1205 } 1206 1207 Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU, 1208 const SCEVAddRecExpr *WideAR) { 1209 Instruction *NarrowUse = DU.NarrowUse; 1210 Instruction *NarrowDef = DU.NarrowDef; 1211 Instruction *WideDef = DU.WideDef; 1212 1213 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1214 1215 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 1216 1217 // We're trying to find X such that 1218 // 1219 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 1220 // 1221 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 1222 // and check using SCEV if any of them are correct. 1223 1224 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 1225 // correct solution to X. 1226 auto GuessNonIVOperand = [&](bool SignExt) { 1227 const SCEV *WideLHS; 1228 const SCEV *WideRHS; 1229 1230 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 1231 if (SignExt) 1232 return SE->getSignExtendExpr(S, Ty); 1233 return SE->getZeroExtendExpr(S, Ty); 1234 }; 1235 1236 if (IVOpIdx == 0) { 1237 WideLHS = SE->getSCEV(WideDef); 1238 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 1239 WideRHS = GetExtend(NarrowRHS, WideType); 1240 } else { 1241 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 1242 WideLHS = GetExtend(NarrowLHS, WideType); 1243 WideRHS = SE->getSCEV(WideDef); 1244 } 1245 1246 // WideUse is "WideDef `op.wide` X" as described in the comment. 1247 const SCEV *WideUse = 1248 getSCEVByOpCode(WideLHS, WideRHS, NarrowUse->getOpcode()); 1249 1250 return WideUse == WideAR; 1251 }; 1252 1253 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 1254 if (!GuessNonIVOperand(SignExtend)) { 1255 SignExtend = !SignExtend; 1256 if (!GuessNonIVOperand(SignExtend)) 1257 return nullptr; 1258 } 1259 1260 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1261 ? WideDef 1262 : createExtendInst(NarrowUse->getOperand(0), WideType, 1263 SignExtend, NarrowUse); 1264 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1265 ? WideDef 1266 : createExtendInst(NarrowUse->getOperand(1), WideType, 1267 SignExtend, NarrowUse); 1268 1269 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1270 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1271 NarrowBO->getName()); 1272 1273 IRBuilder<> Builder(NarrowUse); 1274 Builder.Insert(WideBO); 1275 WideBO->copyIRFlags(NarrowBO); 1276 return WideBO; 1277 } 1278 1279 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 1280 auto It = ExtendKindMap.find(I); 1281 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 1282 return It->second; 1283 } 1284 1285 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1286 unsigned OpCode) const { 1287 switch (OpCode) { 1288 case Instruction::Add: 1289 return SE->getAddExpr(LHS, RHS); 1290 case Instruction::Sub: 1291 return SE->getMinusSCEV(LHS, RHS); 1292 case Instruction::Mul: 1293 return SE->getMulExpr(LHS, RHS); 1294 case Instruction::UDiv: 1295 return SE->getUDivExpr(LHS, RHS); 1296 default: 1297 llvm_unreachable("Unsupported opcode."); 1298 }; 1299 } 1300 1301 /// No-wrap operations can transfer sign extension of their result to their 1302 /// operands. Generate the SCEV value for the widened operation without 1303 /// actually modifying the IR yet. If the expression after extending the 1304 /// operands is an AddRec for this loop, return the AddRec and the kind of 1305 /// extension used. 1306 WidenIV::WidenedRecTy 1307 WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { 1308 // Handle the common case of add<nsw/nuw> 1309 const unsigned OpCode = DU.NarrowUse->getOpcode(); 1310 // Only Add/Sub/Mul instructions supported yet. 1311 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1312 OpCode != Instruction::Mul) 1313 return {nullptr, Unknown}; 1314 1315 // One operand (NarrowDef) has already been extended to WideDef. Now determine 1316 // if extending the other will lead to a recurrence. 1317 const unsigned ExtendOperIdx = 1318 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 1319 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 1320 1321 const SCEV *ExtendOperExpr = nullptr; 1322 const OverflowingBinaryOperator *OBO = 1323 cast<OverflowingBinaryOperator>(DU.NarrowUse); 1324 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 1325 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1326 ExtendOperExpr = SE->getSignExtendExpr( 1327 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1328 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1329 ExtendOperExpr = SE->getZeroExtendExpr( 1330 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1331 else 1332 return {nullptr, Unknown}; 1333 1334 // When creating this SCEV expr, don't apply the current operations NSW or NUW 1335 // flags. This instruction may be guarded by control flow that the no-wrap 1336 // behavior depends on. Non-control-equivalent instructions can be mapped to 1337 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 1338 // semantics to those operations. 1339 const SCEV *lhs = SE->getSCEV(DU.WideDef); 1340 const SCEV *rhs = ExtendOperExpr; 1341 1342 // Let's swap operands to the initial order for the case of non-commutative 1343 // operations, like SUB. See PR21014. 1344 if (ExtendOperIdx == 0) 1345 std::swap(lhs, rhs); 1346 const SCEVAddRecExpr *AddRec = 1347 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 1348 1349 if (!AddRec || AddRec->getLoop() != L) 1350 return {nullptr, Unknown}; 1351 1352 return {AddRec, ExtKind}; 1353 } 1354 1355 /// Is this instruction potentially interesting for further simplification after 1356 /// widening it's type? In other words, can the extend be safely hoisted out of 1357 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 1358 /// so, return the extended recurrence and the kind of extension used. Otherwise 1359 /// return {nullptr, Unknown}. 1360 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { 1361 if (!DU.NarrowUse->getType()->isIntegerTy()) 1362 return {nullptr, Unknown}; 1363 1364 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 1365 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 1366 SE->getTypeSizeInBits(WideType)) { 1367 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 1368 // index. So don't follow this use. 1369 return {nullptr, Unknown}; 1370 } 1371 1372 const SCEV *WideExpr; 1373 ExtendKind ExtKind; 1374 if (DU.NeverNegative) { 1375 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1376 if (isa<SCEVAddRecExpr>(WideExpr)) 1377 ExtKind = SignExtended; 1378 else { 1379 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1380 ExtKind = ZeroExtended; 1381 } 1382 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1383 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1384 ExtKind = SignExtended; 1385 } else { 1386 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1387 ExtKind = ZeroExtended; 1388 } 1389 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1390 if (!AddRec || AddRec->getLoop() != L) 1391 return {nullptr, Unknown}; 1392 return {AddRec, ExtKind}; 1393 } 1394 1395 /// This IV user cannot be widened. Replace this use of the original narrow IV 1396 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1397 static void truncateIVUse(WidenIV::NarrowIVDefUse DU, DominatorTree *DT, 1398 LoopInfo *LI) { 1399 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1400 if (!InsertPt) 1401 return; 1402 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1403 << *DU.NarrowUse << "\n"); 1404 IRBuilder<> Builder(InsertPt); 1405 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1406 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1407 } 1408 1409 /// If the narrow use is a compare instruction, then widen the compare 1410 // (and possibly the other operand). The extend operation is hoisted into the 1411 // loop preheader as far as possible. 1412 bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) { 1413 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1414 if (!Cmp) 1415 return false; 1416 1417 // We can legally widen the comparison in the following two cases: 1418 // 1419 // - The signedness of the IV extension and comparison match 1420 // 1421 // - The narrow IV is always positive (and thus its sign extension is equal 1422 // to its zero extension). For instance, let's say we're zero extending 1423 // %narrow for the following use 1424 // 1425 // icmp slt i32 %narrow, %val ... (A) 1426 // 1427 // and %narrow is always positive. Then 1428 // 1429 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1430 // == icmp slt i32 zext(%narrow), sext(%val) 1431 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1432 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1433 return false; 1434 1435 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1436 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1437 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1438 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1439 1440 // Widen the compare instruction. 1441 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1442 if (!InsertPt) 1443 return false; 1444 IRBuilder<> Builder(InsertPt); 1445 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1446 1447 // Widen the other operand of the compare, if necessary. 1448 if (CastWidth < IVWidth) { 1449 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1450 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1451 } 1452 return true; 1453 } 1454 1455 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this 1456 // will not work when: 1457 // 1) SCEV traces back to an instruction inside the loop that SCEV can not 1458 // expand, eg. add %indvar, (load %addr) 1459 // 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant 1460 // While SCEV fails to avoid trunc, we can still try to use instruction 1461 // combining approach to prove trunc is not required. This can be further 1462 // extended with other instruction combining checks, but for now we handle the 1463 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") 1464 // 1465 // Src: 1466 // %c = sub nsw %b, %indvar 1467 // %d = sext %c to i64 1468 // Dst: 1469 // %indvar.ext1 = sext %indvar to i64 1470 // %m = sext %b to i64 1471 // %d = sub nsw i64 %m, %indvar.ext1 1472 // Therefore, as long as the result of add/sub/mul is extended to wide type, no 1473 // trunc is required regardless of how %b is generated. This pattern is common 1474 // when calculating address in 64 bit architecture 1475 bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { 1476 Instruction *NarrowUse = DU.NarrowUse; 1477 Instruction *NarrowDef = DU.NarrowDef; 1478 Instruction *WideDef = DU.WideDef; 1479 1480 // Handle the common case of add<nsw/nuw> 1481 const unsigned OpCode = NarrowUse->getOpcode(); 1482 // Only Add/Sub/Mul instructions are supported. 1483 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1484 OpCode != Instruction::Mul) 1485 return false; 1486 1487 // The operand that is not defined by NarrowDef of DU. Let's call it the 1488 // other operand. 1489 assert((NarrowUse->getOperand(0) == NarrowDef || 1490 NarrowUse->getOperand(1) == NarrowDef) && 1491 "bad DU"); 1492 1493 const OverflowingBinaryOperator *OBO = 1494 cast<OverflowingBinaryOperator>(NarrowUse); 1495 ExtendKind ExtKind = getExtendKind(NarrowDef); 1496 bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); 1497 bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); 1498 auto AnotherOpExtKind = ExtKind; 1499 1500 // Check that all uses are either: 1501 // - narrow def (in case of we are widening the IV increment); 1502 // - single-input LCSSA Phis; 1503 // - comparison of the chosen type; 1504 // - extend of the chosen type (raison d'etre). 1505 SmallVector<Instruction *, 4> ExtUsers; 1506 SmallVector<PHINode *, 4> LCSSAPhiUsers; 1507 SmallVector<ICmpInst *, 4> ICmpUsers; 1508 for (Use &U : NarrowUse->uses()) { 1509 Instruction *User = cast<Instruction>(U.getUser()); 1510 if (User == NarrowDef) 1511 continue; 1512 if (!L->contains(User)) { 1513 auto *LCSSAPhi = cast<PHINode>(User); 1514 // Make sure there is only 1 input, so that we don't have to split 1515 // critical edges. 1516 if (LCSSAPhi->getNumOperands() != 1) 1517 return false; 1518 LCSSAPhiUsers.push_back(LCSSAPhi); 1519 continue; 1520 } 1521 if (auto *ICmp = dyn_cast<ICmpInst>(User)) { 1522 auto Pred = ICmp->getPredicate(); 1523 // We have 3 types of predicates: signed, unsigned and equality 1524 // predicates. For equality, it's legal to widen icmp for either sign and 1525 // zero extend. For sign extend, we can also do so for signed predicates, 1526 // likeweise for zero extend we can widen icmp for unsigned predicates. 1527 if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred)) 1528 return false; 1529 if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred)) 1530 return false; 1531 ICmpUsers.push_back(ICmp); 1532 continue; 1533 } 1534 if (ExtKind == SignExtended) 1535 User = dyn_cast<SExtInst>(User); 1536 else 1537 User = dyn_cast<ZExtInst>(User); 1538 if (!User || User->getType() != WideType) 1539 return false; 1540 ExtUsers.push_back(User); 1541 } 1542 if (ExtUsers.empty()) { 1543 DeadInsts.emplace_back(NarrowUse); 1544 return true; 1545 } 1546 1547 // We'll prove some facts that should be true in the context of ext users. If 1548 // there is no users, we are done now. If there are some, pick their common 1549 // dominator as context. 1550 const Instruction *CtxI = findCommonDominator(ExtUsers, *DT); 1551 1552 if (!CanSignExtend && !CanZeroExtend) { 1553 // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we 1554 // will most likely not see it. Let's try to prove it. 1555 if (OpCode != Instruction::Add) 1556 return false; 1557 if (ExtKind != ZeroExtended) 1558 return false; 1559 const SCEV *LHS = SE->getSCEV(OBO->getOperand(0)); 1560 const SCEV *RHS = SE->getSCEV(OBO->getOperand(1)); 1561 // TODO: Support case for NarrowDef = NarrowUse->getOperand(1). 1562 if (NarrowUse->getOperand(0) != NarrowDef) 1563 return false; 1564 if (!SE->isKnownNegative(RHS)) 1565 return false; 1566 bool ProvedSubNUW = SE->isKnownPredicateAt(ICmpInst::ICMP_UGE, LHS, 1567 SE->getNegativeSCEV(RHS), CtxI); 1568 if (!ProvedSubNUW) 1569 return false; 1570 // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as 1571 // neg(zext(neg(op))), which is basically sext(op). 1572 AnotherOpExtKind = SignExtended; 1573 } 1574 1575 // Verifying that Defining operand is an AddRec 1576 const SCEV *Op1 = SE->getSCEV(WideDef); 1577 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1578 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1579 return false; 1580 1581 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1582 1583 // Generating a widening use instruction. 1584 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1585 ? WideDef 1586 : createExtendInst(NarrowUse->getOperand(0), WideType, 1587 AnotherOpExtKind, NarrowUse); 1588 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1589 ? WideDef 1590 : createExtendInst(NarrowUse->getOperand(1), WideType, 1591 AnotherOpExtKind, NarrowUse); 1592 1593 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1594 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1595 NarrowBO->getName()); 1596 IRBuilder<> Builder(NarrowUse); 1597 Builder.Insert(WideBO); 1598 WideBO->copyIRFlags(NarrowBO); 1599 ExtendKindMap[NarrowUse] = ExtKind; 1600 1601 for (Instruction *User : ExtUsers) { 1602 assert(User->getType() == WideType && "Checked before!"); 1603 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1604 << *WideBO << "\n"); 1605 ++NumElimExt; 1606 User->replaceAllUsesWith(WideBO); 1607 DeadInsts.emplace_back(User); 1608 } 1609 1610 for (PHINode *User : LCSSAPhiUsers) { 1611 assert(User->getNumOperands() == 1 && "Checked before!"); 1612 Builder.SetInsertPoint(User); 1613 auto *WidePN = 1614 Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide"); 1615 BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor(); 1616 assert(LoopExitingBlock && L->contains(LoopExitingBlock) && 1617 "Not a LCSSA Phi?"); 1618 WidePN->addIncoming(WideBO, LoopExitingBlock); 1619 Builder.SetInsertPoint(&*User->getParent()->getFirstInsertionPt()); 1620 auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType()); 1621 User->replaceAllUsesWith(TruncPN); 1622 DeadInsts.emplace_back(User); 1623 } 1624 1625 for (ICmpInst *User : ICmpUsers) { 1626 Builder.SetInsertPoint(User); 1627 auto ExtendedOp = [&](Value * V)->Value * { 1628 if (V == NarrowUse) 1629 return WideBO; 1630 if (ExtKind == ZeroExtended) 1631 return Builder.CreateZExt(V, WideBO->getType()); 1632 else 1633 return Builder.CreateSExt(V, WideBO->getType()); 1634 }; 1635 auto Pred = User->getPredicate(); 1636 auto *LHS = ExtendedOp(User->getOperand(0)); 1637 auto *RHS = ExtendedOp(User->getOperand(1)); 1638 auto *WideCmp = 1639 Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide"); 1640 User->replaceAllUsesWith(WideCmp); 1641 DeadInsts.emplace_back(User); 1642 } 1643 1644 return true; 1645 } 1646 1647 /// Determine whether an individual user of the narrow IV can be widened. If so, 1648 /// return the wide clone of the user. 1649 Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1650 assert(ExtendKindMap.count(DU.NarrowDef) && 1651 "Should already know the kind of extension used to widen NarrowDef"); 1652 1653 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1654 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1655 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1656 // For LCSSA phis, sink the truncate outside the loop. 1657 // After SimplifyCFG most loop exit targets have a single predecessor. 1658 // Otherwise fall back to a truncate within the loop. 1659 if (UsePhi->getNumOperands() != 1) 1660 truncateIVUse(DU, DT, LI); 1661 else { 1662 // Widening the PHI requires us to insert a trunc. The logical place 1663 // for this trunc is in the same BB as the PHI. This is not possible if 1664 // the BB is terminated by a catchswitch. 1665 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1666 return nullptr; 1667 1668 PHINode *WidePhi = 1669 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1670 UsePhi); 1671 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1672 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1673 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1674 UsePhi->replaceAllUsesWith(Trunc); 1675 DeadInsts.emplace_back(UsePhi); 1676 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1677 << *WidePhi << "\n"); 1678 } 1679 return nullptr; 1680 } 1681 } 1682 1683 // This narrow use can be widened by a sext if it's non-negative or its narrow 1684 // def was widended by a sext. Same for zext. 1685 auto canWidenBySExt = [&]() { 1686 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1687 }; 1688 auto canWidenByZExt = [&]() { 1689 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1690 }; 1691 1692 // Our raison d'etre! Eliminate sign and zero extension. 1693 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1694 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1695 Value *NewDef = DU.WideDef; 1696 if (DU.NarrowUse->getType() != WideType) { 1697 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1698 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1699 if (CastWidth < IVWidth) { 1700 // The cast isn't as wide as the IV, so insert a Trunc. 1701 IRBuilder<> Builder(DU.NarrowUse); 1702 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1703 } 1704 else { 1705 // A wider extend was hidden behind a narrower one. This may induce 1706 // another round of IV widening in which the intermediate IV becomes 1707 // dead. It should be very rare. 1708 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1709 << " not wide enough to subsume " << *DU.NarrowUse 1710 << "\n"); 1711 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1712 NewDef = DU.NarrowUse; 1713 } 1714 } 1715 if (NewDef != DU.NarrowUse) { 1716 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1717 << " replaced by " << *DU.WideDef << "\n"); 1718 ++NumElimExt; 1719 DU.NarrowUse->replaceAllUsesWith(NewDef); 1720 DeadInsts.emplace_back(DU.NarrowUse); 1721 } 1722 // Now that the extend is gone, we want to expose it's uses for potential 1723 // further simplification. We don't need to directly inform SimplifyIVUsers 1724 // of the new users, because their parent IV will be processed later as a 1725 // new loop phi. If we preserved IVUsers analysis, we would also want to 1726 // push the uses of WideDef here. 1727 1728 // No further widening is needed. The deceased [sz]ext had done it for us. 1729 return nullptr; 1730 } 1731 1732 // Does this user itself evaluate to a recurrence after widening? 1733 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1734 if (!WideAddRec.first) 1735 WideAddRec = getWideRecurrence(DU); 1736 1737 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1738 if (!WideAddRec.first) { 1739 // If use is a loop condition, try to promote the condition instead of 1740 // truncating the IV first. 1741 if (widenLoopCompare(DU)) 1742 return nullptr; 1743 1744 // We are here about to generate a truncate instruction that may hurt 1745 // performance because the scalar evolution expression computed earlier 1746 // in WideAddRec.first does not indicate a polynomial induction expression. 1747 // In that case, look at the operands of the use instruction to determine 1748 // if we can still widen the use instead of truncating its operand. 1749 if (widenWithVariantUse(DU)) 1750 return nullptr; 1751 1752 // This user does not evaluate to a recurrence after widening, so don't 1753 // follow it. Instead insert a Trunc to kill off the original use, 1754 // eventually isolating the original narrow IV so it can be removed. 1755 truncateIVUse(DU, DT, LI); 1756 return nullptr; 1757 } 1758 // Assume block terminators cannot evaluate to a recurrence. We can't to 1759 // insert a Trunc after a terminator if there happens to be a critical edge. 1760 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1761 "SCEV is not expected to evaluate a block terminator"); 1762 1763 // Reuse the IV increment that SCEVExpander created as long as it dominates 1764 // NarrowUse. 1765 Instruction *WideUse = nullptr; 1766 if (WideAddRec.first == WideIncExpr && 1767 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1768 WideUse = WideInc; 1769 else { 1770 WideUse = cloneIVUser(DU, WideAddRec.first); 1771 if (!WideUse) 1772 return nullptr; 1773 } 1774 // Evaluation of WideAddRec ensured that the narrow expression could be 1775 // extended outside the loop without overflow. This suggests that the wide use 1776 // evaluates to the same expression as the extended narrow use, but doesn't 1777 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1778 // where it fails, we simply throw away the newly created wide use. 1779 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1780 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1781 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1782 << "\n"); 1783 DeadInsts.emplace_back(WideUse); 1784 return nullptr; 1785 } 1786 1787 // if we reached this point then we are going to replace 1788 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1789 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1790 1791 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1792 // Returning WideUse pushes it on the worklist. 1793 return WideUse; 1794 } 1795 1796 /// Add eligible users of NarrowDef to NarrowIVUsers. 1797 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1798 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1799 bool NonNegativeDef = 1800 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1801 SE->getZero(NarrowSCEV->getType())); 1802 for (User *U : NarrowDef->users()) { 1803 Instruction *NarrowUser = cast<Instruction>(U); 1804 1805 // Handle data flow merges and bizarre phi cycles. 1806 if (!Widened.insert(NarrowUser).second) 1807 continue; 1808 1809 bool NonNegativeUse = false; 1810 if (!NonNegativeDef) { 1811 // We might have a control-dependent range information for this context. 1812 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1813 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1814 } 1815 1816 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1817 NonNegativeDef || NonNegativeUse); 1818 } 1819 } 1820 1821 /// Process a single induction variable. First use the SCEVExpander to create a 1822 /// wide induction variable that evaluates to the same recurrence as the 1823 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1824 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1825 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1826 /// 1827 /// It would be simpler to delete uses as they are processed, but we must avoid 1828 /// invalidating SCEV expressions. 1829 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1830 // Is this phi an induction variable? 1831 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1832 if (!AddRec) 1833 return nullptr; 1834 1835 // Widen the induction variable expression. 1836 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1837 ? SE->getSignExtendExpr(AddRec, WideType) 1838 : SE->getZeroExtendExpr(AddRec, WideType); 1839 1840 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1841 "Expect the new IV expression to preserve its type"); 1842 1843 // Can the IV be extended outside the loop without overflow? 1844 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1845 if (!AddRec || AddRec->getLoop() != L) 1846 return nullptr; 1847 1848 // An AddRec must have loop-invariant operands. Since this AddRec is 1849 // materialized by a loop header phi, the expression cannot have any post-loop 1850 // operands, so they must dominate the loop header. 1851 assert( 1852 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1853 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1854 "Loop header phi recurrence inputs do not dominate the loop"); 1855 1856 // Iterate over IV uses (including transitive ones) looking for IV increments 1857 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1858 // the increment calculate control-dependent range information basing on 1859 // dominating conditions inside of the loop (e.g. a range check inside of the 1860 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1861 // 1862 // Control-dependent range information is later used to prove that a narrow 1863 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1864 // this on demand because when pushNarrowIVUsers needs this information some 1865 // of the dominating conditions might be already widened. 1866 if (UsePostIncrementRanges) 1867 calculatePostIncRanges(OrigPhi); 1868 1869 // The rewriter provides a value for the desired IV expression. This may 1870 // either find an existing phi or materialize a new one. Either way, we 1871 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1872 // of the phi-SCC dominates the loop entry. 1873 Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); 1874 Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); 1875 // If the wide phi is not a phi node, for example a cast node, like bitcast, 1876 // inttoptr, ptrtoint, just skip for now. 1877 if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) { 1878 // if the cast node is an inserted instruction without any user, we should 1879 // remove it to make sure the pass don't touch the function as we can not 1880 // wide the phi. 1881 if (ExpandInst->hasNUses(0) && 1882 Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst))) 1883 DeadInsts.emplace_back(ExpandInst); 1884 return nullptr; 1885 } 1886 1887 // Remembering the WideIV increment generated by SCEVExpander allows 1888 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1889 // employ a general reuse mechanism because the call above is the only call to 1890 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1891 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1892 WideInc = 1893 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1894 WideIncExpr = SE->getSCEV(WideInc); 1895 // Propagate the debug location associated with the original loop increment 1896 // to the new (widened) increment. 1897 auto *OrigInc = 1898 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1899 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1900 } 1901 1902 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1903 ++NumWidened; 1904 1905 // Traverse the def-use chain using a worklist starting at the original IV. 1906 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1907 1908 Widened.insert(OrigPhi); 1909 pushNarrowIVUsers(OrigPhi, WidePhi); 1910 1911 while (!NarrowIVUsers.empty()) { 1912 WidenIV::NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1913 1914 // Process a def-use edge. This may replace the use, so don't hold a 1915 // use_iterator across it. 1916 Instruction *WideUse = widenIVUse(DU, Rewriter); 1917 1918 // Follow all def-use edges from the previous narrow use. 1919 if (WideUse) 1920 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1921 1922 // widenIVUse may have removed the def-use edge. 1923 if (DU.NarrowDef->use_empty()) 1924 DeadInsts.emplace_back(DU.NarrowDef); 1925 } 1926 1927 // Attach any debug information to the new PHI. 1928 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1929 1930 return WidePhi; 1931 } 1932 1933 /// Calculates control-dependent range for the given def at the given context 1934 /// by looking at dominating conditions inside of the loop 1935 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1936 Instruction *NarrowUser) { 1937 using namespace llvm::PatternMatch; 1938 1939 Value *NarrowDefLHS; 1940 const APInt *NarrowDefRHS; 1941 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1942 m_APInt(NarrowDefRHS))) || 1943 !NarrowDefRHS->isNonNegative()) 1944 return; 1945 1946 auto UpdateRangeFromCondition = [&] (Value *Condition, 1947 bool TrueDest) { 1948 CmpInst::Predicate Pred; 1949 Value *CmpRHS; 1950 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1951 m_Value(CmpRHS)))) 1952 return; 1953 1954 CmpInst::Predicate P = 1955 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1956 1957 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1958 auto CmpConstrainedLHSRange = 1959 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1960 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1961 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1962 1963 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1964 }; 1965 1966 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1967 if (!HasGuards) 1968 return; 1969 1970 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1971 Ctx->getParent()->rend())) { 1972 Value *C = nullptr; 1973 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1974 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1975 } 1976 }; 1977 1978 UpdateRangeFromGuards(NarrowUser); 1979 1980 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1981 // If NarrowUserBB is statically unreachable asking dominator queries may 1982 // yield surprising results. (e.g. the block may not have a dom tree node) 1983 if (!DT->isReachableFromEntry(NarrowUserBB)) 1984 return; 1985 1986 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1987 L->contains(DTB->getBlock()); 1988 DTB = DTB->getIDom()) { 1989 auto *BB = DTB->getBlock(); 1990 auto *TI = BB->getTerminator(); 1991 UpdateRangeFromGuards(TI); 1992 1993 auto *BI = dyn_cast<BranchInst>(TI); 1994 if (!BI || !BI->isConditional()) 1995 continue; 1996 1997 auto *TrueSuccessor = BI->getSuccessor(0); 1998 auto *FalseSuccessor = BI->getSuccessor(1); 1999 2000 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 2001 return BBE.isSingleEdge() && 2002 DT->dominates(BBE, NarrowUser->getParent()); 2003 }; 2004 2005 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 2006 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 2007 2008 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 2009 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 2010 } 2011 } 2012 2013 /// Calculates PostIncRangeInfos map for the given IV 2014 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 2015 SmallPtrSet<Instruction *, 16> Visited; 2016 SmallVector<Instruction *, 6> Worklist; 2017 Worklist.push_back(OrigPhi); 2018 Visited.insert(OrigPhi); 2019 2020 while (!Worklist.empty()) { 2021 Instruction *NarrowDef = Worklist.pop_back_val(); 2022 2023 for (Use &U : NarrowDef->uses()) { 2024 auto *NarrowUser = cast<Instruction>(U.getUser()); 2025 2026 // Don't go looking outside the current loop. 2027 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 2028 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 2029 continue; 2030 2031 if (!Visited.insert(NarrowUser).second) 2032 continue; 2033 2034 Worklist.push_back(NarrowUser); 2035 2036 calculatePostIncRange(NarrowDef, NarrowUser); 2037 } 2038 } 2039 } 2040 2041 PHINode *llvm::createWideIV(const WideIVInfo &WI, 2042 LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, 2043 DominatorTree *DT, SmallVectorImpl<WeakTrackingVH> &DeadInsts, 2044 unsigned &NumElimExt, unsigned &NumWidened, 2045 bool HasGuards, bool UsePostIncrementRanges) { 2046 WidenIV Widener(WI, LI, SE, DT, DeadInsts, HasGuards, UsePostIncrementRanges); 2047 PHINode *WidePHI = Widener.createWideIV(Rewriter); 2048 NumElimExt = Widener.getNumElimExt(); 2049 NumWidened = Widener.getNumWidened(); 2050 return WidePHI; 2051 } 2052