1 //===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the Dead Loop Deletion Pass. This pass is responsible 10 // for eliminating loops with non-infinite computable trip counts that have no 11 // side effects or volatile instructions, and do not contribute to the 12 // computation of the function's return value. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Transforms/Scalar/LoopDeletion.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/GlobalsModRef.h" 20 #include "llvm/Analysis/LoopIterator.h" 21 #include "llvm/Analysis/LoopPass.h" 22 #include "llvm/Analysis/MemorySSA.h" 23 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 24 #include "llvm/IR/Dominators.h" 25 #include "llvm/IR/PatternMatch.h" 26 #include "llvm/InitializePasses.h" 27 #include "llvm/Transforms/Scalar.h" 28 #include "llvm/Transforms/Scalar/LoopPassManager.h" 29 #include "llvm/Transforms/Utils/LoopUtils.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "loop-delete" 34 35 STATISTIC(NumDeleted, "Number of loops deleted"); 36 37 enum class LoopDeletionResult { 38 Unmodified, 39 Modified, 40 Deleted, 41 }; 42 43 static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) { 44 if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted) 45 return LoopDeletionResult::Deleted; 46 if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified) 47 return LoopDeletionResult::Modified; 48 return LoopDeletionResult::Unmodified; 49 } 50 51 /// Determines if a loop is dead. 52 /// 53 /// This assumes that we've already checked for unique exit and exiting blocks, 54 /// and that the code is in LCSSA form. 55 static bool isLoopDead(Loop *L, ScalarEvolution &SE, 56 SmallVectorImpl<BasicBlock *> &ExitingBlocks, 57 BasicBlock *ExitBlock, bool &Changed, 58 BasicBlock *Preheader) { 59 // Make sure that all PHI entries coming from the loop are loop invariant. 60 // Because the code is in LCSSA form, any values used outside of the loop 61 // must pass through a PHI in the exit block, meaning that this check is 62 // sufficient to guarantee that no loop-variant values are used outside 63 // of the loop. 64 bool AllEntriesInvariant = true; 65 bool AllOutgoingValuesSame = true; 66 if (!L->hasNoExitBlocks()) { 67 for (PHINode &P : ExitBlock->phis()) { 68 Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]); 69 70 // Make sure all exiting blocks produce the same incoming value for the 71 // block. If there are different incoming values for different exiting 72 // blocks, then it is impossible to statically determine which value 73 // should be used. 74 AllOutgoingValuesSame = 75 all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) { 76 return incoming == P.getIncomingValueForBlock(BB); 77 }); 78 79 if (!AllOutgoingValuesSame) 80 break; 81 82 if (Instruction *I = dyn_cast<Instruction>(incoming)) 83 if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) { 84 AllEntriesInvariant = false; 85 break; 86 } 87 } 88 } 89 90 if (Changed) 91 SE.forgetLoopDispositions(L); 92 93 if (!AllEntriesInvariant || !AllOutgoingValuesSame) 94 return false; 95 96 // Make sure that no instructions in the block have potential side-effects. 97 // This includes instructions that could write to memory, and loads that are 98 // marked volatile. 99 for (auto &I : L->blocks()) 100 if (any_of(*I, [](Instruction &I) { 101 return I.mayHaveSideEffects() && !I.isDroppable(); 102 })) 103 return false; 104 return true; 105 } 106 107 /// This function returns true if there is no viable path from the 108 /// entry block to the header of \p L. Right now, it only does 109 /// a local search to save compile time. 110 static bool isLoopNeverExecuted(Loop *L) { 111 using namespace PatternMatch; 112 113 auto *Preheader = L->getLoopPreheader(); 114 // TODO: We can relax this constraint, since we just need a loop 115 // predecessor. 116 assert(Preheader && "Needs preheader!"); 117 118 if (Preheader->isEntryBlock()) 119 return false; 120 // All predecessors of the preheader should have a constant conditional 121 // branch, with the loop's preheader as not-taken. 122 for (auto *Pred: predecessors(Preheader)) { 123 BasicBlock *Taken, *NotTaken; 124 ConstantInt *Cond; 125 if (!match(Pred->getTerminator(), 126 m_Br(m_ConstantInt(Cond), Taken, NotTaken))) 127 return false; 128 if (!Cond->getZExtValue()) 129 std::swap(Taken, NotTaken); 130 if (Taken == Preheader) 131 return false; 132 } 133 assert(!pred_empty(Preheader) && 134 "Preheader should have predecessors at this point!"); 135 // All the predecessors have the loop preheader as not-taken target. 136 return true; 137 } 138 139 static const SCEV * 140 getSCEVOnFirstIteration(Value *V, Loop *L, ScalarEvolution &SE, 141 DenseMap<Value *, const SCEV *> &FirstIterSCEV) { 142 // Fist, check in cache. 143 auto Existing = FirstIterSCEV.find(V); 144 if (Existing != FirstIterSCEV.end()) 145 return Existing->second; 146 const SCEV *S = nullptr; 147 // TODO: Once ScalarEvolution supports getValueOnNthIteration for anything 148 // else but AddRecs, it's a good use case for it. So far, just consider some 149 // simple cases, like arithmetic operations. 150 Value *LHS, *RHS; 151 using namespace PatternMatch; 152 if (match(V, m_Add(m_Value(LHS), m_Value(RHS)))) { 153 const SCEV *LHSS = getSCEVOnFirstIteration(LHS, L, SE, FirstIterSCEV); 154 const SCEV *RHSS = getSCEVOnFirstIteration(RHS, L, SE, FirstIterSCEV); 155 S = SE.getAddExpr(LHSS, RHSS); 156 } else if (match(V, m_Sub(m_Value(LHS), m_Value(RHS)))) { 157 const SCEV *LHSS = getSCEVOnFirstIteration(LHS, L, SE, FirstIterSCEV); 158 const SCEV *RHSS = getSCEVOnFirstIteration(RHS, L, SE, FirstIterSCEV); 159 S = SE.getMinusSCEV(LHSS, RHSS); 160 } else if (match(V, m_Mul(m_Value(LHS), m_Value(RHS)))) { 161 const SCEV *LHSS = getSCEVOnFirstIteration(LHS, L, SE, FirstIterSCEV); 162 const SCEV *RHSS = getSCEVOnFirstIteration(RHS, L, SE, FirstIterSCEV); 163 S = SE.getMulExpr(LHSS, RHSS); 164 } else 165 S = SE.getSCEV(V); 166 assert(S && "Case not handled?"); 167 FirstIterSCEV[V] = S; 168 return S; 169 } 170 171 // Try to prove that one of conditions that dominates the latch must exit on 1st 172 // iteration. 173 static bool canProveExitOnFirstIteration(Loop *L, DominatorTree &DT, 174 ScalarEvolution &SE, LoopInfo &LI) { 175 BasicBlock *Latch = L->getLoopLatch(); 176 177 if (!Latch) 178 return false; 179 180 LoopBlocksRPO RPOT(L); 181 RPOT.perform(&LI); 182 183 BasicBlock *Header = L->getHeader(); 184 // Blocks that are reachable on the 1st iteration. 185 SmallPtrSet<BasicBlock *, 4> LiveBlocks; 186 // Edges that are reachable on the 1st iteration. 187 DenseSet<BasicBlockEdge> LiveEdges; 188 LiveBlocks.insert(L->getHeader()); 189 190 auto MarkLiveEdge = [&](BasicBlock *From, BasicBlock *To) { 191 assert(LiveBlocks.count(From) && "Must be live!"); 192 LiveBlocks.insert(To); 193 LiveEdges.insert({ From, To }); 194 }; 195 196 auto MarkAllSuccessorsLive = [&](BasicBlock *BB) { 197 for (auto *Succ : successors(BB)) 198 MarkLiveEdge(BB, Succ); 199 }; 200 201 // Check if there is only one predecessor on 1st iteration. Note that because 202 // we iterate in RPOT, we have already visited all its (non-latch) 203 // predecessors. 204 auto GetSolePredecessorOnFirstIteration = [&](BasicBlock * BB)->BasicBlock * { 205 if (BB == Header) 206 return L->getLoopPredecessor(); 207 BasicBlock *OnlyPred = nullptr; 208 for (auto *Pred : predecessors(BB)) 209 if (OnlyPred != Pred && LiveEdges.count({ Pred, BB })) { 210 // 2 live preds. 211 if (OnlyPred) 212 return nullptr; 213 OnlyPred = Pred; 214 } 215 216 assert(OnlyPred && "No live predecessors?"); 217 return OnlyPred; 218 }; 219 DenseMap<Value *, const SCEV *> FirstIterSCEV; 220 SmallPtrSet<BasicBlock *, 4> Visited; 221 222 // Use the following algorithm to prove we never take the latch on the 1st 223 // iteration: 224 // 1. Traverse in topological order, so that whenever we visit a block, all 225 // its predecessors are already visited. 226 // 2. If we can prove that the block may have only 1 predecessor on the 1st 227 // iteration, map all its phis onto input from this predecessor. 228 // 3a. If we can prove which successor of out block is taken on the 1st 229 // iteration, mark this successor live. 230 // 3b. If we cannot prove it, conservatively assume that all successors are 231 // live. 232 for (auto *BB : RPOT) { 233 Visited.insert(BB); 234 235 // This block is not reachable on the 1st iterations. 236 if (!LiveBlocks.count(BB)) 237 continue; 238 239 // Skip inner loops. 240 if (LI.getLoopFor(BB) != L) { 241 MarkAllSuccessorsLive(BB); 242 continue; 243 } 244 245 // If RPOT exists, we should never visit a block before all of its 246 // predecessors are visited. The only situation when this can be broken is 247 // irreducible CFG. Do not deal with such cases. 248 if (BB != Header) 249 for (auto *Pred : predecessors(BB)) 250 if (!Visited.count(Pred)) 251 return false; 252 253 // If this block has only one live pred, map its phis onto their SCEVs. 254 if (auto *OnlyPred = GetSolePredecessorOnFirstIteration(BB)) 255 for (auto &PN : BB->phis()) { 256 if (!SE.isSCEVable(PN.getType())) 257 continue; 258 auto *Incoming = PN.getIncomingValueForBlock(OnlyPred); 259 if (DT.dominates(Incoming, BB->getTerminator())) { 260 const SCEV *IncSCEV = 261 getSCEVOnFirstIteration(Incoming, L, SE, FirstIterSCEV); 262 FirstIterSCEV[&PN] = IncSCEV; 263 } 264 } 265 266 using namespace PatternMatch; 267 ICmpInst::Predicate Pred; 268 Value *LHS, *RHS; 269 BasicBlock *IfTrue, *IfFalse; 270 auto *Term = BB->getTerminator(); 271 // TODO: Handle switches. 272 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 273 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) { 274 MarkAllSuccessorsLive(BB); 275 continue; 276 } 277 278 if (!SE.isSCEVable(LHS->getType())) { 279 MarkAllSuccessorsLive(BB); 280 continue; 281 } 282 283 // Can we prove constant true or false for this condition? 284 const SCEV *LHSS = getSCEVOnFirstIteration(LHS, L, SE, FirstIterSCEV); 285 const SCEV *RHSS = getSCEVOnFirstIteration(RHS, L, SE, FirstIterSCEV); 286 // Only query for liveness of in-loop edge if another successor is also 287 // in-loop. 288 // TODO: isKnownPredicateAt is more powerful, but it's too compile time 289 // consuming. So we avoid using it here. 290 if (L->contains(IfFalse) && SE.isKnownPredicate(Pred, LHSS, RHSS)) 291 MarkLiveEdge(BB, IfTrue); 292 else if (L->contains(IfTrue) && 293 SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LHSS, 294 RHSS)) 295 MarkLiveEdge(BB, IfFalse); 296 else 297 MarkAllSuccessorsLive(BB); 298 } 299 300 // We can break the latch if it wasn't live. 301 return !LiveEdges.count({ Latch, Header }); 302 } 303 304 /// If we can prove the backedge is untaken, remove it. This destroys the 305 /// loop, but leaves the (now trivially loop invariant) control flow and 306 /// side effects (if any) in place. 307 static LoopDeletionResult 308 breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE, 309 LoopInfo &LI, MemorySSA *MSSA, 310 OptimizationRemarkEmitter &ORE) { 311 assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); 312 313 if (!L->getLoopLatch()) 314 return LoopDeletionResult::Unmodified; 315 316 auto *BTC = SE.getBackedgeTakenCount(L); 317 if (!isa<SCEVCouldNotCompute>(BTC) && SE.isKnownNonZero(BTC)) 318 return LoopDeletionResult::Unmodified; 319 if (!BTC->isZero() && !canProveExitOnFirstIteration(L, DT, SE, LI)) 320 return LoopDeletionResult::Unmodified; 321 322 breakLoopBackedge(L, DT, SE, LI, MSSA); 323 return LoopDeletionResult::Deleted; 324 } 325 326 /// Remove a loop if it is dead. 327 /// 328 /// A loop is considered dead either if it does not impact the observable 329 /// behavior of the program other than finite running time, or if it is 330 /// required to make progress by an attribute such as 'mustprogress' or 331 /// 'llvm.loop.mustprogress' and does not make any. This may remove 332 /// infinite loops that have been required to make progress. 333 /// 334 /// This entire process relies pretty heavily on LoopSimplify form and LCSSA in 335 /// order to make various safety checks work. 336 /// 337 /// \returns true if any changes were made. This may mutate the loop even if it 338 /// is unable to delete it due to hoisting trivially loop invariant 339 /// instructions out of the loop. 340 static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, 341 ScalarEvolution &SE, LoopInfo &LI, 342 MemorySSA *MSSA, 343 OptimizationRemarkEmitter &ORE) { 344 assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); 345 346 // We can only remove the loop if there is a preheader that we can branch from 347 // after removing it. Also, if LoopSimplify form is not available, stay out 348 // of trouble. 349 BasicBlock *Preheader = L->getLoopPreheader(); 350 if (!Preheader || !L->hasDedicatedExits()) { 351 LLVM_DEBUG( 352 dbgs() 353 << "Deletion requires Loop with preheader and dedicated exits.\n"); 354 return LoopDeletionResult::Unmodified; 355 } 356 357 BasicBlock *ExitBlock = L->getUniqueExitBlock(); 358 359 if (ExitBlock && isLoopNeverExecuted(L)) { 360 LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!"); 361 // We need to forget the loop before setting the incoming values of the exit 362 // phis to undef, so we properly invalidate the SCEV expressions for those 363 // phis. 364 SE.forgetLoop(L); 365 // Set incoming value to undef for phi nodes in the exit block. 366 for (PHINode &P : ExitBlock->phis()) { 367 std::fill(P.incoming_values().begin(), P.incoming_values().end(), 368 UndefValue::get(P.getType())); 369 } 370 ORE.emit([&]() { 371 return OptimizationRemark(DEBUG_TYPE, "NeverExecutes", L->getStartLoc(), 372 L->getHeader()) 373 << "Loop deleted because it never executes"; 374 }); 375 deleteDeadLoop(L, &DT, &SE, &LI, MSSA); 376 ++NumDeleted; 377 return LoopDeletionResult::Deleted; 378 } 379 380 // The remaining checks below are for a loop being dead because all statements 381 // in the loop are invariant. 382 SmallVector<BasicBlock *, 4> ExitingBlocks; 383 L->getExitingBlocks(ExitingBlocks); 384 385 // We require that the loop has at most one exit block. Otherwise, we'd be in 386 // the situation of needing to be able to solve statically which exit block 387 // will be branched to, or trying to preserve the branching logic in a loop 388 // invariant manner. 389 if (!ExitBlock && !L->hasNoExitBlocks()) { 390 LLVM_DEBUG(dbgs() << "Deletion requires at most one exit block.\n"); 391 return LoopDeletionResult::Unmodified; 392 } 393 // Finally, we have to check that the loop really is dead. 394 bool Changed = false; 395 if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader)) { 396 LLVM_DEBUG(dbgs() << "Loop is not invariant, cannot delete.\n"); 397 return Changed ? LoopDeletionResult::Modified 398 : LoopDeletionResult::Unmodified; 399 } 400 401 // Don't remove loops for which we can't solve the trip count unless the loop 402 // was required to make progress but has been determined to be dead. 403 const SCEV *S = SE.getConstantMaxBackedgeTakenCount(L); 404 if (isa<SCEVCouldNotCompute>(S) && 405 !L->getHeader()->getParent()->mustProgress() && !hasMustProgress(L)) { 406 LLVM_DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount and was " 407 "not required to make progress.\n"); 408 return Changed ? LoopDeletionResult::Modified 409 : LoopDeletionResult::Unmodified; 410 } 411 412 LLVM_DEBUG(dbgs() << "Loop is invariant, delete it!"); 413 ORE.emit([&]() { 414 return OptimizationRemark(DEBUG_TYPE, "Invariant", L->getStartLoc(), 415 L->getHeader()) 416 << "Loop deleted because it is invariant"; 417 }); 418 deleteDeadLoop(L, &DT, &SE, &LI, MSSA); 419 ++NumDeleted; 420 421 return LoopDeletionResult::Deleted; 422 } 423 424 PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, 425 LoopStandardAnalysisResults &AR, 426 LPMUpdater &Updater) { 427 428 LLVM_DEBUG(dbgs() << "Analyzing Loop for deletion: "); 429 LLVM_DEBUG(L.dump()); 430 std::string LoopName = std::string(L.getName()); 431 // For the new PM, we can't use OptimizationRemarkEmitter as an analysis 432 // pass. Function analyses need to be preserved across loop transformations 433 // but ORE cannot be preserved (see comment before the pass definition). 434 OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); 435 auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE); 436 437 // If we can prove the backedge isn't taken, just break it and be done. This 438 // leaves the loop structure in place which means it can handle dispatching 439 // to the right exit based on whatever loop invariant structure remains. 440 if (Result != LoopDeletionResult::Deleted) 441 Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI, 442 AR.MSSA, ORE)); 443 444 if (Result == LoopDeletionResult::Unmodified) 445 return PreservedAnalyses::all(); 446 447 if (Result == LoopDeletionResult::Deleted) 448 Updater.markLoopAsDeleted(L, LoopName); 449 450 auto PA = getLoopPassPreservedAnalyses(); 451 if (AR.MSSA) 452 PA.preserve<MemorySSAAnalysis>(); 453 return PA; 454 } 455 456 namespace { 457 class LoopDeletionLegacyPass : public LoopPass { 458 public: 459 static char ID; // Pass ID, replacement for typeid 460 LoopDeletionLegacyPass() : LoopPass(ID) { 461 initializeLoopDeletionLegacyPassPass(*PassRegistry::getPassRegistry()); 462 } 463 464 // Possibly eliminate loop L if it is dead. 465 bool runOnLoop(Loop *L, LPPassManager &) override; 466 467 void getAnalysisUsage(AnalysisUsage &AU) const override { 468 AU.addPreserved<MemorySSAWrapperPass>(); 469 getLoopAnalysisUsage(AU); 470 } 471 }; 472 } 473 474 char LoopDeletionLegacyPass::ID = 0; 475 INITIALIZE_PASS_BEGIN(LoopDeletionLegacyPass, "loop-deletion", 476 "Delete dead loops", false, false) 477 INITIALIZE_PASS_DEPENDENCY(LoopPass) 478 INITIALIZE_PASS_END(LoopDeletionLegacyPass, "loop-deletion", 479 "Delete dead loops", false, false) 480 481 Pass *llvm::createLoopDeletionPass() { return new LoopDeletionLegacyPass(); } 482 483 bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { 484 if (skipLoop(L)) 485 return false; 486 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 487 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 488 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 489 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 490 MemorySSA *MSSA = nullptr; 491 if (MSSAAnalysis) 492 MSSA = &MSSAAnalysis->getMSSA(); 493 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis 494 // pass. Function analyses need to be preserved across loop transformations 495 // but ORE cannot be preserved (see comment before the pass definition). 496 OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); 497 498 LLVM_DEBUG(dbgs() << "Analyzing Loop for deletion: "); 499 LLVM_DEBUG(L->dump()); 500 501 LoopDeletionResult Result = deleteLoopIfDead(L, DT, SE, LI, MSSA, ORE); 502 503 // If we can prove the backedge isn't taken, just break it and be done. This 504 // leaves the loop structure in place which means it can handle dispatching 505 // to the right exit based on whatever loop invariant structure remains. 506 if (Result != LoopDeletionResult::Deleted) 507 Result = merge(Result, breakBackedgeIfNotTaken(L, DT, SE, LI, MSSA, ORE)); 508 509 if (Result == LoopDeletionResult::Deleted) 510 LPM.markLoopAsDeleted(*L); 511 512 return Result != LoopDeletionResult::Unmodified; 513 } 514