1 //===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Dead Loop Deletion Pass. This pass is responsible
10 // for eliminating loops with non-infinite computable trip counts that have no
11 // side effects or volatile instructions, and do not contribute to the
12 // computation of the function's return value.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Scalar/LoopDeletion.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/GlobalsModRef.h"
20 #include "llvm/Analysis/LoopIterator.h"
21 #include "llvm/Analysis/LoopPass.h"
22 #include "llvm/Analysis/MemorySSA.h"
23 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/InitializePasses.h"
27 #include "llvm/Transforms/Scalar.h"
28 #include "llvm/Transforms/Scalar/LoopPassManager.h"
29 #include "llvm/Transforms/Utils/LoopUtils.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "loop-delete"
34 
35 STATISTIC(NumDeleted, "Number of loops deleted");
36 
37 enum class LoopDeletionResult {
38   Unmodified,
39   Modified,
40   Deleted,
41 };
42 
43 static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) {
44   if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted)
45     return LoopDeletionResult::Deleted;
46   if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified)
47     return LoopDeletionResult::Modified;
48   return LoopDeletionResult::Unmodified;
49 }
50 
51 /// Determines if a loop is dead.
52 ///
53 /// This assumes that we've already checked for unique exit and exiting blocks,
54 /// and that the code is in LCSSA form.
55 static bool isLoopDead(Loop *L, ScalarEvolution &SE,
56                        SmallVectorImpl<BasicBlock *> &ExitingBlocks,
57                        BasicBlock *ExitBlock, bool &Changed,
58                        BasicBlock *Preheader) {
59   // Make sure that all PHI entries coming from the loop are loop invariant.
60   // Because the code is in LCSSA form, any values used outside of the loop
61   // must pass through a PHI in the exit block, meaning that this check is
62   // sufficient to guarantee that no loop-variant values are used outside
63   // of the loop.
64   bool AllEntriesInvariant = true;
65   bool AllOutgoingValuesSame = true;
66   if (!L->hasNoExitBlocks()) {
67     for (PHINode &P : ExitBlock->phis()) {
68       Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]);
69 
70       // Make sure all exiting blocks produce the same incoming value for the
71       // block. If there are different incoming values for different exiting
72       // blocks, then it is impossible to statically determine which value
73       // should be used.
74       AllOutgoingValuesSame =
75           all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
76             return incoming == P.getIncomingValueForBlock(BB);
77           });
78 
79       if (!AllOutgoingValuesSame)
80         break;
81 
82       if (Instruction *I = dyn_cast<Instruction>(incoming))
83         if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) {
84           AllEntriesInvariant = false;
85           break;
86         }
87     }
88   }
89 
90   if (Changed)
91     SE.forgetLoopDispositions(L);
92 
93   if (!AllEntriesInvariant || !AllOutgoingValuesSame)
94     return false;
95 
96   // Make sure that no instructions in the block have potential side-effects.
97   // This includes instructions that could write to memory, and loads that are
98   // marked volatile.
99   for (auto &I : L->blocks())
100     if (any_of(*I, [](Instruction &I) {
101           return I.mayHaveSideEffects() && !I.isDroppable();
102         }))
103       return false;
104   return true;
105 }
106 
107 /// This function returns true if there is no viable path from the
108 /// entry block to the header of \p L. Right now, it only does
109 /// a local search to save compile time.
110 static bool isLoopNeverExecuted(Loop *L) {
111   using namespace PatternMatch;
112 
113   auto *Preheader = L->getLoopPreheader();
114   // TODO: We can relax this constraint, since we just need a loop
115   // predecessor.
116   assert(Preheader && "Needs preheader!");
117 
118   if (Preheader->isEntryBlock())
119     return false;
120   // All predecessors of the preheader should have a constant conditional
121   // branch, with the loop's preheader as not-taken.
122   for (auto *Pred: predecessors(Preheader)) {
123     BasicBlock *Taken, *NotTaken;
124     ConstantInt *Cond;
125     if (!match(Pred->getTerminator(),
126                m_Br(m_ConstantInt(Cond), Taken, NotTaken)))
127       return false;
128     if (!Cond->getZExtValue())
129       std::swap(Taken, NotTaken);
130     if (Taken == Preheader)
131       return false;
132   }
133   assert(!pred_empty(Preheader) &&
134          "Preheader should have predecessors at this point!");
135   // All the predecessors have the loop preheader as not-taken target.
136   return true;
137 }
138 
139 static const SCEV *
140 getSCEVOnFirstIteration(Value *V, Loop *L, ScalarEvolution &SE,
141                         DenseMap<Value *, const SCEV *> &FirstIterSCEV) {
142   // Fist, check in cache.
143   auto Existing = FirstIterSCEV.find(V);
144   if (Existing != FirstIterSCEV.end())
145     return Existing->second;
146   const SCEV *S = nullptr;
147   // TODO: Once ScalarEvolution supports getValueOnNthIteration for anything
148   // else but AddRecs, it's a good use case for it. So far, just consider some
149   // simple cases, like arithmetic operations.
150   Value *LHS, *RHS;
151   using namespace PatternMatch;
152   if (match(V, m_Add(m_Value(LHS), m_Value(RHS)))) {
153     const SCEV *LHSS = getSCEVOnFirstIteration(LHS, L, SE, FirstIterSCEV);
154     const SCEV *RHSS = getSCEVOnFirstIteration(RHS, L, SE, FirstIterSCEV);
155     S = SE.getAddExpr(LHSS, RHSS);
156   } else if (match(V, m_Sub(m_Value(LHS), m_Value(RHS)))) {
157     const SCEV *LHSS = getSCEVOnFirstIteration(LHS, L, SE, FirstIterSCEV);
158     const SCEV *RHSS = getSCEVOnFirstIteration(RHS, L, SE, FirstIterSCEV);
159     S = SE.getMinusSCEV(LHSS, RHSS);
160   } else if (match(V, m_Mul(m_Value(LHS), m_Value(RHS)))) {
161     const SCEV *LHSS = getSCEVOnFirstIteration(LHS, L, SE, FirstIterSCEV);
162     const SCEV *RHSS = getSCEVOnFirstIteration(RHS, L, SE, FirstIterSCEV);
163     S = SE.getMulExpr(LHSS, RHSS);
164   } else
165     S = SE.getSCEV(V);
166   assert(S && "Case not handled?");
167   FirstIterSCEV[V] = S;
168   return S;
169 }
170 
171 // Try to prove that one of conditions that dominates the latch must exit on 1st
172 // iteration.
173 static bool canProveExitOnFirstIteration(Loop *L, DominatorTree &DT,
174                                          ScalarEvolution &SE, LoopInfo &LI) {
175   BasicBlock *Latch = L->getLoopLatch();
176 
177   if (!Latch)
178     return false;
179 
180   LoopBlocksRPO RPOT(L);
181   RPOT.perform(&LI);
182 
183   BasicBlock *Header = L->getHeader();
184   // Blocks that are reachable on the 1st iteration.
185   SmallPtrSet<BasicBlock *, 4> LiveBlocks;
186   // Edges that are reachable on the 1st iteration.
187   DenseSet<BasicBlockEdge> LiveEdges;
188   LiveBlocks.insert(L->getHeader());
189 
190   auto MarkLiveEdge = [&](BasicBlock *From, BasicBlock *To) {
191     assert(LiveBlocks.count(From) && "Must be live!");
192     LiveBlocks.insert(To);
193     LiveEdges.insert({ From, To });
194   };
195 
196   auto MarkAllSuccessorsLive = [&](BasicBlock *BB) {
197     for (auto *Succ : successors(BB))
198       MarkLiveEdge(BB, Succ);
199   };
200 
201   // Check if there is only one predecessor on 1st iteration. Note that because
202   // we iterate in RPOT, we have already visited all its (non-latch)
203   // predecessors.
204   auto GetSolePredecessorOnFirstIteration = [&](BasicBlock * BB)->BasicBlock * {
205     if (BB == Header)
206       return L->getLoopPredecessor();
207     BasicBlock *OnlyPred = nullptr;
208     for (auto *Pred : predecessors(BB))
209       if (OnlyPred != Pred && LiveEdges.count({ Pred, BB })) {
210         // 2 live preds.
211         if (OnlyPred)
212           return nullptr;
213         OnlyPred = Pred;
214       }
215 
216     assert(OnlyPred && "No live predecessors?");
217     return OnlyPred;
218   };
219   DenseMap<Value *, const SCEV *> FirstIterSCEV;
220   SmallPtrSet<BasicBlock *, 4> Visited;
221 
222   // Use the following algorithm to prove we never take the latch on the 1st
223   // iteration:
224   // 1. Traverse in topological order, so that whenever we visit a block, all
225   //    its predecessors are already visited.
226   // 2. If we can prove that the block may have only 1 predecessor on the 1st
227   //    iteration, map all its phis onto input from this predecessor.
228   // 3a. If we can prove which successor of out block is taken on the 1st
229   //     iteration, mark this successor live.
230   // 3b. If we cannot prove it, conservatively assume that all successors are
231   //     live.
232   for (auto *BB : RPOT) {
233     Visited.insert(BB);
234 
235     // This block is not reachable on the 1st iterations.
236     if (!LiveBlocks.count(BB))
237       continue;
238 
239     // Skip inner loops.
240     if (LI.getLoopFor(BB) != L) {
241       MarkAllSuccessorsLive(BB);
242       continue;
243     }
244 
245     // If RPOT exists, we should never visit a block before all of its
246     // predecessors are visited. The only situation when this can be broken is
247     // irreducible CFG. Do not deal with such cases.
248     if (BB != Header)
249       for (auto *Pred : predecessors(BB))
250         if (!Visited.count(Pred))
251           return false;
252 
253     // If this block has only one live pred, map its phis onto their SCEVs.
254     if (auto *OnlyPred = GetSolePredecessorOnFirstIteration(BB))
255       for (auto &PN : BB->phis()) {
256         if (!SE.isSCEVable(PN.getType()))
257           continue;
258         auto *Incoming = PN.getIncomingValueForBlock(OnlyPred);
259         if (DT.dominates(Incoming, BB->getTerminator())) {
260           const SCEV *IncSCEV =
261               getSCEVOnFirstIteration(Incoming, L, SE, FirstIterSCEV);
262           FirstIterSCEV[&PN] = IncSCEV;
263         }
264       }
265 
266     using namespace PatternMatch;
267     ICmpInst::Predicate Pred;
268     Value *LHS, *RHS;
269     BasicBlock *IfTrue, *IfFalse;
270     auto *Term = BB->getTerminator();
271     // TODO: Handle switches.
272     if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
273                           m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) {
274       MarkAllSuccessorsLive(BB);
275       continue;
276     }
277 
278     if (!SE.isSCEVable(LHS->getType())) {
279       MarkAllSuccessorsLive(BB);
280       continue;
281     }
282 
283     // Can we prove constant true or false for this condition?
284     const SCEV *LHSS = getSCEVOnFirstIteration(LHS, L, SE, FirstIterSCEV);
285     const SCEV *RHSS = getSCEVOnFirstIteration(RHS, L, SE, FirstIterSCEV);
286     // Only query for liveness of in-loop edge if another successor is also
287     // in-loop.
288     // TODO: isKnownPredicateAt is more powerful, but it's too compile time
289     // consuming. So we avoid using it here.
290     if (L->contains(IfFalse) && SE.isKnownPredicate(Pred, LHSS, RHSS))
291       MarkLiveEdge(BB, IfTrue);
292     else if (L->contains(IfTrue) &&
293              SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LHSS,
294                                  RHSS))
295       MarkLiveEdge(BB, IfFalse);
296     else
297       MarkAllSuccessorsLive(BB);
298   }
299 
300   // We can break the latch if it wasn't live.
301   return !LiveEdges.count({ Latch, Header });
302 }
303 
304 /// If we can prove the backedge is untaken, remove it.  This destroys the
305 /// loop, but leaves the (now trivially loop invariant) control flow and
306 /// side effects (if any) in place.
307 static LoopDeletionResult
308 breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
309                         LoopInfo &LI, MemorySSA *MSSA,
310                         OptimizationRemarkEmitter &ORE) {
311   assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
312 
313   if (!L->getLoopLatch())
314     return LoopDeletionResult::Unmodified;
315 
316   auto *BTC = SE.getBackedgeTakenCount(L);
317   if (!isa<SCEVCouldNotCompute>(BTC) && SE.isKnownNonZero(BTC))
318     return LoopDeletionResult::Unmodified;
319   if (!BTC->isZero() && !canProveExitOnFirstIteration(L, DT, SE, LI))
320     return LoopDeletionResult::Unmodified;
321 
322   breakLoopBackedge(L, DT, SE, LI, MSSA);
323   return LoopDeletionResult::Deleted;
324 }
325 
326 /// Remove a loop if it is dead.
327 ///
328 /// A loop is considered dead either if it does not impact the observable
329 /// behavior of the program other than finite running time, or if it is
330 /// required to make progress by an attribute such as 'mustprogress' or
331 /// 'llvm.loop.mustprogress' and does not make any. This may remove
332 /// infinite loops that have been required to make progress.
333 ///
334 /// This entire process relies pretty heavily on LoopSimplify form and LCSSA in
335 /// order to make various safety checks work.
336 ///
337 /// \returns true if any changes were made. This may mutate the loop even if it
338 /// is unable to delete it due to hoisting trivially loop invariant
339 /// instructions out of the loop.
340 static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
341                                            ScalarEvolution &SE, LoopInfo &LI,
342                                            MemorySSA *MSSA,
343                                            OptimizationRemarkEmitter &ORE) {
344   assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
345 
346   // We can only remove the loop if there is a preheader that we can branch from
347   // after removing it. Also, if LoopSimplify form is not available, stay out
348   // of trouble.
349   BasicBlock *Preheader = L->getLoopPreheader();
350   if (!Preheader || !L->hasDedicatedExits()) {
351     LLVM_DEBUG(
352         dbgs()
353         << "Deletion requires Loop with preheader and dedicated exits.\n");
354     return LoopDeletionResult::Unmodified;
355   }
356 
357   BasicBlock *ExitBlock = L->getUniqueExitBlock();
358 
359   if (ExitBlock && isLoopNeverExecuted(L)) {
360     LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!");
361     // We need to forget the loop before setting the incoming values of the exit
362     // phis to undef, so we properly invalidate the SCEV expressions for those
363     // phis.
364     SE.forgetLoop(L);
365     // Set incoming value to undef for phi nodes in the exit block.
366     for (PHINode &P : ExitBlock->phis()) {
367       std::fill(P.incoming_values().begin(), P.incoming_values().end(),
368                 UndefValue::get(P.getType()));
369     }
370     ORE.emit([&]() {
371       return OptimizationRemark(DEBUG_TYPE, "NeverExecutes", L->getStartLoc(),
372                                 L->getHeader())
373              << "Loop deleted because it never executes";
374     });
375     deleteDeadLoop(L, &DT, &SE, &LI, MSSA);
376     ++NumDeleted;
377     return LoopDeletionResult::Deleted;
378   }
379 
380   // The remaining checks below are for a loop being dead because all statements
381   // in the loop are invariant.
382   SmallVector<BasicBlock *, 4> ExitingBlocks;
383   L->getExitingBlocks(ExitingBlocks);
384 
385   // We require that the loop has at most one exit block. Otherwise, we'd be in
386   // the situation of needing to be able to solve statically which exit block
387   // will be branched to, or trying to preserve the branching logic in a loop
388   // invariant manner.
389   if (!ExitBlock && !L->hasNoExitBlocks()) {
390     LLVM_DEBUG(dbgs() << "Deletion requires at most one exit block.\n");
391     return LoopDeletionResult::Unmodified;
392   }
393   // Finally, we have to check that the loop really is dead.
394   bool Changed = false;
395   if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader)) {
396     LLVM_DEBUG(dbgs() << "Loop is not invariant, cannot delete.\n");
397     return Changed ? LoopDeletionResult::Modified
398                    : LoopDeletionResult::Unmodified;
399   }
400 
401   // Don't remove loops for which we can't solve the trip count unless the loop
402   // was required to make progress but has been determined to be dead.
403   const SCEV *S = SE.getConstantMaxBackedgeTakenCount(L);
404   if (isa<SCEVCouldNotCompute>(S) &&
405       !L->getHeader()->getParent()->mustProgress() && !hasMustProgress(L)) {
406     LLVM_DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount and was "
407                          "not required to make progress.\n");
408     return Changed ? LoopDeletionResult::Modified
409                    : LoopDeletionResult::Unmodified;
410   }
411 
412   LLVM_DEBUG(dbgs() << "Loop is invariant, delete it!");
413   ORE.emit([&]() {
414     return OptimizationRemark(DEBUG_TYPE, "Invariant", L->getStartLoc(),
415                               L->getHeader())
416            << "Loop deleted because it is invariant";
417   });
418   deleteDeadLoop(L, &DT, &SE, &LI, MSSA);
419   ++NumDeleted;
420 
421   return LoopDeletionResult::Deleted;
422 }
423 
424 PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
425                                         LoopStandardAnalysisResults &AR,
426                                         LPMUpdater &Updater) {
427 
428   LLVM_DEBUG(dbgs() << "Analyzing Loop for deletion: ");
429   LLVM_DEBUG(L.dump());
430   std::string LoopName = std::string(L.getName());
431   // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
432   // pass. Function analyses need to be preserved across loop transformations
433   // but ORE cannot be preserved (see comment before the pass definition).
434   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
435   auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE);
436 
437   // If we can prove the backedge isn't taken, just break it and be done.  This
438   // leaves the loop structure in place which means it can handle dispatching
439   // to the right exit based on whatever loop invariant structure remains.
440   if (Result != LoopDeletionResult::Deleted)
441     Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI,
442                                                    AR.MSSA, ORE));
443 
444   if (Result == LoopDeletionResult::Unmodified)
445     return PreservedAnalyses::all();
446 
447   if (Result == LoopDeletionResult::Deleted)
448     Updater.markLoopAsDeleted(L, LoopName);
449 
450   auto PA = getLoopPassPreservedAnalyses();
451   if (AR.MSSA)
452     PA.preserve<MemorySSAAnalysis>();
453   return PA;
454 }
455 
456 namespace {
457 class LoopDeletionLegacyPass : public LoopPass {
458 public:
459   static char ID; // Pass ID, replacement for typeid
460   LoopDeletionLegacyPass() : LoopPass(ID) {
461     initializeLoopDeletionLegacyPassPass(*PassRegistry::getPassRegistry());
462   }
463 
464   // Possibly eliminate loop L if it is dead.
465   bool runOnLoop(Loop *L, LPPassManager &) override;
466 
467   void getAnalysisUsage(AnalysisUsage &AU) const override {
468     AU.addPreserved<MemorySSAWrapperPass>();
469     getLoopAnalysisUsage(AU);
470   }
471 };
472 }
473 
474 char LoopDeletionLegacyPass::ID = 0;
475 INITIALIZE_PASS_BEGIN(LoopDeletionLegacyPass, "loop-deletion",
476                       "Delete dead loops", false, false)
477 INITIALIZE_PASS_DEPENDENCY(LoopPass)
478 INITIALIZE_PASS_END(LoopDeletionLegacyPass, "loop-deletion",
479                     "Delete dead loops", false, false)
480 
481 Pass *llvm::createLoopDeletionPass() { return new LoopDeletionLegacyPass(); }
482 
483 bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
484   if (skipLoop(L))
485     return false;
486   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
487   ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
488   LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
489   auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
490   MemorySSA *MSSA = nullptr;
491   if (MSSAAnalysis)
492     MSSA = &MSSAAnalysis->getMSSA();
493   // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
494   // pass.  Function analyses need to be preserved across loop transformations
495   // but ORE cannot be preserved (see comment before the pass definition).
496   OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
497 
498   LLVM_DEBUG(dbgs() << "Analyzing Loop for deletion: ");
499   LLVM_DEBUG(L->dump());
500 
501   LoopDeletionResult Result = deleteLoopIfDead(L, DT, SE, LI, MSSA, ORE);
502 
503   // If we can prove the backedge isn't taken, just break it and be done.  This
504   // leaves the loop structure in place which means it can handle dispatching
505   // to the right exit based on whatever loop invariant structure remains.
506   if (Result != LoopDeletionResult::Deleted)
507     Result = merge(Result, breakBackedgeIfNotTaken(L, DT, SE, LI, MSSA, ORE));
508 
509   if (Result == LoopDeletionResult::Deleted)
510     LPM.markLoopAsDeleted(*L);
511 
512   return Result != LoopDeletionResult::Unmodified;
513 }
514