1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 using namespace PatternMatch;
93 
94 #define DEBUG_TYPE "indvars"
95 
96 STATISTIC(NumWidened     , "Number of indvars widened");
97 STATISTIC(NumReplaced    , "Number of exit values replaced");
98 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
99 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
100 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
101 
102 // Trip count verification can be enabled by default under NDEBUG if we
103 // implement a strong expression equivalence checker in SCEV. Until then, we
104 // use the verify-indvars flag, which may assert in some cases.
105 static cl::opt<bool> VerifyIndvars(
106     "verify-indvars", cl::Hidden,
107     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
108              "effect in release builds. (Note: this adds additional SCEV "
109              "queries potentially changing the analysis result)"));
110 
111 static cl::opt<ReplaceExitVal> ReplaceExitValue(
112     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
113     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
114     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
115                clEnumValN(OnlyCheapRepl, "cheap",
116                           "only replace exit value when the cost is cheap"),
117                clEnumValN(NoHardUse, "noharduse",
118                           "only replace exit values when loop def likely dead"),
119                clEnumValN(AlwaysRepl, "always",
120                           "always replace exit value whenever possible")));
121 
122 static cl::opt<bool> UsePostIncrementRanges(
123   "indvars-post-increment-ranges", cl::Hidden,
124   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
125   cl::init(true));
126 
127 static cl::opt<bool>
128 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
129             cl::desc("Disable Linear Function Test Replace optimization"));
130 
131 static cl::opt<bool>
132 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
133                 cl::desc("Predicate conditions in read only loops"));
134 
135 static cl::opt<bool>
136 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
137                 cl::desc("Allow widening of indvars to eliminate s/zext"));
138 
139 namespace {
140 
141 struct RewritePhi;
142 
143 class IndVarSimplify {
144   LoopInfo *LI;
145   ScalarEvolution *SE;
146   DominatorTree *DT;
147   const DataLayout &DL;
148   TargetLibraryInfo *TLI;
149   const TargetTransformInfo *TTI;
150   std::unique_ptr<MemorySSAUpdater> MSSAU;
151 
152   SmallVector<WeakTrackingVH, 16> DeadInsts;
153   bool WidenIndVars;
154 
155   bool handleFloatingPointIV(Loop *L, PHINode *PH);
156   bool rewriteNonIntegerIVs(Loop *L);
157 
158   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
159   /// See if we can convert an exit condition from signed to unsigned.
160   /// (See inline comment about why this is duplicated from simplifyAndExtend)
161   bool canonicalizeExitCondition(Loop *L);
162   /// Try to eliminate loop exits based on analyzeable exit counts
163   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
164   /// Try to form loop invariant tests for loop exits by changing how many
165   /// iterations of the loop run when that is unobservable.
166   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
167 
168   bool rewriteFirstIterationLoopExitValues(Loop *L);
169 
170   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
171                                  const SCEV *ExitCount,
172                                  PHINode *IndVar, SCEVExpander &Rewriter);
173 
174   bool sinkUnusedInvariants(Loop *L);
175 
176 public:
177   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
178                  const DataLayout &DL, TargetLibraryInfo *TLI,
179                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
180       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
181         WidenIndVars(WidenIndVars) {
182     if (MSSA)
183       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
184   }
185 
186   bool run(Loop *L);
187 };
188 
189 } // end anonymous namespace
190 
191 //===----------------------------------------------------------------------===//
192 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
193 //===----------------------------------------------------------------------===//
194 
195 /// Convert APF to an integer, if possible.
196 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
197   bool isExact = false;
198   // See if we can convert this to an int64_t
199   uint64_t UIntVal;
200   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
201                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
202       !isExact)
203     return false;
204   IntVal = UIntVal;
205   return true;
206 }
207 
208 /// If the loop has floating induction variable then insert corresponding
209 /// integer induction variable if possible.
210 /// For example,
211 /// for(double i = 0; i < 10000; ++i)
212 ///   bar(i)
213 /// is converted into
214 /// for(int i = 0; i < 10000; ++i)
215 ///   bar((double)i);
216 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
217   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
218   unsigned BackEdge     = IncomingEdge^1;
219 
220   // Check incoming value.
221   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
222 
223   int64_t InitValue;
224   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
225     return false;
226 
227   // Check IV increment. Reject this PN if increment operation is not
228   // an add or increment value can not be represented by an integer.
229   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
230   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
231 
232   // If this is not an add of the PHI with a constantfp, or if the constant fp
233   // is not an integer, bail out.
234   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
235   int64_t IncValue;
236   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
237       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
238     return false;
239 
240   // Check Incr uses. One user is PN and the other user is an exit condition
241   // used by the conditional terminator.
242   Value::user_iterator IncrUse = Incr->user_begin();
243   Instruction *U1 = cast<Instruction>(*IncrUse++);
244   if (IncrUse == Incr->user_end()) return false;
245   Instruction *U2 = cast<Instruction>(*IncrUse++);
246   if (IncrUse != Incr->user_end()) return false;
247 
248   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
249   // only used by a branch, we can't transform it.
250   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
251   if (!Compare)
252     Compare = dyn_cast<FCmpInst>(U2);
253   if (!Compare || !Compare->hasOneUse() ||
254       !isa<BranchInst>(Compare->user_back()))
255     return false;
256 
257   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
258 
259   // We need to verify that the branch actually controls the iteration count
260   // of the loop.  If not, the new IV can overflow and no one will notice.
261   // The branch block must be in the loop and one of the successors must be out
262   // of the loop.
263   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
264   if (!L->contains(TheBr->getParent()) ||
265       (L->contains(TheBr->getSuccessor(0)) &&
266        L->contains(TheBr->getSuccessor(1))))
267     return false;
268 
269   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
270   // transform it.
271   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
272   int64_t ExitValue;
273   if (ExitValueVal == nullptr ||
274       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
275     return false;
276 
277   // Find new predicate for integer comparison.
278   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
279   switch (Compare->getPredicate()) {
280   default: return false;  // Unknown comparison.
281   case CmpInst::FCMP_OEQ:
282   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
283   case CmpInst::FCMP_ONE:
284   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
285   case CmpInst::FCMP_OGT:
286   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
287   case CmpInst::FCMP_OGE:
288   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
289   case CmpInst::FCMP_OLT:
290   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
291   case CmpInst::FCMP_OLE:
292   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
293   }
294 
295   // We convert the floating point induction variable to a signed i32 value if
296   // we can.  This is only safe if the comparison will not overflow in a way
297   // that won't be trapped by the integer equivalent operations.  Check for this
298   // now.
299   // TODO: We could use i64 if it is native and the range requires it.
300 
301   // The start/stride/exit values must all fit in signed i32.
302   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
303     return false;
304 
305   // If not actually striding (add x, 0.0), avoid touching the code.
306   if (IncValue == 0)
307     return false;
308 
309   // Positive and negative strides have different safety conditions.
310   if (IncValue > 0) {
311     // If we have a positive stride, we require the init to be less than the
312     // exit value.
313     if (InitValue >= ExitValue)
314       return false;
315 
316     uint32_t Range = uint32_t(ExitValue-InitValue);
317     // Check for infinite loop, either:
318     // while (i <= Exit) or until (i > Exit)
319     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
320       if (++Range == 0) return false;  // Range overflows.
321     }
322 
323     unsigned Leftover = Range % uint32_t(IncValue);
324 
325     // If this is an equality comparison, we require that the strided value
326     // exactly land on the exit value, otherwise the IV condition will wrap
327     // around and do things the fp IV wouldn't.
328     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
329         Leftover != 0)
330       return false;
331 
332     // If the stride would wrap around the i32 before exiting, we can't
333     // transform the IV.
334     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
335       return false;
336   } else {
337     // If we have a negative stride, we require the init to be greater than the
338     // exit value.
339     if (InitValue <= ExitValue)
340       return false;
341 
342     uint32_t Range = uint32_t(InitValue-ExitValue);
343     // Check for infinite loop, either:
344     // while (i >= Exit) or until (i < Exit)
345     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
346       if (++Range == 0) return false;  // Range overflows.
347     }
348 
349     unsigned Leftover = Range % uint32_t(-IncValue);
350 
351     // If this is an equality comparison, we require that the strided value
352     // exactly land on the exit value, otherwise the IV condition will wrap
353     // around and do things the fp IV wouldn't.
354     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
355         Leftover != 0)
356       return false;
357 
358     // If the stride would wrap around the i32 before exiting, we can't
359     // transform the IV.
360     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
361       return false;
362   }
363 
364   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
365 
366   // Insert new integer induction variable.
367   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
368   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
369                       PN->getIncomingBlock(IncomingEdge));
370 
371   Value *NewAdd =
372     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
373                               Incr->getName()+".int", Incr);
374   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
375 
376   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
377                                       ConstantInt::get(Int32Ty, ExitValue),
378                                       Compare->getName());
379 
380   // In the following deletions, PN may become dead and may be deleted.
381   // Use a WeakTrackingVH to observe whether this happens.
382   WeakTrackingVH WeakPH = PN;
383 
384   // Delete the old floating point exit comparison.  The branch starts using the
385   // new comparison.
386   NewCompare->takeName(Compare);
387   Compare->replaceAllUsesWith(NewCompare);
388   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
389 
390   // Delete the old floating point increment.
391   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
392   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
393 
394   // If the FP induction variable still has uses, this is because something else
395   // in the loop uses its value.  In order to canonicalize the induction
396   // variable, we chose to eliminate the IV and rewrite it in terms of an
397   // int->fp cast.
398   //
399   // We give preference to sitofp over uitofp because it is faster on most
400   // platforms.
401   if (WeakPH) {
402     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
403                                  &*PN->getParent()->getFirstInsertionPt());
404     PN->replaceAllUsesWith(Conv);
405     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
406   }
407   return true;
408 }
409 
410 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
411   // First step.  Check to see if there are any floating-point recurrences.
412   // If there are, change them into integer recurrences, permitting analysis by
413   // the SCEV routines.
414   BasicBlock *Header = L->getHeader();
415 
416   SmallVector<WeakTrackingVH, 8> PHIs;
417   for (PHINode &PN : Header->phis())
418     PHIs.push_back(&PN);
419 
420   bool Changed = false;
421   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
422     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
423       Changed |= handleFloatingPointIV(L, PN);
424 
425   // If the loop previously had floating-point IV, ScalarEvolution
426   // may not have been able to compute a trip count. Now that we've done some
427   // re-writing, the trip count may be computable.
428   if (Changed)
429     SE->forgetLoop(L);
430   return Changed;
431 }
432 
433 //===---------------------------------------------------------------------===//
434 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
435 // they will exit at the first iteration.
436 //===---------------------------------------------------------------------===//
437 
438 /// Check to see if this loop has loop invariant conditions which lead to loop
439 /// exits. If so, we know that if the exit path is taken, it is at the first
440 /// loop iteration. This lets us predict exit values of PHI nodes that live in
441 /// loop header.
442 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
443   // Verify the input to the pass is already in LCSSA form.
444   assert(L->isLCSSAForm(*DT));
445 
446   SmallVector<BasicBlock *, 8> ExitBlocks;
447   L->getUniqueExitBlocks(ExitBlocks);
448 
449   bool MadeAnyChanges = false;
450   for (auto *ExitBB : ExitBlocks) {
451     // If there are no more PHI nodes in this exit block, then no more
452     // values defined inside the loop are used on this path.
453     for (PHINode &PN : ExitBB->phis()) {
454       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
455            IncomingValIdx != E; ++IncomingValIdx) {
456         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
457 
458         // Can we prove that the exit must run on the first iteration if it
459         // runs at all?  (i.e. early exits are fine for our purposes, but
460         // traces which lead to this exit being taken on the 2nd iteration
461         // aren't.)  Note that this is about whether the exit branch is
462         // executed, not about whether it is taken.
463         if (!L->getLoopLatch() ||
464             !DT->dominates(IncomingBB, L->getLoopLatch()))
465           continue;
466 
467         // Get condition that leads to the exit path.
468         auto *TermInst = IncomingBB->getTerminator();
469 
470         Value *Cond = nullptr;
471         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
472           // Must be a conditional branch, otherwise the block
473           // should not be in the loop.
474           Cond = BI->getCondition();
475         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
476           Cond = SI->getCondition();
477         else
478           continue;
479 
480         if (!L->isLoopInvariant(Cond))
481           continue;
482 
483         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
484 
485         // Only deal with PHIs in the loop header.
486         if (!ExitVal || ExitVal->getParent() != L->getHeader())
487           continue;
488 
489         // If ExitVal is a PHI on the loop header, then we know its
490         // value along this exit because the exit can only be taken
491         // on the first iteration.
492         auto *LoopPreheader = L->getLoopPreheader();
493         assert(LoopPreheader && "Invalid loop");
494         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
495         if (PreheaderIdx != -1) {
496           assert(ExitVal->getParent() == L->getHeader() &&
497                  "ExitVal must be in loop header");
498           MadeAnyChanges = true;
499           PN.setIncomingValue(IncomingValIdx,
500                               ExitVal->getIncomingValue(PreheaderIdx));
501           SE->forgetValue(&PN);
502         }
503       }
504     }
505   }
506   return MadeAnyChanges;
507 }
508 
509 //===----------------------------------------------------------------------===//
510 //  IV Widening - Extend the width of an IV to cover its widest uses.
511 //===----------------------------------------------------------------------===//
512 
513 /// Update information about the induction variable that is extended by this
514 /// sign or zero extend operation. This is used to determine the final width of
515 /// the IV before actually widening it.
516 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
517                         ScalarEvolution *SE,
518                         const TargetTransformInfo *TTI) {
519   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
520   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
521     return;
522 
523   Type *Ty = Cast->getType();
524   uint64_t Width = SE->getTypeSizeInBits(Ty);
525   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
526     return;
527 
528   // Check that `Cast` actually extends the induction variable (we rely on this
529   // later).  This takes care of cases where `Cast` is extending a truncation of
530   // the narrow induction variable, and thus can end up being narrower than the
531   // "narrow" induction variable.
532   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
533   if (NarrowIVWidth >= Width)
534     return;
535 
536   // Cast is either an sext or zext up to this point.
537   // We should not widen an indvar if arithmetics on the wider indvar are more
538   // expensive than those on the narrower indvar. We check only the cost of ADD
539   // because at least an ADD is required to increment the induction variable. We
540   // could compute more comprehensively the cost of all instructions on the
541   // induction variable when necessary.
542   if (TTI &&
543       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
544           TTI->getArithmeticInstrCost(Instruction::Add,
545                                       Cast->getOperand(0)->getType())) {
546     return;
547   }
548 
549   if (!WI.WidestNativeType) {
550     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
551     WI.IsSigned = IsSigned;
552     return;
553   }
554 
555   // We extend the IV to satisfy the sign of its first user, arbitrarily.
556   if (WI.IsSigned != IsSigned)
557     return;
558 
559   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
560     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
561 }
562 
563 //===----------------------------------------------------------------------===//
564 //  Live IV Reduction - Minimize IVs live across the loop.
565 //===----------------------------------------------------------------------===//
566 
567 //===----------------------------------------------------------------------===//
568 //  Simplification of IV users based on SCEV evaluation.
569 //===----------------------------------------------------------------------===//
570 
571 namespace {
572 
573 class IndVarSimplifyVisitor : public IVVisitor {
574   ScalarEvolution *SE;
575   const TargetTransformInfo *TTI;
576   PHINode *IVPhi;
577 
578 public:
579   WideIVInfo WI;
580 
581   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
582                         const TargetTransformInfo *TTI,
583                         const DominatorTree *DTree)
584     : SE(SCEV), TTI(TTI), IVPhi(IV) {
585     DT = DTree;
586     WI.NarrowIV = IVPhi;
587   }
588 
589   // Implement the interface used by simplifyUsersOfIV.
590   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
591 };
592 
593 } // end anonymous namespace
594 
595 /// Iteratively perform simplification on a worklist of IV users. Each
596 /// successive simplification may push more users which may themselves be
597 /// candidates for simplification.
598 ///
599 /// Sign/Zero extend elimination is interleaved with IV simplification.
600 bool IndVarSimplify::simplifyAndExtend(Loop *L,
601                                        SCEVExpander &Rewriter,
602                                        LoopInfo *LI) {
603   SmallVector<WideIVInfo, 8> WideIVs;
604 
605   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
606           Intrinsic::getName(Intrinsic::experimental_guard));
607   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
608 
609   SmallVector<PHINode*, 8> LoopPhis;
610   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
611     LoopPhis.push_back(cast<PHINode>(I));
612   }
613   // Each round of simplification iterates through the SimplifyIVUsers worklist
614   // for all current phis, then determines whether any IVs can be
615   // widened. Widening adds new phis to LoopPhis, inducing another round of
616   // simplification on the wide IVs.
617   bool Changed = false;
618   while (!LoopPhis.empty()) {
619     // Evaluate as many IV expressions as possible before widening any IVs. This
620     // forces SCEV to set no-wrap flags before evaluating sign/zero
621     // extension. The first time SCEV attempts to normalize sign/zero extension,
622     // the result becomes final. So for the most predictable results, we delay
623     // evaluation of sign/zero extend evaluation until needed, and avoid running
624     // other SCEV based analysis prior to simplifyAndExtend.
625     do {
626       PHINode *CurrIV = LoopPhis.pop_back_val();
627 
628       // Information about sign/zero extensions of CurrIV.
629       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
630 
631       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
632                                    &Visitor);
633 
634       if (Visitor.WI.WidestNativeType) {
635         WideIVs.push_back(Visitor.WI);
636       }
637     } while(!LoopPhis.empty());
638 
639     // Continue if we disallowed widening.
640     if (!WidenIndVars)
641       continue;
642 
643     for (; !WideIVs.empty(); WideIVs.pop_back()) {
644       unsigned ElimExt;
645       unsigned Widened;
646       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
647                                           DT, DeadInsts, ElimExt, Widened,
648                                           HasGuards, UsePostIncrementRanges)) {
649         NumElimExt += ElimExt;
650         NumWidened += Widened;
651         Changed = true;
652         LoopPhis.push_back(WidePhi);
653       }
654     }
655   }
656   return Changed;
657 }
658 
659 //===----------------------------------------------------------------------===//
660 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
661 //===----------------------------------------------------------------------===//
662 
663 /// Given an Value which is hoped to be part of an add recurance in the given
664 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
665 /// that this is less general than SCEVs AddRec checking.
666 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
667   Instruction *IncI = dyn_cast<Instruction>(IncV);
668   if (!IncI)
669     return nullptr;
670 
671   switch (IncI->getOpcode()) {
672   case Instruction::Add:
673   case Instruction::Sub:
674     break;
675   case Instruction::GetElementPtr:
676     // An IV counter must preserve its type.
677     if (IncI->getNumOperands() == 2)
678       break;
679     LLVM_FALLTHROUGH;
680   default:
681     return nullptr;
682   }
683 
684   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
685   if (Phi && Phi->getParent() == L->getHeader()) {
686     if (L->isLoopInvariant(IncI->getOperand(1)))
687       return Phi;
688     return nullptr;
689   }
690   if (IncI->getOpcode() == Instruction::GetElementPtr)
691     return nullptr;
692 
693   // Allow add/sub to be commuted.
694   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
695   if (Phi && Phi->getParent() == L->getHeader()) {
696     if (L->isLoopInvariant(IncI->getOperand(0)))
697       return Phi;
698   }
699   return nullptr;
700 }
701 
702 /// Whether the current loop exit test is based on this value.  Currently this
703 /// is limited to a direct use in the loop condition.
704 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
705   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
706   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
707   // TODO: Allow non-icmp loop test.
708   if (!ICmp)
709     return false;
710 
711   // TODO: Allow indirect use.
712   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
713 }
714 
715 /// linearFunctionTestReplace policy. Return true unless we can show that the
716 /// current exit test is already sufficiently canonical.
717 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
718   assert(L->getLoopLatch() && "Must be in simplified form");
719 
720   // Avoid converting a constant or loop invariant test back to a runtime
721   // test.  This is critical for when SCEV's cached ExitCount is less precise
722   // than the current IR (such as after we've proven a particular exit is
723   // actually dead and thus the BE count never reaches our ExitCount.)
724   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
725   if (L->isLoopInvariant(BI->getCondition()))
726     return false;
727 
728   // Do LFTR to simplify the exit condition to an ICMP.
729   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
730   if (!Cond)
731     return true;
732 
733   // Do LFTR to simplify the exit ICMP to EQ/NE
734   ICmpInst::Predicate Pred = Cond->getPredicate();
735   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
736     return true;
737 
738   // Look for a loop invariant RHS
739   Value *LHS = Cond->getOperand(0);
740   Value *RHS = Cond->getOperand(1);
741   if (!L->isLoopInvariant(RHS)) {
742     if (!L->isLoopInvariant(LHS))
743       return true;
744     std::swap(LHS, RHS);
745   }
746   // Look for a simple IV counter LHS
747   PHINode *Phi = dyn_cast<PHINode>(LHS);
748   if (!Phi)
749     Phi = getLoopPhiForCounter(LHS, L);
750 
751   if (!Phi)
752     return true;
753 
754   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
755   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
756   if (Idx < 0)
757     return true;
758 
759   // Do LFTR if the exit condition's IV is *not* a simple counter.
760   Value *IncV = Phi->getIncomingValue(Idx);
761   return Phi != getLoopPhiForCounter(IncV, L);
762 }
763 
764 /// Return true if undefined behavior would provable be executed on the path to
765 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
766 /// anything about whether OnPathTo is actually executed or whether Root is
767 /// actually poison.  This can be used to assess whether a new use of Root can
768 /// be added at a location which is control equivalent with OnPathTo (such as
769 /// immediately before it) without introducing UB which didn't previously
770 /// exist.  Note that a false result conveys no information.
771 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
772                                           Instruction *OnPathTo,
773                                           DominatorTree *DT) {
774   // Basic approach is to assume Root is poison, propagate poison forward
775   // through all users we can easily track, and then check whether any of those
776   // users are provable UB and must execute before out exiting block might
777   // exit.
778 
779   // The set of all recursive users we've visited (which are assumed to all be
780   // poison because of said visit)
781   SmallSet<const Value *, 16> KnownPoison;
782   SmallVector<const Instruction*, 16> Worklist;
783   Worklist.push_back(Root);
784   while (!Worklist.empty()) {
785     const Instruction *I = Worklist.pop_back_val();
786 
787     // If we know this must trigger UB on a path leading our target.
788     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
789       return true;
790 
791     // If we can't analyze propagation through this instruction, just skip it
792     // and transitive users.  Safe as false is a conservative result.
793     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
794       continue;
795 
796     if (KnownPoison.insert(I).second)
797       for (const User *User : I->users())
798         Worklist.push_back(cast<Instruction>(User));
799   }
800 
801   // Might be non-UB, or might have a path we couldn't prove must execute on
802   // way to exiting bb.
803   return false;
804 }
805 
806 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
807 /// down to checking that all operands are constant and listing instructions
808 /// that may hide undef.
809 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
810                                unsigned Depth) {
811   if (isa<Constant>(V))
812     return !isa<UndefValue>(V);
813 
814   if (Depth >= 6)
815     return false;
816 
817   // Conservatively handle non-constant non-instructions. For example, Arguments
818   // may be undef.
819   Instruction *I = dyn_cast<Instruction>(V);
820   if (!I)
821     return false;
822 
823   // Load and return values may be undef.
824   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
825     return false;
826 
827   // Optimistically handle other instructions.
828   for (Value *Op : I->operands()) {
829     if (!Visited.insert(Op).second)
830       continue;
831     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
832       return false;
833   }
834   return true;
835 }
836 
837 /// Return true if the given value is concrete. We must prove that undef can
838 /// never reach it.
839 ///
840 /// TODO: If we decide that this is a good approach to checking for undef, we
841 /// may factor it into a common location.
842 static bool hasConcreteDef(Value *V) {
843   SmallPtrSet<Value*, 8> Visited;
844   Visited.insert(V);
845   return hasConcreteDefImpl(V, Visited, 0);
846 }
847 
848 /// Return true if this IV has any uses other than the (soon to be rewritten)
849 /// loop exit test.
850 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
851   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
852   Value *IncV = Phi->getIncomingValue(LatchIdx);
853 
854   for (User *U : Phi->users())
855     if (U != Cond && U != IncV) return false;
856 
857   for (User *U : IncV->users())
858     if (U != Cond && U != Phi) return false;
859   return true;
860 }
861 
862 /// Return true if the given phi is a "counter" in L.  A counter is an
863 /// add recurance (of integer or pointer type) with an arbitrary start, and a
864 /// step of 1.  Note that L must have exactly one latch.
865 static bool isLoopCounter(PHINode* Phi, Loop *L,
866                           ScalarEvolution *SE) {
867   assert(Phi->getParent() == L->getHeader());
868   assert(L->getLoopLatch());
869 
870   if (!SE->isSCEVable(Phi->getType()))
871     return false;
872 
873   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
874   if (!AR || AR->getLoop() != L || !AR->isAffine())
875     return false;
876 
877   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
878   if (!Step || !Step->isOne())
879     return false;
880 
881   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
882   Value *IncV = Phi->getIncomingValue(LatchIdx);
883   return (getLoopPhiForCounter(IncV, L) == Phi &&
884           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
885 }
886 
887 /// Search the loop header for a loop counter (anadd rec w/step of one)
888 /// suitable for use by LFTR.  If multiple counters are available, select the
889 /// "best" one based profitable heuristics.
890 ///
891 /// BECount may be an i8* pointer type. The pointer difference is already
892 /// valid count without scaling the address stride, so it remains a pointer
893 /// expression as far as SCEV is concerned.
894 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
895                                 const SCEV *BECount,
896                                 ScalarEvolution *SE, DominatorTree *DT) {
897   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
898 
899   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
900 
901   // Loop over all of the PHI nodes, looking for a simple counter.
902   PHINode *BestPhi = nullptr;
903   const SCEV *BestInit = nullptr;
904   BasicBlock *LatchBlock = L->getLoopLatch();
905   assert(LatchBlock && "Must be in simplified form");
906   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
907 
908   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
909     PHINode *Phi = cast<PHINode>(I);
910     if (!isLoopCounter(Phi, L, SE))
911       continue;
912 
913     // Avoid comparing an integer IV against a pointer Limit.
914     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
915       continue;
916 
917     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
918 
919     // AR may be a pointer type, while BECount is an integer type.
920     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
921     // AR may not be a narrower type, or we may never exit.
922     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
923     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
924       continue;
925 
926     // Avoid reusing a potentially undef value to compute other values that may
927     // have originally had a concrete definition.
928     if (!hasConcreteDef(Phi)) {
929       // We explicitly allow unknown phis as long as they are already used by
930       // the loop exit test.  This is legal since performing LFTR could not
931       // increase the number of undef users.
932       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
933       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
934           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
935         continue;
936     }
937 
938     // Avoid introducing undefined behavior due to poison which didn't exist in
939     // the original program.  (Annoyingly, the rules for poison and undef
940     // propagation are distinct, so this does NOT cover the undef case above.)
941     // We have to ensure that we don't introduce UB by introducing a use on an
942     // iteration where said IV produces poison.  Our strategy here differs for
943     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
944     // see code in linearFunctionTestReplace.  For pointers, we restrict
945     // transforms as there is no good way to reinfer inbounds once lost.
946     if (!Phi->getType()->isIntegerTy() &&
947         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
948       continue;
949 
950     const SCEV *Init = AR->getStart();
951 
952     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
953       // Don't force a live loop counter if another IV can be used.
954       if (AlmostDeadIV(Phi, LatchBlock, Cond))
955         continue;
956 
957       // Prefer to count-from-zero. This is a more "canonical" counter form. It
958       // also prefers integer to pointer IVs.
959       if (BestInit->isZero() != Init->isZero()) {
960         if (BestInit->isZero())
961           continue;
962       }
963       // If two IVs both count from zero or both count from nonzero then the
964       // narrower is likely a dead phi that has been widened. Use the wider phi
965       // to allow the other to be eliminated.
966       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
967         continue;
968     }
969     BestPhi = Phi;
970     BestInit = Init;
971   }
972   return BestPhi;
973 }
974 
975 /// Insert an IR expression which computes the value held by the IV IndVar
976 /// (which must be an loop counter w/unit stride) after the backedge of loop L
977 /// is taken ExitCount times.
978 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
979                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
980                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
981   assert(isLoopCounter(IndVar, L, SE));
982   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
983   const SCEV *IVInit = AR->getStart();
984 
985   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
986   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
987   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
988   // the existing GEPs whenever possible.
989   if (IndVar->getType()->isPointerTy() &&
990       !ExitCount->getType()->isPointerTy()) {
991     // IVOffset will be the new GEP offset that is interpreted by GEP as a
992     // signed value. ExitCount on the other hand represents the loop trip count,
993     // which is an unsigned value. FindLoopCounter only allows induction
994     // variables that have a positive unit stride of one. This means we don't
995     // have to handle the case of negative offsets (yet) and just need to zero
996     // extend ExitCount.
997     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
998     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
999     if (UsePostInc)
1000       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
1001 
1002     // Expand the code for the iteration count.
1003     assert(SE->isLoopInvariant(IVOffset, L) &&
1004            "Computed iteration count is not loop invariant!");
1005 
1006     // We could handle pointer IVs other than i8*, but we need to compensate for
1007     // gep index scaling.
1008     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
1009                              cast<PointerType>(IndVar->getType())
1010                                  ->getElementType())->isOne() &&
1011            "unit stride pointer IV must be i8*");
1012 
1013     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1014     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1015     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1016   } else {
1017     // In any other case, convert both IVInit and ExitCount to integers before
1018     // comparing. This may result in SCEV expansion of pointers, but in practice
1019     // SCEV will fold the pointer arithmetic away as such:
1020     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1021     //
1022     // Valid Cases: (1) both integers is most common; (2) both may be pointers
1023     // for simple memset-style loops.
1024     //
1025     // IVInit integer and ExitCount pointer would only occur if a canonical IV
1026     // were generated on top of case #2, which is not expected.
1027 
1028     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1029     // For unit stride, IVCount = Start + ExitCount with 2's complement
1030     // overflow.
1031 
1032     // For integer IVs, truncate the IV before computing IVInit + BECount,
1033     // unless we know apriori that the limit must be a constant when evaluated
1034     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
1035     // of the IV in the loop over a (potentially) expensive expansion of the
1036     // widened exit count add(zext(add)) expression.
1037     if (SE->getTypeSizeInBits(IVInit->getType())
1038         > SE->getTypeSizeInBits(ExitCount->getType())) {
1039       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1040         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1041       else
1042         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1043     }
1044 
1045     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1046 
1047     if (UsePostInc)
1048       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1049 
1050     // Expand the code for the iteration count.
1051     assert(SE->isLoopInvariant(IVLimit, L) &&
1052            "Computed iteration count is not loop invariant!");
1053     // Ensure that we generate the same type as IndVar, or a smaller integer
1054     // type. In the presence of null pointer values, we have an integer type
1055     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1056     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1057       IndVar->getType() : ExitCount->getType();
1058     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1059     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1060   }
1061 }
1062 
1063 /// This method rewrites the exit condition of the loop to be a canonical !=
1064 /// comparison against the incremented loop induction variable.  This pass is
1065 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1066 /// determine a loop-invariant trip count of the loop, which is actually a much
1067 /// broader range than just linear tests.
1068 bool IndVarSimplify::
1069 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1070                           const SCEV *ExitCount,
1071                           PHINode *IndVar, SCEVExpander &Rewriter) {
1072   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1073   assert(isLoopCounter(IndVar, L, SE));
1074   Instruction * const IncVar =
1075     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1076 
1077   // Initialize CmpIndVar to the preincremented IV.
1078   Value *CmpIndVar = IndVar;
1079   bool UsePostInc = false;
1080 
1081   // If the exiting block is the same as the backedge block, we prefer to
1082   // compare against the post-incremented value, otherwise we must compare
1083   // against the preincremented value.
1084   if (ExitingBB == L->getLoopLatch()) {
1085     // For pointer IVs, we chose to not strip inbounds which requires us not
1086     // to add a potentially UB introducing use.  We need to either a) show
1087     // the loop test we're modifying is already in post-inc form, or b) show
1088     // that adding a use must not introduce UB.
1089     bool SafeToPostInc =
1090         IndVar->getType()->isIntegerTy() ||
1091         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1092         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1093     if (SafeToPostInc) {
1094       UsePostInc = true;
1095       CmpIndVar = IncVar;
1096     }
1097   }
1098 
1099   // It may be necessary to drop nowrap flags on the incrementing instruction
1100   // if either LFTR moves from a pre-inc check to a post-inc check (in which
1101   // case the increment might have previously been poison on the last iteration
1102   // only) or if LFTR switches to a different IV that was previously dynamically
1103   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1104   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1105   // check), because the pre-inc addrec flags may be adopted from the original
1106   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1107   // TODO: This handling is inaccurate for one case: If we switch to a
1108   // dynamically dead IV that wraps on the first loop iteration only, which is
1109   // not covered by the post-inc addrec. (If the new IV was not dynamically
1110   // dead, it could not be poison on the first iteration in the first place.)
1111   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1112     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1113     if (BO->hasNoUnsignedWrap())
1114       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1115     if (BO->hasNoSignedWrap())
1116       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1117   }
1118 
1119   Value *ExitCnt = genLoopLimit(
1120       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1121   assert(ExitCnt->getType()->isPointerTy() ==
1122              IndVar->getType()->isPointerTy() &&
1123          "genLoopLimit missed a cast");
1124 
1125   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1126   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1127   ICmpInst::Predicate P;
1128   if (L->contains(BI->getSuccessor(0)))
1129     P = ICmpInst::ICMP_NE;
1130   else
1131     P = ICmpInst::ICMP_EQ;
1132 
1133   IRBuilder<> Builder(BI);
1134 
1135   // The new loop exit condition should reuse the debug location of the
1136   // original loop exit condition.
1137   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1138     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1139 
1140   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1141   // avoid the expensive expansion of the limit expression in the wider type,
1142   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1143   // since we know (from the exit count bitwidth), that we can't self-wrap in
1144   // the narrower type.
1145   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1146   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1147   if (CmpIndVarSize > ExitCntSize) {
1148     assert(!CmpIndVar->getType()->isPointerTy() &&
1149            !ExitCnt->getType()->isPointerTy());
1150 
1151     // Before resorting to actually inserting the truncate, use the same
1152     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1153     // the other side of the comparison instead.  We still evaluate the limit
1154     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1155     // a truncate within in.
1156     bool Extended = false;
1157     const SCEV *IV = SE->getSCEV(CmpIndVar);
1158     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1159                                                   ExitCnt->getType());
1160     const SCEV *ZExtTrunc =
1161       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1162 
1163     if (ZExtTrunc == IV) {
1164       Extended = true;
1165       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1166                                    "wide.trip.count");
1167     } else {
1168       const SCEV *SExtTrunc =
1169         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1170       if (SExtTrunc == IV) {
1171         Extended = true;
1172         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1173                                      "wide.trip.count");
1174       }
1175     }
1176 
1177     if (Extended) {
1178       bool Discard;
1179       L->makeLoopInvariant(ExitCnt, Discard);
1180     } else
1181       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1182                                       "lftr.wideiv");
1183   }
1184   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1185                     << "      LHS:" << *CmpIndVar << '\n'
1186                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1187                     << "\n"
1188                     << "      RHS:\t" << *ExitCnt << "\n"
1189                     << "ExitCount:\t" << *ExitCount << "\n"
1190                     << "  was: " << *BI->getCondition() << "\n");
1191 
1192   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1193   Value *OrigCond = BI->getCondition();
1194   // It's tempting to use replaceAllUsesWith here to fully replace the old
1195   // comparison, but that's not immediately safe, since users of the old
1196   // comparison may not be dominated by the new comparison. Instead, just
1197   // update the branch to use the new comparison; in the common case this
1198   // will make old comparison dead.
1199   BI->setCondition(Cond);
1200   DeadInsts.emplace_back(OrigCond);
1201 
1202   ++NumLFTR;
1203   return true;
1204 }
1205 
1206 //===----------------------------------------------------------------------===//
1207 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1208 //===----------------------------------------------------------------------===//
1209 
1210 /// If there's a single exit block, sink any loop-invariant values that
1211 /// were defined in the preheader but not used inside the loop into the
1212 /// exit block to reduce register pressure in the loop.
1213 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1214   BasicBlock *ExitBlock = L->getExitBlock();
1215   if (!ExitBlock) return false;
1216 
1217   BasicBlock *Preheader = L->getLoopPreheader();
1218   if (!Preheader) return false;
1219 
1220   bool MadeAnyChanges = false;
1221   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1222   BasicBlock::iterator I(Preheader->getTerminator());
1223   while (I != Preheader->begin()) {
1224     --I;
1225     // New instructions were inserted at the end of the preheader.
1226     if (isa<PHINode>(I))
1227       break;
1228 
1229     // Don't move instructions which might have side effects, since the side
1230     // effects need to complete before instructions inside the loop.  Also don't
1231     // move instructions which might read memory, since the loop may modify
1232     // memory. Note that it's okay if the instruction might have undefined
1233     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1234     // block.
1235     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1236       continue;
1237 
1238     // Skip debug info intrinsics.
1239     if (isa<DbgInfoIntrinsic>(I))
1240       continue;
1241 
1242     // Skip eh pad instructions.
1243     if (I->isEHPad())
1244       continue;
1245 
1246     // Don't sink alloca: we never want to sink static alloca's out of the
1247     // entry block, and correctly sinking dynamic alloca's requires
1248     // checks for stacksave/stackrestore intrinsics.
1249     // FIXME: Refactor this check somehow?
1250     if (isa<AllocaInst>(I))
1251       continue;
1252 
1253     // Determine if there is a use in or before the loop (direct or
1254     // otherwise).
1255     bool UsedInLoop = false;
1256     for (Use &U : I->uses()) {
1257       Instruction *User = cast<Instruction>(U.getUser());
1258       BasicBlock *UseBB = User->getParent();
1259       if (PHINode *P = dyn_cast<PHINode>(User)) {
1260         unsigned i =
1261           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1262         UseBB = P->getIncomingBlock(i);
1263       }
1264       if (UseBB == Preheader || L->contains(UseBB)) {
1265         UsedInLoop = true;
1266         break;
1267       }
1268     }
1269 
1270     // If there is, the def must remain in the preheader.
1271     if (UsedInLoop)
1272       continue;
1273 
1274     // Otherwise, sink it to the exit block.
1275     Instruction *ToMove = &*I;
1276     bool Done = false;
1277 
1278     if (I != Preheader->begin()) {
1279       // Skip debug info intrinsics.
1280       do {
1281         --I;
1282       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1283 
1284       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1285         Done = true;
1286     } else {
1287       Done = true;
1288     }
1289 
1290     MadeAnyChanges = true;
1291     ToMove->moveBefore(*ExitBlock, InsertPt);
1292     if (Done) break;
1293     InsertPt = ToMove->getIterator();
1294   }
1295 
1296   return MadeAnyChanges;
1297 }
1298 
1299 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1300                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1301   auto *OldCond = BI->getCondition();
1302   BI->setCondition(NewCond);
1303   if (OldCond->use_empty())
1304     DeadInsts.emplace_back(OldCond);
1305 }
1306 
1307 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1308                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1309   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1310   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1311   auto *OldCond = BI->getCondition();
1312   auto *NewCond =
1313       ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1314   replaceExitCond(BI, NewCond, DeadInsts);
1315 }
1316 
1317 static void replaceLoopPHINodesWithPreheaderValues(
1318     Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1319   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1320   auto *LoopPreheader = L->getLoopPreheader();
1321   auto *LoopHeader = L->getHeader();
1322   for (auto &PN : LoopHeader->phis()) {
1323     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1324     PN.replaceAllUsesWith(PreheaderIncoming);
1325     DeadInsts.emplace_back(&PN);
1326   }
1327 }
1328 
1329 static void replaceWithInvariantCond(
1330     const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1331     const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1332     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1333   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1334   Rewriter.setInsertPoint(BI);
1335   auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1336   auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1337   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1338   if (ExitIfTrue)
1339     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1340   IRBuilder<> Builder(BI);
1341   auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1342                                      BI->getCondition()->getName());
1343   replaceExitCond(BI, NewCond, DeadInsts);
1344 }
1345 
1346 static bool optimizeLoopExitWithUnknownExitCount(
1347     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1348     const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1349     ScalarEvolution *SE, SCEVExpander &Rewriter,
1350     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1351   ICmpInst::Predicate Pred;
1352   Value *LHS, *RHS;
1353   BasicBlock *TrueSucc, *FalseSucc;
1354   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1355                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1356     return false;
1357 
1358   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1359          "Not a loop exit!");
1360 
1361   // 'LHS pred RHS' should now mean that we stay in loop.
1362   if (L->contains(FalseSucc))
1363     Pred = CmpInst::getInversePredicate(Pred);
1364 
1365   // If we are proving loop exit, invert the predicate.
1366   if (Inverted)
1367     Pred = CmpInst::getInversePredicate(Pred);
1368 
1369   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1370   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1371   // Can we prove it to be trivially true?
1372   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1373     foldExit(L, ExitingBB, Inverted, DeadInsts);
1374     return true;
1375   }
1376   // Further logic works for non-inverted condition only.
1377   if (Inverted)
1378     return false;
1379 
1380   auto *ARTy = LHSS->getType();
1381   auto *MaxIterTy = MaxIter->getType();
1382   // If possible, adjust types.
1383   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1384     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1385   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1386     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1387     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1388     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1389       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1390   }
1391 
1392   if (SkipLastIter) {
1393     const SCEV *One = SE->getOne(MaxIter->getType());
1394     MaxIter = SE->getMinusSCEV(MaxIter, One);
1395   }
1396 
1397   // Check if there is a loop-invariant predicate equivalent to our check.
1398   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1399                                                                L, BI, MaxIter);
1400   if (!LIP)
1401     return false;
1402 
1403   // Can we prove it to be trivially true?
1404   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1405     foldExit(L, ExitingBB, Inverted, DeadInsts);
1406   else
1407     replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1408                              Rewriter, DeadInsts);
1409 
1410   return true;
1411 }
1412 
1413 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1414   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1415   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1416   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1417   // it already has flags.  The alternative to this would be to extending the
1418   // set of "interesting" IV users to include the icmp, but doing that
1419   // regresses results in practice by querying SCEVs before trip counts which
1420   // rely on them which results in SCEV caching sub-optimal answers.  The
1421   // concern about caching sub-optimal results is why we only query SCEVs of
1422   // the loop invariant RHS here.
1423   SmallVector<BasicBlock*, 16> ExitingBlocks;
1424   L->getExitingBlocks(ExitingBlocks);
1425   bool Changed = false;
1426   for (auto *ExitingBB : ExitingBlocks) {
1427     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1428     if (!BI)
1429       continue;
1430     assert(BI->isConditional() && "exit branch must be conditional");
1431 
1432     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1433     if (!ICmp)
1434       continue;
1435 
1436     auto *LHS = ICmp->getOperand(0);
1437     auto *RHS = ICmp->getOperand(1);
1438     // Avoid computing SCEVs in the loop to avoid poisoning cache with
1439     // sub-optimal results.
1440     if (!L->isLoopInvariant(RHS))
1441       continue;
1442 
1443     // Match (icmp signed-cond zext, RHS)
1444     Value *LHSOp = nullptr;
1445     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1446       continue;
1447 
1448     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1449     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1450     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1451     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1452     FullCR = FullCR.zeroExtend(OuterBitWidth);
1453     if (!FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS))))
1454       continue;
1455 
1456     // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1457     // replace the signed condition with the unsigned version.
1458     ICmp->setPredicate(ICmp->getUnsignedPredicate());
1459     Changed = true;
1460   }
1461   return Changed;
1462 }
1463 
1464 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1465   SmallVector<BasicBlock*, 16> ExitingBlocks;
1466   L->getExitingBlocks(ExitingBlocks);
1467 
1468   // Remove all exits which aren't both rewriteable and execute on every
1469   // iteration.
1470   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1471     // If our exitting block exits multiple loops, we can only rewrite the
1472     // innermost one.  Otherwise, we're changing how many times the innermost
1473     // loop runs before it exits.
1474     if (LI->getLoopFor(ExitingBB) != L)
1475       return true;
1476 
1477     // Can't rewrite non-branch yet.
1478     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1479     if (!BI)
1480       return true;
1481 
1482     // If already constant, nothing to do.
1483     if (isa<Constant>(BI->getCondition()))
1484       return true;
1485 
1486     // Likewise, the loop latch must be dominated by the exiting BB.
1487     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1488       return true;
1489 
1490     return false;
1491   });
1492 
1493   if (ExitingBlocks.empty())
1494     return false;
1495 
1496   // Get a symbolic upper bound on the loop backedge taken count.
1497   const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1498   if (isa<SCEVCouldNotCompute>(MaxExitCount))
1499     return false;
1500 
1501   // Visit our exit blocks in order of dominance. We know from the fact that
1502   // all exits must dominate the latch, so there is a total dominance order
1503   // between them.
1504   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1505                // std::sort sorts in ascending order, so we want the inverse of
1506                // the normal dominance relation.
1507                if (A == B) return false;
1508                if (DT->properlyDominates(A, B))
1509                  return true;
1510                else {
1511                  assert(DT->properlyDominates(B, A) &&
1512                         "expected total dominance order!");
1513                  return false;
1514                }
1515   });
1516 #ifdef ASSERT
1517   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1518     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1519   }
1520 #endif
1521 
1522   bool Changed = false;
1523   bool SkipLastIter = false;
1524   SmallSet<const SCEV*, 8> DominatingExitCounts;
1525   for (BasicBlock *ExitingBB : ExitingBlocks) {
1526     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1527     if (isa<SCEVCouldNotCompute>(ExitCount)) {
1528       // Okay, we do not know the exit count here. Can we at least prove that it
1529       // will remain the same within iteration space?
1530       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1531       auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1532         return optimizeLoopExitWithUnknownExitCount(
1533             L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
1534             Rewriter, DeadInsts);
1535       };
1536 
1537       // TODO: We might have proved that we can skip the last iteration for
1538       // this check. In this case, we only want to check the condition on the
1539       // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1540       // corner case:
1541       //
1542       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1543       //
1544       // If we could not prove that len != 0, then we also could not prove that
1545       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1546       // OptimizeCond will likely not prove anything for it, even if it could
1547       // prove the same fact for len.
1548       //
1549       // As a temporary solution, we query both last and pre-last iterations in
1550       // hope that we will be able to prove triviality for at least one of
1551       // them. We can stop querying MaxExitCount for this case once SCEV
1552       // understands that (MaxExitCount - 1) will not overflow here.
1553       if (OptimizeCond(false, false) || OptimizeCond(true, false))
1554         Changed = true;
1555       else if (SkipLastIter)
1556         if (OptimizeCond(false, true) || OptimizeCond(true, true))
1557           Changed = true;
1558       continue;
1559     }
1560 
1561     if (MaxExitCount == ExitCount)
1562       // If the loop has more than 1 iteration, all further checks will be
1563       // executed 1 iteration less.
1564       SkipLastIter = true;
1565 
1566     // If we know we'd exit on the first iteration, rewrite the exit to
1567     // reflect this.  This does not imply the loop must exit through this
1568     // exit; there may be an earlier one taken on the first iteration.
1569     // We know that the backedge can't be taken, so we replace all
1570     // the header PHIs with values coming from the preheader.
1571     if (ExitCount->isZero()) {
1572       foldExit(L, ExitingBB, true, DeadInsts);
1573       replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
1574       Changed = true;
1575       continue;
1576     }
1577 
1578     assert(ExitCount->getType()->isIntegerTy() &&
1579            MaxExitCount->getType()->isIntegerTy() &&
1580            "Exit counts must be integers");
1581 
1582     Type *WiderType =
1583       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1584     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1585     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1586     assert(MaxExitCount->getType() == ExitCount->getType());
1587 
1588     // Can we prove that some other exit must be taken strictly before this
1589     // one?
1590     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1591                                      MaxExitCount, ExitCount)) {
1592       foldExit(L, ExitingBB, false, DeadInsts);
1593       Changed = true;
1594       continue;
1595     }
1596 
1597     // As we run, keep track of which exit counts we've encountered.  If we
1598     // find a duplicate, we've found an exit which would have exited on the
1599     // exiting iteration, but (from the visit order) strictly follows another
1600     // which does the same and is thus dead.
1601     if (!DominatingExitCounts.insert(ExitCount).second) {
1602       foldExit(L, ExitingBB, false, DeadInsts);
1603       Changed = true;
1604       continue;
1605     }
1606 
1607     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1608     // here.  If we kept track of the min of dominanting exits so far, we could
1609     // discharge exits with EC >= MDEC. This is less powerful than the existing
1610     // transform (since later exits aren't considered), but potentially more
1611     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1612     // or a >u b.  Such a case is not currently known.
1613   }
1614   return Changed;
1615 }
1616 
1617 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1618   SmallVector<BasicBlock*, 16> ExitingBlocks;
1619   L->getExitingBlocks(ExitingBlocks);
1620 
1621   // Finally, see if we can rewrite our exit conditions into a loop invariant
1622   // form. If we have a read-only loop, and we can tell that we must exit down
1623   // a path which does not need any of the values computed within the loop, we
1624   // can rewrite the loop to exit on the first iteration.  Note that this
1625   // doesn't either a) tell us the loop exits on the first iteration (unless
1626   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1627   // This transformation looks a lot like a restricted form of dead loop
1628   // elimination, but restricted to read-only loops and without neccesssarily
1629   // needing to kill the loop entirely.
1630   if (!LoopPredication)
1631     return false;
1632 
1633   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1634   // through *explicit* control flow.  We have to eliminate the possibility of
1635   // implicit exits (see below) before we know it's truly exact.
1636   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1637   if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
1638     return false;
1639 
1640   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1641   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1642 
1643   auto BadExit = [&](BasicBlock *ExitingBB) {
1644     // If our exiting block exits multiple loops, we can only rewrite the
1645     // innermost one.  Otherwise, we're changing how many times the innermost
1646     // loop runs before it exits.
1647     if (LI->getLoopFor(ExitingBB) != L)
1648       return true;
1649 
1650     // Can't rewrite non-branch yet.
1651     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1652     if (!BI)
1653       return true;
1654 
1655     // If already constant, nothing to do.
1656     if (isa<Constant>(BI->getCondition()))
1657       return true;
1658 
1659     // If the exit block has phis, we need to be able to compute the values
1660     // within the loop which contains them.  This assumes trivially lcssa phis
1661     // have already been removed; TODO: generalize
1662     BasicBlock *ExitBlock =
1663     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1664     if (!ExitBlock->phis().empty())
1665       return true;
1666 
1667     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1668     if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
1669       return true;
1670 
1671     assert(SE->isLoopInvariant(ExitCount, L) &&
1672            "Exit count must be loop invariant");
1673     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1674     return false;
1675   };
1676 
1677   // If we have any exits which can't be predicated themselves, than we can't
1678   // predicate any exit which isn't guaranteed to execute before it.  Consider
1679   // two exits (a) and (b) which would both exit on the same iteration.  If we
1680   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1681   // we could convert a loop from exiting through (a) to one exiting through
1682   // (b).  Note that this problem exists only for exits with the same exit
1683   // count, and we could be more aggressive when exit counts are known inequal.
1684   llvm::sort(ExitingBlocks,
1685             [&](BasicBlock *A, BasicBlock *B) {
1686               // std::sort sorts in ascending order, so we want the inverse of
1687               // the normal dominance relation, plus a tie breaker for blocks
1688               // unordered by dominance.
1689               if (DT->properlyDominates(A, B)) return true;
1690               if (DT->properlyDominates(B, A)) return false;
1691               return A->getName() < B->getName();
1692             });
1693   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1694   // exits before the backedge).  If they aren't, reasoning about reachability
1695   // is complicated and we choose not to for now.
1696   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1697     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1698       return false;
1699 
1700   // Given our sorted total order, we know that exit[j] must be evaluated
1701   // after all exit[i] such j > i.
1702   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1703     if (BadExit(ExitingBlocks[i])) {
1704       ExitingBlocks.resize(i);
1705       break;
1706     }
1707 
1708   if (ExitingBlocks.empty())
1709     return false;
1710 
1711   // We rely on not being able to reach an exiting block on a later iteration
1712   // then it's statically compute exit count.  The implementaton of
1713   // getExitCount currently has this invariant, but assert it here so that
1714   // breakage is obvious if this ever changes..
1715   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1716         return DT->dominates(ExitingBB, L->getLoopLatch());
1717       }));
1718 
1719   // At this point, ExitingBlocks consists of only those blocks which are
1720   // predicatable.  Given that, we know we have at least one exit we can
1721   // predicate if the loop is doesn't have side effects and doesn't have any
1722   // implicit exits (because then our exact BTC isn't actually exact).
1723   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1724   // suggestions on how to improve this?  I can obviously bail out for outer
1725   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1726   // is that enough for *all* side effects?
1727   for (BasicBlock *BB : L->blocks())
1728     for (auto &I : *BB)
1729       // TODO:isGuaranteedToTransfer
1730       if (I.mayHaveSideEffects())
1731         return false;
1732 
1733   bool Changed = false;
1734   // Finally, do the actual predication for all predicatable blocks.  A couple
1735   // of notes here:
1736   // 1) We don't bother to constant fold dominated exits with identical exit
1737   //    counts; that's simply a form of CSE/equality propagation and we leave
1738   //    it for dedicated passes.
1739   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1740   //    legality constraints and we leave that to dedicated logic.  We want to
1741   //    predicate even if we can't insert a loop invariant expression as
1742   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1743   //    varying check.
1744   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1745   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1746   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1747   for (BasicBlock *ExitingBB : ExitingBlocks) {
1748     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1749 
1750     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1751     Value *NewCond;
1752     if (ExitCount == ExactBTC) {
1753       NewCond = L->contains(BI->getSuccessor(0)) ?
1754         B.getFalse() : B.getTrue();
1755     } else {
1756       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1757       if (!ExactBTCV)
1758         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1759       Value *RHS = ExactBTCV;
1760       if (ECV->getType() != RHS->getType()) {
1761         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1762         ECV = B.CreateZExt(ECV, WiderTy);
1763         RHS = B.CreateZExt(RHS, WiderTy);
1764       }
1765       auto Pred = L->contains(BI->getSuccessor(0)) ?
1766         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1767       NewCond = B.CreateICmp(Pred, ECV, RHS);
1768     }
1769     Value *OldCond = BI->getCondition();
1770     BI->setCondition(NewCond);
1771     if (OldCond->use_empty())
1772       DeadInsts.emplace_back(OldCond);
1773     Changed = true;
1774   }
1775 
1776   return Changed;
1777 }
1778 
1779 //===----------------------------------------------------------------------===//
1780 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1781 //===----------------------------------------------------------------------===//
1782 
1783 bool IndVarSimplify::run(Loop *L) {
1784   // We need (and expect!) the incoming loop to be in LCSSA.
1785   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1786          "LCSSA required to run indvars!");
1787 
1788   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1789   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1790   //    canonicalization can be a pessimization without LSR to "clean up"
1791   //    afterwards.
1792   //  - We depend on having a preheader; in particular,
1793   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1794   //    and we're in trouble if we can't find the induction variable even when
1795   //    we've manually inserted one.
1796   //  - LFTR relies on having a single backedge.
1797   if (!L->isLoopSimplifyForm())
1798     return false;
1799 
1800 #ifndef NDEBUG
1801   // Used below for a consistency check only
1802   // Note: Since the result returned by ScalarEvolution may depend on the order
1803   // in which previous results are added to its cache, the call to
1804   // getBackedgeTakenCount() may change following SCEV queries.
1805   const SCEV *BackedgeTakenCount;
1806   if (VerifyIndvars)
1807     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1808 #endif
1809 
1810   bool Changed = false;
1811   // If there are any floating-point recurrences, attempt to
1812   // transform them to use integer recurrences.
1813   Changed |= rewriteNonIntegerIVs(L);
1814 
1815   // Create a rewriter object which we'll use to transform the code with.
1816   SCEVExpander Rewriter(*SE, DL, "indvars");
1817 #ifndef NDEBUG
1818   Rewriter.setDebugType(DEBUG_TYPE);
1819 #endif
1820 
1821   // Eliminate redundant IV users.
1822   //
1823   // Simplification works best when run before other consumers of SCEV. We
1824   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1825   // other expressions involving loop IVs have been evaluated. This helps SCEV
1826   // set no-wrap flags before normalizing sign/zero extension.
1827   Rewriter.disableCanonicalMode();
1828   Changed |= simplifyAndExtend(L, Rewriter, LI);
1829 
1830   // Check to see if we can compute the final value of any expressions
1831   // that are recurrent in the loop, and substitute the exit values from the
1832   // loop into any instructions outside of the loop that use the final values
1833   // of the current expressions.
1834   if (ReplaceExitValue != NeverRepl) {
1835     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1836                                              ReplaceExitValue, DeadInsts)) {
1837       NumReplaced += Rewrites;
1838       Changed = true;
1839     }
1840   }
1841 
1842   // Eliminate redundant IV cycles.
1843   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
1844 
1845   if (canonicalizeExitCondition(L))
1846     // We've changed the predicate, but have not changed exit counts, or the
1847     // values which can flow through any SCEV.  i.e, no invalidation needed.
1848     Changed = true;
1849 
1850   // Try to eliminate loop exits based on analyzeable exit counts
1851   if (optimizeLoopExits(L, Rewriter))  {
1852     Changed = true;
1853     // Given we've changed exit counts, notify SCEV
1854     // Some nested loops may share same folded exit basic block,
1855     // thus we need to notify top most loop.
1856     SE->forgetTopmostLoop(L);
1857   }
1858 
1859   // Try to form loop invariant tests for loop exits by changing how many
1860   // iterations of the loop run when that is unobservable.
1861   if (predicateLoopExits(L, Rewriter)) {
1862     Changed = true;
1863     // Given we've changed exit counts, notify SCEV
1864     SE->forgetLoop(L);
1865   }
1866 
1867   // If we have a trip count expression, rewrite the loop's exit condition
1868   // using it.
1869   if (!DisableLFTR) {
1870     BasicBlock *PreHeader = L->getLoopPreheader();
1871     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
1872 
1873     SmallVector<BasicBlock*, 16> ExitingBlocks;
1874     L->getExitingBlocks(ExitingBlocks);
1875     for (BasicBlock *ExitingBB : ExitingBlocks) {
1876       // Can't rewrite non-branch yet.
1877       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1878         continue;
1879 
1880       // If our exitting block exits multiple loops, we can only rewrite the
1881       // innermost one.  Otherwise, we're changing how many times the innermost
1882       // loop runs before it exits.
1883       if (LI->getLoopFor(ExitingBB) != L)
1884         continue;
1885 
1886       if (!needsLFTR(L, ExitingBB))
1887         continue;
1888 
1889       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1890       if (isa<SCEVCouldNotCompute>(ExitCount))
1891         continue;
1892 
1893       // This was handled above, but as we form SCEVs, we can sometimes refine
1894       // existing ones; this allows exit counts to be folded to zero which
1895       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1896       // until stable to handle cases like this better.
1897       if (ExitCount->isZero())
1898         continue;
1899 
1900       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1901       if (!IndVar)
1902         continue;
1903 
1904       // Avoid high cost expansions.  Note: This heuristic is questionable in
1905       // that our definition of "high cost" is not exactly principled.
1906       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1907                                        TTI, PreHeaderBR))
1908         continue;
1909 
1910       // Check preconditions for proper SCEVExpander operation. SCEV does not
1911       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
1912       // any pass that uses the SCEVExpander must do it. This does not work
1913       // well for loop passes because SCEVExpander makes assumptions about
1914       // all loops, while LoopPassManager only forces the current loop to be
1915       // simplified.
1916       //
1917       // FIXME: SCEV expansion has no way to bail out, so the caller must
1918       // explicitly check any assumptions made by SCEV. Brittle.
1919       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
1920       if (!AR || AR->getLoop()->getLoopPreheader())
1921         Changed |= linearFunctionTestReplace(L, ExitingBB,
1922                                              ExitCount, IndVar,
1923                                              Rewriter);
1924     }
1925   }
1926   // Clear the rewriter cache, because values that are in the rewriter's cache
1927   // can be deleted in the loop below, causing the AssertingVH in the cache to
1928   // trigger.
1929   Rewriter.clear();
1930 
1931   // Now that we're done iterating through lists, clean up any instructions
1932   // which are now dead.
1933   while (!DeadInsts.empty()) {
1934     Value *V = DeadInsts.pop_back_val();
1935 
1936     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
1937       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
1938     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
1939       Changed |=
1940           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
1941   }
1942 
1943   // The Rewriter may not be used from this point on.
1944 
1945   // Loop-invariant instructions in the preheader that aren't used in the
1946   // loop may be sunk below the loop to reduce register pressure.
1947   Changed |= sinkUnusedInvariants(L);
1948 
1949   // rewriteFirstIterationLoopExitValues does not rely on the computation of
1950   // trip count and therefore can further simplify exit values in addition to
1951   // rewriteLoopExitValues.
1952   Changed |= rewriteFirstIterationLoopExitValues(L);
1953 
1954   // Clean up dead instructions.
1955   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
1956 
1957   // Check a post-condition.
1958   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1959          "Indvars did not preserve LCSSA!");
1960 
1961   // Verify that LFTR, and any other change have not interfered with SCEV's
1962   // ability to compute trip count.  We may have *changed* the exit count, but
1963   // only by reducing it.
1964 #ifndef NDEBUG
1965   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
1966     SE->forgetLoop(L);
1967     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
1968     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
1969         SE->getTypeSizeInBits(NewBECount->getType()))
1970       NewBECount = SE->getTruncateOrNoop(NewBECount,
1971                                          BackedgeTakenCount->getType());
1972     else
1973       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
1974                                                  NewBECount->getType());
1975     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
1976                                  NewBECount) && "indvars must preserve SCEV");
1977   }
1978   if (VerifyMemorySSA && MSSAU)
1979     MSSAU->getMemorySSA()->verifyMemorySSA();
1980 #endif
1981 
1982   return Changed;
1983 }
1984 
1985 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
1986                                           LoopStandardAnalysisResults &AR,
1987                                           LPMUpdater &) {
1988   Function *F = L.getHeader()->getParent();
1989   const DataLayout &DL = F->getParent()->getDataLayout();
1990 
1991   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
1992                      WidenIndVars && AllowIVWidening);
1993   if (!IVS.run(&L))
1994     return PreservedAnalyses::all();
1995 
1996   auto PA = getLoopPassPreservedAnalyses();
1997   PA.preserveSet<CFGAnalyses>();
1998   if (AR.MSSA)
1999     PA.preserve<MemorySSAAnalysis>();
2000   return PA;
2001 }
2002 
2003 namespace {
2004 
2005 struct IndVarSimplifyLegacyPass : public LoopPass {
2006   static char ID; // Pass identification, replacement for typeid
2007 
2008   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2009     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2010   }
2011 
2012   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2013     if (skipLoop(L))
2014       return false;
2015 
2016     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2017     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2018     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2019     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2020     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2021     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2022     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2023     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2024     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2025     MemorySSA *MSSA = nullptr;
2026     if (MSSAAnalysis)
2027       MSSA = &MSSAAnalysis->getMSSA();
2028 
2029     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2030     return IVS.run(L);
2031   }
2032 
2033   void getAnalysisUsage(AnalysisUsage &AU) const override {
2034     AU.setPreservesCFG();
2035     AU.addPreserved<MemorySSAWrapperPass>();
2036     getLoopAnalysisUsage(AU);
2037   }
2038 };
2039 
2040 } // end anonymous namespace
2041 
2042 char IndVarSimplifyLegacyPass::ID = 0;
2043 
2044 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2045                       "Induction Variable Simplification", false, false)
2046 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2047 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2048                     "Induction Variable Simplification", false, false)
2049 
2050 Pass *llvm::createIndVarSimplifyPass() {
2051   return new IndVarSimplifyLegacyPass();
2052 }
2053