1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 
93 #define DEBUG_TYPE "indvars"
94 
95 STATISTIC(NumWidened     , "Number of indvars widened");
96 STATISTIC(NumReplaced    , "Number of exit values replaced");
97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
100 
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105     "verify-indvars", cl::Hidden,
106     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
107              "effect in release builds. (Note: this adds additional SCEV "
108              "queries potentially changing the analysis result)"));
109 
110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
111     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
112     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
113     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
114                clEnumValN(OnlyCheapRepl, "cheap",
115                           "only replace exit value when the cost is cheap"),
116                clEnumValN(NoHardUse, "noharduse",
117                           "only replace exit values when loop def likely dead"),
118                clEnumValN(AlwaysRepl, "always",
119                           "always replace exit value whenever possible")));
120 
121 static cl::opt<bool> UsePostIncrementRanges(
122   "indvars-post-increment-ranges", cl::Hidden,
123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124   cl::init(true));
125 
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128             cl::desc("Disable Linear Function Test Replace optimization"));
129 
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132                 cl::desc("Predicate conditions in read only loops"));
133 
134 static cl::opt<bool>
135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136                 cl::desc("Allow widening of indvars to eliminate s/zext"));
137 
138 namespace {
139 
140 struct RewritePhi;
141 
142 class IndVarSimplify {
143   LoopInfo *LI;
144   ScalarEvolution *SE;
145   DominatorTree *DT;
146   const DataLayout &DL;
147   TargetLibraryInfo *TLI;
148   const TargetTransformInfo *TTI;
149   std::unique_ptr<MemorySSAUpdater> MSSAU;
150 
151   SmallVector<WeakTrackingVH, 16> DeadInsts;
152 
153   bool handleFloatingPointIV(Loop *L, PHINode *PH);
154   bool rewriteNonIntegerIVs(Loop *L);
155 
156   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
157   /// Try to eliminate loop exits based on analyzeable exit counts
158   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
159   /// Try to form loop invariant tests for loop exits by changing how many
160   /// iterations of the loop run when that is unobservable.
161   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
162 
163   bool rewriteFirstIterationLoopExitValues(Loop *L);
164 
165   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
166                                  const SCEV *ExitCount,
167                                  PHINode *IndVar, SCEVExpander &Rewriter);
168 
169   bool sinkUnusedInvariants(Loop *L);
170 
171 public:
172   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
173                  const DataLayout &DL, TargetLibraryInfo *TLI,
174                  TargetTransformInfo *TTI, MemorySSA *MSSA)
175       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
176     if (MSSA)
177       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
178   }
179 
180   bool run(Loop *L);
181 };
182 
183 } // end anonymous namespace
184 
185 /// Determine the insertion point for this user. By default, insert immediately
186 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
187 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
188 /// common dominator for the incoming blocks. A nullptr can be returned if no
189 /// viable location is found: it may happen if User is a PHI and Def only comes
190 /// to this PHI from unreachable blocks.
191 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
192                                           DominatorTree *DT, LoopInfo *LI) {
193   PHINode *PHI = dyn_cast<PHINode>(User);
194   if (!PHI)
195     return User;
196 
197   Instruction *InsertPt = nullptr;
198   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
199     if (PHI->getIncomingValue(i) != Def)
200       continue;
201 
202     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
203 
204     if (!DT->isReachableFromEntry(InsertBB))
205       continue;
206 
207     if (!InsertPt) {
208       InsertPt = InsertBB->getTerminator();
209       continue;
210     }
211     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
212     InsertPt = InsertBB->getTerminator();
213   }
214 
215   // If we have skipped all inputs, it means that Def only comes to Phi from
216   // unreachable blocks.
217   if (!InsertPt)
218     return nullptr;
219 
220   auto *DefI = dyn_cast<Instruction>(Def);
221   if (!DefI)
222     return InsertPt;
223 
224   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
225 
226   auto *L = LI->getLoopFor(DefI->getParent());
227   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
228 
229   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
230     if (LI->getLoopFor(DTN->getBlock()) == L)
231       return DTN->getBlock()->getTerminator();
232 
233   llvm_unreachable("DefI dominates InsertPt!");
234 }
235 
236 //===----------------------------------------------------------------------===//
237 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
238 //===----------------------------------------------------------------------===//
239 
240 /// Convert APF to an integer, if possible.
241 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
242   bool isExact = false;
243   // See if we can convert this to an int64_t
244   uint64_t UIntVal;
245   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
246                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
247       !isExact)
248     return false;
249   IntVal = UIntVal;
250   return true;
251 }
252 
253 /// If the loop has floating induction variable then insert corresponding
254 /// integer induction variable if possible.
255 /// For example,
256 /// for(double i = 0; i < 10000; ++i)
257 ///   bar(i)
258 /// is converted into
259 /// for(int i = 0; i < 10000; ++i)
260 ///   bar((double)i);
261 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
262   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
263   unsigned BackEdge     = IncomingEdge^1;
264 
265   // Check incoming value.
266   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
267 
268   int64_t InitValue;
269   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
270     return false;
271 
272   // Check IV increment. Reject this PN if increment operation is not
273   // an add or increment value can not be represented by an integer.
274   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
275   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
276 
277   // If this is not an add of the PHI with a constantfp, or if the constant fp
278   // is not an integer, bail out.
279   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
280   int64_t IncValue;
281   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
282       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
283     return false;
284 
285   // Check Incr uses. One user is PN and the other user is an exit condition
286   // used by the conditional terminator.
287   Value::user_iterator IncrUse = Incr->user_begin();
288   Instruction *U1 = cast<Instruction>(*IncrUse++);
289   if (IncrUse == Incr->user_end()) return false;
290   Instruction *U2 = cast<Instruction>(*IncrUse++);
291   if (IncrUse != Incr->user_end()) return false;
292 
293   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
294   // only used by a branch, we can't transform it.
295   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
296   if (!Compare)
297     Compare = dyn_cast<FCmpInst>(U2);
298   if (!Compare || !Compare->hasOneUse() ||
299       !isa<BranchInst>(Compare->user_back()))
300     return false;
301 
302   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
303 
304   // We need to verify that the branch actually controls the iteration count
305   // of the loop.  If not, the new IV can overflow and no one will notice.
306   // The branch block must be in the loop and one of the successors must be out
307   // of the loop.
308   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
309   if (!L->contains(TheBr->getParent()) ||
310       (L->contains(TheBr->getSuccessor(0)) &&
311        L->contains(TheBr->getSuccessor(1))))
312     return false;
313 
314   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
315   // transform it.
316   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
317   int64_t ExitValue;
318   if (ExitValueVal == nullptr ||
319       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
320     return false;
321 
322   // Find new predicate for integer comparison.
323   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
324   switch (Compare->getPredicate()) {
325   default: return false;  // Unknown comparison.
326   case CmpInst::FCMP_OEQ:
327   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
328   case CmpInst::FCMP_ONE:
329   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
330   case CmpInst::FCMP_OGT:
331   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
332   case CmpInst::FCMP_OGE:
333   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
334   case CmpInst::FCMP_OLT:
335   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
336   case CmpInst::FCMP_OLE:
337   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
338   }
339 
340   // We convert the floating point induction variable to a signed i32 value if
341   // we can.  This is only safe if the comparison will not overflow in a way
342   // that won't be trapped by the integer equivalent operations.  Check for this
343   // now.
344   // TODO: We could use i64 if it is native and the range requires it.
345 
346   // The start/stride/exit values must all fit in signed i32.
347   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
348     return false;
349 
350   // If not actually striding (add x, 0.0), avoid touching the code.
351   if (IncValue == 0)
352     return false;
353 
354   // Positive and negative strides have different safety conditions.
355   if (IncValue > 0) {
356     // If we have a positive stride, we require the init to be less than the
357     // exit value.
358     if (InitValue >= ExitValue)
359       return false;
360 
361     uint32_t Range = uint32_t(ExitValue-InitValue);
362     // Check for infinite loop, either:
363     // while (i <= Exit) or until (i > Exit)
364     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
365       if (++Range == 0) return false;  // Range overflows.
366     }
367 
368     unsigned Leftover = Range % uint32_t(IncValue);
369 
370     // If this is an equality comparison, we require that the strided value
371     // exactly land on the exit value, otherwise the IV condition will wrap
372     // around and do things the fp IV wouldn't.
373     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
374         Leftover != 0)
375       return false;
376 
377     // If the stride would wrap around the i32 before exiting, we can't
378     // transform the IV.
379     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
380       return false;
381   } else {
382     // If we have a negative stride, we require the init to be greater than the
383     // exit value.
384     if (InitValue <= ExitValue)
385       return false;
386 
387     uint32_t Range = uint32_t(InitValue-ExitValue);
388     // Check for infinite loop, either:
389     // while (i >= Exit) or until (i < Exit)
390     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
391       if (++Range == 0) return false;  // Range overflows.
392     }
393 
394     unsigned Leftover = Range % uint32_t(-IncValue);
395 
396     // If this is an equality comparison, we require that the strided value
397     // exactly land on the exit value, otherwise the IV condition will wrap
398     // around and do things the fp IV wouldn't.
399     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
400         Leftover != 0)
401       return false;
402 
403     // If the stride would wrap around the i32 before exiting, we can't
404     // transform the IV.
405     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
406       return false;
407   }
408 
409   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
410 
411   // Insert new integer induction variable.
412   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
413   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
414                       PN->getIncomingBlock(IncomingEdge));
415 
416   Value *NewAdd =
417     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
418                               Incr->getName()+".int", Incr);
419   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
420 
421   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
422                                       ConstantInt::get(Int32Ty, ExitValue),
423                                       Compare->getName());
424 
425   // In the following deletions, PN may become dead and may be deleted.
426   // Use a WeakTrackingVH to observe whether this happens.
427   WeakTrackingVH WeakPH = PN;
428 
429   // Delete the old floating point exit comparison.  The branch starts using the
430   // new comparison.
431   NewCompare->takeName(Compare);
432   Compare->replaceAllUsesWith(NewCompare);
433   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
434 
435   // Delete the old floating point increment.
436   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
437   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
438 
439   // If the FP induction variable still has uses, this is because something else
440   // in the loop uses its value.  In order to canonicalize the induction
441   // variable, we chose to eliminate the IV and rewrite it in terms of an
442   // int->fp cast.
443   //
444   // We give preference to sitofp over uitofp because it is faster on most
445   // platforms.
446   if (WeakPH) {
447     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
448                                  &*PN->getParent()->getFirstInsertionPt());
449     PN->replaceAllUsesWith(Conv);
450     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
451   }
452   return true;
453 }
454 
455 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
456   // First step.  Check to see if there are any floating-point recurrences.
457   // If there are, change them into integer recurrences, permitting analysis by
458   // the SCEV routines.
459   BasicBlock *Header = L->getHeader();
460 
461   SmallVector<WeakTrackingVH, 8> PHIs;
462   for (PHINode &PN : Header->phis())
463     PHIs.push_back(&PN);
464 
465   bool Changed = false;
466   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
467     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
468       Changed |= handleFloatingPointIV(L, PN);
469 
470   // If the loop previously had floating-point IV, ScalarEvolution
471   // may not have been able to compute a trip count. Now that we've done some
472   // re-writing, the trip count may be computable.
473   if (Changed)
474     SE->forgetLoop(L);
475   return Changed;
476 }
477 
478 //===---------------------------------------------------------------------===//
479 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
480 // they will exit at the first iteration.
481 //===---------------------------------------------------------------------===//
482 
483 /// Check to see if this loop has loop invariant conditions which lead to loop
484 /// exits. If so, we know that if the exit path is taken, it is at the first
485 /// loop iteration. This lets us predict exit values of PHI nodes that live in
486 /// loop header.
487 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
488   // Verify the input to the pass is already in LCSSA form.
489   assert(L->isLCSSAForm(*DT));
490 
491   SmallVector<BasicBlock *, 8> ExitBlocks;
492   L->getUniqueExitBlocks(ExitBlocks);
493 
494   bool MadeAnyChanges = false;
495   for (auto *ExitBB : ExitBlocks) {
496     // If there are no more PHI nodes in this exit block, then no more
497     // values defined inside the loop are used on this path.
498     for (PHINode &PN : ExitBB->phis()) {
499       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
500            IncomingValIdx != E; ++IncomingValIdx) {
501         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
502 
503         // Can we prove that the exit must run on the first iteration if it
504         // runs at all?  (i.e. early exits are fine for our purposes, but
505         // traces which lead to this exit being taken on the 2nd iteration
506         // aren't.)  Note that this is about whether the exit branch is
507         // executed, not about whether it is taken.
508         if (!L->getLoopLatch() ||
509             !DT->dominates(IncomingBB, L->getLoopLatch()))
510           continue;
511 
512         // Get condition that leads to the exit path.
513         auto *TermInst = IncomingBB->getTerminator();
514 
515         Value *Cond = nullptr;
516         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
517           // Must be a conditional branch, otherwise the block
518           // should not be in the loop.
519           Cond = BI->getCondition();
520         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
521           Cond = SI->getCondition();
522         else
523           continue;
524 
525         if (!L->isLoopInvariant(Cond))
526           continue;
527 
528         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
529 
530         // Only deal with PHIs in the loop header.
531         if (!ExitVal || ExitVal->getParent() != L->getHeader())
532           continue;
533 
534         // If ExitVal is a PHI on the loop header, then we know its
535         // value along this exit because the exit can only be taken
536         // on the first iteration.
537         auto *LoopPreheader = L->getLoopPreheader();
538         assert(LoopPreheader && "Invalid loop");
539         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
540         if (PreheaderIdx != -1) {
541           assert(ExitVal->getParent() == L->getHeader() &&
542                  "ExitVal must be in loop header");
543           MadeAnyChanges = true;
544           PN.setIncomingValue(IncomingValIdx,
545                               ExitVal->getIncomingValue(PreheaderIdx));
546         }
547       }
548     }
549   }
550   return MadeAnyChanges;
551 }
552 
553 //===----------------------------------------------------------------------===//
554 //  IV Widening - Extend the width of an IV to cover its widest uses.
555 //===----------------------------------------------------------------------===//
556 
557 namespace {
558 
559 // Collect information about induction variables that are used by sign/zero
560 // extend operations. This information is recorded by CollectExtend and provides
561 // the input to WidenIV.
562 struct WideIVInfo {
563   PHINode *NarrowIV = nullptr;
564 
565   // Widest integer type created [sz]ext
566   Type *WidestNativeType = nullptr;
567 
568   // Was a sext user seen before a zext?
569   bool IsSigned = false;
570 };
571 
572 } // end anonymous namespace
573 
574 /// Update information about the induction variable that is extended by this
575 /// sign or zero extend operation. This is used to determine the final width of
576 /// the IV before actually widening it.
577 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
578                         const TargetTransformInfo *TTI) {
579   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
580   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
581     return;
582 
583   Type *Ty = Cast->getType();
584   uint64_t Width = SE->getTypeSizeInBits(Ty);
585   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
586     return;
587 
588   // Check that `Cast` actually extends the induction variable (we rely on this
589   // later).  This takes care of cases where `Cast` is extending a truncation of
590   // the narrow induction variable, and thus can end up being narrower than the
591   // "narrow" induction variable.
592   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
593   if (NarrowIVWidth >= Width)
594     return;
595 
596   // Cast is either an sext or zext up to this point.
597   // We should not widen an indvar if arithmetics on the wider indvar are more
598   // expensive than those on the narrower indvar. We check only the cost of ADD
599   // because at least an ADD is required to increment the induction variable. We
600   // could compute more comprehensively the cost of all instructions on the
601   // induction variable when necessary.
602   if (TTI &&
603       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
604           TTI->getArithmeticInstrCost(Instruction::Add,
605                                       Cast->getOperand(0)->getType())) {
606     return;
607   }
608 
609   if (!WI.WidestNativeType) {
610     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
611     WI.IsSigned = IsSigned;
612     return;
613   }
614 
615   // We extend the IV to satisfy the sign of its first user, arbitrarily.
616   if (WI.IsSigned != IsSigned)
617     return;
618 
619   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
620     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
621 }
622 
623 namespace {
624 
625 /// Record a link in the Narrow IV def-use chain along with the WideIV that
626 /// computes the same value as the Narrow IV def.  This avoids caching Use*
627 /// pointers.
628 struct NarrowIVDefUse {
629   Instruction *NarrowDef = nullptr;
630   Instruction *NarrowUse = nullptr;
631   Instruction *WideDef = nullptr;
632 
633   // True if the narrow def is never negative.  Tracking this information lets
634   // us use a sign extension instead of a zero extension or vice versa, when
635   // profitable and legal.
636   bool NeverNegative = false;
637 
638   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
639                  bool NeverNegative)
640       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
641         NeverNegative(NeverNegative) {}
642 };
643 
644 /// The goal of this transform is to remove sign and zero extends without
645 /// creating any new induction variables. To do this, it creates a new phi of
646 /// the wider type and redirects all users, either removing extends or inserting
647 /// truncs whenever we stop propagating the type.
648 class WidenIV {
649   // Parameters
650   PHINode *OrigPhi;
651   Type *WideType;
652 
653   // Context
654   LoopInfo        *LI;
655   Loop            *L;
656   ScalarEvolution *SE;
657   DominatorTree   *DT;
658 
659   // Does the module have any calls to the llvm.experimental.guard intrinsic
660   // at all? If not we can avoid scanning instructions looking for guards.
661   bool HasGuards;
662 
663   // Result
664   PHINode *WidePhi = nullptr;
665   Instruction *WideInc = nullptr;
666   const SCEV *WideIncExpr = nullptr;
667   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
668 
669   SmallPtrSet<Instruction *,16> Widened;
670   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
671 
672   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
673 
674   // A map tracking the kind of extension used to widen each narrow IV
675   // and narrow IV user.
676   // Key: pointer to a narrow IV or IV user.
677   // Value: the kind of extension used to widen this Instruction.
678   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
679 
680   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
681 
682   // A map with control-dependent ranges for post increment IV uses. The key is
683   // a pair of IV def and a use of this def denoting the context. The value is
684   // a ConstantRange representing possible values of the def at the given
685   // context.
686   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
687 
688   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
689                                               Instruction *UseI) {
690     DefUserPair Key(Def, UseI);
691     auto It = PostIncRangeInfos.find(Key);
692     return It == PostIncRangeInfos.end()
693                ? Optional<ConstantRange>(None)
694                : Optional<ConstantRange>(It->second);
695   }
696 
697   void calculatePostIncRanges(PHINode *OrigPhi);
698   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
699 
700   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
701     DefUserPair Key(Def, UseI);
702     auto It = PostIncRangeInfos.find(Key);
703     if (It == PostIncRangeInfos.end())
704       PostIncRangeInfos.insert({Key, R});
705     else
706       It->second = R.intersectWith(It->second);
707   }
708 
709 public:
710   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
711           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
712           bool HasGuards)
713       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
714         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
715         HasGuards(HasGuards), DeadInsts(DI) {
716     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
717     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
718   }
719 
720   PHINode *createWideIV(SCEVExpander &Rewriter);
721 
722 protected:
723   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
724                           Instruction *Use);
725 
726   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
727   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
728                                      const SCEVAddRecExpr *WideAR);
729   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
730 
731   ExtendKind getExtendKind(Instruction *I);
732 
733   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
734 
735   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
736 
737   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
738 
739   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
740                               unsigned OpCode) const;
741 
742   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
743 
744   bool widenLoopCompare(NarrowIVDefUse DU);
745   bool widenWithVariantUse(NarrowIVDefUse DU);
746   void widenWithVariantUseCodegen(NarrowIVDefUse DU);
747 
748   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
749 };
750 
751 } // end anonymous namespace
752 
753 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
754                                  bool IsSigned, Instruction *Use) {
755   // Set the debug location and conservative insertion point.
756   IRBuilder<> Builder(Use);
757   // Hoist the insertion point into loop preheaders as far as possible.
758   for (const Loop *L = LI->getLoopFor(Use->getParent());
759        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
760        L = L->getParentLoop())
761     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
762 
763   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
764                     Builder.CreateZExt(NarrowOper, WideType);
765 }
766 
767 /// Instantiate a wide operation to replace a narrow operation. This only needs
768 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
769 /// 0 for any operation we decide not to clone.
770 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
771                                   const SCEVAddRecExpr *WideAR) {
772   unsigned Opcode = DU.NarrowUse->getOpcode();
773   switch (Opcode) {
774   default:
775     return nullptr;
776   case Instruction::Add:
777   case Instruction::Mul:
778   case Instruction::UDiv:
779   case Instruction::Sub:
780     return cloneArithmeticIVUser(DU, WideAR);
781 
782   case Instruction::And:
783   case Instruction::Or:
784   case Instruction::Xor:
785   case Instruction::Shl:
786   case Instruction::LShr:
787   case Instruction::AShr:
788     return cloneBitwiseIVUser(DU);
789   }
790 }
791 
792 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
793   Instruction *NarrowUse = DU.NarrowUse;
794   Instruction *NarrowDef = DU.NarrowDef;
795   Instruction *WideDef = DU.WideDef;
796 
797   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
798 
799   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
800   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
801   // invariant and will be folded or hoisted. If it actually comes from a
802   // widened IV, it should be removed during a future call to widenIVUse.
803   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
804   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
805                    ? WideDef
806                    : createExtendInst(NarrowUse->getOperand(0), WideType,
807                                       IsSigned, NarrowUse);
808   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
809                    ? WideDef
810                    : createExtendInst(NarrowUse->getOperand(1), WideType,
811                                       IsSigned, NarrowUse);
812 
813   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
814   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
815                                         NarrowBO->getName());
816   IRBuilder<> Builder(NarrowUse);
817   Builder.Insert(WideBO);
818   WideBO->copyIRFlags(NarrowBO);
819   return WideBO;
820 }
821 
822 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
823                                             const SCEVAddRecExpr *WideAR) {
824   Instruction *NarrowUse = DU.NarrowUse;
825   Instruction *NarrowDef = DU.NarrowDef;
826   Instruction *WideDef = DU.WideDef;
827 
828   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
829 
830   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
831 
832   // We're trying to find X such that
833   //
834   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
835   //
836   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
837   // and check using SCEV if any of them are correct.
838 
839   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
840   // correct solution to X.
841   auto GuessNonIVOperand = [&](bool SignExt) {
842     const SCEV *WideLHS;
843     const SCEV *WideRHS;
844 
845     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
846       if (SignExt)
847         return SE->getSignExtendExpr(S, Ty);
848       return SE->getZeroExtendExpr(S, Ty);
849     };
850 
851     if (IVOpIdx == 0) {
852       WideLHS = SE->getSCEV(WideDef);
853       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
854       WideRHS = GetExtend(NarrowRHS, WideType);
855     } else {
856       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
857       WideLHS = GetExtend(NarrowLHS, WideType);
858       WideRHS = SE->getSCEV(WideDef);
859     }
860 
861     // WideUse is "WideDef `op.wide` X" as described in the comment.
862     const SCEV *WideUse = nullptr;
863 
864     switch (NarrowUse->getOpcode()) {
865     default:
866       llvm_unreachable("No other possibility!");
867 
868     case Instruction::Add:
869       WideUse = SE->getAddExpr(WideLHS, WideRHS);
870       break;
871 
872     case Instruction::Mul:
873       WideUse = SE->getMulExpr(WideLHS, WideRHS);
874       break;
875 
876     case Instruction::UDiv:
877       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
878       break;
879 
880     case Instruction::Sub:
881       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
882       break;
883     }
884 
885     return WideUse == WideAR;
886   };
887 
888   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
889   if (!GuessNonIVOperand(SignExtend)) {
890     SignExtend = !SignExtend;
891     if (!GuessNonIVOperand(SignExtend))
892       return nullptr;
893   }
894 
895   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
896                    ? WideDef
897                    : createExtendInst(NarrowUse->getOperand(0), WideType,
898                                       SignExtend, NarrowUse);
899   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
900                    ? WideDef
901                    : createExtendInst(NarrowUse->getOperand(1), WideType,
902                                       SignExtend, NarrowUse);
903 
904   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
905   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
906                                         NarrowBO->getName());
907 
908   IRBuilder<> Builder(NarrowUse);
909   Builder.Insert(WideBO);
910   WideBO->copyIRFlags(NarrowBO);
911   return WideBO;
912 }
913 
914 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
915   auto It = ExtendKindMap.find(I);
916   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
917   return It->second;
918 }
919 
920 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
921                                      unsigned OpCode) const {
922   if (OpCode == Instruction::Add)
923     return SE->getAddExpr(LHS, RHS);
924   if (OpCode == Instruction::Sub)
925     return SE->getMinusSCEV(LHS, RHS);
926   if (OpCode == Instruction::Mul)
927     return SE->getMulExpr(LHS, RHS);
928 
929   llvm_unreachable("Unsupported opcode.");
930 }
931 
932 /// No-wrap operations can transfer sign extension of their result to their
933 /// operands. Generate the SCEV value for the widened operation without
934 /// actually modifying the IR yet. If the expression after extending the
935 /// operands is an AddRec for this loop, return the AddRec and the kind of
936 /// extension used.
937 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
938   // Handle the common case of add<nsw/nuw>
939   const unsigned OpCode = DU.NarrowUse->getOpcode();
940   // Only Add/Sub/Mul instructions supported yet.
941   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
942       OpCode != Instruction::Mul)
943     return {nullptr, Unknown};
944 
945   // One operand (NarrowDef) has already been extended to WideDef. Now determine
946   // if extending the other will lead to a recurrence.
947   const unsigned ExtendOperIdx =
948       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
949   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
950 
951   const SCEV *ExtendOperExpr = nullptr;
952   const OverflowingBinaryOperator *OBO =
953     cast<OverflowingBinaryOperator>(DU.NarrowUse);
954   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
955   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
956     ExtendOperExpr = SE->getSignExtendExpr(
957       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
958   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
959     ExtendOperExpr = SE->getZeroExtendExpr(
960       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
961   else
962     return {nullptr, Unknown};
963 
964   // When creating this SCEV expr, don't apply the current operations NSW or NUW
965   // flags. This instruction may be guarded by control flow that the no-wrap
966   // behavior depends on. Non-control-equivalent instructions can be mapped to
967   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
968   // semantics to those operations.
969   const SCEV *lhs = SE->getSCEV(DU.WideDef);
970   const SCEV *rhs = ExtendOperExpr;
971 
972   // Let's swap operands to the initial order for the case of non-commutative
973   // operations, like SUB. See PR21014.
974   if (ExtendOperIdx == 0)
975     std::swap(lhs, rhs);
976   const SCEVAddRecExpr *AddRec =
977       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
978 
979   if (!AddRec || AddRec->getLoop() != L)
980     return {nullptr, Unknown};
981 
982   return {AddRec, ExtKind};
983 }
984 
985 /// Is this instruction potentially interesting for further simplification after
986 /// widening it's type? In other words, can the extend be safely hoisted out of
987 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
988 /// so, return the extended recurrence and the kind of extension used. Otherwise
989 /// return {nullptr, Unknown}.
990 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
991   if (!SE->isSCEVable(DU.NarrowUse->getType()))
992     return {nullptr, Unknown};
993 
994   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
995   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
996       SE->getTypeSizeInBits(WideType)) {
997     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
998     // index. So don't follow this use.
999     return {nullptr, Unknown};
1000   }
1001 
1002   const SCEV *WideExpr;
1003   ExtendKind ExtKind;
1004   if (DU.NeverNegative) {
1005     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1006     if (isa<SCEVAddRecExpr>(WideExpr))
1007       ExtKind = SignExtended;
1008     else {
1009       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1010       ExtKind = ZeroExtended;
1011     }
1012   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1013     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1014     ExtKind = SignExtended;
1015   } else {
1016     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1017     ExtKind = ZeroExtended;
1018   }
1019   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1020   if (!AddRec || AddRec->getLoop() != L)
1021     return {nullptr, Unknown};
1022   return {AddRec, ExtKind};
1023 }
1024 
1025 /// This IV user cannot be widened. Replace this use of the original narrow IV
1026 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1027 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1028   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1029   if (!InsertPt)
1030     return;
1031   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1032                     << *DU.NarrowUse << "\n");
1033   IRBuilder<> Builder(InsertPt);
1034   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1035   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1036 }
1037 
1038 /// If the narrow use is a compare instruction, then widen the compare
1039 //  (and possibly the other operand).  The extend operation is hoisted into the
1040 // loop preheader as far as possible.
1041 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1042   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1043   if (!Cmp)
1044     return false;
1045 
1046   // We can legally widen the comparison in the following two cases:
1047   //
1048   //  - The signedness of the IV extension and comparison match
1049   //
1050   //  - The narrow IV is always positive (and thus its sign extension is equal
1051   //    to its zero extension).  For instance, let's say we're zero extending
1052   //    %narrow for the following use
1053   //
1054   //      icmp slt i32 %narrow, %val   ... (A)
1055   //
1056   //    and %narrow is always positive.  Then
1057   //
1058   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1059   //          == icmp slt i32 zext(%narrow), sext(%val)
1060   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1061   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1062     return false;
1063 
1064   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1065   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1066   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1067   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1068 
1069   // Widen the compare instruction.
1070   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1071   if (!InsertPt)
1072     return false;
1073   IRBuilder<> Builder(InsertPt);
1074   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1075 
1076   // Widen the other operand of the compare, if necessary.
1077   if (CastWidth < IVWidth) {
1078     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1079     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1080   }
1081   return true;
1082 }
1083 
1084 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1085 // will not work when:
1086 //    1) SCEV traces back to an instruction inside the loop that SCEV can not
1087 // expand, eg. add %indvar, (load %addr)
1088 //    2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1089 // While SCEV fails to avoid trunc, we can still try to use instruction
1090 // combining approach to prove trunc is not required. This can be further
1091 // extended with other instruction combining checks, but for now we handle the
1092 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1093 //
1094 // Src:
1095 //   %c = sub nsw %b, %indvar
1096 //   %d = sext %c to i64
1097 // Dst:
1098 //   %indvar.ext1 = sext %indvar to i64
1099 //   %m = sext %b to i64
1100 //   %d = sub nsw i64 %m, %indvar.ext1
1101 // Therefore, as long as the result of add/sub/mul is extended to wide type, no
1102 // trunc is required regardless of how %b is generated. This pattern is common
1103 // when calculating address in 64 bit architecture
1104 bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) {
1105   Instruction *NarrowUse = DU.NarrowUse;
1106   Instruction *NarrowDef = DU.NarrowDef;
1107   Instruction *WideDef = DU.WideDef;
1108 
1109   // Handle the common case of add<nsw/nuw>
1110   const unsigned OpCode = NarrowUse->getOpcode();
1111   // Only Add/Sub/Mul instructions are supported.
1112   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1113       OpCode != Instruction::Mul)
1114     return false;
1115 
1116   // The operand that is not defined by NarrowDef of DU. Let's call it the
1117   // other operand.
1118   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1119   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1120          "bad DU");
1121 
1122   const SCEV *ExtendOperExpr = nullptr;
1123   const OverflowingBinaryOperator *OBO =
1124     cast<OverflowingBinaryOperator>(NarrowUse);
1125   ExtendKind ExtKind = getExtendKind(NarrowDef);
1126   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1127     ExtendOperExpr = SE->getSignExtendExpr(
1128       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1129   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1130     ExtendOperExpr = SE->getZeroExtendExpr(
1131       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1132   else
1133     return false;
1134 
1135   // Verifying that Defining operand is an AddRec
1136   const SCEV *Op1 = SE->getSCEV(WideDef);
1137   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1138   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1139     return false;
1140   // Verifying that other operand is an Extend.
1141   if (ExtKind == SignExtended) {
1142     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1143       return false;
1144   } else {
1145     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1146       return false;
1147   }
1148 
1149   if (ExtKind == SignExtended) {
1150     for (Use &U : NarrowUse->uses()) {
1151       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1152       if (!User || User->getType() != WideType)
1153         return false;
1154     }
1155   } else { // ExtKind == ZeroExtended
1156     for (Use &U : NarrowUse->uses()) {
1157       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1158       if (!User || User->getType() != WideType)
1159         return false;
1160     }
1161   }
1162 
1163   return true;
1164 }
1165 
1166 /// Special Case for widening with loop variant (see
1167 /// WidenIV::widenWithVariant). This is the code generation part.
1168 void WidenIV::widenWithVariantUseCodegen(NarrowIVDefUse DU) {
1169   Instruction *NarrowUse = DU.NarrowUse;
1170   Instruction *NarrowDef = DU.NarrowDef;
1171   Instruction *WideDef = DU.WideDef;
1172 
1173   ExtendKind ExtKind = getExtendKind(NarrowDef);
1174 
1175   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1176 
1177   // Generating a widening use instruction.
1178   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1179                    ? WideDef
1180                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1181                                       ExtKind, NarrowUse);
1182   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1183                    ? WideDef
1184                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1185                                       ExtKind, NarrowUse);
1186 
1187   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1188   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1189                                         NarrowBO->getName());
1190   IRBuilder<> Builder(NarrowUse);
1191   Builder.Insert(WideBO);
1192   WideBO->copyIRFlags(NarrowBO);
1193 
1194   assert(ExtKind != Unknown && "Unknown ExtKind not handled");
1195 
1196   ExtendKindMap[NarrowUse] = ExtKind;
1197 
1198   for (Use &U : NarrowUse->uses()) {
1199     Instruction *User = nullptr;
1200     if (ExtKind == SignExtended)
1201       User = dyn_cast<SExtInst>(U.getUser());
1202     else
1203       User = dyn_cast<ZExtInst>(U.getUser());
1204     if (User && User->getType() == WideType) {
1205       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1206                         << *WideBO << "\n");
1207       ++NumElimExt;
1208       User->replaceAllUsesWith(WideBO);
1209       DeadInsts.emplace_back(User);
1210     }
1211   }
1212 }
1213 
1214 /// Determine whether an individual user of the narrow IV can be widened. If so,
1215 /// return the wide clone of the user.
1216 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1217   assert(ExtendKindMap.count(DU.NarrowDef) &&
1218          "Should already know the kind of extension used to widen NarrowDef");
1219 
1220   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1221   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1222     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1223       // For LCSSA phis, sink the truncate outside the loop.
1224       // After SimplifyCFG most loop exit targets have a single predecessor.
1225       // Otherwise fall back to a truncate within the loop.
1226       if (UsePhi->getNumOperands() != 1)
1227         truncateIVUse(DU, DT, LI);
1228       else {
1229         // Widening the PHI requires us to insert a trunc.  The logical place
1230         // for this trunc is in the same BB as the PHI.  This is not possible if
1231         // the BB is terminated by a catchswitch.
1232         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1233           return nullptr;
1234 
1235         PHINode *WidePhi =
1236           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1237                           UsePhi);
1238         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1239         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1240         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1241         UsePhi->replaceAllUsesWith(Trunc);
1242         DeadInsts.emplace_back(UsePhi);
1243         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1244                           << *WidePhi << "\n");
1245       }
1246       return nullptr;
1247     }
1248   }
1249 
1250   // This narrow use can be widened by a sext if it's non-negative or its narrow
1251   // def was widended by a sext. Same for zext.
1252   auto canWidenBySExt = [&]() {
1253     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1254   };
1255   auto canWidenByZExt = [&]() {
1256     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1257   };
1258 
1259   // Our raison d'etre! Eliminate sign and zero extension.
1260   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1261       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1262     Value *NewDef = DU.WideDef;
1263     if (DU.NarrowUse->getType() != WideType) {
1264       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1265       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1266       if (CastWidth < IVWidth) {
1267         // The cast isn't as wide as the IV, so insert a Trunc.
1268         IRBuilder<> Builder(DU.NarrowUse);
1269         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1270       }
1271       else {
1272         // A wider extend was hidden behind a narrower one. This may induce
1273         // another round of IV widening in which the intermediate IV becomes
1274         // dead. It should be very rare.
1275         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1276                           << " not wide enough to subsume " << *DU.NarrowUse
1277                           << "\n");
1278         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1279         NewDef = DU.NarrowUse;
1280       }
1281     }
1282     if (NewDef != DU.NarrowUse) {
1283       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1284                         << " replaced by " << *DU.WideDef << "\n");
1285       ++NumElimExt;
1286       DU.NarrowUse->replaceAllUsesWith(NewDef);
1287       DeadInsts.emplace_back(DU.NarrowUse);
1288     }
1289     // Now that the extend is gone, we want to expose it's uses for potential
1290     // further simplification. We don't need to directly inform SimplifyIVUsers
1291     // of the new users, because their parent IV will be processed later as a
1292     // new loop phi. If we preserved IVUsers analysis, we would also want to
1293     // push the uses of WideDef here.
1294 
1295     // No further widening is needed. The deceased [sz]ext had done it for us.
1296     return nullptr;
1297   }
1298 
1299   // Does this user itself evaluate to a recurrence after widening?
1300   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1301   if (!WideAddRec.first)
1302     WideAddRec = getWideRecurrence(DU);
1303 
1304   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1305   if (!WideAddRec.first) {
1306     // If use is a loop condition, try to promote the condition instead of
1307     // truncating the IV first.
1308     if (widenLoopCompare(DU))
1309       return nullptr;
1310 
1311     // We are here about to generate a truncate instruction that may hurt
1312     // performance because the scalar evolution expression computed earlier
1313     // in WideAddRec.first does not indicate a polynomial induction expression.
1314     // In that case, look at the operands of the use instruction to determine
1315     // if we can still widen the use instead of truncating its operand.
1316     if (widenWithVariantUse(DU)) {
1317       widenWithVariantUseCodegen(DU);
1318       return nullptr;
1319     }
1320 
1321     // This user does not evaluate to a recurrence after widening, so don't
1322     // follow it. Instead insert a Trunc to kill off the original use,
1323     // eventually isolating the original narrow IV so it can be removed.
1324     truncateIVUse(DU, DT, LI);
1325     return nullptr;
1326   }
1327   // Assume block terminators cannot evaluate to a recurrence. We can't to
1328   // insert a Trunc after a terminator if there happens to be a critical edge.
1329   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1330          "SCEV is not expected to evaluate a block terminator");
1331 
1332   // Reuse the IV increment that SCEVExpander created as long as it dominates
1333   // NarrowUse.
1334   Instruction *WideUse = nullptr;
1335   if (WideAddRec.first == WideIncExpr &&
1336       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1337     WideUse = WideInc;
1338   else {
1339     WideUse = cloneIVUser(DU, WideAddRec.first);
1340     if (!WideUse)
1341       return nullptr;
1342   }
1343   // Evaluation of WideAddRec ensured that the narrow expression could be
1344   // extended outside the loop without overflow. This suggests that the wide use
1345   // evaluates to the same expression as the extended narrow use, but doesn't
1346   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1347   // where it fails, we simply throw away the newly created wide use.
1348   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1349     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1350                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1351                       << "\n");
1352     DeadInsts.emplace_back(WideUse);
1353     return nullptr;
1354   }
1355 
1356   // if we reached this point then we are going to replace
1357   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1358   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1359 
1360   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1361   // Returning WideUse pushes it on the worklist.
1362   return WideUse;
1363 }
1364 
1365 /// Add eligible users of NarrowDef to NarrowIVUsers.
1366 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1367   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1368   bool NonNegativeDef =
1369       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1370                            SE->getZero(NarrowSCEV->getType()));
1371   for (User *U : NarrowDef->users()) {
1372     Instruction *NarrowUser = cast<Instruction>(U);
1373 
1374     // Handle data flow merges and bizarre phi cycles.
1375     if (!Widened.insert(NarrowUser).second)
1376       continue;
1377 
1378     bool NonNegativeUse = false;
1379     if (!NonNegativeDef) {
1380       // We might have a control-dependent range information for this context.
1381       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1382         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1383     }
1384 
1385     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1386                                NonNegativeDef || NonNegativeUse);
1387   }
1388 }
1389 
1390 /// Process a single induction variable. First use the SCEVExpander to create a
1391 /// wide induction variable that evaluates to the same recurrence as the
1392 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1393 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1394 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1395 ///
1396 /// It would be simpler to delete uses as they are processed, but we must avoid
1397 /// invalidating SCEV expressions.
1398 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1399   // Bail if we disallowed widening.
1400   if(!AllowIVWidening)
1401     return nullptr;
1402 
1403   // Is this phi an induction variable?
1404   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1405   if (!AddRec)
1406     return nullptr;
1407 
1408   // Widen the induction variable expression.
1409   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1410                                ? SE->getSignExtendExpr(AddRec, WideType)
1411                                : SE->getZeroExtendExpr(AddRec, WideType);
1412 
1413   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1414          "Expect the new IV expression to preserve its type");
1415 
1416   // Can the IV be extended outside the loop without overflow?
1417   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1418   if (!AddRec || AddRec->getLoop() != L)
1419     return nullptr;
1420 
1421   // An AddRec must have loop-invariant operands. Since this AddRec is
1422   // materialized by a loop header phi, the expression cannot have any post-loop
1423   // operands, so they must dominate the loop header.
1424   assert(
1425       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1426       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1427       "Loop header phi recurrence inputs do not dominate the loop");
1428 
1429   // Iterate over IV uses (including transitive ones) looking for IV increments
1430   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1431   // the increment calculate control-dependent range information basing on
1432   // dominating conditions inside of the loop (e.g. a range check inside of the
1433   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1434   //
1435   // Control-dependent range information is later used to prove that a narrow
1436   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1437   // this on demand because when pushNarrowIVUsers needs this information some
1438   // of the dominating conditions might be already widened.
1439   if (UsePostIncrementRanges)
1440     calculatePostIncRanges(OrigPhi);
1441 
1442   // The rewriter provides a value for the desired IV expression. This may
1443   // either find an existing phi or materialize a new one. Either way, we
1444   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1445   // of the phi-SCC dominates the loop entry.
1446   Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
1447   Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
1448   // If the wide phi is not a phi node, for example a cast node, like bitcast,
1449   // inttoptr, ptrtoint, just skip for now.
1450   if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
1451     // if the cast node is an inserted instruction without any user, we should
1452     // remove it to make sure the pass don't touch the function as we can not
1453     // wide the phi.
1454     if (ExpandInst->hasNUses(0) &&
1455         Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
1456       DeadInsts.emplace_back(ExpandInst);
1457     return nullptr;
1458   }
1459 
1460   // Remembering the WideIV increment generated by SCEVExpander allows
1461   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1462   // employ a general reuse mechanism because the call above is the only call to
1463   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1464   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1465     WideInc =
1466       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1467     WideIncExpr = SE->getSCEV(WideInc);
1468     // Propagate the debug location associated with the original loop increment
1469     // to the new (widened) increment.
1470     auto *OrigInc =
1471       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1472     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1473   }
1474 
1475   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1476   ++NumWidened;
1477 
1478   // Traverse the def-use chain using a worklist starting at the original IV.
1479   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1480 
1481   Widened.insert(OrigPhi);
1482   pushNarrowIVUsers(OrigPhi, WidePhi);
1483 
1484   while (!NarrowIVUsers.empty()) {
1485     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1486 
1487     // Process a def-use edge. This may replace the use, so don't hold a
1488     // use_iterator across it.
1489     Instruction *WideUse = widenIVUse(DU, Rewriter);
1490 
1491     // Follow all def-use edges from the previous narrow use.
1492     if (WideUse)
1493       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1494 
1495     // widenIVUse may have removed the def-use edge.
1496     if (DU.NarrowDef->use_empty())
1497       DeadInsts.emplace_back(DU.NarrowDef);
1498   }
1499 
1500   // Attach any debug information to the new PHI.
1501   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1502 
1503   return WidePhi;
1504 }
1505 
1506 /// Calculates control-dependent range for the given def at the given context
1507 /// by looking at dominating conditions inside of the loop
1508 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1509                                     Instruction *NarrowUser) {
1510   using namespace llvm::PatternMatch;
1511 
1512   Value *NarrowDefLHS;
1513   const APInt *NarrowDefRHS;
1514   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1515                                  m_APInt(NarrowDefRHS))) ||
1516       !NarrowDefRHS->isNonNegative())
1517     return;
1518 
1519   auto UpdateRangeFromCondition = [&] (Value *Condition,
1520                                        bool TrueDest) {
1521     CmpInst::Predicate Pred;
1522     Value *CmpRHS;
1523     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1524                                  m_Value(CmpRHS))))
1525       return;
1526 
1527     CmpInst::Predicate P =
1528             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1529 
1530     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1531     auto CmpConstrainedLHSRange =
1532             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1533     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1534         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1535 
1536     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1537   };
1538 
1539   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1540     if (!HasGuards)
1541       return;
1542 
1543     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1544                                      Ctx->getParent()->rend())) {
1545       Value *C = nullptr;
1546       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1547         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1548     }
1549   };
1550 
1551   UpdateRangeFromGuards(NarrowUser);
1552 
1553   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1554   // If NarrowUserBB is statically unreachable asking dominator queries may
1555   // yield surprising results. (e.g. the block may not have a dom tree node)
1556   if (!DT->isReachableFromEntry(NarrowUserBB))
1557     return;
1558 
1559   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1560        L->contains(DTB->getBlock());
1561        DTB = DTB->getIDom()) {
1562     auto *BB = DTB->getBlock();
1563     auto *TI = BB->getTerminator();
1564     UpdateRangeFromGuards(TI);
1565 
1566     auto *BI = dyn_cast<BranchInst>(TI);
1567     if (!BI || !BI->isConditional())
1568       continue;
1569 
1570     auto *TrueSuccessor = BI->getSuccessor(0);
1571     auto *FalseSuccessor = BI->getSuccessor(1);
1572 
1573     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1574       return BBE.isSingleEdge() &&
1575              DT->dominates(BBE, NarrowUser->getParent());
1576     };
1577 
1578     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1579       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1580 
1581     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1582       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1583   }
1584 }
1585 
1586 /// Calculates PostIncRangeInfos map for the given IV
1587 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1588   SmallPtrSet<Instruction *, 16> Visited;
1589   SmallVector<Instruction *, 6> Worklist;
1590   Worklist.push_back(OrigPhi);
1591   Visited.insert(OrigPhi);
1592 
1593   while (!Worklist.empty()) {
1594     Instruction *NarrowDef = Worklist.pop_back_val();
1595 
1596     for (Use &U : NarrowDef->uses()) {
1597       auto *NarrowUser = cast<Instruction>(U.getUser());
1598 
1599       // Don't go looking outside the current loop.
1600       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1601       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1602         continue;
1603 
1604       if (!Visited.insert(NarrowUser).second)
1605         continue;
1606 
1607       Worklist.push_back(NarrowUser);
1608 
1609       calculatePostIncRange(NarrowDef, NarrowUser);
1610     }
1611   }
1612 }
1613 
1614 //===----------------------------------------------------------------------===//
1615 //  Live IV Reduction - Minimize IVs live across the loop.
1616 //===----------------------------------------------------------------------===//
1617 
1618 //===----------------------------------------------------------------------===//
1619 //  Simplification of IV users based on SCEV evaluation.
1620 //===----------------------------------------------------------------------===//
1621 
1622 namespace {
1623 
1624 class IndVarSimplifyVisitor : public IVVisitor {
1625   ScalarEvolution *SE;
1626   const TargetTransformInfo *TTI;
1627   PHINode *IVPhi;
1628 
1629 public:
1630   WideIVInfo WI;
1631 
1632   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1633                         const TargetTransformInfo *TTI,
1634                         const DominatorTree *DTree)
1635     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1636     DT = DTree;
1637     WI.NarrowIV = IVPhi;
1638   }
1639 
1640   // Implement the interface used by simplifyUsersOfIV.
1641   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1642 };
1643 
1644 } // end anonymous namespace
1645 
1646 /// Iteratively perform simplification on a worklist of IV users. Each
1647 /// successive simplification may push more users which may themselves be
1648 /// candidates for simplification.
1649 ///
1650 /// Sign/Zero extend elimination is interleaved with IV simplification.
1651 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1652                                        SCEVExpander &Rewriter,
1653                                        LoopInfo *LI) {
1654   SmallVector<WideIVInfo, 8> WideIVs;
1655 
1656   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1657           Intrinsic::getName(Intrinsic::experimental_guard));
1658   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1659 
1660   SmallVector<PHINode*, 8> LoopPhis;
1661   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1662     LoopPhis.push_back(cast<PHINode>(I));
1663   }
1664   // Each round of simplification iterates through the SimplifyIVUsers worklist
1665   // for all current phis, then determines whether any IVs can be
1666   // widened. Widening adds new phis to LoopPhis, inducing another round of
1667   // simplification on the wide IVs.
1668   bool Changed = false;
1669   while (!LoopPhis.empty()) {
1670     // Evaluate as many IV expressions as possible before widening any IVs. This
1671     // forces SCEV to set no-wrap flags before evaluating sign/zero
1672     // extension. The first time SCEV attempts to normalize sign/zero extension,
1673     // the result becomes final. So for the most predictable results, we delay
1674     // evaluation of sign/zero extend evaluation until needed, and avoid running
1675     // other SCEV based analysis prior to simplifyAndExtend.
1676     do {
1677       PHINode *CurrIV = LoopPhis.pop_back_val();
1678 
1679       // Information about sign/zero extensions of CurrIV.
1680       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1681 
1682       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
1683                                    &Visitor);
1684 
1685       if (Visitor.WI.WidestNativeType) {
1686         WideIVs.push_back(Visitor.WI);
1687       }
1688     } while(!LoopPhis.empty());
1689 
1690     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1691       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1692       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1693         Changed = true;
1694         LoopPhis.push_back(WidePhi);
1695       }
1696     }
1697   }
1698   return Changed;
1699 }
1700 
1701 //===----------------------------------------------------------------------===//
1702 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1703 //===----------------------------------------------------------------------===//
1704 
1705 /// Given an Value which is hoped to be part of an add recurance in the given
1706 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1707 /// that this is less general than SCEVs AddRec checking.
1708 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1709   Instruction *IncI = dyn_cast<Instruction>(IncV);
1710   if (!IncI)
1711     return nullptr;
1712 
1713   switch (IncI->getOpcode()) {
1714   case Instruction::Add:
1715   case Instruction::Sub:
1716     break;
1717   case Instruction::GetElementPtr:
1718     // An IV counter must preserve its type.
1719     if (IncI->getNumOperands() == 2)
1720       break;
1721     LLVM_FALLTHROUGH;
1722   default:
1723     return nullptr;
1724   }
1725 
1726   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1727   if (Phi && Phi->getParent() == L->getHeader()) {
1728     if (L->isLoopInvariant(IncI->getOperand(1)))
1729       return Phi;
1730     return nullptr;
1731   }
1732   if (IncI->getOpcode() == Instruction::GetElementPtr)
1733     return nullptr;
1734 
1735   // Allow add/sub to be commuted.
1736   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1737   if (Phi && Phi->getParent() == L->getHeader()) {
1738     if (L->isLoopInvariant(IncI->getOperand(0)))
1739       return Phi;
1740   }
1741   return nullptr;
1742 }
1743 
1744 /// Whether the current loop exit test is based on this value.  Currently this
1745 /// is limited to a direct use in the loop condition.
1746 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
1747   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1748   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1749   // TODO: Allow non-icmp loop test.
1750   if (!ICmp)
1751     return false;
1752 
1753   // TODO: Allow indirect use.
1754   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
1755 }
1756 
1757 /// linearFunctionTestReplace policy. Return true unless we can show that the
1758 /// current exit test is already sufficiently canonical.
1759 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
1760   assert(L->getLoopLatch() && "Must be in simplified form");
1761 
1762   // Avoid converting a constant or loop invariant test back to a runtime
1763   // test.  This is critical for when SCEV's cached ExitCount is less precise
1764   // than the current IR (such as after we've proven a particular exit is
1765   // actually dead and thus the BE count never reaches our ExitCount.)
1766   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1767   if (L->isLoopInvariant(BI->getCondition()))
1768     return false;
1769 
1770   // Do LFTR to simplify the exit condition to an ICMP.
1771   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1772   if (!Cond)
1773     return true;
1774 
1775   // Do LFTR to simplify the exit ICMP to EQ/NE
1776   ICmpInst::Predicate Pred = Cond->getPredicate();
1777   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1778     return true;
1779 
1780   // Look for a loop invariant RHS
1781   Value *LHS = Cond->getOperand(0);
1782   Value *RHS = Cond->getOperand(1);
1783   if (!L->isLoopInvariant(RHS)) {
1784     if (!L->isLoopInvariant(LHS))
1785       return true;
1786     std::swap(LHS, RHS);
1787   }
1788   // Look for a simple IV counter LHS
1789   PHINode *Phi = dyn_cast<PHINode>(LHS);
1790   if (!Phi)
1791     Phi = getLoopPhiForCounter(LHS, L);
1792 
1793   if (!Phi)
1794     return true;
1795 
1796   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1797   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1798   if (Idx < 0)
1799     return true;
1800 
1801   // Do LFTR if the exit condition's IV is *not* a simple counter.
1802   Value *IncV = Phi->getIncomingValue(Idx);
1803   return Phi != getLoopPhiForCounter(IncV, L);
1804 }
1805 
1806 /// Return true if undefined behavior would provable be executed on the path to
1807 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
1808 /// anything about whether OnPathTo is actually executed or whether Root is
1809 /// actually poison.  This can be used to assess whether a new use of Root can
1810 /// be added at a location which is control equivalent with OnPathTo (such as
1811 /// immediately before it) without introducing UB which didn't previously
1812 /// exist.  Note that a false result conveys no information.
1813 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
1814                                           Instruction *OnPathTo,
1815                                           DominatorTree *DT) {
1816   // Basic approach is to assume Root is poison, propagate poison forward
1817   // through all users we can easily track, and then check whether any of those
1818   // users are provable UB and must execute before out exiting block might
1819   // exit.
1820 
1821   // The set of all recursive users we've visited (which are assumed to all be
1822   // poison because of said visit)
1823   SmallSet<const Value *, 16> KnownPoison;
1824   SmallVector<const Instruction*, 16> Worklist;
1825   Worklist.push_back(Root);
1826   while (!Worklist.empty()) {
1827     const Instruction *I = Worklist.pop_back_val();
1828 
1829     // If we know this must trigger UB on a path leading our target.
1830     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
1831       return true;
1832 
1833     // If we can't analyze propagation through this instruction, just skip it
1834     // and transitive users.  Safe as false is a conservative result.
1835     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
1836       continue;
1837 
1838     if (KnownPoison.insert(I).second)
1839       for (const User *User : I->users())
1840         Worklist.push_back(cast<Instruction>(User));
1841   }
1842 
1843   // Might be non-UB, or might have a path we couldn't prove must execute on
1844   // way to exiting bb.
1845   return false;
1846 }
1847 
1848 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1849 /// down to checking that all operands are constant and listing instructions
1850 /// that may hide undef.
1851 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1852                                unsigned Depth) {
1853   if (isa<Constant>(V))
1854     return !isa<UndefValue>(V);
1855 
1856   if (Depth >= 6)
1857     return false;
1858 
1859   // Conservatively handle non-constant non-instructions. For example, Arguments
1860   // may be undef.
1861   Instruction *I = dyn_cast<Instruction>(V);
1862   if (!I)
1863     return false;
1864 
1865   // Load and return values may be undef.
1866   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1867     return false;
1868 
1869   // Optimistically handle other instructions.
1870   for (Value *Op : I->operands()) {
1871     if (!Visited.insert(Op).second)
1872       continue;
1873     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1874       return false;
1875   }
1876   return true;
1877 }
1878 
1879 /// Return true if the given value is concrete. We must prove that undef can
1880 /// never reach it.
1881 ///
1882 /// TODO: If we decide that this is a good approach to checking for undef, we
1883 /// may factor it into a common location.
1884 static bool hasConcreteDef(Value *V) {
1885   SmallPtrSet<Value*, 8> Visited;
1886   Visited.insert(V);
1887   return hasConcreteDefImpl(V, Visited, 0);
1888 }
1889 
1890 /// Return true if this IV has any uses other than the (soon to be rewritten)
1891 /// loop exit test.
1892 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1893   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1894   Value *IncV = Phi->getIncomingValue(LatchIdx);
1895 
1896   for (User *U : Phi->users())
1897     if (U != Cond && U != IncV) return false;
1898 
1899   for (User *U : IncV->users())
1900     if (U != Cond && U != Phi) return false;
1901   return true;
1902 }
1903 
1904 /// Return true if the given phi is a "counter" in L.  A counter is an
1905 /// add recurance (of integer or pointer type) with an arbitrary start, and a
1906 /// step of 1.  Note that L must have exactly one latch.
1907 static bool isLoopCounter(PHINode* Phi, Loop *L,
1908                           ScalarEvolution *SE) {
1909   assert(Phi->getParent() == L->getHeader());
1910   assert(L->getLoopLatch());
1911 
1912   if (!SE->isSCEVable(Phi->getType()))
1913     return false;
1914 
1915   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1916   if (!AR || AR->getLoop() != L || !AR->isAffine())
1917     return false;
1918 
1919   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1920   if (!Step || !Step->isOne())
1921     return false;
1922 
1923   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
1924   Value *IncV = Phi->getIncomingValue(LatchIdx);
1925   return (getLoopPhiForCounter(IncV, L) == Phi);
1926 }
1927 
1928 /// Search the loop header for a loop counter (anadd rec w/step of one)
1929 /// suitable for use by LFTR.  If multiple counters are available, select the
1930 /// "best" one based profitable heuristics.
1931 ///
1932 /// BECount may be an i8* pointer type. The pointer difference is already
1933 /// valid count without scaling the address stride, so it remains a pointer
1934 /// expression as far as SCEV is concerned.
1935 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
1936                                 const SCEV *BECount,
1937                                 ScalarEvolution *SE, DominatorTree *DT) {
1938   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1939 
1940   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
1941 
1942   // Loop over all of the PHI nodes, looking for a simple counter.
1943   PHINode *BestPhi = nullptr;
1944   const SCEV *BestInit = nullptr;
1945   BasicBlock *LatchBlock = L->getLoopLatch();
1946   assert(LatchBlock && "Must be in simplified form");
1947   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1948 
1949   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1950     PHINode *Phi = cast<PHINode>(I);
1951     if (!isLoopCounter(Phi, L, SE))
1952       continue;
1953 
1954     // Avoid comparing an integer IV against a pointer Limit.
1955     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1956       continue;
1957 
1958     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1959 
1960     // AR may be a pointer type, while BECount is an integer type.
1961     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1962     // AR may not be a narrower type, or we may never exit.
1963     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1964     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
1965       continue;
1966 
1967     // Avoid reusing a potentially undef value to compute other values that may
1968     // have originally had a concrete definition.
1969     if (!hasConcreteDef(Phi)) {
1970       // We explicitly allow unknown phis as long as they are already used by
1971       // the loop exit test.  This is legal since performing LFTR could not
1972       // increase the number of undef users.
1973       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
1974       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
1975           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
1976         continue;
1977     }
1978 
1979     // Avoid introducing undefined behavior due to poison which didn't exist in
1980     // the original program.  (Annoyingly, the rules for poison and undef
1981     // propagation are distinct, so this does NOT cover the undef case above.)
1982     // We have to ensure that we don't introduce UB by introducing a use on an
1983     // iteration where said IV produces poison.  Our strategy here differs for
1984     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
1985     // see code in linearFunctionTestReplace.  For pointers, we restrict
1986     // transforms as there is no good way to reinfer inbounds once lost.
1987     if (!Phi->getType()->isIntegerTy() &&
1988         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
1989       continue;
1990 
1991     const SCEV *Init = AR->getStart();
1992 
1993     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1994       // Don't force a live loop counter if another IV can be used.
1995       if (AlmostDeadIV(Phi, LatchBlock, Cond))
1996         continue;
1997 
1998       // Prefer to count-from-zero. This is a more "canonical" counter form. It
1999       // also prefers integer to pointer IVs.
2000       if (BestInit->isZero() != Init->isZero()) {
2001         if (BestInit->isZero())
2002           continue;
2003       }
2004       // If two IVs both count from zero or both count from nonzero then the
2005       // narrower is likely a dead phi that has been widened. Use the wider phi
2006       // to allow the other to be eliminated.
2007       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2008         continue;
2009     }
2010     BestPhi = Phi;
2011     BestInit = Init;
2012   }
2013   return BestPhi;
2014 }
2015 
2016 /// Insert an IR expression which computes the value held by the IV IndVar
2017 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2018 /// is taken ExitCount times.
2019 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2020                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2021                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2022   assert(isLoopCounter(IndVar, L, SE));
2023   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2024   const SCEV *IVInit = AR->getStart();
2025 
2026   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2027   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2028   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2029   // the existing GEPs whenever possible.
2030   if (IndVar->getType()->isPointerTy() &&
2031       !ExitCount->getType()->isPointerTy()) {
2032     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2033     // signed value. ExitCount on the other hand represents the loop trip count,
2034     // which is an unsigned value. FindLoopCounter only allows induction
2035     // variables that have a positive unit stride of one. This means we don't
2036     // have to handle the case of negative offsets (yet) and just need to zero
2037     // extend ExitCount.
2038     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2039     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2040     if (UsePostInc)
2041       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2042 
2043     // Expand the code for the iteration count.
2044     assert(SE->isLoopInvariant(IVOffset, L) &&
2045            "Computed iteration count is not loop invariant!");
2046 
2047     // We could handle pointer IVs other than i8*, but we need to compensate for
2048     // gep index scaling.
2049     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2050                              cast<PointerType>(IndVar->getType())
2051                                  ->getElementType())->isOne() &&
2052            "unit stride pointer IV must be i8*");
2053 
2054     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2055     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2056     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2057   } else {
2058     // In any other case, convert both IVInit and ExitCount to integers before
2059     // comparing. This may result in SCEV expansion of pointers, but in practice
2060     // SCEV will fold the pointer arithmetic away as such:
2061     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2062     //
2063     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2064     // for simple memset-style loops.
2065     //
2066     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2067     // were generated on top of case #2, which is not expected.
2068 
2069     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2070     // For unit stride, IVCount = Start + ExitCount with 2's complement
2071     // overflow.
2072 
2073     // For integer IVs, truncate the IV before computing IVInit + BECount,
2074     // unless we know apriori that the limit must be a constant when evaluated
2075     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2076     // of the IV in the loop over a (potentially) expensive expansion of the
2077     // widened exit count add(zext(add)) expression.
2078     if (SE->getTypeSizeInBits(IVInit->getType())
2079         > SE->getTypeSizeInBits(ExitCount->getType())) {
2080       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2081         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2082       else
2083         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2084     }
2085 
2086     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2087 
2088     if (UsePostInc)
2089       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2090 
2091     // Expand the code for the iteration count.
2092     assert(SE->isLoopInvariant(IVLimit, L) &&
2093            "Computed iteration count is not loop invariant!");
2094     // Ensure that we generate the same type as IndVar, or a smaller integer
2095     // type. In the presence of null pointer values, we have an integer type
2096     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2097     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2098       IndVar->getType() : ExitCount->getType();
2099     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2100     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2101   }
2102 }
2103 
2104 /// This method rewrites the exit condition of the loop to be a canonical !=
2105 /// comparison against the incremented loop induction variable.  This pass is
2106 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2107 /// determine a loop-invariant trip count of the loop, which is actually a much
2108 /// broader range than just linear tests.
2109 bool IndVarSimplify::
2110 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2111                           const SCEV *ExitCount,
2112                           PHINode *IndVar, SCEVExpander &Rewriter) {
2113   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2114   assert(isLoopCounter(IndVar, L, SE));
2115   Instruction * const IncVar =
2116     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2117 
2118   // Initialize CmpIndVar to the preincremented IV.
2119   Value *CmpIndVar = IndVar;
2120   bool UsePostInc = false;
2121 
2122   // If the exiting block is the same as the backedge block, we prefer to
2123   // compare against the post-incremented value, otherwise we must compare
2124   // against the preincremented value.
2125   if (ExitingBB == L->getLoopLatch()) {
2126     // For pointer IVs, we chose to not strip inbounds which requires us not
2127     // to add a potentially UB introducing use.  We need to either a) show
2128     // the loop test we're modifying is already in post-inc form, or b) show
2129     // that adding a use must not introduce UB.
2130     bool SafeToPostInc =
2131         IndVar->getType()->isIntegerTy() ||
2132         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2133         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2134     if (SafeToPostInc) {
2135       UsePostInc = true;
2136       CmpIndVar = IncVar;
2137     }
2138   }
2139 
2140   // It may be necessary to drop nowrap flags on the incrementing instruction
2141   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2142   // case the increment might have previously been poison on the last iteration
2143   // only) or if LFTR switches to a different IV that was previously dynamically
2144   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2145   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2146   // check), because the pre-inc addrec flags may be adopted from the original
2147   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2148   // TODO: This handling is inaccurate for one case: If we switch to a
2149   // dynamically dead IV that wraps on the first loop iteration only, which is
2150   // not covered by the post-inc addrec. (If the new IV was not dynamically
2151   // dead, it could not be poison on the first iteration in the first place.)
2152   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2153     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2154     if (BO->hasNoUnsignedWrap())
2155       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2156     if (BO->hasNoSignedWrap())
2157       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2158   }
2159 
2160   Value *ExitCnt = genLoopLimit(
2161       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2162   assert(ExitCnt->getType()->isPointerTy() ==
2163              IndVar->getType()->isPointerTy() &&
2164          "genLoopLimit missed a cast");
2165 
2166   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2167   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2168   ICmpInst::Predicate P;
2169   if (L->contains(BI->getSuccessor(0)))
2170     P = ICmpInst::ICMP_NE;
2171   else
2172     P = ICmpInst::ICMP_EQ;
2173 
2174   IRBuilder<> Builder(BI);
2175 
2176   // The new loop exit condition should reuse the debug location of the
2177   // original loop exit condition.
2178   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2179     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2180 
2181   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2182   // avoid the expensive expansion of the limit expression in the wider type,
2183   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2184   // since we know (from the exit count bitwidth), that we can't self-wrap in
2185   // the narrower type.
2186   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2187   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2188   if (CmpIndVarSize > ExitCntSize) {
2189     assert(!CmpIndVar->getType()->isPointerTy() &&
2190            !ExitCnt->getType()->isPointerTy());
2191 
2192     // Before resorting to actually inserting the truncate, use the same
2193     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2194     // the other side of the comparison instead.  We still evaluate the limit
2195     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2196     // a truncate within in.
2197     bool Extended = false;
2198     const SCEV *IV = SE->getSCEV(CmpIndVar);
2199     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2200                                                   ExitCnt->getType());
2201     const SCEV *ZExtTrunc =
2202       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2203 
2204     if (ZExtTrunc == IV) {
2205       Extended = true;
2206       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2207                                    "wide.trip.count");
2208     } else {
2209       const SCEV *SExtTrunc =
2210         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2211       if (SExtTrunc == IV) {
2212         Extended = true;
2213         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2214                                      "wide.trip.count");
2215       }
2216     }
2217 
2218     if (Extended) {
2219       bool Discard;
2220       L->makeLoopInvariant(ExitCnt, Discard);
2221     } else
2222       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2223                                       "lftr.wideiv");
2224   }
2225   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2226                     << "      LHS:" << *CmpIndVar << '\n'
2227                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2228                     << "\n"
2229                     << "      RHS:\t" << *ExitCnt << "\n"
2230                     << "ExitCount:\t" << *ExitCount << "\n"
2231                     << "  was: " << *BI->getCondition() << "\n");
2232 
2233   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2234   Value *OrigCond = BI->getCondition();
2235   // It's tempting to use replaceAllUsesWith here to fully replace the old
2236   // comparison, but that's not immediately safe, since users of the old
2237   // comparison may not be dominated by the new comparison. Instead, just
2238   // update the branch to use the new comparison; in the common case this
2239   // will make old comparison dead.
2240   BI->setCondition(Cond);
2241   DeadInsts.emplace_back(OrigCond);
2242 
2243   ++NumLFTR;
2244   return true;
2245 }
2246 
2247 //===----------------------------------------------------------------------===//
2248 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2249 //===----------------------------------------------------------------------===//
2250 
2251 /// If there's a single exit block, sink any loop-invariant values that
2252 /// were defined in the preheader but not used inside the loop into the
2253 /// exit block to reduce register pressure in the loop.
2254 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2255   BasicBlock *ExitBlock = L->getExitBlock();
2256   if (!ExitBlock) return false;
2257 
2258   BasicBlock *Preheader = L->getLoopPreheader();
2259   if (!Preheader) return false;
2260 
2261   bool MadeAnyChanges = false;
2262   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2263   BasicBlock::iterator I(Preheader->getTerminator());
2264   while (I != Preheader->begin()) {
2265     --I;
2266     // New instructions were inserted at the end of the preheader.
2267     if (isa<PHINode>(I))
2268       break;
2269 
2270     // Don't move instructions which might have side effects, since the side
2271     // effects need to complete before instructions inside the loop.  Also don't
2272     // move instructions which might read memory, since the loop may modify
2273     // memory. Note that it's okay if the instruction might have undefined
2274     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2275     // block.
2276     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2277       continue;
2278 
2279     // Skip debug info intrinsics.
2280     if (isa<DbgInfoIntrinsic>(I))
2281       continue;
2282 
2283     // Skip eh pad instructions.
2284     if (I->isEHPad())
2285       continue;
2286 
2287     // Don't sink alloca: we never want to sink static alloca's out of the
2288     // entry block, and correctly sinking dynamic alloca's requires
2289     // checks for stacksave/stackrestore intrinsics.
2290     // FIXME: Refactor this check somehow?
2291     if (isa<AllocaInst>(I))
2292       continue;
2293 
2294     // Determine if there is a use in or before the loop (direct or
2295     // otherwise).
2296     bool UsedInLoop = false;
2297     for (Use &U : I->uses()) {
2298       Instruction *User = cast<Instruction>(U.getUser());
2299       BasicBlock *UseBB = User->getParent();
2300       if (PHINode *P = dyn_cast<PHINode>(User)) {
2301         unsigned i =
2302           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2303         UseBB = P->getIncomingBlock(i);
2304       }
2305       if (UseBB == Preheader || L->contains(UseBB)) {
2306         UsedInLoop = true;
2307         break;
2308       }
2309     }
2310 
2311     // If there is, the def must remain in the preheader.
2312     if (UsedInLoop)
2313       continue;
2314 
2315     // Otherwise, sink it to the exit block.
2316     Instruction *ToMove = &*I;
2317     bool Done = false;
2318 
2319     if (I != Preheader->begin()) {
2320       // Skip debug info intrinsics.
2321       do {
2322         --I;
2323       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2324 
2325       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2326         Done = true;
2327     } else {
2328       Done = true;
2329     }
2330 
2331     MadeAnyChanges = true;
2332     ToMove->moveBefore(*ExitBlock, InsertPt);
2333     if (Done) break;
2334     InsertPt = ToMove->getIterator();
2335   }
2336 
2337   return MadeAnyChanges;
2338 }
2339 
2340 // Returns true if the condition of \p BI being checked is invariant and can be
2341 // proved to be trivially true.
2342 static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE,
2343                           bool ProvingLoopExit) {
2344   ICmpInst::Predicate Pred;
2345   Value *LHS, *RHS;
2346   using namespace PatternMatch;
2347   BasicBlock *TrueSucc, *FalseSucc;
2348   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
2349                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
2350     return false;
2351 
2352   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
2353          "Not a loop exit!");
2354 
2355   // 'LHS pred RHS' should now mean that we stay in loop.
2356   if (L->contains(FalseSucc))
2357     Pred = CmpInst::getInversePredicate(Pred);
2358 
2359   // If we are proving loop exit, invert the predicate.
2360   if (ProvingLoopExit)
2361     Pred = CmpInst::getInversePredicate(Pred);
2362 
2363   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
2364   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
2365   // Can we prove it to be trivially true?
2366   if (SE->isKnownPredicate(Pred, LHSS, RHSS))
2367     return true;
2368 
2369   return false;
2370 }
2371 
2372 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2373   SmallVector<BasicBlock*, 16> ExitingBlocks;
2374   L->getExitingBlocks(ExitingBlocks);
2375 
2376   // Remove all exits which aren't both rewriteable and execute on every
2377   // iteration.
2378   auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2379     // If our exitting block exits multiple loops, we can only rewrite the
2380     // innermost one.  Otherwise, we're changing how many times the innermost
2381     // loop runs before it exits.
2382     if (LI->getLoopFor(ExitingBB) != L)
2383       return true;
2384 
2385     // Can't rewrite non-branch yet.
2386     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2387     if (!BI)
2388       return true;
2389 
2390     // If already constant, nothing to do.
2391     if (isa<Constant>(BI->getCondition()))
2392       return true;
2393 
2394     // Likewise, the loop latch must be dominated by the exiting BB.
2395     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
2396       return true;
2397 
2398     return false;
2399   });
2400   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2401 
2402   if (ExitingBlocks.empty())
2403     return false;
2404 
2405   // Get a symbolic upper bound on the loop backedge taken count.
2406   const SCEV *MaxExitCount = SE->computeMaxBackedgeTakenCount(L);
2407   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2408     return false;
2409 
2410   // Visit our exit blocks in order of dominance. We know from the fact that
2411   // all exits must dominate the latch, so there is a total dominance order
2412   // between them.
2413   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
2414                // std::sort sorts in ascending order, so we want the inverse of
2415                // the normal dominance relation.
2416                if (A == B) return false;
2417                if (DT->properlyDominates(A, B))
2418                  return true;
2419                else {
2420                  assert(DT->properlyDominates(B, A) &&
2421                         "expected total dominance order!");
2422                  return false;
2423                }
2424   });
2425 #ifdef ASSERT
2426   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2427     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2428   }
2429 #endif
2430 
2431   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2432     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2433     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2434     auto *OldCond = BI->getCondition();
2435     auto *NewCond = ConstantInt::get(OldCond->getType(),
2436                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2437     BI->setCondition(NewCond);
2438     if (OldCond->use_empty())
2439       DeadInsts.emplace_back(OldCond);
2440   };
2441 
2442   bool Changed = false;
2443   SmallSet<const SCEV*, 8> DominatingExitCounts;
2444   for (BasicBlock *ExitingBB : ExitingBlocks) {
2445     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2446     if (isa<SCEVCouldNotCompute>(ExitCount)) {
2447       // Okay, we do not know the exit count here. Can we at least prove that it
2448       // will remain the same within iteration space?
2449       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2450       if (isTrivialCond(L, BI, SE, false)) {
2451         FoldExit(ExitingBB, false);
2452         Changed = true;
2453       }
2454       if (isTrivialCond(L, BI, SE, true)) {
2455         FoldExit(ExitingBB, true);
2456         Changed = true;
2457       }
2458       continue;
2459     }
2460 
2461     // If we know we'd exit on the first iteration, rewrite the exit to
2462     // reflect this.  This does not imply the loop must exit through this
2463     // exit; there may be an earlier one taken on the first iteration.
2464     // TODO: Given we know the backedge can't be taken, we should go ahead
2465     // and break it.  Or at least, kill all the header phis and simplify.
2466     if (ExitCount->isZero()) {
2467       FoldExit(ExitingBB, true);
2468       Changed = true;
2469       continue;
2470     }
2471 
2472     // If we end up with a pointer exit count, bail.  Note that we can end up
2473     // with a pointer exit count for one exiting block, and not for another in
2474     // the same loop.
2475     if (!ExitCount->getType()->isIntegerTy() ||
2476         !MaxExitCount->getType()->isIntegerTy())
2477       continue;
2478 
2479     Type *WiderType =
2480       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2481     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2482     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2483     assert(MaxExitCount->getType() == ExitCount->getType());
2484 
2485     // Can we prove that some other exit must be taken strictly before this
2486     // one?
2487     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2488                                      MaxExitCount, ExitCount)) {
2489       FoldExit(ExitingBB, false);
2490       Changed = true;
2491       continue;
2492     }
2493 
2494     // As we run, keep track of which exit counts we've encountered.  If we
2495     // find a duplicate, we've found an exit which would have exited on the
2496     // exiting iteration, but (from the visit order) strictly follows another
2497     // which does the same and is thus dead.
2498     if (!DominatingExitCounts.insert(ExitCount).second) {
2499       FoldExit(ExitingBB, false);
2500       Changed = true;
2501       continue;
2502     }
2503 
2504     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2505     // here.  If we kept track of the min of dominanting exits so far, we could
2506     // discharge exits with EC >= MDEC. This is less powerful than the existing
2507     // transform (since later exits aren't considered), but potentially more
2508     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2509     // or a >u b.  Such a case is not currently known.
2510   }
2511   return Changed;
2512 }
2513 
2514 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2515   SmallVector<BasicBlock*, 16> ExitingBlocks;
2516   L->getExitingBlocks(ExitingBlocks);
2517 
2518   // Finally, see if we can rewrite our exit conditions into a loop invariant
2519   // form. If we have a read-only loop, and we can tell that we must exit down
2520   // a path which does not need any of the values computed within the loop, we
2521   // can rewrite the loop to exit on the first iteration.  Note that this
2522   // doesn't either a) tell us the loop exits on the first iteration (unless
2523   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2524   // This transformation looks a lot like a restricted form of dead loop
2525   // elimination, but restricted to read-only loops and without neccesssarily
2526   // needing to kill the loop entirely.
2527   if (!LoopPredication)
2528     return false;
2529 
2530   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2531     return false;
2532 
2533   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2534   // through *explicit* control flow.  We have to eliminate the possibility of
2535   // implicit exits (see below) before we know it's truly exact.
2536   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2537   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2538       !SE->isLoopInvariant(ExactBTC, L) ||
2539       !isSafeToExpand(ExactBTC, *SE))
2540     return false;
2541 
2542   // If we end up with a pointer exit count, bail.  It may be unsized.
2543   if (!ExactBTC->getType()->isIntegerTy())
2544     return false;
2545 
2546   auto BadExit = [&](BasicBlock *ExitingBB) {
2547     // If our exiting block exits multiple loops, we can only rewrite the
2548     // innermost one.  Otherwise, we're changing how many times the innermost
2549     // loop runs before it exits.
2550     if (LI->getLoopFor(ExitingBB) != L)
2551       return true;
2552 
2553     // Can't rewrite non-branch yet.
2554     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2555     if (!BI)
2556       return true;
2557 
2558     // If already constant, nothing to do.
2559     if (isa<Constant>(BI->getCondition()))
2560       return true;
2561 
2562     // If the exit block has phis, we need to be able to compute the values
2563     // within the loop which contains them.  This assumes trivially lcssa phis
2564     // have already been removed; TODO: generalize
2565     BasicBlock *ExitBlock =
2566     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2567     if (!ExitBlock->phis().empty())
2568       return true;
2569 
2570     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2571     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2572     if (!SE->isLoopInvariant(ExitCount, L) ||
2573         !isSafeToExpand(ExitCount, *SE))
2574       return true;
2575 
2576     // If we end up with a pointer exit count, bail.  It may be unsized.
2577     if (!ExitCount->getType()->isIntegerTy())
2578       return true;
2579 
2580     return false;
2581   };
2582 
2583   // If we have any exits which can't be predicated themselves, than we can't
2584   // predicate any exit which isn't guaranteed to execute before it.  Consider
2585   // two exits (a) and (b) which would both exit on the same iteration.  If we
2586   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2587   // we could convert a loop from exiting through (a) to one exiting through
2588   // (b).  Note that this problem exists only for exits with the same exit
2589   // count, and we could be more aggressive when exit counts are known inequal.
2590   llvm::sort(ExitingBlocks,
2591             [&](BasicBlock *A, BasicBlock *B) {
2592               // std::sort sorts in ascending order, so we want the inverse of
2593               // the normal dominance relation, plus a tie breaker for blocks
2594               // unordered by dominance.
2595               if (DT->properlyDominates(A, B)) return true;
2596               if (DT->properlyDominates(B, A)) return false;
2597               return A->getName() < B->getName();
2598             });
2599   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2600   // exits before the backedge).  If they aren't, reasoning about reachability
2601   // is complicated and we choose not to for now.
2602   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2603     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2604       return false;
2605 
2606   // Given our sorted total order, we know that exit[j] must be evaluated
2607   // after all exit[i] such j > i.
2608   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2609     if (BadExit(ExitingBlocks[i])) {
2610       ExitingBlocks.resize(i);
2611       break;
2612     }
2613 
2614   if (ExitingBlocks.empty())
2615     return false;
2616 
2617   // We rely on not being able to reach an exiting block on a later iteration
2618   // then it's statically compute exit count.  The implementaton of
2619   // getExitCount currently has this invariant, but assert it here so that
2620   // breakage is obvious if this ever changes..
2621   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2622         return DT->dominates(ExitingBB, L->getLoopLatch());
2623       }));
2624 
2625   // At this point, ExitingBlocks consists of only those blocks which are
2626   // predicatable.  Given that, we know we have at least one exit we can
2627   // predicate if the loop is doesn't have side effects and doesn't have any
2628   // implicit exits (because then our exact BTC isn't actually exact).
2629   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2630   // suggestions on how to improve this?  I can obviously bail out for outer
2631   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2632   // is that enough for *all* side effects?
2633   for (BasicBlock *BB : L->blocks())
2634     for (auto &I : *BB)
2635       // TODO:isGuaranteedToTransfer
2636       if (I.mayHaveSideEffects() || I.mayThrow())
2637         return false;
2638 
2639   bool Changed = false;
2640   // Finally, do the actual predication for all predicatable blocks.  A couple
2641   // of notes here:
2642   // 1) We don't bother to constant fold dominated exits with identical exit
2643   //    counts; that's simply a form of CSE/equality propagation and we leave
2644   //    it for dedicated passes.
2645   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2646   //    legality constraints and we leave that to dedicated logic.  We want to
2647   //    predicate even if we can't insert a loop invariant expression as
2648   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2649   //    varying check.
2650   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2651   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2652   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2653   for (BasicBlock *ExitingBB : ExitingBlocks) {
2654     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2655 
2656     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2657     Value *NewCond;
2658     if (ExitCount == ExactBTC) {
2659       NewCond = L->contains(BI->getSuccessor(0)) ?
2660         B.getFalse() : B.getTrue();
2661     } else {
2662       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2663       if (!ExactBTCV)
2664         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2665       Value *RHS = ExactBTCV;
2666       if (ECV->getType() != RHS->getType()) {
2667         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2668         ECV = B.CreateZExt(ECV, WiderTy);
2669         RHS = B.CreateZExt(RHS, WiderTy);
2670       }
2671       auto Pred = L->contains(BI->getSuccessor(0)) ?
2672         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2673       NewCond = B.CreateICmp(Pred, ECV, RHS);
2674     }
2675     Value *OldCond = BI->getCondition();
2676     BI->setCondition(NewCond);
2677     if (OldCond->use_empty())
2678       DeadInsts.emplace_back(OldCond);
2679     Changed = true;
2680   }
2681 
2682   return Changed;
2683 }
2684 
2685 //===----------------------------------------------------------------------===//
2686 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2687 //===----------------------------------------------------------------------===//
2688 
2689 bool IndVarSimplify::run(Loop *L) {
2690   // We need (and expect!) the incoming loop to be in LCSSA.
2691   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2692          "LCSSA required to run indvars!");
2693 
2694   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2695   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2696   //    canonicalization can be a pessimization without LSR to "clean up"
2697   //    afterwards.
2698   //  - We depend on having a preheader; in particular,
2699   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2700   //    and we're in trouble if we can't find the induction variable even when
2701   //    we've manually inserted one.
2702   //  - LFTR relies on having a single backedge.
2703   if (!L->isLoopSimplifyForm())
2704     return false;
2705 
2706 #ifndef NDEBUG
2707   // Used below for a consistency check only
2708   // Note: Since the result returned by ScalarEvolution may depend on the order
2709   // in which previous results are added to its cache, the call to
2710   // getBackedgeTakenCount() may change following SCEV queries.
2711   const SCEV *BackedgeTakenCount;
2712   if (VerifyIndvars)
2713     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2714 #endif
2715 
2716   bool Changed = false;
2717   // If there are any floating-point recurrences, attempt to
2718   // transform them to use integer recurrences.
2719   Changed |= rewriteNonIntegerIVs(L);
2720 
2721   // Create a rewriter object which we'll use to transform the code with.
2722   SCEVExpander Rewriter(*SE, DL, "indvars");
2723 #ifndef NDEBUG
2724   Rewriter.setDebugType(DEBUG_TYPE);
2725 #endif
2726 
2727   // Eliminate redundant IV users.
2728   //
2729   // Simplification works best when run before other consumers of SCEV. We
2730   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2731   // other expressions involving loop IVs have been evaluated. This helps SCEV
2732   // set no-wrap flags before normalizing sign/zero extension.
2733   Rewriter.disableCanonicalMode();
2734   Changed |= simplifyAndExtend(L, Rewriter, LI);
2735 
2736   // Check to see if we can compute the final value of any expressions
2737   // that are recurrent in the loop, and substitute the exit values from the
2738   // loop into any instructions outside of the loop that use the final values
2739   // of the current expressions.
2740   if (ReplaceExitValue != NeverRepl) {
2741     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2742                                              ReplaceExitValue, DeadInsts)) {
2743       NumReplaced += Rewrites;
2744       Changed = true;
2745     }
2746   }
2747 
2748   // Eliminate redundant IV cycles.
2749   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2750 
2751   // Try to eliminate loop exits based on analyzeable exit counts
2752   if (optimizeLoopExits(L, Rewriter))  {
2753     Changed = true;
2754     // Given we've changed exit counts, notify SCEV
2755     SE->forgetLoop(L);
2756   }
2757 
2758   // Try to form loop invariant tests for loop exits by changing how many
2759   // iterations of the loop run when that is unobservable.
2760   if (predicateLoopExits(L, Rewriter)) {
2761     Changed = true;
2762     // Given we've changed exit counts, notify SCEV
2763     SE->forgetLoop(L);
2764   }
2765 
2766   // If we have a trip count expression, rewrite the loop's exit condition
2767   // using it.
2768   if (!DisableLFTR) {
2769     BasicBlock *PreHeader = L->getLoopPreheader();
2770     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
2771 
2772     SmallVector<BasicBlock*, 16> ExitingBlocks;
2773     L->getExitingBlocks(ExitingBlocks);
2774     for (BasicBlock *ExitingBB : ExitingBlocks) {
2775       // Can't rewrite non-branch yet.
2776       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2777         continue;
2778 
2779       // If our exitting block exits multiple loops, we can only rewrite the
2780       // innermost one.  Otherwise, we're changing how many times the innermost
2781       // loop runs before it exits.
2782       if (LI->getLoopFor(ExitingBB) != L)
2783         continue;
2784 
2785       if (!needsLFTR(L, ExitingBB))
2786         continue;
2787 
2788       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2789       if (isa<SCEVCouldNotCompute>(ExitCount))
2790         continue;
2791 
2792       // This was handled above, but as we form SCEVs, we can sometimes refine
2793       // existing ones; this allows exit counts to be folded to zero which
2794       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2795       // until stable to handle cases like this better.
2796       if (ExitCount->isZero())
2797         continue;
2798 
2799       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2800       if (!IndVar)
2801         continue;
2802 
2803       // Avoid high cost expansions.  Note: This heuristic is questionable in
2804       // that our definition of "high cost" is not exactly principled.
2805       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2806                                        TTI, PreHeaderBR))
2807         continue;
2808 
2809       // Check preconditions for proper SCEVExpander operation. SCEV does not
2810       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2811       // any pass that uses the SCEVExpander must do it. This does not work
2812       // well for loop passes because SCEVExpander makes assumptions about
2813       // all loops, while LoopPassManager only forces the current loop to be
2814       // simplified.
2815       //
2816       // FIXME: SCEV expansion has no way to bail out, so the caller must
2817       // explicitly check any assumptions made by SCEV. Brittle.
2818       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2819       if (!AR || AR->getLoop()->getLoopPreheader())
2820         Changed |= linearFunctionTestReplace(L, ExitingBB,
2821                                              ExitCount, IndVar,
2822                                              Rewriter);
2823     }
2824   }
2825   // Clear the rewriter cache, because values that are in the rewriter's cache
2826   // can be deleted in the loop below, causing the AssertingVH in the cache to
2827   // trigger.
2828   Rewriter.clear();
2829 
2830   // Now that we're done iterating through lists, clean up any instructions
2831   // which are now dead.
2832   while (!DeadInsts.empty())
2833     if (Instruction *Inst =
2834             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2835       Changed |=
2836           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2837 
2838   // The Rewriter may not be used from this point on.
2839 
2840   // Loop-invariant instructions in the preheader that aren't used in the
2841   // loop may be sunk below the loop to reduce register pressure.
2842   Changed |= sinkUnusedInvariants(L);
2843 
2844   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2845   // trip count and therefore can further simplify exit values in addition to
2846   // rewriteLoopExitValues.
2847   Changed |= rewriteFirstIterationLoopExitValues(L);
2848 
2849   // Clean up dead instructions.
2850   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2851 
2852   // Check a post-condition.
2853   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2854          "Indvars did not preserve LCSSA!");
2855 
2856   // Verify that LFTR, and any other change have not interfered with SCEV's
2857   // ability to compute trip count.  We may have *changed* the exit count, but
2858   // only by reducing it.
2859 #ifndef NDEBUG
2860   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2861     SE->forgetLoop(L);
2862     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2863     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2864         SE->getTypeSizeInBits(NewBECount->getType()))
2865       NewBECount = SE->getTruncateOrNoop(NewBECount,
2866                                          BackedgeTakenCount->getType());
2867     else
2868       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2869                                                  NewBECount->getType());
2870     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2871                                  NewBECount) && "indvars must preserve SCEV");
2872   }
2873   if (VerifyMemorySSA && MSSAU)
2874     MSSAU->getMemorySSA()->verifyMemorySSA();
2875 #endif
2876 
2877   return Changed;
2878 }
2879 
2880 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2881                                           LoopStandardAnalysisResults &AR,
2882                                           LPMUpdater &) {
2883   Function *F = L.getHeader()->getParent();
2884   const DataLayout &DL = F->getParent()->getDataLayout();
2885 
2886   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
2887   if (!IVS.run(&L))
2888     return PreservedAnalyses::all();
2889 
2890   auto PA = getLoopPassPreservedAnalyses();
2891   PA.preserveSet<CFGAnalyses>();
2892   if (AR.MSSA)
2893     PA.preserve<MemorySSAAnalysis>();
2894   return PA;
2895 }
2896 
2897 namespace {
2898 
2899 struct IndVarSimplifyLegacyPass : public LoopPass {
2900   static char ID; // Pass identification, replacement for typeid
2901 
2902   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2903     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2904   }
2905 
2906   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2907     if (skipLoop(L))
2908       return false;
2909 
2910     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2911     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2912     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2913     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2914     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2915     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2916     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2917     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2918     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2919     MemorySSA *MSSA = nullptr;
2920     if (MSSAAnalysis)
2921       MSSA = &MSSAAnalysis->getMSSA();
2922 
2923     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
2924     return IVS.run(L);
2925   }
2926 
2927   void getAnalysisUsage(AnalysisUsage &AU) const override {
2928     AU.setPreservesCFG();
2929     AU.addPreserved<MemorySSAWrapperPass>();
2930     getLoopAnalysisUsage(AU);
2931   }
2932 };
2933 
2934 } // end anonymous namespace
2935 
2936 char IndVarSimplifyLegacyPass::ID = 0;
2937 
2938 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2939                       "Induction Variable Simplification", false, false)
2940 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2941 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2942                     "Induction Variable Simplification", false, false)
2943 
2944 Pass *llvm::createIndVarSimplifyPass() {
2945   return new IndVarSimplifyLegacyPass();
2946 }
2947