1 //===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements a simple loop reroller.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/BitVector.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/MapVector.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/AliasAnalysis.h"
24 #include "llvm/Analysis/AliasSetTracker.h"
25 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/Analysis/LoopPass.h"
27 #include "llvm/Analysis/ScalarEvolution.h"
28 #include "llvm/Analysis/ScalarEvolutionExpander.h"
29 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
30 #include "llvm/Analysis/TargetLibraryInfo.h"
31 #include "llvm/Analysis/Utils/Local.h"
32 #include "llvm/Analysis/ValueTracking.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/IRBuilder.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/Module.h"
45 #include "llvm/IR/Type.h"
46 #include "llvm/IR/Use.h"
47 #include "llvm/IR/User.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/CommandLine.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include "llvm/Transforms/Scalar.h"
55 #include "llvm/Transforms/Utils.h"
56 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
57 #include "llvm/Transforms/Utils/LoopUtils.h"
58 #include <cassert>
59 #include <cstddef>
60 #include <cstdint>
61 #include <cstdlib>
62 #include <iterator>
63 #include <map>
64 #include <utility>
65 
66 using namespace llvm;
67 
68 #define DEBUG_TYPE "loop-reroll"
69 
70 STATISTIC(NumRerolledLoops, "Number of rerolled loops");
71 
72 static cl::opt<unsigned>
73 MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
74   cl::desc("The maximum increment for loop rerolling"));
75 
76 static cl::opt<unsigned>
77 NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
78                           cl::Hidden,
79                           cl::desc("The maximum number of failures to tolerate"
80                                    " during fuzzy matching. (default: 400)"));
81 
82 // This loop re-rolling transformation aims to transform loops like this:
83 //
84 // int foo(int a);
85 // void bar(int *x) {
86 //   for (int i = 0; i < 500; i += 3) {
87 //     foo(i);
88 //     foo(i+1);
89 //     foo(i+2);
90 //   }
91 // }
92 //
93 // into a loop like this:
94 //
95 // void bar(int *x) {
96 //   for (int i = 0; i < 500; ++i)
97 //     foo(i);
98 // }
99 //
100 // It does this by looking for loops that, besides the latch code, are composed
101 // of isomorphic DAGs of instructions, with each DAG rooted at some increment
102 // to the induction variable, and where each DAG is isomorphic to the DAG
103 // rooted at the induction variable (excepting the sub-DAGs which root the
104 // other induction-variable increments). In other words, we're looking for loop
105 // bodies of the form:
106 //
107 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
108 // f(%iv)
109 // %iv.1 = add %iv, 1                <-- a root increment
110 // f(%iv.1)
111 // %iv.2 = add %iv, 2                <-- a root increment
112 // f(%iv.2)
113 // %iv.scale_m_1 = add %iv, scale-1  <-- a root increment
114 // f(%iv.scale_m_1)
115 // ...
116 // %iv.next = add %iv, scale
117 // %cmp = icmp(%iv, ...)
118 // br %cmp, header, exit
119 //
120 // where each f(i) is a set of instructions that, collectively, are a function
121 // only of i (and other loop-invariant values).
122 //
123 // As a special case, we can also reroll loops like this:
124 //
125 // int foo(int);
126 // void bar(int *x) {
127 //   for (int i = 0; i < 500; ++i) {
128 //     x[3*i] = foo(0);
129 //     x[3*i+1] = foo(0);
130 //     x[3*i+2] = foo(0);
131 //   }
132 // }
133 //
134 // into this:
135 //
136 // void bar(int *x) {
137 //   for (int i = 0; i < 1500; ++i)
138 //     x[i] = foo(0);
139 // }
140 //
141 // in which case, we're looking for inputs like this:
142 //
143 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
144 // %scaled.iv = mul %iv, scale
145 // f(%scaled.iv)
146 // %scaled.iv.1 = add %scaled.iv, 1
147 // f(%scaled.iv.1)
148 // %scaled.iv.2 = add %scaled.iv, 2
149 // f(%scaled.iv.2)
150 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
151 // f(%scaled.iv.scale_m_1)
152 // ...
153 // %iv.next = add %iv, 1
154 // %cmp = icmp(%iv, ...)
155 // br %cmp, header, exit
156 
157 namespace {
158 
159   enum IterationLimits {
160     /// The maximum number of iterations that we'll try and reroll.
161     IL_MaxRerollIterations = 32,
162     /// The bitvector index used by loop induction variables and other
163     /// instructions that belong to all iterations.
164     IL_All,
165     IL_End
166   };
167 
168   class LoopReroll : public LoopPass {
169   public:
170     static char ID; // Pass ID, replacement for typeid
171 
172     LoopReroll() : LoopPass(ID) {
173       initializeLoopRerollPass(*PassRegistry::getPassRegistry());
174     }
175 
176     bool runOnLoop(Loop *L, LPPassManager &LPM) override;
177 
178     void getAnalysisUsage(AnalysisUsage &AU) const override {
179       AU.addRequired<TargetLibraryInfoWrapperPass>();
180       getLoopAnalysisUsage(AU);
181     }
182 
183   protected:
184     AliasAnalysis *AA;
185     LoopInfo *LI;
186     ScalarEvolution *SE;
187     TargetLibraryInfo *TLI;
188     DominatorTree *DT;
189     bool PreserveLCSSA;
190 
191     using SmallInstructionVector = SmallVector<Instruction *, 16>;
192     using SmallInstructionSet = SmallSet<Instruction *, 16>;
193 
194     // Map between induction variable and its increment
195     DenseMap<Instruction *, int64_t> IVToIncMap;
196 
197     // For loop with multiple induction variable, remember the one used only to
198     // control the loop.
199     Instruction *LoopControlIV;
200 
201     // A chain of isomorphic instructions, identified by a single-use PHI
202     // representing a reduction. Only the last value may be used outside the
203     // loop.
204     struct SimpleLoopReduction {
205       SimpleLoopReduction(Instruction *P, Loop *L) : Instructions(1, P) {
206         assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
207         add(L);
208       }
209 
210       bool valid() const {
211         return Valid;
212       }
213 
214       Instruction *getPHI() const {
215         assert(Valid && "Using invalid reduction");
216         return Instructions.front();
217       }
218 
219       Instruction *getReducedValue() const {
220         assert(Valid && "Using invalid reduction");
221         return Instructions.back();
222       }
223 
224       Instruction *get(size_t i) const {
225         assert(Valid && "Using invalid reduction");
226         return Instructions[i+1];
227       }
228 
229       Instruction *operator [] (size_t i) const { return get(i); }
230 
231       // The size, ignoring the initial PHI.
232       size_t size() const {
233         assert(Valid && "Using invalid reduction");
234         return Instructions.size()-1;
235       }
236 
237       using iterator = SmallInstructionVector::iterator;
238       using const_iterator = SmallInstructionVector::const_iterator;
239 
240       iterator begin() {
241         assert(Valid && "Using invalid reduction");
242         return std::next(Instructions.begin());
243       }
244 
245       const_iterator begin() const {
246         assert(Valid && "Using invalid reduction");
247         return std::next(Instructions.begin());
248       }
249 
250       iterator end() { return Instructions.end(); }
251       const_iterator end() const { return Instructions.end(); }
252 
253     protected:
254       bool Valid = false;
255       SmallInstructionVector Instructions;
256 
257       void add(Loop *L);
258     };
259 
260     // The set of all reductions, and state tracking of possible reductions
261     // during loop instruction processing.
262     struct ReductionTracker {
263       using SmallReductionVector = SmallVector<SimpleLoopReduction, 16>;
264 
265       // Add a new possible reduction.
266       void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
267 
268       // Setup to track possible reductions corresponding to the provided
269       // rerolling scale. Only reductions with a number of non-PHI instructions
270       // that is divisible by the scale are considered. Three instructions sets
271       // are filled in:
272       //   - A set of all possible instructions in eligible reductions.
273       //   - A set of all PHIs in eligible reductions
274       //   - A set of all reduced values (last instructions) in eligible
275       //     reductions.
276       void restrictToScale(uint64_t Scale,
277                            SmallInstructionSet &PossibleRedSet,
278                            SmallInstructionSet &PossibleRedPHISet,
279                            SmallInstructionSet &PossibleRedLastSet) {
280         PossibleRedIdx.clear();
281         PossibleRedIter.clear();
282         Reds.clear();
283 
284         for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
285           if (PossibleReds[i].size() % Scale == 0) {
286             PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
287             PossibleRedPHISet.insert(PossibleReds[i].getPHI());
288 
289             PossibleRedSet.insert(PossibleReds[i].getPHI());
290             PossibleRedIdx[PossibleReds[i].getPHI()] = i;
291             for (Instruction *J : PossibleReds[i]) {
292               PossibleRedSet.insert(J);
293               PossibleRedIdx[J] = i;
294             }
295           }
296       }
297 
298       // The functions below are used while processing the loop instructions.
299 
300       // Are the two instructions both from reductions, and furthermore, from
301       // the same reduction?
302       bool isPairInSame(Instruction *J1, Instruction *J2) {
303         DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
304         if (J1I != PossibleRedIdx.end()) {
305           DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
306           if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
307             return true;
308         }
309 
310         return false;
311       }
312 
313       // The two provided instructions, the first from the base iteration, and
314       // the second from iteration i, form a matched pair. If these are part of
315       // a reduction, record that fact.
316       void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
317         if (PossibleRedIdx.count(J1)) {
318           assert(PossibleRedIdx.count(J2) &&
319                  "Recording reduction vs. non-reduction instruction?");
320 
321           PossibleRedIter[J1] = 0;
322           PossibleRedIter[J2] = i;
323 
324           int Idx = PossibleRedIdx[J1];
325           assert(Idx == PossibleRedIdx[J2] &&
326                  "Recording pair from different reductions?");
327           Reds.insert(Idx);
328         }
329       }
330 
331       // The functions below can be called after we've finished processing all
332       // instructions in the loop, and we know which reductions were selected.
333 
334       bool validateSelected();
335       void replaceSelected();
336 
337     protected:
338       // The vector of all possible reductions (for any scale).
339       SmallReductionVector PossibleReds;
340 
341       DenseMap<Instruction *, int> PossibleRedIdx;
342       DenseMap<Instruction *, int> PossibleRedIter;
343       DenseSet<int> Reds;
344     };
345 
346     // A DAGRootSet models an induction variable being used in a rerollable
347     // loop. For example,
348     //
349     //   x[i*3+0] = y1
350     //   x[i*3+1] = y2
351     //   x[i*3+2] = y3
352     //
353     //   Base instruction -> i*3
354     //                    +---+----+
355     //                   /    |     \
356     //               ST[y1]  +1     +2  <-- Roots
357     //                        |      |
358     //                      ST[y2] ST[y3]
359     //
360     // There may be multiple DAGRoots, for example:
361     //
362     //   x[i*2+0] = ...   (1)
363     //   x[i*2+1] = ...   (1)
364     //   x[i*2+4] = ...   (2)
365     //   x[i*2+5] = ...   (2)
366     //   x[(i+1234)*2+5678] = ... (3)
367     //   x[(i+1234)*2+5679] = ... (3)
368     //
369     // The loop will be rerolled by adding a new loop induction variable,
370     // one for the Base instruction in each DAGRootSet.
371     //
372     struct DAGRootSet {
373       Instruction *BaseInst;
374       SmallInstructionVector Roots;
375 
376       // The instructions between IV and BaseInst (but not including BaseInst).
377       SmallInstructionSet SubsumedInsts;
378     };
379 
380     // The set of all DAG roots, and state tracking of all roots
381     // for a particular induction variable.
382     struct DAGRootTracker {
383       DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
384                      ScalarEvolution *SE, AliasAnalysis *AA,
385                      TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
386                      bool PreserveLCSSA,
387                      DenseMap<Instruction *, int64_t> &IncrMap,
388                      Instruction *LoopCtrlIV)
389           : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
390             PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
391             LoopControlIV(LoopCtrlIV) {}
392 
393       /// Stage 1: Find all the DAG roots for the induction variable.
394       bool findRoots();
395 
396       /// Stage 2: Validate if the found roots are valid.
397       bool validate(ReductionTracker &Reductions);
398 
399       /// Stage 3: Assuming validate() returned true, perform the
400       /// replacement.
401       /// @param IterCount The maximum iteration count of L.
402       void replace(const SCEV *IterCount);
403 
404     protected:
405       using UsesTy = MapVector<Instruction *, BitVector>;
406 
407       void findRootsRecursive(Instruction *IVU,
408                               SmallInstructionSet SubsumedInsts);
409       bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
410       bool collectPossibleRoots(Instruction *Base,
411                                 std::map<int64_t,Instruction*> &Roots);
412       bool validateRootSet(DAGRootSet &DRS);
413 
414       bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
415       void collectInLoopUserSet(const SmallInstructionVector &Roots,
416                                 const SmallInstructionSet &Exclude,
417                                 const SmallInstructionSet &Final,
418                                 DenseSet<Instruction *> &Users);
419       void collectInLoopUserSet(Instruction *Root,
420                                 const SmallInstructionSet &Exclude,
421                                 const SmallInstructionSet &Final,
422                                 DenseSet<Instruction *> &Users);
423 
424       UsesTy::iterator nextInstr(int Val, UsesTy &In,
425                                  const SmallInstructionSet &Exclude,
426                                  UsesTy::iterator *StartI=nullptr);
427       bool isBaseInst(Instruction *I);
428       bool isRootInst(Instruction *I);
429       bool instrDependsOn(Instruction *I,
430                           UsesTy::iterator Start,
431                           UsesTy::iterator End);
432       void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount);
433       void updateNonLoopCtrlIncr();
434 
435       LoopReroll *Parent;
436 
437       // Members of Parent, replicated here for brevity.
438       Loop *L;
439       ScalarEvolution *SE;
440       AliasAnalysis *AA;
441       TargetLibraryInfo *TLI;
442       DominatorTree *DT;
443       LoopInfo *LI;
444       bool PreserveLCSSA;
445 
446       // The loop induction variable.
447       Instruction *IV;
448 
449       // Loop step amount.
450       int64_t Inc;
451 
452       // Loop reroll count; if Inc == 1, this records the scaling applied
453       // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
454       // If Inc is not 1, Scale = Inc.
455       uint64_t Scale;
456 
457       // The roots themselves.
458       SmallVector<DAGRootSet,16> RootSets;
459 
460       // All increment instructions for IV.
461       SmallInstructionVector LoopIncs;
462 
463       // Map of all instructions in the loop (in order) to the iterations
464       // they are used in (or specially, IL_All for instructions
465       // used in the loop increment mechanism).
466       UsesTy Uses;
467 
468       // Map between induction variable and its increment
469       DenseMap<Instruction *, int64_t> &IVToIncMap;
470 
471       Instruction *LoopControlIV;
472     };
473 
474     // Check if it is a compare-like instruction whose user is a branch
475     bool isCompareUsedByBranch(Instruction *I) {
476       auto *TI = I->getParent()->getTerminator();
477       if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
478         return false;
479       return I->hasOneUse() && TI->getOperand(0) == I;
480     };
481 
482     bool isLoopControlIV(Loop *L, Instruction *IV);
483     void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
484     void collectPossibleReductions(Loop *L,
485            ReductionTracker &Reductions);
486     bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
487                 ReductionTracker &Reductions);
488   };
489 
490 } // end anonymous namespace
491 
492 char LoopReroll::ID = 0;
493 
494 INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
495 INITIALIZE_PASS_DEPENDENCY(LoopPass)
496 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
497 INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
498 
499 Pass *llvm::createLoopRerollPass() {
500   return new LoopReroll;
501 }
502 
503 // Returns true if the provided instruction is used outside the given loop.
504 // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
505 // non-loop blocks to be outside the loop.
506 static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
507   for (User *U : I->users()) {
508     if (!L->contains(cast<Instruction>(U)))
509       return true;
510   }
511   return false;
512 }
513 
514 static const SCEVConstant *getIncrmentFactorSCEV(ScalarEvolution *SE,
515                                                  const SCEV *SCEVExpr,
516                                                  Instruction &IV) {
517   const SCEVMulExpr *MulSCEV = dyn_cast<SCEVMulExpr>(SCEVExpr);
518 
519   // If StepRecurrence of a SCEVExpr is a constant (c1 * c2, c2 = sizeof(ptr)),
520   // Return c1.
521   if (!MulSCEV && IV.getType()->isPointerTy())
522     if (const SCEVConstant *IncSCEV = dyn_cast<SCEVConstant>(SCEVExpr)) {
523       const PointerType *PTy = cast<PointerType>(IV.getType());
524       Type *ElTy = PTy->getElementType();
525       const SCEV *SizeOfExpr =
526           SE->getSizeOfExpr(SE->getEffectiveSCEVType(IV.getType()), ElTy);
527       if (IncSCEV->getValue()->getValue().isNegative()) {
528         const SCEV *NewSCEV =
529             SE->getUDivExpr(SE->getNegativeSCEV(SCEVExpr), SizeOfExpr);
530         return dyn_cast<SCEVConstant>(SE->getNegativeSCEV(NewSCEV));
531       } else {
532         return dyn_cast<SCEVConstant>(SE->getUDivExpr(SCEVExpr, SizeOfExpr));
533       }
534     }
535 
536   if (!MulSCEV)
537     return nullptr;
538 
539   // If StepRecurrence of a SCEVExpr is a c * sizeof(x), where c is constant,
540   // Return c.
541   const SCEVConstant *CIncSCEV = nullptr;
542   for (const SCEV *Operand : MulSCEV->operands()) {
543     if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Operand)) {
544       CIncSCEV = Constant;
545     } else if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Operand)) {
546       Type *AllocTy;
547       if (!Unknown->isSizeOf(AllocTy))
548         break;
549     } else {
550       return nullptr;
551     }
552   }
553   return CIncSCEV;
554 }
555 
556 // Check if an IV is only used to control the loop. There are two cases:
557 // 1. It only has one use which is loop increment, and the increment is only
558 // used by comparison and the PHI (could has sext with nsw in between), and the
559 // comparison is only used by branch.
560 // 2. It is used by loop increment and the comparison, the loop increment is
561 // only used by the PHI, and the comparison is used only by the branch.
562 bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
563   unsigned IVUses = IV->getNumUses();
564   if (IVUses != 2 && IVUses != 1)
565     return false;
566 
567   for (auto *User : IV->users()) {
568     int32_t IncOrCmpUses = User->getNumUses();
569     bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
570 
571     // User can only have one or two uses.
572     if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
573       return false;
574 
575     // Case 1
576     if (IVUses == 1) {
577       // The only user must be the loop increment.
578       // The loop increment must have two uses.
579       if (IsCompInst || IncOrCmpUses != 2)
580         return false;
581     }
582 
583     // Case 2
584     if (IVUses == 2 && IncOrCmpUses != 1)
585       return false;
586 
587     // The users of the IV must be a binary operation or a comparison
588     if (auto *BO = dyn_cast<BinaryOperator>(User)) {
589       if (BO->getOpcode() == Instruction::Add) {
590         // Loop Increment
591         // User of Loop Increment should be either PHI or CMP
592         for (auto *UU : User->users()) {
593           if (PHINode *PN = dyn_cast<PHINode>(UU)) {
594             if (PN != IV)
595               return false;
596           }
597           // Must be a CMP or an ext (of a value with nsw) then CMP
598           else {
599             Instruction *UUser = dyn_cast<Instruction>(UU);
600             // Skip SExt if we are extending an nsw value
601             // TODO: Allow ZExt too
602             if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() &&
603                 isa<SExtInst>(UUser))
604               UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
605             if (!isCompareUsedByBranch(UUser))
606               return false;
607           }
608         }
609       } else
610         return false;
611       // Compare : can only have one use, and must be branch
612     } else if (!IsCompInst)
613       return false;
614   }
615   return true;
616 }
617 
618 // Collect the list of loop induction variables with respect to which it might
619 // be possible to reroll the loop.
620 void LoopReroll::collectPossibleIVs(Loop *L,
621                                     SmallInstructionVector &PossibleIVs) {
622   BasicBlock *Header = L->getHeader();
623   for (BasicBlock::iterator I = Header->begin(),
624        IE = Header->getFirstInsertionPt(); I != IE; ++I) {
625     if (!isa<PHINode>(I))
626       continue;
627     if (!I->getType()->isIntegerTy() && !I->getType()->isPointerTy())
628       continue;
629 
630     if (const SCEVAddRecExpr *PHISCEV =
631             dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
632       if (PHISCEV->getLoop() != L)
633         continue;
634       if (!PHISCEV->isAffine())
635         continue;
636       const SCEVConstant *IncSCEV = nullptr;
637       if (I->getType()->isPointerTy())
638         IncSCEV =
639             getIncrmentFactorSCEV(SE, PHISCEV->getStepRecurrence(*SE), *I);
640       else
641         IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
642       if (IncSCEV) {
643         const APInt &AInt = IncSCEV->getValue()->getValue().abs();
644         if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc))
645           continue;
646         IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
647         DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
648                      << "\n");
649 
650         if (isLoopControlIV(L, &*I)) {
651           assert(!LoopControlIV && "Found two loop control only IV");
652           LoopControlIV = &(*I);
653           DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I << " = "
654                        << *PHISCEV << "\n");
655         } else
656           PossibleIVs.push_back(&*I);
657       }
658     }
659   }
660 }
661 
662 // Add the remainder of the reduction-variable chain to the instruction vector
663 // (the initial PHINode has already been added). If successful, the object is
664 // marked as valid.
665 void LoopReroll::SimpleLoopReduction::add(Loop *L) {
666   assert(!Valid && "Cannot add to an already-valid chain");
667 
668   // The reduction variable must be a chain of single-use instructions
669   // (including the PHI), except for the last value (which is used by the PHI
670   // and also outside the loop).
671   Instruction *C = Instructions.front();
672   if (C->user_empty())
673     return;
674 
675   do {
676     C = cast<Instruction>(*C->user_begin());
677     if (C->hasOneUse()) {
678       if (!C->isBinaryOp())
679         return;
680 
681       if (!(isa<PHINode>(Instructions.back()) ||
682             C->isSameOperationAs(Instructions.back())))
683         return;
684 
685       Instructions.push_back(C);
686     }
687   } while (C->hasOneUse());
688 
689   if (Instructions.size() < 2 ||
690       !C->isSameOperationAs(Instructions.back()) ||
691       C->use_empty())
692     return;
693 
694   // C is now the (potential) last instruction in the reduction chain.
695   for (User *U : C->users()) {
696     // The only in-loop user can be the initial PHI.
697     if (L->contains(cast<Instruction>(U)))
698       if (cast<Instruction>(U) != Instructions.front())
699         return;
700   }
701 
702   Instructions.push_back(C);
703   Valid = true;
704 }
705 
706 // Collect the vector of possible reduction variables.
707 void LoopReroll::collectPossibleReductions(Loop *L,
708   ReductionTracker &Reductions) {
709   BasicBlock *Header = L->getHeader();
710   for (BasicBlock::iterator I = Header->begin(),
711        IE = Header->getFirstInsertionPt(); I != IE; ++I) {
712     if (!isa<PHINode>(I))
713       continue;
714     if (!I->getType()->isSingleValueType())
715       continue;
716 
717     SimpleLoopReduction SLR(&*I, L);
718     if (!SLR.valid())
719       continue;
720 
721     DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with " <<
722           SLR.size() << " chained instructions)\n");
723     Reductions.addSLR(SLR);
724   }
725 }
726 
727 // Collect the set of all users of the provided root instruction. This set of
728 // users contains not only the direct users of the root instruction, but also
729 // all users of those users, and so on. There are two exceptions:
730 //
731 //   1. Instructions in the set of excluded instructions are never added to the
732 //   use set (even if they are users). This is used, for example, to exclude
733 //   including root increments in the use set of the primary IV.
734 //
735 //   2. Instructions in the set of final instructions are added to the use set
736 //   if they are users, but their users are not added. This is used, for
737 //   example, to prevent a reduction update from forcing all later reduction
738 //   updates into the use set.
739 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
740   Instruction *Root, const SmallInstructionSet &Exclude,
741   const SmallInstructionSet &Final,
742   DenseSet<Instruction *> &Users) {
743   SmallInstructionVector Queue(1, Root);
744   while (!Queue.empty()) {
745     Instruction *I = Queue.pop_back_val();
746     if (!Users.insert(I).second)
747       continue;
748 
749     if (!Final.count(I))
750       for (Use &U : I->uses()) {
751         Instruction *User = cast<Instruction>(U.getUser());
752         if (PHINode *PN = dyn_cast<PHINode>(User)) {
753           // Ignore "wrap-around" uses to PHIs of this loop's header.
754           if (PN->getIncomingBlock(U) == L->getHeader())
755             continue;
756         }
757 
758         if (L->contains(User) && !Exclude.count(User)) {
759           Queue.push_back(User);
760         }
761       }
762 
763     // We also want to collect single-user "feeder" values.
764     for (User::op_iterator OI = I->op_begin(),
765          OIE = I->op_end(); OI != OIE; ++OI) {
766       if (Instruction *Op = dyn_cast<Instruction>(*OI))
767         if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
768             !Final.count(Op))
769           Queue.push_back(Op);
770     }
771   }
772 }
773 
774 // Collect all of the users of all of the provided root instructions (combined
775 // into a single set).
776 void LoopReroll::DAGRootTracker::collectInLoopUserSet(
777   const SmallInstructionVector &Roots,
778   const SmallInstructionSet &Exclude,
779   const SmallInstructionSet &Final,
780   DenseSet<Instruction *> &Users) {
781   for (Instruction *Root : Roots)
782     collectInLoopUserSet(Root, Exclude, Final, Users);
783 }
784 
785 static bool isUnorderedLoadStore(Instruction *I) {
786   if (LoadInst *LI = dyn_cast<LoadInst>(I))
787     return LI->isUnordered();
788   if (StoreInst *SI = dyn_cast<StoreInst>(I))
789     return SI->isUnordered();
790   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
791     return !MI->isVolatile();
792   return false;
793 }
794 
795 /// Return true if IVU is a "simple" arithmetic operation.
796 /// This is used for narrowing the search space for DAGRoots; only arithmetic
797 /// and GEPs can be part of a DAGRoot.
798 static bool isSimpleArithmeticOp(User *IVU) {
799   if (Instruction *I = dyn_cast<Instruction>(IVU)) {
800     switch (I->getOpcode()) {
801     default: return false;
802     case Instruction::Add:
803     case Instruction::Sub:
804     case Instruction::Mul:
805     case Instruction::Shl:
806     case Instruction::AShr:
807     case Instruction::LShr:
808     case Instruction::GetElementPtr:
809     case Instruction::Trunc:
810     case Instruction::ZExt:
811     case Instruction::SExt:
812       return true;
813     }
814   }
815   return false;
816 }
817 
818 static bool isLoopIncrement(User *U, Instruction *IV) {
819   BinaryOperator *BO = dyn_cast<BinaryOperator>(U);
820 
821   if ((BO && BO->getOpcode() != Instruction::Add) ||
822       (!BO && !isa<GetElementPtrInst>(U)))
823     return false;
824 
825   for (auto *UU : U->users()) {
826     PHINode *PN = dyn_cast<PHINode>(UU);
827     if (PN && PN == IV)
828       return true;
829   }
830   return false;
831 }
832 
833 bool LoopReroll::DAGRootTracker::
834 collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
835   SmallInstructionVector BaseUsers;
836 
837   for (auto *I : Base->users()) {
838     ConstantInt *CI = nullptr;
839 
840     if (isLoopIncrement(I, IV)) {
841       LoopIncs.push_back(cast<Instruction>(I));
842       continue;
843     }
844 
845     // The root nodes must be either GEPs, ORs or ADDs.
846     if (auto *BO = dyn_cast<BinaryOperator>(I)) {
847       if (BO->getOpcode() == Instruction::Add ||
848           BO->getOpcode() == Instruction::Or)
849         CI = dyn_cast<ConstantInt>(BO->getOperand(1));
850     } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
851       Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
852       CI = dyn_cast<ConstantInt>(LastOperand);
853     }
854 
855     if (!CI) {
856       if (Instruction *II = dyn_cast<Instruction>(I)) {
857         BaseUsers.push_back(II);
858         continue;
859       } else {
860         DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I << "\n");
861         return false;
862       }
863     }
864 
865     int64_t V = std::abs(CI->getValue().getSExtValue());
866     if (Roots.find(V) != Roots.end())
867       // No duplicates, please.
868       return false;
869 
870     Roots[V] = cast<Instruction>(I);
871   }
872 
873   // Make sure we have at least two roots.
874   if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
875     return false;
876 
877   // If we found non-loop-inc, non-root users of Base, assume they are
878   // for the zeroth root index. This is because "add %a, 0" gets optimized
879   // away.
880   if (BaseUsers.size()) {
881     if (Roots.find(0) != Roots.end()) {
882       DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
883       return false;
884     }
885     Roots[0] = Base;
886   }
887 
888   // Calculate the number of users of the base, or lowest indexed, iteration.
889   unsigned NumBaseUses = BaseUsers.size();
890   if (NumBaseUses == 0)
891     NumBaseUses = Roots.begin()->second->getNumUses();
892 
893   // Check that every node has the same number of users.
894   for (auto &KV : Roots) {
895     if (KV.first == 0)
896       continue;
897     if (!KV.second->hasNUses(NumBaseUses)) {
898       DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
899             << "#Base=" << NumBaseUses << ", #Root=" <<
900             KV.second->getNumUses() << "\n");
901       return false;
902     }
903   }
904 
905   return true;
906 }
907 
908 void LoopReroll::DAGRootTracker::
909 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
910   // Does the user look like it could be part of a root set?
911   // All its users must be simple arithmetic ops.
912   if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1))
913     return;
914 
915   if (I != IV && findRootsBase(I, SubsumedInsts))
916     return;
917 
918   SubsumedInsts.insert(I);
919 
920   for (User *V : I->users()) {
921     Instruction *I = cast<Instruction>(V);
922     if (is_contained(LoopIncs, I))
923       continue;
924 
925     if (!isSimpleArithmeticOp(I))
926       continue;
927 
928     // The recursive call makes a copy of SubsumedInsts.
929     findRootsRecursive(I, SubsumedInsts);
930   }
931 }
932 
933 bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
934   if (DRS.Roots.empty())
935     return false;
936 
937   // Consider a DAGRootSet with N-1 roots (so N different values including
938   //   BaseInst).
939   // Define d = Roots[0] - BaseInst, which should be the same as
940   //   Roots[I] - Roots[I-1] for all I in [1..N).
941   // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
942   //   loop iteration J.
943   //
944   // Now, For the loop iterations to be consecutive:
945   //   D = d * N
946   const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
947   if (!ADR)
948     return false;
949   unsigned N = DRS.Roots.size() + 1;
950   const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
951   const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
952   if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
953     return false;
954 
955   return true;
956 }
957 
958 bool LoopReroll::DAGRootTracker::
959 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
960   // The base of a RootSet must be an AddRec, so it can be erased.
961   const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
962   if (!IVU_ADR || IVU_ADR->getLoop() != L)
963     return false;
964 
965   std::map<int64_t, Instruction*> V;
966   if (!collectPossibleRoots(IVU, V))
967     return false;
968 
969   // If we didn't get a root for index zero, then IVU must be
970   // subsumed.
971   if (V.find(0) == V.end())
972     SubsumedInsts.insert(IVU);
973 
974   // Partition the vector into monotonically increasing indexes.
975   DAGRootSet DRS;
976   DRS.BaseInst = nullptr;
977 
978   SmallVector<DAGRootSet, 16> PotentialRootSets;
979 
980   for (auto &KV : V) {
981     if (!DRS.BaseInst) {
982       DRS.BaseInst = KV.second;
983       DRS.SubsumedInsts = SubsumedInsts;
984     } else if (DRS.Roots.empty()) {
985       DRS.Roots.push_back(KV.second);
986     } else if (V.find(KV.first - 1) != V.end()) {
987       DRS.Roots.push_back(KV.second);
988     } else {
989       // Linear sequence terminated.
990       if (!validateRootSet(DRS))
991         return false;
992 
993       // Construct a new DAGRootSet with the next sequence.
994       PotentialRootSets.push_back(DRS);
995       DRS.BaseInst = KV.second;
996       DRS.Roots.clear();
997     }
998   }
999 
1000   if (!validateRootSet(DRS))
1001     return false;
1002 
1003   PotentialRootSets.push_back(DRS);
1004 
1005   RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
1006 
1007   return true;
1008 }
1009 
1010 bool LoopReroll::DAGRootTracker::findRoots() {
1011   Inc = IVToIncMap[IV];
1012 
1013   assert(RootSets.empty() && "Unclean state!");
1014   if (std::abs(Inc) == 1) {
1015     for (auto *IVU : IV->users()) {
1016       if (isLoopIncrement(IVU, IV))
1017         LoopIncs.push_back(cast<Instruction>(IVU));
1018     }
1019     findRootsRecursive(IV, SmallInstructionSet());
1020     LoopIncs.push_back(IV);
1021   } else {
1022     if (!findRootsBase(IV, SmallInstructionSet()))
1023       return false;
1024   }
1025 
1026   // Ensure all sets have the same size.
1027   if (RootSets.empty()) {
1028     DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
1029     return false;
1030   }
1031   for (auto &V : RootSets) {
1032     if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
1033       DEBUG(dbgs()
1034             << "LRR: Aborting because not all root sets have the same size\n");
1035       return false;
1036     }
1037   }
1038 
1039   Scale = RootSets[0].Roots.size() + 1;
1040 
1041   if (Scale > IL_MaxRerollIterations) {
1042     DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
1043           << "#Found=" << Scale << ", #Max=" << IL_MaxRerollIterations
1044           << "\n");
1045     return false;
1046   }
1047 
1048   DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale << "\n");
1049 
1050   return true;
1051 }
1052 
1053 bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
1054   // Populate the MapVector with all instructions in the block, in order first,
1055   // so we can iterate over the contents later in perfect order.
1056   for (auto &I : *L->getHeader()) {
1057     Uses[&I].resize(IL_End);
1058   }
1059 
1060   SmallInstructionSet Exclude;
1061   for (auto &DRS : RootSets) {
1062     Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1063     Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1064     Exclude.insert(DRS.BaseInst);
1065   }
1066   Exclude.insert(LoopIncs.begin(), LoopIncs.end());
1067 
1068   for (auto &DRS : RootSets) {
1069     DenseSet<Instruction*> VBase;
1070     collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
1071     for (auto *I : VBase) {
1072       Uses[I].set(0);
1073     }
1074 
1075     unsigned Idx = 1;
1076     for (auto *Root : DRS.Roots) {
1077       DenseSet<Instruction*> V;
1078       collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
1079 
1080       // While we're here, check the use sets are the same size.
1081       if (V.size() != VBase.size()) {
1082         DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
1083         return false;
1084       }
1085 
1086       for (auto *I : V) {
1087         Uses[I].set(Idx);
1088       }
1089       ++Idx;
1090     }
1091 
1092     // Make sure our subsumed instructions are remembered too.
1093     for (auto *I : DRS.SubsumedInsts) {
1094       Uses[I].set(IL_All);
1095     }
1096   }
1097 
1098   // Make sure the loop increments are also accounted for.
1099 
1100   Exclude.clear();
1101   for (auto &DRS : RootSets) {
1102     Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1103     Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1104     Exclude.insert(DRS.BaseInst);
1105   }
1106 
1107   DenseSet<Instruction*> V;
1108   collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
1109   for (auto *I : V) {
1110     Uses[I].set(IL_All);
1111   }
1112 
1113   return true;
1114 }
1115 
1116 /// Get the next instruction in "In" that is a member of set Val.
1117 /// Start searching from StartI, and do not return anything in Exclude.
1118 /// If StartI is not given, start from In.begin().
1119 LoopReroll::DAGRootTracker::UsesTy::iterator
1120 LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
1121                                       const SmallInstructionSet &Exclude,
1122                                       UsesTy::iterator *StartI) {
1123   UsesTy::iterator I = StartI ? *StartI : In.begin();
1124   while (I != In.end() && (I->second.test(Val) == 0 ||
1125                            Exclude.count(I->first) != 0))
1126     ++I;
1127   return I;
1128 }
1129 
1130 bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
1131   for (auto &DRS : RootSets) {
1132     if (DRS.BaseInst == I)
1133       return true;
1134   }
1135   return false;
1136 }
1137 
1138 bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
1139   for (auto &DRS : RootSets) {
1140     if (is_contained(DRS.Roots, I))
1141       return true;
1142   }
1143   return false;
1144 }
1145 
1146 /// Return true if instruction I depends on any instruction between
1147 /// Start and End.
1148 bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
1149                                                 UsesTy::iterator Start,
1150                                                 UsesTy::iterator End) {
1151   for (auto *U : I->users()) {
1152     for (auto It = Start; It != End; ++It)
1153       if (U == It->first)
1154         return true;
1155   }
1156   return false;
1157 }
1158 
1159 static bool isIgnorableInst(const Instruction *I) {
1160   if (isa<DbgInfoIntrinsic>(I))
1161     return true;
1162   const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
1163   if (!II)
1164     return false;
1165   switch (II->getIntrinsicID()) {
1166     default:
1167       return false;
1168     case Intrinsic::annotation:
1169     case Intrinsic::ptr_annotation:
1170     case Intrinsic::var_annotation:
1171     // TODO: the following intrinsics may also be whitelisted:
1172     //   lifetime_start, lifetime_end, invariant_start, invariant_end
1173       return true;
1174   }
1175   return false;
1176 }
1177 
1178 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
1179   // We now need to check for equivalence of the use graph of each root with
1180   // that of the primary induction variable (excluding the roots). Our goal
1181   // here is not to solve the full graph isomorphism problem, but rather to
1182   // catch common cases without a lot of work. As a result, we will assume
1183   // that the relative order of the instructions in each unrolled iteration
1184   // is the same (although we will not make an assumption about how the
1185   // different iterations are intermixed). Note that while the order must be
1186   // the same, the instructions may not be in the same basic block.
1187 
1188   // An array of just the possible reductions for this scale factor. When we
1189   // collect the set of all users of some root instructions, these reduction
1190   // instructions are treated as 'final' (their uses are not considered).
1191   // This is important because we don't want the root use set to search down
1192   // the reduction chain.
1193   SmallInstructionSet PossibleRedSet;
1194   SmallInstructionSet PossibleRedLastSet;
1195   SmallInstructionSet PossibleRedPHISet;
1196   Reductions.restrictToScale(Scale, PossibleRedSet,
1197                              PossibleRedPHISet, PossibleRedLastSet);
1198 
1199   // Populate "Uses" with where each instruction is used.
1200   if (!collectUsedInstructions(PossibleRedSet))
1201     return false;
1202 
1203   // Make sure we mark the reduction PHIs as used in all iterations.
1204   for (auto *I : PossibleRedPHISet) {
1205     Uses[I].set(IL_All);
1206   }
1207 
1208   // Make sure we mark loop-control-only PHIs as used in all iterations. See
1209   // comment above LoopReroll::isLoopControlIV for more information.
1210   BasicBlock *Header = L->getHeader();
1211   if (LoopControlIV && LoopControlIV != IV) {
1212     for (auto *U : LoopControlIV->users()) {
1213       Instruction *IVUser = dyn_cast<Instruction>(U);
1214       // IVUser could be loop increment or compare
1215       Uses[IVUser].set(IL_All);
1216       for (auto *UU : IVUser->users()) {
1217         Instruction *UUser = dyn_cast<Instruction>(UU);
1218         // UUser could be compare, PHI or branch
1219         Uses[UUser].set(IL_All);
1220         // Skip SExt
1221         if (isa<SExtInst>(UUser)) {
1222           UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
1223           Uses[UUser].set(IL_All);
1224         }
1225         // Is UUser a compare instruction?
1226         if (UU->hasOneUse()) {
1227           Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
1228           if (BI == cast<BranchInst>(Header->getTerminator()))
1229             Uses[BI].set(IL_All);
1230         }
1231       }
1232     }
1233   }
1234 
1235   // Make sure all instructions in the loop are in one and only one
1236   // set.
1237   for (auto &KV : Uses) {
1238     if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
1239       DEBUG(dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
1240             << *KV.first << " (#uses=" << KV.second.count() << ")\n");
1241       return false;
1242     }
1243   }
1244 
1245   DEBUG(
1246     for (auto &KV : Uses) {
1247       dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
1248     }
1249     );
1250 
1251   for (unsigned Iter = 1; Iter < Scale; ++Iter) {
1252     // In addition to regular aliasing information, we need to look for
1253     // instructions from later (future) iterations that have side effects
1254     // preventing us from reordering them past other instructions with side
1255     // effects.
1256     bool FutureSideEffects = false;
1257     AliasSetTracker AST(*AA);
1258     // The map between instructions in f(%iv.(i+1)) and f(%iv).
1259     DenseMap<Value *, Value *> BaseMap;
1260 
1261     // Compare iteration Iter to the base.
1262     SmallInstructionSet Visited;
1263     auto BaseIt = nextInstr(0, Uses, Visited);
1264     auto RootIt = nextInstr(Iter, Uses, Visited);
1265     auto LastRootIt = Uses.begin();
1266 
1267     while (BaseIt != Uses.end() && RootIt != Uses.end()) {
1268       Instruction *BaseInst = BaseIt->first;
1269       Instruction *RootInst = RootIt->first;
1270 
1271       // Skip over the IV or root instructions; only match their users.
1272       bool Continue = false;
1273       if (isBaseInst(BaseInst)) {
1274         Visited.insert(BaseInst);
1275         BaseIt = nextInstr(0, Uses, Visited);
1276         Continue = true;
1277       }
1278       if (isRootInst(RootInst)) {
1279         LastRootIt = RootIt;
1280         Visited.insert(RootInst);
1281         RootIt = nextInstr(Iter, Uses, Visited);
1282         Continue = true;
1283       }
1284       if (Continue) continue;
1285 
1286       if (!BaseInst->isSameOperationAs(RootInst)) {
1287         // Last chance saloon. We don't try and solve the full isomorphism
1288         // problem, but try and at least catch the case where two instructions
1289         // *of different types* are round the wrong way. We won't be able to
1290         // efficiently tell, given two ADD instructions, which way around we
1291         // should match them, but given an ADD and a SUB, we can at least infer
1292         // which one is which.
1293         //
1294         // This should allow us to deal with a greater subset of the isomorphism
1295         // problem. It does however change a linear algorithm into a quadratic
1296         // one, so limit the number of probes we do.
1297         auto TryIt = RootIt;
1298         unsigned N = NumToleratedFailedMatches;
1299         while (TryIt != Uses.end() &&
1300                !BaseInst->isSameOperationAs(TryIt->first) &&
1301                N--) {
1302           ++TryIt;
1303           TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
1304         }
1305 
1306         if (TryIt == Uses.end() || TryIt == RootIt ||
1307             instrDependsOn(TryIt->first, RootIt, TryIt)) {
1308           DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1309                 " vs. " << *RootInst << "\n");
1310           return false;
1311         }
1312 
1313         RootIt = TryIt;
1314         RootInst = TryIt->first;
1315       }
1316 
1317       // All instructions between the last root and this root
1318       // may belong to some other iteration. If they belong to a
1319       // future iteration, then they're dangerous to alias with.
1320       //
1321       // Note that because we allow a limited amount of flexibility in the order
1322       // that we visit nodes, LastRootIt might be *before* RootIt, in which
1323       // case we've already checked this set of instructions so we shouldn't
1324       // do anything.
1325       for (; LastRootIt < RootIt; ++LastRootIt) {
1326         Instruction *I = LastRootIt->first;
1327         if (LastRootIt->second.find_first() < (int)Iter)
1328           continue;
1329         if (I->mayWriteToMemory())
1330           AST.add(I);
1331         // Note: This is specifically guarded by a check on isa<PHINode>,
1332         // which while a valid (somewhat arbitrary) micro-optimization, is
1333         // needed because otherwise isSafeToSpeculativelyExecute returns
1334         // false on PHI nodes.
1335         if (!isa<PHINode>(I) && !isUnorderedLoadStore(I) &&
1336             !isSafeToSpeculativelyExecute(I))
1337           // Intervening instructions cause side effects.
1338           FutureSideEffects = true;
1339       }
1340 
1341       // Make sure that this instruction, which is in the use set of this
1342       // root instruction, does not also belong to the base set or the set of
1343       // some other root instruction.
1344       if (RootIt->second.count() > 1) {
1345         DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1346                         " vs. " << *RootInst << " (prev. case overlap)\n");
1347         return false;
1348       }
1349 
1350       // Make sure that we don't alias with any instruction in the alias set
1351       // tracker. If we do, then we depend on a future iteration, and we
1352       // can't reroll.
1353       if (RootInst->mayReadFromMemory())
1354         for (auto &K : AST) {
1355           if (K.aliasesUnknownInst(RootInst, *AA)) {
1356             DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1357                             " vs. " << *RootInst << " (depends on future store)\n");
1358             return false;
1359           }
1360         }
1361 
1362       // If we've past an instruction from a future iteration that may have
1363       // side effects, and this instruction might also, then we can't reorder
1364       // them, and this matching fails. As an exception, we allow the alias
1365       // set tracker to handle regular (unordered) load/store dependencies.
1366       if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) &&
1367                                  !isSafeToSpeculativelyExecute(BaseInst)) ||
1368                                 (!isUnorderedLoadStore(RootInst) &&
1369                                  !isSafeToSpeculativelyExecute(RootInst)))) {
1370         DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1371                         " vs. " << *RootInst <<
1372                         " (side effects prevent reordering)\n");
1373         return false;
1374       }
1375 
1376       // For instructions that are part of a reduction, if the operation is
1377       // associative, then don't bother matching the operands (because we
1378       // already know that the instructions are isomorphic, and the order
1379       // within the iteration does not matter). For non-associative reductions,
1380       // we do need to match the operands, because we need to reject
1381       // out-of-order instructions within an iteration!
1382       // For example (assume floating-point addition), we need to reject this:
1383       //   x += a[i]; x += b[i];
1384       //   x += a[i+1]; x += b[i+1];
1385       //   x += b[i+2]; x += a[i+2];
1386       bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
1387 
1388       if (!(InReduction && BaseInst->isAssociative())) {
1389         bool Swapped = false, SomeOpMatched = false;
1390         for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) {
1391           Value *Op2 = RootInst->getOperand(j);
1392 
1393           // If this is part of a reduction (and the operation is not
1394           // associatve), then we match all operands, but not those that are
1395           // part of the reduction.
1396           if (InReduction)
1397             if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
1398               if (Reductions.isPairInSame(RootInst, Op2I))
1399                 continue;
1400 
1401           DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
1402           if (BMI != BaseMap.end()) {
1403             Op2 = BMI->second;
1404           } else {
1405             for (auto &DRS : RootSets) {
1406               if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
1407                 Op2 = DRS.BaseInst;
1408                 break;
1409               }
1410             }
1411           }
1412 
1413           if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
1414             // If we've not already decided to swap the matched operands, and
1415             // we've not already matched our first operand (note that we could
1416             // have skipped matching the first operand because it is part of a
1417             // reduction above), and the instruction is commutative, then try
1418             // the swapped match.
1419             if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
1420                 BaseInst->getOperand(!j) == Op2) {
1421               Swapped = true;
1422             } else {
1423               DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1424                     << " vs. " << *RootInst << " (operand " << j << ")\n");
1425               return false;
1426             }
1427           }
1428 
1429           SomeOpMatched = true;
1430         }
1431       }
1432 
1433       if ((!PossibleRedLastSet.count(BaseInst) &&
1434            hasUsesOutsideLoop(BaseInst, L)) ||
1435           (!PossibleRedLastSet.count(RootInst) &&
1436            hasUsesOutsideLoop(RootInst, L))) {
1437         DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1438                         " vs. " << *RootInst << " (uses outside loop)\n");
1439         return false;
1440       }
1441 
1442       Reductions.recordPair(BaseInst, RootInst, Iter);
1443       BaseMap.insert(std::make_pair(RootInst, BaseInst));
1444 
1445       LastRootIt = RootIt;
1446       Visited.insert(BaseInst);
1447       Visited.insert(RootInst);
1448       BaseIt = nextInstr(0, Uses, Visited);
1449       RootIt = nextInstr(Iter, Uses, Visited);
1450     }
1451     assert(BaseIt == Uses.end() && RootIt == Uses.end() &&
1452            "Mismatched set sizes!");
1453   }
1454 
1455   DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
1456                   *IV << "\n");
1457 
1458   return true;
1459 }
1460 
1461 void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
1462   BasicBlock *Header = L->getHeader();
1463   // Remove instructions associated with non-base iterations.
1464   for (BasicBlock::reverse_iterator J = Header->rbegin(), JE = Header->rend();
1465        J != JE;) {
1466     unsigned I = Uses[&*J].find_first();
1467     if (I > 0 && I < IL_All) {
1468       DEBUG(dbgs() << "LRR: removing: " << *J << "\n");
1469       J++->eraseFromParent();
1470       continue;
1471     }
1472 
1473     ++J;
1474   }
1475 
1476   bool HasTwoIVs = LoopControlIV && LoopControlIV != IV;
1477 
1478   if (HasTwoIVs) {
1479     updateNonLoopCtrlIncr();
1480     replaceIV(LoopControlIV, LoopControlIV, IterCount);
1481   } else
1482     // We need to create a new induction variable for each different BaseInst.
1483     for (auto &DRS : RootSets)
1484       // Insert the new induction variable.
1485       replaceIV(DRS.BaseInst, IV, IterCount);
1486 
1487   SimplifyInstructionsInBlock(Header, TLI);
1488   DeleteDeadPHIs(Header, TLI);
1489 }
1490 
1491 // For non-loop-control IVs, we only need to update the last increment
1492 // with right amount, then we are done.
1493 void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() {
1494   const SCEV *NewInc = nullptr;
1495   for (auto *LoopInc : LoopIncs) {
1496     GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LoopInc);
1497     const SCEVConstant *COp = nullptr;
1498     if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) {
1499       COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
1500     } else {
1501       COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(0)));
1502       if (!COp)
1503         COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
1504     }
1505 
1506     assert(COp && "Didn't find constant operand of LoopInc!\n");
1507 
1508     const APInt &AInt = COp->getValue()->getValue();
1509     const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale);
1510     if (AInt.isNegative()) {
1511       NewInc = SE->getNegativeSCEV(COp);
1512       NewInc = SE->getUDivExpr(NewInc, ScaleSCEV);
1513       NewInc = SE->getNegativeSCEV(NewInc);
1514     } else
1515       NewInc = SE->getUDivExpr(COp, ScaleSCEV);
1516 
1517     LoopInc->setOperand(1, dyn_cast<SCEVConstant>(NewInc)->getValue());
1518   }
1519 }
1520 
1521 void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
1522                                            Instruction *InstIV,
1523                                            const SCEV *IterCount) {
1524   BasicBlock *Header = L->getHeader();
1525   int64_t Inc = IVToIncMap[InstIV];
1526   bool NeedNewIV = InstIV == LoopControlIV;
1527   bool Negative = !NeedNewIV && Inc < 0;
1528 
1529   const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(Inst));
1530   const SCEV *Start = RealIVSCEV->getStart();
1531 
1532   if (NeedNewIV)
1533     Start = SE->getConstant(Start->getType(), 0);
1534 
1535   const SCEV *SizeOfExpr = nullptr;
1536   const SCEV *IncrExpr =
1537       SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1);
1538   if (auto *PTy = dyn_cast<PointerType>(Inst->getType())) {
1539     Type *ElTy = PTy->getElementType();
1540     SizeOfExpr =
1541         SE->getSizeOfExpr(SE->getEffectiveSCEVType(Inst->getType()), ElTy);
1542     IncrExpr = SE->getMulExpr(IncrExpr, SizeOfExpr);
1543   }
1544   const SCEV *NewIVSCEV =
1545       SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap);
1546 
1547   { // Limit the lifetime of SCEVExpander.
1548     const DataLayout &DL = Header->getModule()->getDataLayout();
1549     SCEVExpander Expander(*SE, DL, "reroll");
1550     Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
1551                                           Header->getFirstNonPHIOrDbg());
1552 
1553     for (auto &KV : Uses)
1554       if (KV.second.find_first() == 0)
1555         KV.first->replaceUsesOfWith(Inst, NewIV);
1556 
1557     if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
1558       // FIXME: Why do we need this check?
1559       if (Uses[BI].find_first() == IL_All) {
1560         const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
1561 
1562         if (NeedNewIV)
1563           ICSCEV = SE->getMulExpr(IterCount,
1564                                   SE->getConstant(IterCount->getType(), Scale));
1565 
1566         // Iteration count SCEV minus or plus 1
1567         const SCEV *MinusPlus1SCEV =
1568             SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1);
1569         if (Inst->getType()->isPointerTy()) {
1570           assert(SizeOfExpr && "SizeOfExpr is not initialized");
1571           MinusPlus1SCEV = SE->getMulExpr(MinusPlus1SCEV, SizeOfExpr);
1572         }
1573 
1574         const SCEV *ICMinusPlus1SCEV = SE->getMinusSCEV(ICSCEV, MinusPlus1SCEV);
1575         // Iteration count minus 1
1576         Instruction *InsertPtr = nullptr;
1577         if (isa<SCEVConstant>(ICMinusPlus1SCEV)) {
1578           InsertPtr = BI;
1579         } else {
1580           BasicBlock *Preheader = L->getLoopPreheader();
1581           if (!Preheader)
1582             Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
1583           InsertPtr = Preheader->getTerminator();
1584         }
1585 
1586         if (!isa<PointerType>(NewIV->getType()) && NeedNewIV &&
1587             (SE->getTypeSizeInBits(NewIV->getType()) <
1588              SE->getTypeSizeInBits(ICMinusPlus1SCEV->getType()))) {
1589           IRBuilder<> Builder(BI);
1590           Builder.SetCurrentDebugLocation(BI->getDebugLoc());
1591           NewIV = Builder.CreateSExt(NewIV, ICMinusPlus1SCEV->getType());
1592         }
1593         Value *ICMinusPlus1 = Expander.expandCodeFor(
1594             ICMinusPlus1SCEV, NewIV->getType(), InsertPtr);
1595 
1596         Value *Cond =
1597             new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinusPlus1, "exitcond");
1598         BI->setCondition(Cond);
1599 
1600         if (BI->getSuccessor(1) != Header)
1601           BI->swapSuccessors();
1602       }
1603     }
1604   }
1605 }
1606 
1607 // Validate the selected reductions. All iterations must have an isomorphic
1608 // part of the reduction chain and, for non-associative reductions, the chain
1609 // entries must appear in order.
1610 bool LoopReroll::ReductionTracker::validateSelected() {
1611   // For a non-associative reduction, the chain entries must appear in order.
1612   for (int i : Reds) {
1613     int PrevIter = 0, BaseCount = 0, Count = 0;
1614     for (Instruction *J : PossibleReds[i]) {
1615       // Note that all instructions in the chain must have been found because
1616       // all instructions in the function must have been assigned to some
1617       // iteration.
1618       int Iter = PossibleRedIter[J];
1619       if (Iter != PrevIter && Iter != PrevIter + 1 &&
1620           !PossibleReds[i].getReducedValue()->isAssociative()) {
1621         DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " <<
1622                         J << "\n");
1623         return false;
1624       }
1625 
1626       if (Iter != PrevIter) {
1627         if (Count != BaseCount) {
1628           DEBUG(dbgs() << "LRR: Iteration " << PrevIter <<
1629                 " reduction use count " << Count <<
1630                 " is not equal to the base use count " <<
1631                 BaseCount << "\n");
1632           return false;
1633         }
1634 
1635         Count = 0;
1636       }
1637 
1638       ++Count;
1639       if (Iter == 0)
1640         ++BaseCount;
1641 
1642       PrevIter = Iter;
1643     }
1644   }
1645 
1646   return true;
1647 }
1648 
1649 // For all selected reductions, remove all parts except those in the first
1650 // iteration (and the PHI). Replace outside uses of the reduced value with uses
1651 // of the first-iteration reduced value (in other words, reroll the selected
1652 // reductions).
1653 void LoopReroll::ReductionTracker::replaceSelected() {
1654   // Fixup reductions to refer to the last instruction associated with the
1655   // first iteration (not the last).
1656   for (int i : Reds) {
1657     int j = 0;
1658     for (int e = PossibleReds[i].size(); j != e; ++j)
1659       if (PossibleRedIter[PossibleReds[i][j]] != 0) {
1660         --j;
1661         break;
1662       }
1663 
1664     // Replace users with the new end-of-chain value.
1665     SmallInstructionVector Users;
1666     for (User *U : PossibleReds[i].getReducedValue()->users()) {
1667       Users.push_back(cast<Instruction>(U));
1668     }
1669 
1670     for (Instruction *User : Users)
1671       User->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
1672                               PossibleReds[i][j]);
1673   }
1674 }
1675 
1676 // Reroll the provided loop with respect to the provided induction variable.
1677 // Generally, we're looking for a loop like this:
1678 //
1679 // %iv = phi [ (preheader, ...), (body, %iv.next) ]
1680 // f(%iv)
1681 // %iv.1 = add %iv, 1                <-- a root increment
1682 // f(%iv.1)
1683 // %iv.2 = add %iv, 2                <-- a root increment
1684 // f(%iv.2)
1685 // %iv.scale_m_1 = add %iv, scale-1  <-- a root increment
1686 // f(%iv.scale_m_1)
1687 // ...
1688 // %iv.next = add %iv, scale
1689 // %cmp = icmp(%iv, ...)
1690 // br %cmp, header, exit
1691 //
1692 // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
1693 // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
1694 // be intermixed with eachother. The restriction imposed by this algorithm is
1695 // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
1696 // etc. be the same.
1697 //
1698 // First, we collect the use set of %iv, excluding the other increment roots.
1699 // This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
1700 // times, having collected the use set of f(%iv.(i+1)), during which we:
1701 //   - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
1702 //     the next unmatched instruction in f(%iv.(i+1)).
1703 //   - Ensure that both matched instructions don't have any external users
1704 //     (with the exception of last-in-chain reduction instructions).
1705 //   - Track the (aliasing) write set, and other side effects, of all
1706 //     instructions that belong to future iterations that come before the matched
1707 //     instructions. If the matched instructions read from that write set, then
1708 //     f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
1709 //     f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
1710 //     if any of these future instructions had side effects (could not be
1711 //     speculatively executed), and so do the matched instructions, when we
1712 //     cannot reorder those side-effect-producing instructions, and rerolling
1713 //     fails.
1714 //
1715 // Finally, we make sure that all loop instructions are either loop increment
1716 // roots, belong to simple latch code, parts of validated reductions, part of
1717 // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
1718 // have been validated), then we reroll the loop.
1719 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
1720                         const SCEV *IterCount,
1721                         ReductionTracker &Reductions) {
1722   DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
1723                           IVToIncMap, LoopControlIV);
1724 
1725   if (!DAGRoots.findRoots())
1726     return false;
1727   DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
1728                   *IV << "\n");
1729 
1730   if (!DAGRoots.validate(Reductions))
1731     return false;
1732   if (!Reductions.validateSelected())
1733     return false;
1734   // At this point, we've validated the rerolling, and we're committed to
1735   // making changes!
1736 
1737   Reductions.replaceSelected();
1738   DAGRoots.replace(IterCount);
1739 
1740   ++NumRerolledLoops;
1741   return true;
1742 }
1743 
1744 bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
1745   if (skipLoop(L))
1746     return false;
1747 
1748   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1749   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1750   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1751   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1752   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1753   PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1754 
1755   BasicBlock *Header = L->getHeader();
1756   DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
1757         "] Loop %" << Header->getName() << " (" <<
1758         L->getNumBlocks() << " block(s))\n");
1759 
1760   // For now, we'll handle only single BB loops.
1761   if (L->getNumBlocks() > 1)
1762     return false;
1763 
1764   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
1765     return false;
1766 
1767   const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
1768   const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType()));
1769   DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
1770   DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
1771 
1772   // First, we need to find the induction variable with respect to which we can
1773   // reroll (there may be several possible options).
1774   SmallInstructionVector PossibleIVs;
1775   IVToIncMap.clear();
1776   LoopControlIV = nullptr;
1777   collectPossibleIVs(L, PossibleIVs);
1778 
1779   if (PossibleIVs.empty()) {
1780     DEBUG(dbgs() << "LRR: No possible IVs found\n");
1781     return false;
1782   }
1783 
1784   ReductionTracker Reductions;
1785   collectPossibleReductions(L, Reductions);
1786   bool Changed = false;
1787 
1788   // For each possible IV, collect the associated possible set of 'root' nodes
1789   // (i+1, i+2, etc.).
1790   for (Instruction *PossibleIV : PossibleIVs)
1791     if (reroll(PossibleIV, L, Header, IterCount, Reductions)) {
1792       Changed = true;
1793       break;
1794     }
1795   DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
1796 
1797   // Trip count of L has changed so SE must be re-evaluated.
1798   if (Changed)
1799     SE->forgetLoop(L);
1800 
1801   return Changed;
1802 }
1803