1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines common loop utility functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Utils/LoopUtils.h"
15 #include "llvm/ADT/ScopeExit.h"
16 #include "llvm/Analysis/AliasAnalysis.h"
17 #include "llvm/Analysis/BasicAliasAnalysis.h"
18 #include "llvm/Analysis/GlobalsModRef.h"
19 #include "llvm/Analysis/InstructionSimplify.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/LoopPass.h"
22 #include "llvm/Analysis/ScalarEvolution.h"
23 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
24 #include "llvm/Analysis/ScalarEvolutionExpander.h"
25 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
26 #include "llvm/Analysis/TargetTransformInfo.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/IR/ValueHandle.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37 
38 using namespace llvm;
39 using namespace llvm::PatternMatch;
40 
41 #define DEBUG_TYPE "loop-utils"
42 
43 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
44                                         SmallPtrSetImpl<Instruction *> &Set) {
45   for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
46     if (!Set.count(dyn_cast<Instruction>(*Use)))
47       return false;
48   return true;
49 }
50 
51 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
52   switch (Kind) {
53   default:
54     break;
55   case RK_IntegerAdd:
56   case RK_IntegerMult:
57   case RK_IntegerOr:
58   case RK_IntegerAnd:
59   case RK_IntegerXor:
60   case RK_IntegerMinMax:
61     return true;
62   }
63   return false;
64 }
65 
66 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
67   return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
68 }
69 
70 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
71   switch (Kind) {
72   default:
73     break;
74   case RK_IntegerAdd:
75   case RK_IntegerMult:
76   case RK_FloatAdd:
77   case RK_FloatMult:
78     return true;
79   }
80   return false;
81 }
82 
83 /// Determines if Phi may have been type-promoted. If Phi has a single user
84 /// that ANDs the Phi with a type mask, return the user. RT is updated to
85 /// account for the narrower bit width represented by the mask, and the AND
86 /// instruction is added to CI.
87 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
88                                    SmallPtrSetImpl<Instruction *> &Visited,
89                                    SmallPtrSetImpl<Instruction *> &CI) {
90   if (!Phi->hasOneUse())
91     return Phi;
92 
93   const APInt *M = nullptr;
94   Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
95 
96   // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
97   // with a new integer type of the corresponding bit width.
98   if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
99     int32_t Bits = (*M + 1).exactLogBase2();
100     if (Bits > 0) {
101       RT = IntegerType::get(Phi->getContext(), Bits);
102       Visited.insert(Phi);
103       CI.insert(J);
104       return J;
105     }
106   }
107   return Phi;
108 }
109 
110 /// Compute the minimal bit width needed to represent a reduction whose exit
111 /// instruction is given by Exit.
112 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
113                                                      DemandedBits *DB,
114                                                      AssumptionCache *AC,
115                                                      DominatorTree *DT) {
116   bool IsSigned = false;
117   const DataLayout &DL = Exit->getModule()->getDataLayout();
118   uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
119 
120   if (DB) {
121     // Use the demanded bits analysis to determine the bits that are live out
122     // of the exit instruction, rounding up to the nearest power of two. If the
123     // use of demanded bits results in a smaller bit width, we know the value
124     // must be positive (i.e., IsSigned = false), because if this were not the
125     // case, the sign bit would have been demanded.
126     auto Mask = DB->getDemandedBits(Exit);
127     MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
128   }
129 
130   if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
131     // If demanded bits wasn't able to limit the bit width, we can try to use
132     // value tracking instead. This can be the case, for example, if the value
133     // may be negative.
134     auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
135     auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
136     MaxBitWidth = NumTypeBits - NumSignBits;
137     KnownBits Bits = computeKnownBits(Exit, DL);
138     if (!Bits.isNonNegative()) {
139       // If the value is not known to be non-negative, we set IsSigned to true,
140       // meaning that we will use sext instructions instead of zext
141       // instructions to restore the original type.
142       IsSigned = true;
143       if (!Bits.isNegative())
144         // If the value is not known to be negative, we don't known what the
145         // upper bit is, and therefore, we don't know what kind of extend we
146         // will need. In this case, just increase the bit width by one bit and
147         // use sext.
148         ++MaxBitWidth;
149     }
150   }
151   if (!isPowerOf2_64(MaxBitWidth))
152     MaxBitWidth = NextPowerOf2(MaxBitWidth);
153 
154   return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
155                         IsSigned);
156 }
157 
158 /// Collect cast instructions that can be ignored in the vectorizer's cost
159 /// model, given a reduction exit value and the minimal type in which the
160 /// reduction can be represented.
161 static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
162                                  Type *RecurrenceType,
163                                  SmallPtrSetImpl<Instruction *> &Casts) {
164 
165   SmallVector<Instruction *, 8> Worklist;
166   SmallPtrSet<Instruction *, 8> Visited;
167   Worklist.push_back(Exit);
168 
169   while (!Worklist.empty()) {
170     Instruction *Val = Worklist.pop_back_val();
171     Visited.insert(Val);
172     if (auto *Cast = dyn_cast<CastInst>(Val))
173       if (Cast->getSrcTy() == RecurrenceType) {
174         // If the source type of a cast instruction is equal to the recurrence
175         // type, it will be eliminated, and should be ignored in the vectorizer
176         // cost model.
177         Casts.insert(Cast);
178         continue;
179       }
180 
181     // Add all operands to the work list if they are loop-varying values that
182     // we haven't yet visited.
183     for (Value *O : cast<User>(Val)->operands())
184       if (auto *I = dyn_cast<Instruction>(O))
185         if (TheLoop->contains(I) && !Visited.count(I))
186           Worklist.push_back(I);
187   }
188 }
189 
190 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
191                                            Loop *TheLoop, bool HasFunNoNaNAttr,
192                                            RecurrenceDescriptor &RedDes,
193                                            DemandedBits *DB,
194                                            AssumptionCache *AC,
195                                            DominatorTree *DT) {
196   if (Phi->getNumIncomingValues() != 2)
197     return false;
198 
199   // Reduction variables are only found in the loop header block.
200   if (Phi->getParent() != TheLoop->getHeader())
201     return false;
202 
203   // Obtain the reduction start value from the value that comes from the loop
204   // preheader.
205   Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
206 
207   // ExitInstruction is the single value which is used outside the loop.
208   // We only allow for a single reduction value to be used outside the loop.
209   // This includes users of the reduction, variables (which form a cycle
210   // which ends in the phi node).
211   Instruction *ExitInstruction = nullptr;
212   // Indicates that we found a reduction operation in our scan.
213   bool FoundReduxOp = false;
214 
215   // We start with the PHI node and scan for all of the users of this
216   // instruction. All users must be instructions that can be used as reduction
217   // variables (such as ADD). We must have a single out-of-block user. The cycle
218   // must include the original PHI.
219   bool FoundStartPHI = false;
220 
221   // To recognize min/max patterns formed by a icmp select sequence, we store
222   // the number of instruction we saw from the recognized min/max pattern,
223   //  to make sure we only see exactly the two instructions.
224   unsigned NumCmpSelectPatternInst = 0;
225   InstDesc ReduxDesc(false, nullptr);
226 
227   // Data used for determining if the recurrence has been type-promoted.
228   Type *RecurrenceType = Phi->getType();
229   SmallPtrSet<Instruction *, 4> CastInsts;
230   Instruction *Start = Phi;
231   bool IsSigned = false;
232 
233   SmallPtrSet<Instruction *, 8> VisitedInsts;
234   SmallVector<Instruction *, 8> Worklist;
235 
236   // Return early if the recurrence kind does not match the type of Phi. If the
237   // recurrence kind is arithmetic, we attempt to look through AND operations
238   // resulting from the type promotion performed by InstCombine.  Vector
239   // operations are not limited to the legal integer widths, so we may be able
240   // to evaluate the reduction in the narrower width.
241   if (RecurrenceType->isFloatingPointTy()) {
242     if (!isFloatingPointRecurrenceKind(Kind))
243       return false;
244   } else {
245     if (!isIntegerRecurrenceKind(Kind))
246       return false;
247     if (isArithmeticRecurrenceKind(Kind))
248       Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
249   }
250 
251   Worklist.push_back(Start);
252   VisitedInsts.insert(Start);
253 
254   // A value in the reduction can be used:
255   //  - By the reduction:
256   //      - Reduction operation:
257   //        - One use of reduction value (safe).
258   //        - Multiple use of reduction value (not safe).
259   //      - PHI:
260   //        - All uses of the PHI must be the reduction (safe).
261   //        - Otherwise, not safe.
262   //  - By instructions outside of the loop (safe).
263   //      * One value may have several outside users, but all outside
264   //        uses must be of the same value.
265   //  - By an instruction that is not part of the reduction (not safe).
266   //    This is either:
267   //      * An instruction type other than PHI or the reduction operation.
268   //      * A PHI in the header other than the initial PHI.
269   while (!Worklist.empty()) {
270     Instruction *Cur = Worklist.back();
271     Worklist.pop_back();
272 
273     // No Users.
274     // If the instruction has no users then this is a broken chain and can't be
275     // a reduction variable.
276     if (Cur->use_empty())
277       return false;
278 
279     bool IsAPhi = isa<PHINode>(Cur);
280 
281     // A header PHI use other than the original PHI.
282     if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
283       return false;
284 
285     // Reductions of instructions such as Div, and Sub is only possible if the
286     // LHS is the reduction variable.
287     if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
288         !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
289         !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
290       return false;
291 
292     // Any reduction instruction must be of one of the allowed kinds. We ignore
293     // the starting value (the Phi or an AND instruction if the Phi has been
294     // type-promoted).
295     if (Cur != Start) {
296       ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
297       if (!ReduxDesc.isRecurrence())
298         return false;
299     }
300 
301     // A reduction operation must only have one use of the reduction value.
302     if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
303         hasMultipleUsesOf(Cur, VisitedInsts))
304       return false;
305 
306     // All inputs to a PHI node must be a reduction value.
307     if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
308       return false;
309 
310     if (Kind == RK_IntegerMinMax &&
311         (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
312       ++NumCmpSelectPatternInst;
313     if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
314       ++NumCmpSelectPatternInst;
315 
316     // Check  whether we found a reduction operator.
317     FoundReduxOp |= !IsAPhi && Cur != Start;
318 
319     // Process users of current instruction. Push non-PHI nodes after PHI nodes
320     // onto the stack. This way we are going to have seen all inputs to PHI
321     // nodes once we get to them.
322     SmallVector<Instruction *, 8> NonPHIs;
323     SmallVector<Instruction *, 8> PHIs;
324     for (User *U : Cur->users()) {
325       Instruction *UI = cast<Instruction>(U);
326 
327       // Check if we found the exit user.
328       BasicBlock *Parent = UI->getParent();
329       if (!TheLoop->contains(Parent)) {
330         // If we already know this instruction is used externally, move on to
331         // the next user.
332         if (ExitInstruction == Cur)
333           continue;
334 
335         // Exit if you find multiple values used outside or if the header phi
336         // node is being used. In this case the user uses the value of the
337         // previous iteration, in which case we would loose "VF-1" iterations of
338         // the reduction operation if we vectorize.
339         if (ExitInstruction != nullptr || Cur == Phi)
340           return false;
341 
342         // The instruction used by an outside user must be the last instruction
343         // before we feed back to the reduction phi. Otherwise, we loose VF-1
344         // operations on the value.
345         if (!is_contained(Phi->operands(), Cur))
346           return false;
347 
348         ExitInstruction = Cur;
349         continue;
350       }
351 
352       // Process instructions only once (termination). Each reduction cycle
353       // value must only be used once, except by phi nodes and min/max
354       // reductions which are represented as a cmp followed by a select.
355       InstDesc IgnoredVal(false, nullptr);
356       if (VisitedInsts.insert(UI).second) {
357         if (isa<PHINode>(UI))
358           PHIs.push_back(UI);
359         else
360           NonPHIs.push_back(UI);
361       } else if (!isa<PHINode>(UI) &&
362                  ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
363                    !isa<SelectInst>(UI)) ||
364                   !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence()))
365         return false;
366 
367       // Remember that we completed the cycle.
368       if (UI == Phi)
369         FoundStartPHI = true;
370     }
371     Worklist.append(PHIs.begin(), PHIs.end());
372     Worklist.append(NonPHIs.begin(), NonPHIs.end());
373   }
374 
375   // This means we have seen one but not the other instruction of the
376   // pattern or more than just a select and cmp.
377   if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
378       NumCmpSelectPatternInst != 2)
379     return false;
380 
381   if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
382     return false;
383 
384   if (Start != Phi) {
385     // If the starting value is not the same as the phi node, we speculatively
386     // looked through an 'and' instruction when evaluating a potential
387     // arithmetic reduction to determine if it may have been type-promoted.
388     //
389     // We now compute the minimal bit width that is required to represent the
390     // reduction. If this is the same width that was indicated by the 'and', we
391     // can represent the reduction in the smaller type. The 'and' instruction
392     // will be eliminated since it will essentially be a cast instruction that
393     // can be ignore in the cost model. If we compute a different type than we
394     // did when evaluating the 'and', the 'and' will not be eliminated, and we
395     // will end up with different kinds of operations in the recurrence
396     // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
397     // the case.
398     //
399     // The vectorizer relies on InstCombine to perform the actual
400     // type-shrinking. It does this by inserting instructions to truncate the
401     // exit value of the reduction to the width indicated by RecurrenceType and
402     // then extend this value back to the original width. If IsSigned is false,
403     // a 'zext' instruction will be generated; otherwise, a 'sext' will be
404     // used.
405     //
406     // TODO: We should not rely on InstCombine to rewrite the reduction in the
407     //       smaller type. We should just generate a correctly typed expression
408     //       to begin with.
409     Type *ComputedType;
410     std::tie(ComputedType, IsSigned) =
411         computeRecurrenceType(ExitInstruction, DB, AC, DT);
412     if (ComputedType != RecurrenceType)
413       return false;
414 
415     // The recurrence expression will be represented in a narrower type. If
416     // there are any cast instructions that will be unnecessary, collect them
417     // in CastInsts. Note that the 'and' instruction was already included in
418     // this list.
419     //
420     // TODO: A better way to represent this may be to tag in some way all the
421     //       instructions that are a part of the reduction. The vectorizer cost
422     //       model could then apply the recurrence type to these instructions,
423     //       without needing a white list of instructions to ignore.
424     collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
425   }
426 
427   // We found a reduction var if we have reached the original phi node and we
428   // only have a single instruction with out-of-loop users.
429 
430   // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
431   // is saved as part of the RecurrenceDescriptor.
432 
433   // Save the description of this reduction variable.
434   RecurrenceDescriptor RD(
435       RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
436       ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
437   RedDes = RD;
438 
439   return true;
440 }
441 
442 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
443 /// pattern corresponding to a min(X, Y) or max(X, Y).
444 RecurrenceDescriptor::InstDesc
445 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
446 
447   assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
448          "Expect a select instruction");
449   Instruction *Cmp = nullptr;
450   SelectInst *Select = nullptr;
451 
452   // We must handle the select(cmp()) as a single instruction. Advance to the
453   // select.
454   if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
455     if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
456       return InstDesc(false, I);
457     return InstDesc(Select, Prev.getMinMaxKind());
458   }
459 
460   // Only handle single use cases for now.
461   if (!(Select = dyn_cast<SelectInst>(I)))
462     return InstDesc(false, I);
463   if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
464       !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
465     return InstDesc(false, I);
466   if (!Cmp->hasOneUse())
467     return InstDesc(false, I);
468 
469   Value *CmpLeft;
470   Value *CmpRight;
471 
472   // Look for a min/max pattern.
473   if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
474     return InstDesc(Select, MRK_UIntMin);
475   else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
476     return InstDesc(Select, MRK_UIntMax);
477   else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
478     return InstDesc(Select, MRK_SIntMax);
479   else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
480     return InstDesc(Select, MRK_SIntMin);
481   else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
482     return InstDesc(Select, MRK_FloatMin);
483   else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
484     return InstDesc(Select, MRK_FloatMax);
485   else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
486     return InstDesc(Select, MRK_FloatMin);
487   else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
488     return InstDesc(Select, MRK_FloatMax);
489 
490   return InstDesc(false, I);
491 }
492 
493 RecurrenceDescriptor::InstDesc
494 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
495                                         InstDesc &Prev, bool HasFunNoNaNAttr) {
496   bool FP = I->getType()->isFloatingPointTy();
497   Instruction *UAI = Prev.getUnsafeAlgebraInst();
498   if (!UAI && FP && !I->isFast())
499     UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
500 
501   switch (I->getOpcode()) {
502   default:
503     return InstDesc(false, I);
504   case Instruction::PHI:
505     return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
506   case Instruction::Sub:
507   case Instruction::Add:
508     return InstDesc(Kind == RK_IntegerAdd, I);
509   case Instruction::Mul:
510     return InstDesc(Kind == RK_IntegerMult, I);
511   case Instruction::And:
512     return InstDesc(Kind == RK_IntegerAnd, I);
513   case Instruction::Or:
514     return InstDesc(Kind == RK_IntegerOr, I);
515   case Instruction::Xor:
516     return InstDesc(Kind == RK_IntegerXor, I);
517   case Instruction::FMul:
518     return InstDesc(Kind == RK_FloatMult, I, UAI);
519   case Instruction::FSub:
520   case Instruction::FAdd:
521     return InstDesc(Kind == RK_FloatAdd, I, UAI);
522   case Instruction::FCmp:
523   case Instruction::ICmp:
524   case Instruction::Select:
525     if (Kind != RK_IntegerMinMax &&
526         (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
527       return InstDesc(false, I);
528     return isMinMaxSelectCmpPattern(I, Prev);
529   }
530 }
531 
532 bool RecurrenceDescriptor::hasMultipleUsesOf(
533     Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) {
534   unsigned NumUses = 0;
535   for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
536        ++Use) {
537     if (Insts.count(dyn_cast<Instruction>(*Use)))
538       ++NumUses;
539     if (NumUses > 1)
540       return true;
541   }
542 
543   return false;
544 }
545 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
546                                           RecurrenceDescriptor &RedDes,
547                                           DemandedBits *DB, AssumptionCache *AC,
548                                           DominatorTree *DT) {
549 
550   BasicBlock *Header = TheLoop->getHeader();
551   Function &F = *Header->getParent();
552   bool HasFunNoNaNAttr =
553       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
554 
555   if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
556                       AC, DT)) {
557     DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
558     return true;
559   }
560   if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
561                       AC, DT)) {
562     DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
563     return true;
564   }
565   if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
566                       AC, DT)) {
567     DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
568     return true;
569   }
570   if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
571                       AC, DT)) {
572     DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
573     return true;
574   }
575   if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
576                       AC, DT)) {
577     DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
578     return true;
579   }
580   if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
581                       DB, AC, DT)) {
582     DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
583     return true;
584   }
585   if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
586                       AC, DT)) {
587     DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
588     return true;
589   }
590   if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
591                       AC, DT)) {
592     DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
593     return true;
594   }
595   if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
596                       AC, DT)) {
597     DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n");
598     return true;
599   }
600   // Not a reduction of known type.
601   return false;
602 }
603 
604 bool RecurrenceDescriptor::isFirstOrderRecurrence(
605     PHINode *Phi, Loop *TheLoop,
606     DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
607 
608   // Ensure the phi node is in the loop header and has two incoming values.
609   if (Phi->getParent() != TheLoop->getHeader() ||
610       Phi->getNumIncomingValues() != 2)
611     return false;
612 
613   // Ensure the loop has a preheader and a single latch block. The loop
614   // vectorizer will need the latch to set up the next iteration of the loop.
615   auto *Preheader = TheLoop->getLoopPreheader();
616   auto *Latch = TheLoop->getLoopLatch();
617   if (!Preheader || !Latch)
618     return false;
619 
620   // Ensure the phi node's incoming blocks are the loop preheader and latch.
621   if (Phi->getBasicBlockIndex(Preheader) < 0 ||
622       Phi->getBasicBlockIndex(Latch) < 0)
623     return false;
624 
625   // Get the previous value. The previous value comes from the latch edge while
626   // the initial value comes form the preheader edge.
627   auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
628   if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
629       SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
630     return false;
631 
632   // Ensure every user of the phi node is dominated by the previous value.
633   // The dominance requirement ensures the loop vectorizer will not need to
634   // vectorize the initial value prior to the first iteration of the loop.
635   // TODO: Consider extending this sinking to handle other kinds of instructions
636   // and expressions, beyond sinking a single cast past Previous.
637   if (Phi->hasOneUse()) {
638     auto *I = Phi->user_back();
639     if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
640         DT->dominates(Previous, I->user_back())) {
641       if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
642         SinkAfter[I] = Previous;
643       return true;
644     }
645   }
646 
647   for (User *U : Phi->users())
648     if (auto *I = dyn_cast<Instruction>(U)) {
649       if (!DT->dominates(Previous, I))
650         return false;
651     }
652 
653   return true;
654 }
655 
656 /// This function returns the identity element (or neutral element) for
657 /// the operation K.
658 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
659                                                       Type *Tp) {
660   switch (K) {
661   case RK_IntegerXor:
662   case RK_IntegerAdd:
663   case RK_IntegerOr:
664     // Adding, Xoring, Oring zero to a number does not change it.
665     return ConstantInt::get(Tp, 0);
666   case RK_IntegerMult:
667     // Multiplying a number by 1 does not change it.
668     return ConstantInt::get(Tp, 1);
669   case RK_IntegerAnd:
670     // AND-ing a number with an all-1 value does not change it.
671     return ConstantInt::get(Tp, -1, true);
672   case RK_FloatMult:
673     // Multiplying a number by 1 does not change it.
674     return ConstantFP::get(Tp, 1.0L);
675   case RK_FloatAdd:
676     // Adding zero to a number does not change it.
677     return ConstantFP::get(Tp, 0.0L);
678   default:
679     llvm_unreachable("Unknown recurrence kind");
680   }
681 }
682 
683 /// This function translates the recurrence kind to an LLVM binary operator.
684 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
685   switch (Kind) {
686   case RK_IntegerAdd:
687     return Instruction::Add;
688   case RK_IntegerMult:
689     return Instruction::Mul;
690   case RK_IntegerOr:
691     return Instruction::Or;
692   case RK_IntegerAnd:
693     return Instruction::And;
694   case RK_IntegerXor:
695     return Instruction::Xor;
696   case RK_FloatMult:
697     return Instruction::FMul;
698   case RK_FloatAdd:
699     return Instruction::FAdd;
700   case RK_IntegerMinMax:
701     return Instruction::ICmp;
702   case RK_FloatMinMax:
703     return Instruction::FCmp;
704   default:
705     llvm_unreachable("Unknown recurrence operation");
706   }
707 }
708 
709 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
710                                             MinMaxRecurrenceKind RK,
711                                             Value *Left, Value *Right) {
712   CmpInst::Predicate P = CmpInst::ICMP_NE;
713   switch (RK) {
714   default:
715     llvm_unreachable("Unknown min/max recurrence kind");
716   case MRK_UIntMin:
717     P = CmpInst::ICMP_ULT;
718     break;
719   case MRK_UIntMax:
720     P = CmpInst::ICMP_UGT;
721     break;
722   case MRK_SIntMin:
723     P = CmpInst::ICMP_SLT;
724     break;
725   case MRK_SIntMax:
726     P = CmpInst::ICMP_SGT;
727     break;
728   case MRK_FloatMin:
729     P = CmpInst::FCMP_OLT;
730     break;
731   case MRK_FloatMax:
732     P = CmpInst::FCMP_OGT;
733     break;
734   }
735 
736   // We only match FP sequences that are 'fast', so we can unconditionally
737   // set it on any generated instructions.
738   IRBuilder<>::FastMathFlagGuard FMFG(Builder);
739   FastMathFlags FMF;
740   FMF.setFast();
741   Builder.setFastMathFlags(FMF);
742 
743   Value *Cmp;
744   if (RK == MRK_FloatMin || RK == MRK_FloatMax)
745     Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
746   else
747     Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
748 
749   Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
750   return Select;
751 }
752 
753 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
754                                          const SCEV *Step, BinaryOperator *BOp,
755                                          SmallVectorImpl<Instruction *> *Casts)
756   : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
757   assert(IK != IK_NoInduction && "Not an induction");
758 
759   // Start value type should match the induction kind and the value
760   // itself should not be null.
761   assert(StartValue && "StartValue is null");
762   assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
763          "StartValue is not a pointer for pointer induction");
764   assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
765          "StartValue is not an integer for integer induction");
766 
767   // Check the Step Value. It should be non-zero integer value.
768   assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
769          "Step value is zero");
770 
771   assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
772          "Step value should be constant for pointer induction");
773   assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
774          "StepValue is not an integer");
775 
776   assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
777          "StepValue is not FP for FpInduction");
778   assert((IK != IK_FpInduction || (InductionBinOp &&
779           (InductionBinOp->getOpcode() == Instruction::FAdd ||
780            InductionBinOp->getOpcode() == Instruction::FSub))) &&
781          "Binary opcode should be specified for FP induction");
782 
783   if (Casts) {
784     for (auto &Inst : *Casts) {
785       RedundantCasts.push_back(Inst);
786     }
787   }
788 }
789 
790 int InductionDescriptor::getConsecutiveDirection() const {
791   ConstantInt *ConstStep = getConstIntStepValue();
792   if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
793     return ConstStep->getSExtValue();
794   return 0;
795 }
796 
797 ConstantInt *InductionDescriptor::getConstIntStepValue() const {
798   if (isa<SCEVConstant>(Step))
799     return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
800   return nullptr;
801 }
802 
803 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
804                                       ScalarEvolution *SE,
805                                       const DataLayout& DL) const {
806 
807   SCEVExpander Exp(*SE, DL, "induction");
808   assert(Index->getType() == Step->getType() &&
809          "Index type does not match StepValue type");
810   switch (IK) {
811   case IK_IntInduction: {
812     assert(Index->getType() == StartValue->getType() &&
813            "Index type does not match StartValue type");
814 
815     // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
816     // and calculate (Start + Index * Step) for all cases, without
817     // special handling for "isOne" and "isMinusOne".
818     // But in the real life the result code getting worse. We mix SCEV
819     // expressions and ADD/SUB operations and receive redundant
820     // intermediate values being calculated in different ways and
821     // Instcombine is unable to reduce them all.
822 
823     if (getConstIntStepValue() &&
824         getConstIntStepValue()->isMinusOne())
825       return B.CreateSub(StartValue, Index);
826     if (getConstIntStepValue() &&
827         getConstIntStepValue()->isOne())
828       return B.CreateAdd(StartValue, Index);
829     const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
830                                    SE->getMulExpr(Step, SE->getSCEV(Index)));
831     return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
832   }
833   case IK_PtrInduction: {
834     assert(isa<SCEVConstant>(Step) &&
835            "Expected constant step for pointer induction");
836     const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
837     Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
838     return B.CreateGEP(nullptr, StartValue, Index);
839   }
840   case IK_FpInduction: {
841     assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
842     assert(InductionBinOp &&
843            (InductionBinOp->getOpcode() == Instruction::FAdd ||
844             InductionBinOp->getOpcode() == Instruction::FSub) &&
845            "Original bin op should be defined for FP induction");
846 
847     Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
848 
849     // Floating point operations had to be 'fast' to enable the induction.
850     FastMathFlags Flags;
851     Flags.setFast();
852 
853     Value *MulExp = B.CreateFMul(StepValue, Index);
854     if (isa<Instruction>(MulExp))
855       // We have to check, the MulExp may be a constant.
856       cast<Instruction>(MulExp)->setFastMathFlags(Flags);
857 
858     Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue,
859                                MulExp, "induction");
860     if (isa<Instruction>(BOp))
861       cast<Instruction>(BOp)->setFastMathFlags(Flags);
862 
863     return BOp;
864   }
865   case IK_NoInduction:
866     return nullptr;
867   }
868   llvm_unreachable("invalid enum");
869 }
870 
871 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
872                                            ScalarEvolution *SE,
873                                            InductionDescriptor &D) {
874 
875   // Here we only handle FP induction variables.
876   assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
877 
878   if (TheLoop->getHeader() != Phi->getParent())
879     return false;
880 
881   // The loop may have multiple entrances or multiple exits; we can analyze
882   // this phi if it has a unique entry value and a unique backedge value.
883   if (Phi->getNumIncomingValues() != 2)
884     return false;
885   Value *BEValue = nullptr, *StartValue = nullptr;
886   if (TheLoop->contains(Phi->getIncomingBlock(0))) {
887     BEValue = Phi->getIncomingValue(0);
888     StartValue = Phi->getIncomingValue(1);
889   } else {
890     assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
891            "Unexpected Phi node in the loop");
892     BEValue = Phi->getIncomingValue(1);
893     StartValue = Phi->getIncomingValue(0);
894   }
895 
896   BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
897   if (!BOp)
898     return false;
899 
900   Value *Addend = nullptr;
901   if (BOp->getOpcode() == Instruction::FAdd) {
902     if (BOp->getOperand(0) == Phi)
903       Addend = BOp->getOperand(1);
904     else if (BOp->getOperand(1) == Phi)
905       Addend = BOp->getOperand(0);
906   } else if (BOp->getOpcode() == Instruction::FSub)
907     if (BOp->getOperand(0) == Phi)
908       Addend = BOp->getOperand(1);
909 
910   if (!Addend)
911     return false;
912 
913   // The addend should be loop invariant
914   if (auto *I = dyn_cast<Instruction>(Addend))
915     if (TheLoop->contains(I))
916       return false;
917 
918   // FP Step has unknown SCEV
919   const SCEV *Step = SE->getUnknown(Addend);
920   D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
921   return true;
922 }
923 
924 /// This function is called when we suspect that the update-chain of a phi node
925 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
926 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
927 /// predicate P under which the SCEV expression for the phi can be the
928 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
929 /// cast instructions that are involved in the update-chain of this induction.
930 /// A caller that adds the required runtime predicate can be free to drop these
931 /// cast instructions, and compute the phi using \p AR (instead of some scev
932 /// expression with casts).
933 ///
934 /// For example, without a predicate the scev expression can take the following
935 /// form:
936 ///      (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
937 ///
938 /// It corresponds to the following IR sequence:
939 /// %for.body:
940 ///   %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
941 ///   %casted_phi = "ExtTrunc i64 %x"
942 ///   %add = add i64 %casted_phi, %step
943 ///
944 /// where %x is given in \p PN,
945 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
946 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
947 /// several forms, for example, such as:
948 ///   ExtTrunc1:    %casted_phi = and  %x, 2^n-1
949 /// or:
950 ///   ExtTrunc2:    %t = shl %x, m
951 ///                 %casted_phi = ashr %t, m
952 ///
953 /// If we are able to find such sequence, we return the instructions
954 /// we found, namely %casted_phi and the instructions on its use-def chain up
955 /// to the phi (not including the phi).
956 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
957                                     const SCEVUnknown *PhiScev,
958                                     const SCEVAddRecExpr *AR,
959                                     SmallVectorImpl<Instruction *> &CastInsts) {
960 
961   assert(CastInsts.empty() && "CastInsts is expected to be empty.");
962   auto *PN = cast<PHINode>(PhiScev->getValue());
963   assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
964   const Loop *L = AR->getLoop();
965 
966   // Find any cast instructions that participate in the def-use chain of
967   // PhiScev in the loop.
968   // FORNOW/TODO: We currently expect the def-use chain to include only
969   // two-operand instructions, where one of the operands is an invariant.
970   // createAddRecFromPHIWithCasts() currently does not support anything more
971   // involved than that, so we keep the search simple. This can be
972   // extended/generalized as needed.
973 
974   auto getDef = [&](const Value *Val) -> Value * {
975     const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
976     if (!BinOp)
977       return nullptr;
978     Value *Op0 = BinOp->getOperand(0);
979     Value *Op1 = BinOp->getOperand(1);
980     Value *Def = nullptr;
981     if (L->isLoopInvariant(Op0))
982       Def = Op1;
983     else if (L->isLoopInvariant(Op1))
984       Def = Op0;
985     return Def;
986   };
987 
988   // Look for the instruction that defines the induction via the
989   // loop backedge.
990   BasicBlock *Latch = L->getLoopLatch();
991   if (!Latch)
992     return false;
993   Value *Val = PN->getIncomingValueForBlock(Latch);
994   if (!Val)
995     return false;
996 
997   // Follow the def-use chain until the induction phi is reached.
998   // If on the way we encounter a Value that has the same SCEV Expr as the
999   // phi node, we can consider the instructions we visit from that point
1000   // as part of the cast-sequence that can be ignored.
1001   bool InCastSequence = false;
1002   auto *Inst = dyn_cast<Instruction>(Val);
1003   while (Val != PN) {
1004     // If we encountered a phi node other than PN, or if we left the loop,
1005     // we bail out.
1006     if (!Inst || !L->contains(Inst)) {
1007       return false;
1008     }
1009     auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
1010     if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
1011       InCastSequence = true;
1012     if (InCastSequence) {
1013       // Only the last instruction in the cast sequence is expected to have
1014       // uses outside the induction def-use chain.
1015       if (!CastInsts.empty())
1016         if (!Inst->hasOneUse())
1017           return false;
1018       CastInsts.push_back(Inst);
1019     }
1020     Val = getDef(Val);
1021     if (!Val)
1022       return false;
1023     Inst = dyn_cast<Instruction>(Val);
1024   }
1025 
1026   return InCastSequence;
1027 }
1028 
1029 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
1030                                          PredicatedScalarEvolution &PSE,
1031                                          InductionDescriptor &D,
1032                                          bool Assume) {
1033   Type *PhiTy = Phi->getType();
1034 
1035   // Handle integer and pointer inductions variables.
1036   // Now we handle also FP induction but not trying to make a
1037   // recurrent expression from the PHI node in-place.
1038 
1039   if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() &&
1040       !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
1041     return false;
1042 
1043   if (PhiTy->isFloatingPointTy())
1044     return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
1045 
1046   const SCEV *PhiScev = PSE.getSCEV(Phi);
1047   const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1048 
1049   // We need this expression to be an AddRecExpr.
1050   if (Assume && !AR)
1051     AR = PSE.getAsAddRec(Phi);
1052 
1053   if (!AR) {
1054     DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1055     return false;
1056   }
1057 
1058   // Record any Cast instructions that participate in the induction update
1059   const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
1060   // If we started from an UnknownSCEV, and managed to build an addRecurrence
1061   // only after enabling Assume with PSCEV, this means we may have encountered
1062   // cast instructions that required adding a runtime check in order to
1063   // guarantee the correctness of the AddRecurence respresentation of the
1064   // induction.
1065   if (PhiScev != AR && SymbolicPhi) {
1066     SmallVector<Instruction *, 2> Casts;
1067     if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
1068       return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
1069   }
1070 
1071   return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
1072 }
1073 
1074 bool InductionDescriptor::isInductionPHI(
1075     PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
1076     InductionDescriptor &D, const SCEV *Expr,
1077     SmallVectorImpl<Instruction *> *CastsToIgnore) {
1078   Type *PhiTy = Phi->getType();
1079   // We only handle integer and pointer inductions variables.
1080   if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
1081     return false;
1082 
1083   // Check that the PHI is consecutive.
1084   const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
1085   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1086 
1087   if (!AR) {
1088     DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1089     return false;
1090   }
1091 
1092   if (AR->getLoop() != TheLoop) {
1093     // FIXME: We should treat this as a uniform. Unfortunately, we
1094     // don't currently know how to handled uniform PHIs.
1095     DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
1096     return false;
1097   }
1098 
1099   Value *StartValue =
1100     Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
1101   const SCEV *Step = AR->getStepRecurrence(*SE);
1102   // Calculate the pointer stride and check if it is consecutive.
1103   // The stride may be a constant or a loop invariant integer value.
1104   const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
1105   if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
1106     return false;
1107 
1108   if (PhiTy->isIntegerTy()) {
1109     D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr,
1110                             CastsToIgnore);
1111     return true;
1112   }
1113 
1114   assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1115   // Pointer induction should be a constant.
1116   if (!ConstStep)
1117     return false;
1118 
1119   ConstantInt *CV = ConstStep->getValue();
1120   Type *PointerElementType = PhiTy->getPointerElementType();
1121   // The pointer stride cannot be determined if the pointer element type is not
1122   // sized.
1123   if (!PointerElementType->isSized())
1124     return false;
1125 
1126   const DataLayout &DL = Phi->getModule()->getDataLayout();
1127   int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
1128   if (!Size)
1129     return false;
1130 
1131   int64_t CVSize = CV->getSExtValue();
1132   if (CVSize % Size)
1133     return false;
1134   auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size,
1135                                     true /* signed */);
1136   D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
1137   return true;
1138 }
1139 
1140 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
1141                                    bool PreserveLCSSA) {
1142   bool Changed = false;
1143 
1144   // We re-use a vector for the in-loop predecesosrs.
1145   SmallVector<BasicBlock *, 4> InLoopPredecessors;
1146 
1147   auto RewriteExit = [&](BasicBlock *BB) {
1148     assert(InLoopPredecessors.empty() &&
1149            "Must start with an empty predecessors list!");
1150     auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
1151 
1152     // See if there are any non-loop predecessors of this exit block and
1153     // keep track of the in-loop predecessors.
1154     bool IsDedicatedExit = true;
1155     for (auto *PredBB : predecessors(BB))
1156       if (L->contains(PredBB)) {
1157         if (isa<IndirectBrInst>(PredBB->getTerminator()))
1158           // We cannot rewrite exiting edges from an indirectbr.
1159           return false;
1160 
1161         InLoopPredecessors.push_back(PredBB);
1162       } else {
1163         IsDedicatedExit = false;
1164       }
1165 
1166     assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
1167 
1168     // Nothing to do if this is already a dedicated exit.
1169     if (IsDedicatedExit)
1170       return false;
1171 
1172     auto *NewExitBB = SplitBlockPredecessors(
1173         BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA);
1174 
1175     if (!NewExitBB)
1176       DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
1177                    << *L << "\n");
1178     else
1179       DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
1180                    << NewExitBB->getName() << "\n");
1181     return true;
1182   };
1183 
1184   // Walk the exit blocks directly rather than building up a data structure for
1185   // them, but only visit each one once.
1186   SmallPtrSet<BasicBlock *, 4> Visited;
1187   for (auto *BB : L->blocks())
1188     for (auto *SuccBB : successors(BB)) {
1189       // We're looking for exit blocks so skip in-loop successors.
1190       if (L->contains(SuccBB))
1191         continue;
1192 
1193       // Visit each exit block exactly once.
1194       if (!Visited.insert(SuccBB).second)
1195         continue;
1196 
1197       Changed |= RewriteExit(SuccBB);
1198     }
1199 
1200   return Changed;
1201 }
1202 
1203 /// \brief Returns the instructions that use values defined in the loop.
1204 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
1205   SmallVector<Instruction *, 8> UsedOutside;
1206 
1207   for (auto *Block : L->getBlocks())
1208     // FIXME: I believe that this could use copy_if if the Inst reference could
1209     // be adapted into a pointer.
1210     for (auto &Inst : *Block) {
1211       auto Users = Inst.users();
1212       if (any_of(Users, [&](User *U) {
1213             auto *Use = cast<Instruction>(U);
1214             return !L->contains(Use->getParent());
1215           }))
1216         UsedOutside.push_back(&Inst);
1217     }
1218 
1219   return UsedOutside;
1220 }
1221 
1222 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
1223   // By definition, all loop passes need the LoopInfo analysis and the
1224   // Dominator tree it depends on. Because they all participate in the loop
1225   // pass manager, they must also preserve these.
1226   AU.addRequired<DominatorTreeWrapperPass>();
1227   AU.addPreserved<DominatorTreeWrapperPass>();
1228   AU.addRequired<LoopInfoWrapperPass>();
1229   AU.addPreserved<LoopInfoWrapperPass>();
1230 
1231   // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
1232   // here because users shouldn't directly get them from this header.
1233   extern char &LoopSimplifyID;
1234   extern char &LCSSAID;
1235   AU.addRequiredID(LoopSimplifyID);
1236   AU.addPreservedID(LoopSimplifyID);
1237   AU.addRequiredID(LCSSAID);
1238   AU.addPreservedID(LCSSAID);
1239   // This is used in the LPPassManager to perform LCSSA verification on passes
1240   // which preserve lcssa form
1241   AU.addRequired<LCSSAVerificationPass>();
1242   AU.addPreserved<LCSSAVerificationPass>();
1243 
1244   // Loop passes are designed to run inside of a loop pass manager which means
1245   // that any function analyses they require must be required by the first loop
1246   // pass in the manager (so that it is computed before the loop pass manager
1247   // runs) and preserved by all loop pasess in the manager. To make this
1248   // reasonably robust, the set needed for most loop passes is maintained here.
1249   // If your loop pass requires an analysis not listed here, you will need to
1250   // carefully audit the loop pass manager nesting structure that results.
1251   AU.addRequired<AAResultsWrapperPass>();
1252   AU.addPreserved<AAResultsWrapperPass>();
1253   AU.addPreserved<BasicAAWrapperPass>();
1254   AU.addPreserved<GlobalsAAWrapperPass>();
1255   AU.addPreserved<SCEVAAWrapperPass>();
1256   AU.addRequired<ScalarEvolutionWrapperPass>();
1257   AU.addPreserved<ScalarEvolutionWrapperPass>();
1258 }
1259 
1260 /// Manually defined generic "LoopPass" dependency initialization. This is used
1261 /// to initialize the exact set of passes from above in \c
1262 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization
1263 /// with:
1264 ///
1265 ///   INITIALIZE_PASS_DEPENDENCY(LoopPass)
1266 ///
1267 /// As-if "LoopPass" were a pass.
1268 void llvm::initializeLoopPassPass(PassRegistry &Registry) {
1269   INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1270   INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1271   INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
1272   INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
1273   INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1274   INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
1275   INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
1276   INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
1277   INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
1278 }
1279 
1280 /// \brief Find string metadata for loop
1281 ///
1282 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
1283 /// operand or null otherwise.  If the string metadata is not found return
1284 /// Optional's not-a-value.
1285 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
1286                                                             StringRef Name) {
1287   MDNode *LoopID = TheLoop->getLoopID();
1288   // Return none if LoopID is false.
1289   if (!LoopID)
1290     return None;
1291 
1292   // First operand should refer to the loop id itself.
1293   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
1294   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
1295 
1296   // Iterate over LoopID operands and look for MDString Metadata
1297   for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
1298     MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
1299     if (!MD)
1300       continue;
1301     MDString *S = dyn_cast<MDString>(MD->getOperand(0));
1302     if (!S)
1303       continue;
1304     // Return true if MDString holds expected MetaData.
1305     if (Name.equals(S->getString()))
1306       switch (MD->getNumOperands()) {
1307       case 1:
1308         return nullptr;
1309       case 2:
1310         return &MD->getOperand(1);
1311       default:
1312         llvm_unreachable("loop metadata has 0 or 1 operand");
1313       }
1314   }
1315   return None;
1316 }
1317 
1318 /// Does a BFS from a given node to all of its children inside a given loop.
1319 /// The returned vector of nodes includes the starting point.
1320 SmallVector<DomTreeNode *, 16>
1321 llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
1322   SmallVector<DomTreeNode *, 16> Worklist;
1323   auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
1324     // Only include subregions in the top level loop.
1325     BasicBlock *BB = DTN->getBlock();
1326     if (CurLoop->contains(BB))
1327       Worklist.push_back(DTN);
1328   };
1329 
1330   AddRegionToWorklist(N);
1331 
1332   for (size_t I = 0; I < Worklist.size(); I++)
1333     for (DomTreeNode *Child : Worklist[I]->getChildren())
1334       AddRegionToWorklist(Child);
1335 
1336   return Worklist;
1337 }
1338 
1339 void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
1340                           ScalarEvolution *SE = nullptr,
1341                           LoopInfo *LI = nullptr) {
1342   assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
1343   auto *Preheader = L->getLoopPreheader();
1344   assert(Preheader && "Preheader should exist!");
1345 
1346   // Now that we know the removal is safe, remove the loop by changing the
1347   // branch from the preheader to go to the single exit block.
1348   //
1349   // Because we're deleting a large chunk of code at once, the sequence in which
1350   // we remove things is very important to avoid invalidation issues.
1351 
1352   // Tell ScalarEvolution that the loop is deleted. Do this before
1353   // deleting the loop so that ScalarEvolution can look at the loop
1354   // to determine what it needs to clean up.
1355   if (SE)
1356     SE->forgetLoop(L);
1357 
1358   auto *ExitBlock = L->getUniqueExitBlock();
1359   assert(ExitBlock && "Should have a unique exit block!");
1360   assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
1361 
1362   auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator());
1363   assert(OldBr && "Preheader must end with a branch");
1364   assert(OldBr->isUnconditional() && "Preheader must have a single successor");
1365   // Connect the preheader to the exit block. Keep the old edge to the header
1366   // around to perform the dominator tree update in two separate steps
1367   // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
1368   // preheader -> header.
1369   //
1370   //
1371   // 0.  Preheader          1.  Preheader           2.  Preheader
1372   //        |                    |   |                   |
1373   //        V                    |   V                   |
1374   //      Header <--\            | Header <--\           | Header <--\
1375   //       |  |     |            |  |  |     |           |  |  |     |
1376   //       |  V     |            |  |  V     |           |  |  V     |
1377   //       | Body --/            |  | Body --/           |  | Body --/
1378   //       V                     V  V                    V  V
1379   //      Exit                   Exit                    Exit
1380   //
1381   // By doing this is two separate steps we can perform the dominator tree
1382   // update without using the batch update API.
1383   //
1384   // Even when the loop is never executed, we cannot remove the edge from the
1385   // source block to the exit block. Consider the case where the unexecuted loop
1386   // branches back to an outer loop. If we deleted the loop and removed the edge
1387   // coming to this inner loop, this will break the outer loop structure (by
1388   // deleting the backedge of the outer loop). If the outer loop is indeed a
1389   // non-loop, it will be deleted in a future iteration of loop deletion pass.
1390   IRBuilder<> Builder(OldBr);
1391   Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
1392   // Remove the old branch. The conditional branch becomes a new terminator.
1393   OldBr->eraseFromParent();
1394 
1395   // Rewrite phis in the exit block to get their inputs from the Preheader
1396   // instead of the exiting block.
1397   for (PHINode &P : ExitBlock->phis()) {
1398     // Set the zero'th element of Phi to be from the preheader and remove all
1399     // other incoming values. Given the loop has dedicated exits, all other
1400     // incoming values must be from the exiting blocks.
1401     int PredIndex = 0;
1402     P.setIncomingBlock(PredIndex, Preheader);
1403     // Removes all incoming values from all other exiting blocks (including
1404     // duplicate values from an exiting block).
1405     // Nuke all entries except the zero'th entry which is the preheader entry.
1406     // NOTE! We need to remove Incoming Values in the reverse order as done
1407     // below, to keep the indices valid for deletion (removeIncomingValues
1408     // updates getNumIncomingValues and shifts all values down into the operand
1409     // being deleted).
1410     for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
1411       P.removeIncomingValue(e - i, false);
1412 
1413     assert((P.getNumIncomingValues() == 1 &&
1414             P.getIncomingBlock(PredIndex) == Preheader) &&
1415            "Should have exactly one value and that's from the preheader!");
1416   }
1417 
1418   // Disconnect the loop body by branching directly to its exit.
1419   Builder.SetInsertPoint(Preheader->getTerminator());
1420   Builder.CreateBr(ExitBlock);
1421   // Remove the old branch.
1422   Preheader->getTerminator()->eraseFromParent();
1423 
1424   if (DT) {
1425     // Update the dominator tree by informing it about the new edge from the
1426     // preheader to the exit.
1427     DT->insertEdge(Preheader, ExitBlock);
1428     // Inform the dominator tree about the removed edge.
1429     DT->deleteEdge(Preheader, L->getHeader());
1430   }
1431 
1432   // Given LCSSA form is satisfied, we should not have users of instructions
1433   // within the dead loop outside of the loop. However, LCSSA doesn't take
1434   // unreachable uses into account. We handle them here.
1435   // We could do it after drop all references (in this case all users in the
1436   // loop will be already eliminated and we have less work to do but according
1437   // to API doc of User::dropAllReferences only valid operation after dropping
1438   // references, is deletion. So let's substitute all usages of
1439   // instruction from the loop with undef value of corresponding type first.
1440   for (auto *Block : L->blocks())
1441     for (Instruction &I : *Block) {
1442       auto *Undef = UndefValue::get(I.getType());
1443       for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) {
1444         Use &U = *UI;
1445         ++UI;
1446         if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
1447           if (L->contains(Usr->getParent()))
1448             continue;
1449         // If we have a DT then we can check that uses outside a loop only in
1450         // unreachable block.
1451         if (DT)
1452           assert(!DT->isReachableFromEntry(U) &&
1453                  "Unexpected user in reachable block");
1454         U.set(Undef);
1455       }
1456     }
1457 
1458   // Remove the block from the reference counting scheme, so that we can
1459   // delete it freely later.
1460   for (auto *Block : L->blocks())
1461     Block->dropAllReferences();
1462 
1463   if (LI) {
1464     // Erase the instructions and the blocks without having to worry
1465     // about ordering because we already dropped the references.
1466     // NOTE: This iteration is safe because erasing the block does not remove
1467     // its entry from the loop's block list.  We do that in the next section.
1468     for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end();
1469          LpI != LpE; ++LpI)
1470       (*LpI)->eraseFromParent();
1471 
1472     // Finally, the blocks from loopinfo.  This has to happen late because
1473     // otherwise our loop iterators won't work.
1474 
1475     SmallPtrSet<BasicBlock *, 8> blocks;
1476     blocks.insert(L->block_begin(), L->block_end());
1477     for (BasicBlock *BB : blocks)
1478       LI->removeBlock(BB);
1479 
1480     // The last step is to update LoopInfo now that we've eliminated this loop.
1481     LI->erase(L);
1482   }
1483 }
1484 
1485 /// Computes loop safety information, checks loop body & header
1486 /// for the possibility of may throw exception.
1487 ///
1488 void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
1489   assert(CurLoop != nullptr && "CurLoop cant be null");
1490   BasicBlock *Header = CurLoop->getHeader();
1491   // Setting default safety values.
1492   SafetyInfo->MayThrow = false;
1493   SafetyInfo->HeaderMayThrow = false;
1494   // Iterate over header and compute safety info.
1495   SafetyInfo->HeaderMayThrow =
1496     !isGuaranteedToTransferExecutionToSuccessor(Header);
1497 
1498   SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
1499   // Iterate over loop instructions and compute safety info.
1500   // Skip header as it has been computed and stored in HeaderMayThrow.
1501   // The first block in loopinfo.Blocks is guaranteed to be the header.
1502   assert(Header == *CurLoop->getBlocks().begin() &&
1503          "First block must be header");
1504   for (Loop::block_iterator BB = std::next(CurLoop->block_begin()),
1505                             BBE = CurLoop->block_end();
1506        (BB != BBE) && !SafetyInfo->MayThrow; ++BB)
1507     SafetyInfo->MayThrow |=
1508       !isGuaranteedToTransferExecutionToSuccessor(*BB);
1509 
1510   // Compute funclet colors if we might sink/hoist in a function with a funclet
1511   // personality routine.
1512   Function *Fn = CurLoop->getHeader()->getParent();
1513   if (Fn->hasPersonalityFn())
1514     if (Constant *PersonalityFn = Fn->getPersonalityFn())
1515       if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)))
1516         SafetyInfo->BlockColors = colorEHFunclets(*Fn);
1517 }
1518 
1519 /// Return true if we can prove that the given ExitBlock is not reached on the
1520 /// first iteration of the given loop.  That is, the backedge of the loop must
1521 /// be executed before the ExitBlock is executed in any dynamic execution trace.
1522 static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock,
1523                                            const DominatorTree *DT,
1524                                            const Loop *CurLoop) {
1525   auto *CondExitBlock = ExitBlock->getSinglePredecessor();
1526   if (!CondExitBlock)
1527     // expect unique exits
1528     return false;
1529   assert(CurLoop->contains(CondExitBlock) && "meaning of exit block");
1530   auto *BI = dyn_cast<BranchInst>(CondExitBlock->getTerminator());
1531   if (!BI || !BI->isConditional())
1532     return false;
1533   auto *Cond = dyn_cast<CmpInst>(BI->getCondition());
1534   if (!Cond)
1535     return false;
1536   // todo: this would be a lot more powerful if we used scev, but all the
1537   // plumbing is currently missing to pass a pointer in from the pass
1538   // Check for cmp (phi [x, preheader] ...), y where (pred x, y is known
1539   auto *LHS = dyn_cast<PHINode>(Cond->getOperand(0));
1540   auto *RHS = Cond->getOperand(1);
1541   if (!LHS || LHS->getParent() != CurLoop->getHeader())
1542     return false;
1543   auto DL = ExitBlock->getModule()->getDataLayout();
1544   auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1545   auto *SimpleValOrNull = SimplifyCmpInst(Cond->getPredicate(),
1546                                           IVStart, RHS,
1547                                           {DL, /*TLI*/ nullptr,
1548                                               DT, /*AC*/ nullptr, BI});
1549   auto *SimpleCst = dyn_cast_or_null<Constant>(SimpleValOrNull);
1550   if (!SimpleCst)
1551     return false;
1552   if (ExitBlock == BI->getSuccessor(0))
1553     return SimpleCst->isZeroValue();
1554   assert(ExitBlock == BI->getSuccessor(1) && "implied by above");
1555   return SimpleCst->isAllOnesValue();
1556 }
1557 
1558 /// Returns true if the instruction in a loop is guaranteed to execute at least
1559 /// once.
1560 bool llvm::isGuaranteedToExecute(const Instruction &Inst,
1561                                  const DominatorTree *DT, const Loop *CurLoop,
1562                                  const LoopSafetyInfo *SafetyInfo) {
1563   // We have to check to make sure that the instruction dominates all
1564   // of the exit blocks.  If it doesn't, then there is a path out of the loop
1565   // which does not execute this instruction, so we can't hoist it.
1566 
1567   // If the instruction is in the header block for the loop (which is very
1568   // common), it is always guaranteed to dominate the exit blocks.  Since this
1569   // is a common case, and can save some work, check it now.
1570   if (Inst.getParent() == CurLoop->getHeader())
1571     // If there's a throw in the header block, we can't guarantee we'll reach
1572     // Inst.
1573     return !SafetyInfo->HeaderMayThrow;
1574 
1575   // Somewhere in this loop there is an instruction which may throw and make us
1576   // exit the loop.
1577   if (SafetyInfo->MayThrow)
1578     return false;
1579 
1580   // Note: There are two styles of reasoning intermixed below for
1581   // implementation efficiency reasons.  They are:
1582   // 1) If we can prove that the instruction dominates all exit blocks, then we
1583   // know the instruction must have executed on *some* iteration before we
1584   // exit.  We do not prove *which* iteration the instruction must execute on.
1585   // 2) If we can prove that the instruction dominates the latch and all exits
1586   // which might be taken on the first iteration, we know the instruction must
1587   // execute on the first iteration.  This second style allows a conditional
1588   // exit before the instruction of interest which is provably not taken on the
1589   // first iteration.  This is a quite common case for range check like
1590   // patterns.  TODO: support loops with multiple latches.
1591 
1592   const bool InstDominatesLatch =
1593     CurLoop->getLoopLatch() != nullptr &&
1594     DT->dominates(Inst.getParent(), CurLoop->getLoopLatch());
1595 
1596   // Get the exit blocks for the current loop.
1597   SmallVector<BasicBlock *, 8> ExitBlocks;
1598   CurLoop->getExitBlocks(ExitBlocks);
1599 
1600   // Verify that the block dominates each of the exit blocks of the loop.
1601   for (BasicBlock *ExitBlock : ExitBlocks)
1602     if (!DT->dominates(Inst.getParent(), ExitBlock))
1603       if (!InstDominatesLatch ||
1604           !CanProveNotTakenFirstIteration(ExitBlock, DT, CurLoop))
1605         return false;
1606 
1607   // As a degenerate case, if the loop is statically infinite then we haven't
1608   // proven anything since there are no exit blocks.
1609   if (ExitBlocks.empty())
1610     return false;
1611 
1612   // FIXME: In general, we have to prove that the loop isn't an infinite loop.
1613   // See http::llvm.org/PR24078 .  (The "ExitBlocks.empty()" check above is
1614   // just a special case of this.)
1615   return true;
1616 }
1617 
1618 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
1619   // Only support loops with a unique exiting block, and a latch.
1620   if (!L->getExitingBlock())
1621     return None;
1622 
1623   // Get the branch weights for the loop's backedge.
1624   BranchInst *LatchBR =
1625       dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
1626   if (!LatchBR || LatchBR->getNumSuccessors() != 2)
1627     return None;
1628 
1629   assert((LatchBR->getSuccessor(0) == L->getHeader() ||
1630           LatchBR->getSuccessor(1) == L->getHeader()) &&
1631          "At least one edge out of the latch must go to the header");
1632 
1633   // To estimate the number of times the loop body was executed, we want to
1634   // know the number of times the backedge was taken, vs. the number of times
1635   // we exited the loop.
1636   uint64_t TrueVal, FalseVal;
1637   if (!LatchBR->extractProfMetadata(TrueVal, FalseVal))
1638     return None;
1639 
1640   if (!TrueVal || !FalseVal)
1641     return 0;
1642 
1643   // Divide the count of the backedge by the count of the edge exiting the loop,
1644   // rounding to nearest.
1645   if (LatchBR->getSuccessor(0) == L->getHeader())
1646     return (TrueVal + (FalseVal / 2)) / FalseVal;
1647   else
1648     return (FalseVal + (TrueVal / 2)) / TrueVal;
1649 }
1650 
1651 /// \brief Adds a 'fast' flag to floating point operations.
1652 static Value *addFastMathFlag(Value *V) {
1653   if (isa<FPMathOperator>(V)) {
1654     FastMathFlags Flags;
1655     Flags.setFast();
1656     cast<Instruction>(V)->setFastMathFlags(Flags);
1657   }
1658   return V;
1659 }
1660 
1661 // Helper to generate a log2 shuffle reduction.
1662 Value *
1663 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
1664                           RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
1665                           ArrayRef<Value *> RedOps) {
1666   unsigned VF = Src->getType()->getVectorNumElements();
1667   // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
1668   // and vector ops, reducing the set of values being computed by half each
1669   // round.
1670   assert(isPowerOf2_32(VF) &&
1671          "Reduction emission only supported for pow2 vectors!");
1672   Value *TmpVec = Src;
1673   SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
1674   for (unsigned i = VF; i != 1; i >>= 1) {
1675     // Move the upper half of the vector to the lower half.
1676     for (unsigned j = 0; j != i / 2; ++j)
1677       ShuffleMask[j] = Builder.getInt32(i / 2 + j);
1678 
1679     // Fill the rest of the mask with undef.
1680     std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
1681               UndefValue::get(Builder.getInt32Ty()));
1682 
1683     Value *Shuf = Builder.CreateShuffleVector(
1684         TmpVec, UndefValue::get(TmpVec->getType()),
1685         ConstantVector::get(ShuffleMask), "rdx.shuf");
1686 
1687     if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
1688       // Floating point operations had to be 'fast' to enable the reduction.
1689       TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
1690                                                    TmpVec, Shuf, "bin.rdx"));
1691     } else {
1692       assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
1693              "Invalid min/max");
1694       TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec,
1695                                                     Shuf);
1696     }
1697     if (!RedOps.empty())
1698       propagateIRFlags(TmpVec, RedOps);
1699   }
1700   // The result is in the first element of the vector.
1701   return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
1702 }
1703 
1704 /// Create a simple vector reduction specified by an opcode and some
1705 /// flags (if generating min/max reductions).
1706 Value *llvm::createSimpleTargetReduction(
1707     IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
1708     Value *Src, TargetTransformInfo::ReductionFlags Flags,
1709     ArrayRef<Value *> RedOps) {
1710   assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
1711 
1712   Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
1713   std::function<Value*()> BuildFunc;
1714   using RD = RecurrenceDescriptor;
1715   RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
1716   // TODO: Support creating ordered reductions.
1717   FastMathFlags FMFFast;
1718   FMFFast.setFast();
1719 
1720   switch (Opcode) {
1721   case Instruction::Add:
1722     BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
1723     break;
1724   case Instruction::Mul:
1725     BuildFunc = [&]() { return Builder.CreateMulReduce(Src); };
1726     break;
1727   case Instruction::And:
1728     BuildFunc = [&]() { return Builder.CreateAndReduce(Src); };
1729     break;
1730   case Instruction::Or:
1731     BuildFunc = [&]() { return Builder.CreateOrReduce(Src); };
1732     break;
1733   case Instruction::Xor:
1734     BuildFunc = [&]() { return Builder.CreateXorReduce(Src); };
1735     break;
1736   case Instruction::FAdd:
1737     BuildFunc = [&]() {
1738       auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
1739       cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
1740       return Rdx;
1741     };
1742     break;
1743   case Instruction::FMul:
1744     BuildFunc = [&]() {
1745       auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
1746       cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
1747       return Rdx;
1748     };
1749     break;
1750   case Instruction::ICmp:
1751     if (Flags.IsMaxOp) {
1752       MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax;
1753       BuildFunc = [&]() {
1754         return Builder.CreateIntMaxReduce(Src, Flags.IsSigned);
1755       };
1756     } else {
1757       MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin;
1758       BuildFunc = [&]() {
1759         return Builder.CreateIntMinReduce(Src, Flags.IsSigned);
1760       };
1761     }
1762     break;
1763   case Instruction::FCmp:
1764     if (Flags.IsMaxOp) {
1765       MinMaxKind = RD::MRK_FloatMax;
1766       BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); };
1767     } else {
1768       MinMaxKind = RD::MRK_FloatMin;
1769       BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); };
1770     }
1771     break;
1772   default:
1773     llvm_unreachable("Unhandled opcode");
1774     break;
1775   }
1776   if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
1777     return BuildFunc();
1778   return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
1779 }
1780 
1781 /// Create a vector reduction using a given recurrence descriptor.
1782 Value *llvm::createTargetReduction(IRBuilder<> &B,
1783                                    const TargetTransformInfo *TTI,
1784                                    RecurrenceDescriptor &Desc, Value *Src,
1785                                    bool NoNaN) {
1786   // TODO: Support in-order reductions based on the recurrence descriptor.
1787   using RD = RecurrenceDescriptor;
1788   RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
1789   TargetTransformInfo::ReductionFlags Flags;
1790   Flags.NoNaN = NoNaN;
1791   switch (RecKind) {
1792   case RD::RK_FloatAdd:
1793     return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
1794   case RD::RK_FloatMult:
1795     return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
1796   case RD::RK_IntegerAdd:
1797     return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
1798   case RD::RK_IntegerMult:
1799     return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
1800   case RD::RK_IntegerAnd:
1801     return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
1802   case RD::RK_IntegerOr:
1803     return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
1804   case RD::RK_IntegerXor:
1805     return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
1806   case RD::RK_IntegerMinMax: {
1807     RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
1808     Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
1809     Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
1810     return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
1811   }
1812   case RD::RK_FloatMinMax: {
1813     Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
1814     return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
1815   }
1816   default:
1817     llvm_unreachable("Unhandled RecKind");
1818   }
1819 }
1820 
1821 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
1822   auto *VecOp = dyn_cast<Instruction>(I);
1823   if (!VecOp)
1824     return;
1825   auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
1826                                             : dyn_cast<Instruction>(OpValue);
1827   if (!Intersection)
1828     return;
1829   const unsigned Opcode = Intersection->getOpcode();
1830   VecOp->copyIRFlags(Intersection);
1831   for (auto *V : VL) {
1832     auto *Instr = dyn_cast<Instruction>(V);
1833     if (!Instr)
1834       continue;
1835     if (OpValue == nullptr || Opcode == Instr->getOpcode())
1836       VecOp->andIRFlags(V);
1837   }
1838 }
1839