1 //===- ValueTracking.cpp - Walk computations to compute properties --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains routines that help analyze properties that chains of
11 // computations have.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Analysis/ValueTracking.h"
16 #include "llvm/ADT/Optional.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/Analysis/AssumptionCache.h"
19 #include "llvm/Analysis/InstructionSimplify.h"
20 #include "llvm/Analysis/Loads.h"
21 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/Analysis/MemoryBuiltins.h"
23 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
24 #include "llvm/Analysis/VectorUtils.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/ConstantRange.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/GetElementPtrTypeIterator.h"
32 #include "llvm/IR/GlobalAlias.h"
33 #include "llvm/IR/GlobalVariable.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/IR/Metadata.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/IR/PatternMatch.h"
40 #include "llvm/IR/Statepoint.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/KnownBits.h"
43 #include "llvm/Support/MathExtras.h"
44 #include <algorithm>
45 #include <array>
46 #include <cstring>
47 using namespace llvm;
48 using namespace llvm::PatternMatch;
49 
50 const unsigned MaxDepth = 6;
51 
52 // Controls the number of uses of the value searched for possible
53 // dominating comparisons.
54 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
55                                               cl::Hidden, cl::init(20));
56 
57 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
58 /// returns the element type's bitwidth.
59 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
60   if (unsigned BitWidth = Ty->getScalarSizeInBits())
61     return BitWidth;
62 
63   return DL.getPointerTypeSizeInBits(Ty);
64 }
65 
66 namespace {
67 // Simplifying using an assume can only be done in a particular control-flow
68 // context (the context instruction provides that context). If an assume and
69 // the context instruction are not in the same block then the DT helps in
70 // figuring out if we can use it.
71 struct Query {
72   const DataLayout &DL;
73   AssumptionCache *AC;
74   const Instruction *CxtI;
75   const DominatorTree *DT;
76   // Unlike the other analyses, this may be a nullptr because not all clients
77   // provide it currently.
78   OptimizationRemarkEmitter *ORE;
79 
80   /// Set of assumptions that should be excluded from further queries.
81   /// This is because of the potential for mutual recursion to cause
82   /// computeKnownBits to repeatedly visit the same assume intrinsic. The
83   /// classic case of this is assume(x = y), which will attempt to determine
84   /// bits in x from bits in y, which will attempt to determine bits in y from
85   /// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
86   /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
87   /// (all of which can call computeKnownBits), and so on.
88   std::array<const Value *, MaxDepth> Excluded;
89   unsigned NumExcluded;
90 
91   Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
92         const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr)
93       : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), NumExcluded(0) {}
94 
95   Query(const Query &Q, const Value *NewExcl)
96       : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE),
97         NumExcluded(Q.NumExcluded) {
98     Excluded = Q.Excluded;
99     Excluded[NumExcluded++] = NewExcl;
100     assert(NumExcluded <= Excluded.size());
101   }
102 
103   bool isExcluded(const Value *Value) const {
104     if (NumExcluded == 0)
105       return false;
106     auto End = Excluded.begin() + NumExcluded;
107     return std::find(Excluded.begin(), End, Value) != End;
108   }
109 };
110 } // end anonymous namespace
111 
112 // Given the provided Value and, potentially, a context instruction, return
113 // the preferred context instruction (if any).
114 static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
115   // If we've been provided with a context instruction, then use that (provided
116   // it has been inserted).
117   if (CxtI && CxtI->getParent())
118     return CxtI;
119 
120   // If the value is really an already-inserted instruction, then use that.
121   CxtI = dyn_cast<Instruction>(V);
122   if (CxtI && CxtI->getParent())
123     return CxtI;
124 
125   return nullptr;
126 }
127 
128 static void computeKnownBits(const Value *V, KnownBits &Known,
129                              unsigned Depth, const Query &Q);
130 
131 void llvm::computeKnownBits(const Value *V, KnownBits &Known,
132                             const DataLayout &DL, unsigned Depth,
133                             AssumptionCache *AC, const Instruction *CxtI,
134                             const DominatorTree *DT,
135                             OptimizationRemarkEmitter *ORE) {
136   ::computeKnownBits(V, Known, Depth,
137                      Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
138 }
139 
140 static KnownBits computeKnownBits(const Value *V, unsigned Depth,
141                                   const Query &Q);
142 
143 KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
144                                  unsigned Depth, AssumptionCache *AC,
145                                  const Instruction *CxtI,
146                                  const DominatorTree *DT,
147                                  OptimizationRemarkEmitter *ORE) {
148   return ::computeKnownBits(V, Depth,
149                             Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
150 }
151 
152 bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
153                                const DataLayout &DL,
154                                AssumptionCache *AC, const Instruction *CxtI,
155                                const DominatorTree *DT) {
156   assert(LHS->getType() == RHS->getType() &&
157          "LHS and RHS should have the same type");
158   assert(LHS->getType()->isIntOrIntVectorTy() &&
159          "LHS and RHS should be integers");
160   IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
161   KnownBits LHSKnown(IT->getBitWidth());
162   KnownBits RHSKnown(IT->getBitWidth());
163   computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT);
164   computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT);
165   return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
166 }
167 
168 
169 bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) {
170   for (const User *U : CxtI->users()) {
171     if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
172       if (IC->isEquality())
173         if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
174           if (C->isNullValue())
175             continue;
176     return false;
177   }
178   return true;
179 }
180 
181 static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
182                                    const Query &Q);
183 
184 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
185                                   bool OrZero,
186                                   unsigned Depth, AssumptionCache *AC,
187                                   const Instruction *CxtI,
188                                   const DominatorTree *DT) {
189   return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
190                                   Query(DL, AC, safeCxtI(V, CxtI), DT));
191 }
192 
193 static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
194 
195 bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
196                           AssumptionCache *AC, const Instruction *CxtI,
197                           const DominatorTree *DT) {
198   return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
199 }
200 
201 bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
202                               unsigned Depth,
203                               AssumptionCache *AC, const Instruction *CxtI,
204                               const DominatorTree *DT) {
205   KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
206   return Known.isNonNegative();
207 }
208 
209 bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
210                            AssumptionCache *AC, const Instruction *CxtI,
211                            const DominatorTree *DT) {
212   if (auto *CI = dyn_cast<ConstantInt>(V))
213     return CI->getValue().isStrictlyPositive();
214 
215   // TODO: We'd doing two recursive queries here.  We should factor this such
216   // that only a single query is needed.
217   return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT) &&
218     isKnownNonZero(V, DL, Depth, AC, CxtI, DT);
219 }
220 
221 bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
222                            AssumptionCache *AC, const Instruction *CxtI,
223                            const DominatorTree *DT) {
224   KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
225   return Known.isNegative();
226 }
227 
228 static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q);
229 
230 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
231                            const DataLayout &DL,
232                            AssumptionCache *AC, const Instruction *CxtI,
233                            const DominatorTree *DT) {
234   return ::isKnownNonEqual(V1, V2, Query(DL, AC,
235                                          safeCxtI(V1, safeCxtI(V2, CxtI)),
236                                          DT));
237 }
238 
239 static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
240                               const Query &Q);
241 
242 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
243                              const DataLayout &DL,
244                              unsigned Depth, AssumptionCache *AC,
245                              const Instruction *CxtI, const DominatorTree *DT) {
246   return ::MaskedValueIsZero(V, Mask, Depth,
247                              Query(DL, AC, safeCxtI(V, CxtI), DT));
248 }
249 
250 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
251                                    const Query &Q);
252 
253 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
254                                   unsigned Depth, AssumptionCache *AC,
255                                   const Instruction *CxtI,
256                                   const DominatorTree *DT) {
257   return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
258 }
259 
260 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
261                                    bool NSW,
262                                    KnownBits &KnownOut, KnownBits &Known2,
263                                    unsigned Depth, const Query &Q) {
264   unsigned BitWidth = KnownOut.getBitWidth();
265 
266   // If an initial sequence of bits in the result is not needed, the
267   // corresponding bits in the operands are not needed.
268   KnownBits LHSKnown(BitWidth);
269   computeKnownBits(Op0, LHSKnown, Depth + 1, Q);
270   computeKnownBits(Op1, Known2, Depth + 1, Q);
271 
272   KnownOut = KnownBits::computeForAddSub(Add, NSW, LHSKnown, Known2);
273 }
274 
275 static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
276                                 KnownBits &Known, KnownBits &Known2,
277                                 unsigned Depth, const Query &Q) {
278   unsigned BitWidth = Known.getBitWidth();
279   computeKnownBits(Op1, Known, Depth + 1, Q);
280   computeKnownBits(Op0, Known2, Depth + 1, Q);
281 
282   bool isKnownNegative = false;
283   bool isKnownNonNegative = false;
284   // If the multiplication is known not to overflow, compute the sign bit.
285   if (NSW) {
286     if (Op0 == Op1) {
287       // The product of a number with itself is non-negative.
288       isKnownNonNegative = true;
289     } else {
290       bool isKnownNonNegativeOp1 = Known.isNonNegative();
291       bool isKnownNonNegativeOp0 = Known2.isNonNegative();
292       bool isKnownNegativeOp1 = Known.isNegative();
293       bool isKnownNegativeOp0 = Known2.isNegative();
294       // The product of two numbers with the same sign is non-negative.
295       isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
296         (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
297       // The product of a negative number and a non-negative number is either
298       // negative or zero.
299       if (!isKnownNonNegative)
300         isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
301                            isKnownNonZero(Op0, Depth, Q)) ||
302                           (isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
303                            isKnownNonZero(Op1, Depth, Q));
304     }
305   }
306 
307   // If low bits are zero in either operand, output low known-0 bits.
308   // Also compute a conservative estimate for high known-0 bits.
309   // More trickiness is possible, but this is sufficient for the
310   // interesting case of alignment computation.
311   unsigned TrailZ = Known.countMinTrailingZeros() +
312                     Known2.countMinTrailingZeros();
313   unsigned LeadZ =  std::max(Known.countMinLeadingZeros() +
314                              Known2.countMinLeadingZeros(),
315                              BitWidth) - BitWidth;
316 
317   TrailZ = std::min(TrailZ, BitWidth);
318   LeadZ = std::min(LeadZ, BitWidth);
319   Known.resetAll();
320   Known.Zero.setLowBits(TrailZ);
321   Known.Zero.setHighBits(LeadZ);
322 
323   // Only make use of no-wrap flags if we failed to compute the sign bit
324   // directly.  This matters if the multiplication always overflows, in
325   // which case we prefer to follow the result of the direct computation,
326   // though as the program is invoking undefined behaviour we can choose
327   // whatever we like here.
328   if (isKnownNonNegative && !Known.isNegative())
329     Known.makeNonNegative();
330   else if (isKnownNegative && !Known.isNonNegative())
331     Known.makeNegative();
332 }
333 
334 void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
335                                              KnownBits &Known) {
336   unsigned BitWidth = Known.getBitWidth();
337   unsigned NumRanges = Ranges.getNumOperands() / 2;
338   assert(NumRanges >= 1);
339 
340   Known.Zero.setAllBits();
341   Known.One.setAllBits();
342 
343   for (unsigned i = 0; i < NumRanges; ++i) {
344     ConstantInt *Lower =
345         mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
346     ConstantInt *Upper =
347         mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
348     ConstantRange Range(Lower->getValue(), Upper->getValue());
349 
350     // The first CommonPrefixBits of all values in Range are equal.
351     unsigned CommonPrefixBits =
352         (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
353 
354     APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
355     Known.One &= Range.getUnsignedMax() & Mask;
356     Known.Zero &= ~Range.getUnsignedMax() & Mask;
357   }
358 }
359 
360 static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
361   SmallVector<const Value *, 16> WorkSet(1, I);
362   SmallPtrSet<const Value *, 32> Visited;
363   SmallPtrSet<const Value *, 16> EphValues;
364 
365   // The instruction defining an assumption's condition itself is always
366   // considered ephemeral to that assumption (even if it has other
367   // non-ephemeral users). See r246696's test case for an example.
368   if (is_contained(I->operands(), E))
369     return true;
370 
371   while (!WorkSet.empty()) {
372     const Value *V = WorkSet.pop_back_val();
373     if (!Visited.insert(V).second)
374       continue;
375 
376     // If all uses of this value are ephemeral, then so is this value.
377     if (all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) {
378       if (V == E)
379         return true;
380 
381       EphValues.insert(V);
382       if (const User *U = dyn_cast<User>(V))
383         for (User::const_op_iterator J = U->op_begin(), JE = U->op_end();
384              J != JE; ++J) {
385           if (isSafeToSpeculativelyExecute(*J))
386             WorkSet.push_back(*J);
387         }
388     }
389   }
390 
391   return false;
392 }
393 
394 // Is this an intrinsic that cannot be speculated but also cannot trap?
395 static bool isAssumeLikeIntrinsic(const Instruction *I) {
396   if (const CallInst *CI = dyn_cast<CallInst>(I))
397     if (Function *F = CI->getCalledFunction())
398       switch (F->getIntrinsicID()) {
399       default: break;
400       // FIXME: This list is repeated from NoTTI::getIntrinsicCost.
401       case Intrinsic::assume:
402       case Intrinsic::dbg_declare:
403       case Intrinsic::dbg_value:
404       case Intrinsic::invariant_start:
405       case Intrinsic::invariant_end:
406       case Intrinsic::lifetime_start:
407       case Intrinsic::lifetime_end:
408       case Intrinsic::objectsize:
409       case Intrinsic::ptr_annotation:
410       case Intrinsic::var_annotation:
411         return true;
412       }
413 
414   return false;
415 }
416 
417 bool llvm::isValidAssumeForContext(const Instruction *Inv,
418                                    const Instruction *CxtI,
419                                    const DominatorTree *DT) {
420 
421   // There are two restrictions on the use of an assume:
422   //  1. The assume must dominate the context (or the control flow must
423   //     reach the assume whenever it reaches the context).
424   //  2. The context must not be in the assume's set of ephemeral values
425   //     (otherwise we will use the assume to prove that the condition
426   //     feeding the assume is trivially true, thus causing the removal of
427   //     the assume).
428 
429   if (DT) {
430     if (DT->dominates(Inv, CxtI))
431       return true;
432   } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
433     // We don't have a DT, but this trivially dominates.
434     return true;
435   }
436 
437   // With or without a DT, the only remaining case we will check is if the
438   // instructions are in the same BB.  Give up if that is not the case.
439   if (Inv->getParent() != CxtI->getParent())
440     return false;
441 
442   // If we have a dom tree, then we now know that the assume doens't dominate
443   // the other instruction.  If we don't have a dom tree then we can check if
444   // the assume is first in the BB.
445   if (!DT) {
446     // Search forward from the assume until we reach the context (or the end
447     // of the block); the common case is that the assume will come first.
448     for (auto I = std::next(BasicBlock::const_iterator(Inv)),
449          IE = Inv->getParent()->end(); I != IE; ++I)
450       if (&*I == CxtI)
451         return true;
452   }
453 
454   // The context comes first, but they're both in the same block. Make sure
455   // there is nothing in between that might interrupt the control flow.
456   for (BasicBlock::const_iterator I =
457          std::next(BasicBlock::const_iterator(CxtI)), IE(Inv);
458        I != IE; ++I)
459     if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
460       return false;
461 
462   return !isEphemeralValueOf(Inv, CxtI);
463 }
464 
465 static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
466                                        unsigned Depth, const Query &Q) {
467   // Use of assumptions is context-sensitive. If we don't have a context, we
468   // cannot use them!
469   if (!Q.AC || !Q.CxtI)
470     return;
471 
472   unsigned BitWidth = Known.getBitWidth();
473 
474   // Note that the patterns below need to be kept in sync with the code
475   // in AssumptionCache::updateAffectedValues.
476 
477   for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
478     if (!AssumeVH)
479       continue;
480     CallInst *I = cast<CallInst>(AssumeVH);
481     assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
482            "Got assumption for the wrong function!");
483     if (Q.isExcluded(I))
484       continue;
485 
486     // Warning: This loop can end up being somewhat performance sensetive.
487     // We're running this loop for once for each value queried resulting in a
488     // runtime of ~O(#assumes * #values).
489 
490     assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
491            "must be an assume intrinsic");
492 
493     Value *Arg = I->getArgOperand(0);
494 
495     if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
496       assert(BitWidth == 1 && "assume operand is not i1?");
497       Known.setAllOnes();
498       return;
499     }
500     if (match(Arg, m_Not(m_Specific(V))) &&
501         isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
502       assert(BitWidth == 1 && "assume operand is not i1?");
503       Known.setAllZero();
504       return;
505     }
506 
507     // The remaining tests are all recursive, so bail out if we hit the limit.
508     if (Depth == MaxDepth)
509       continue;
510 
511     Value *A, *B;
512     auto m_V = m_CombineOr(m_Specific(V),
513                            m_CombineOr(m_PtrToInt(m_Specific(V)),
514                            m_BitCast(m_Specific(V))));
515 
516     CmpInst::Predicate Pred;
517     ConstantInt *C;
518     // assume(v = a)
519     if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
520         Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
521       KnownBits RHSKnown(BitWidth);
522       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
523       Known.Zero |= RHSKnown.Zero;
524       Known.One  |= RHSKnown.One;
525     // assume(v & b = a)
526     } else if (match(Arg,
527                      m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
528                Pred == ICmpInst::ICMP_EQ &&
529                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
530       KnownBits RHSKnown(BitWidth);
531       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
532       KnownBits MaskKnown(BitWidth);
533       computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
534 
535       // For those bits in the mask that are known to be one, we can propagate
536       // known bits from the RHS to V.
537       Known.Zero |= RHSKnown.Zero & MaskKnown.One;
538       Known.One  |= RHSKnown.One  & MaskKnown.One;
539     // assume(~(v & b) = a)
540     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
541                                    m_Value(A))) &&
542                Pred == ICmpInst::ICMP_EQ &&
543                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
544       KnownBits RHSKnown(BitWidth);
545       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
546       KnownBits MaskKnown(BitWidth);
547       computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
548 
549       // For those bits in the mask that are known to be one, we can propagate
550       // inverted known bits from the RHS to V.
551       Known.Zero |= RHSKnown.One  & MaskKnown.One;
552       Known.One  |= RHSKnown.Zero & MaskKnown.One;
553     // assume(v | b = a)
554     } else if (match(Arg,
555                      m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
556                Pred == ICmpInst::ICMP_EQ &&
557                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
558       KnownBits RHSKnown(BitWidth);
559       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
560       KnownBits BKnown(BitWidth);
561       computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
562 
563       // For those bits in B that are known to be zero, we can propagate known
564       // bits from the RHS to V.
565       Known.Zero |= RHSKnown.Zero & BKnown.Zero;
566       Known.One  |= RHSKnown.One  & BKnown.Zero;
567     // assume(~(v | b) = a)
568     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
569                                    m_Value(A))) &&
570                Pred == ICmpInst::ICMP_EQ &&
571                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
572       KnownBits RHSKnown(BitWidth);
573       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
574       KnownBits BKnown(BitWidth);
575       computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
576 
577       // For those bits in B that are known to be zero, we can propagate
578       // inverted known bits from the RHS to V.
579       Known.Zero |= RHSKnown.One  & BKnown.Zero;
580       Known.One  |= RHSKnown.Zero & BKnown.Zero;
581     // assume(v ^ b = a)
582     } else if (match(Arg,
583                      m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
584                Pred == ICmpInst::ICMP_EQ &&
585                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
586       KnownBits RHSKnown(BitWidth);
587       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
588       KnownBits BKnown(BitWidth);
589       computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
590 
591       // For those bits in B that are known to be zero, we can propagate known
592       // bits from the RHS to V. For those bits in B that are known to be one,
593       // we can propagate inverted known bits from the RHS to V.
594       Known.Zero |= RHSKnown.Zero & BKnown.Zero;
595       Known.One  |= RHSKnown.One  & BKnown.Zero;
596       Known.Zero |= RHSKnown.One  & BKnown.One;
597       Known.One  |= RHSKnown.Zero & BKnown.One;
598     // assume(~(v ^ b) = a)
599     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
600                                    m_Value(A))) &&
601                Pred == ICmpInst::ICMP_EQ &&
602                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
603       KnownBits RHSKnown(BitWidth);
604       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
605       KnownBits BKnown(BitWidth);
606       computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
607 
608       // For those bits in B that are known to be zero, we can propagate
609       // inverted known bits from the RHS to V. For those bits in B that are
610       // known to be one, we can propagate known bits from the RHS to V.
611       Known.Zero |= RHSKnown.One  & BKnown.Zero;
612       Known.One  |= RHSKnown.Zero & BKnown.Zero;
613       Known.Zero |= RHSKnown.Zero & BKnown.One;
614       Known.One  |= RHSKnown.One  & BKnown.One;
615     // assume(v << c = a)
616     } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
617                                    m_Value(A))) &&
618                Pred == ICmpInst::ICMP_EQ &&
619                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
620       KnownBits RHSKnown(BitWidth);
621       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
622       // For those bits in RHS that are known, we can propagate them to known
623       // bits in V shifted to the right by C.
624       RHSKnown.Zero.lshrInPlace(C->getZExtValue());
625       Known.Zero |= RHSKnown.Zero;
626       RHSKnown.One.lshrInPlace(C->getZExtValue());
627       Known.One  |= RHSKnown.One;
628     // assume(~(v << c) = a)
629     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
630                                    m_Value(A))) &&
631                Pred == ICmpInst::ICMP_EQ &&
632                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
633       KnownBits RHSKnown(BitWidth);
634       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
635       // For those bits in RHS that are known, we can propagate them inverted
636       // to known bits in V shifted to the right by C.
637       RHSKnown.One.lshrInPlace(C->getZExtValue());
638       Known.Zero |= RHSKnown.One;
639       RHSKnown.Zero.lshrInPlace(C->getZExtValue());
640       Known.One  |= RHSKnown.Zero;
641     // assume(v >> c = a)
642     } else if (match(Arg,
643                      m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
644                               m_Value(A))) &&
645                Pred == ICmpInst::ICMP_EQ &&
646                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
647       KnownBits RHSKnown(BitWidth);
648       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
649       // For those bits in RHS that are known, we can propagate them to known
650       // bits in V shifted to the right by C.
651       Known.Zero |= RHSKnown.Zero << C->getZExtValue();
652       Known.One  |= RHSKnown.One  << C->getZExtValue();
653     // assume(~(v >> c) = a)
654     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
655                                    m_Value(A))) &&
656                Pred == ICmpInst::ICMP_EQ &&
657                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
658       KnownBits RHSKnown(BitWidth);
659       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
660       // For those bits in RHS that are known, we can propagate them inverted
661       // to known bits in V shifted to the right by C.
662       Known.Zero |= RHSKnown.One  << C->getZExtValue();
663       Known.One  |= RHSKnown.Zero << C->getZExtValue();
664     // assume(v >=_s c) where c is non-negative
665     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
666                Pred == ICmpInst::ICMP_SGE &&
667                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
668       KnownBits RHSKnown(BitWidth);
669       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
670 
671       if (RHSKnown.isNonNegative()) {
672         // We know that the sign bit is zero.
673         Known.makeNonNegative();
674       }
675     // assume(v >_s c) where c is at least -1.
676     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
677                Pred == ICmpInst::ICMP_SGT &&
678                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
679       KnownBits RHSKnown(BitWidth);
680       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
681 
682       if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) {
683         // We know that the sign bit is zero.
684         Known.makeNonNegative();
685       }
686     // assume(v <=_s c) where c is negative
687     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
688                Pred == ICmpInst::ICMP_SLE &&
689                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
690       KnownBits RHSKnown(BitWidth);
691       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
692 
693       if (RHSKnown.isNegative()) {
694         // We know that the sign bit is one.
695         Known.makeNegative();
696       }
697     // assume(v <_s c) where c is non-positive
698     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
699                Pred == ICmpInst::ICMP_SLT &&
700                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
701       KnownBits RHSKnown(BitWidth);
702       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
703 
704       if (RHSKnown.isZero() || RHSKnown.isNegative()) {
705         // We know that the sign bit is one.
706         Known.makeNegative();
707       }
708     // assume(v <=_u c)
709     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
710                Pred == ICmpInst::ICMP_ULE &&
711                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
712       KnownBits RHSKnown(BitWidth);
713       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
714 
715       // Whatever high bits in c are zero are known to be zero.
716       Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
717       // assume(v <_u c)
718     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
719                Pred == ICmpInst::ICMP_ULT &&
720                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
721       KnownBits RHSKnown(BitWidth);
722       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
723 
724       // Whatever high bits in c are zero are known to be zero (if c is a power
725       // of 2, then one more).
726       if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
727         Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
728       else
729         Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
730     }
731   }
732 
733   // If assumptions conflict with each other or previous known bits, then we
734   // have a logical fallacy. It's possible that the assumption is not reachable,
735   // so this isn't a real bug. On the other hand, the program may have undefined
736   // behavior, or we might have a bug in the compiler. We can't assert/crash, so
737   // clear out the known bits, try to warn the user, and hope for the best.
738   if (Known.Zero.intersects(Known.One)) {
739     Known.resetAll();
740 
741     if (Q.ORE) {
742       auto *CxtI = const_cast<Instruction *>(Q.CxtI);
743       OptimizationRemarkAnalysis ORA("value-tracking", "BadAssumption", CxtI);
744       Q.ORE->emit(ORA << "Detected conflicting code assumptions. Program may "
745                          "have undefined behavior, or compiler may have "
746                          "internal error.");
747     }
748   }
749 }
750 
751 // Compute known bits from a shift operator, including those with a
752 // non-constant shift amount. Known is the outputs of this function. Known2 is a
753 // pre-allocated temporary with the/ same bit width as Known. KZF and KOF are
754 // operator-specific functors that, given the known-zero or known-one bits
755 // respectively, and a shift amount, compute the implied known-zero or known-one
756 // bits of the shift operator's result respectively for that shift amount. The
757 // results from calling KZF and KOF are conservatively combined for all
758 // permitted shift amounts.
759 static void computeKnownBitsFromShiftOperator(
760     const Operator *I, KnownBits &Known, KnownBits &Known2,
761     unsigned Depth, const Query &Q,
762     function_ref<APInt(const APInt &, unsigned)> KZF,
763     function_ref<APInt(const APInt &, unsigned)> KOF) {
764   unsigned BitWidth = Known.getBitWidth();
765 
766   if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
767     unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1);
768 
769     computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
770     Known.Zero = KZF(Known.Zero, ShiftAmt);
771     Known.One  = KOF(Known.One, ShiftAmt);
772     // If there is conflict between Known.Zero and Known.One, this must be an
773     // overflowing left shift, so the shift result is undefined. Clear Known
774     // bits so that other code could propagate this undef.
775     if ((Known.Zero & Known.One) != 0)
776       Known.resetAll();
777 
778     return;
779   }
780 
781   computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
782 
783   // If the shift amount could be greater than or equal to the bit-width of the LHS, the
784   // value could be undef, so we don't know anything about it.
785   if ((~Known.Zero).uge(BitWidth)) {
786     Known.resetAll();
787     return;
788   }
789 
790   // Note: We cannot use Known.Zero.getLimitedValue() here, because if
791   // BitWidth > 64 and any upper bits are known, we'll end up returning the
792   // limit value (which implies all bits are known).
793   uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue();
794   uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue();
795 
796   // It would be more-clearly correct to use the two temporaries for this
797   // calculation. Reusing the APInts here to prevent unnecessary allocations.
798   Known.resetAll();
799 
800   // If we know the shifter operand is nonzero, we can sometimes infer more
801   // known bits. However this is expensive to compute, so be lazy about it and
802   // only compute it when absolutely necessary.
803   Optional<bool> ShifterOperandIsNonZero;
804 
805   // Early exit if we can't constrain any well-defined shift amount.
806   if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) &&
807       !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) {
808     ShifterOperandIsNonZero =
809         isKnownNonZero(I->getOperand(1), Depth + 1, Q);
810     if (!*ShifterOperandIsNonZero)
811       return;
812   }
813 
814   computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
815 
816   Known.Zero.setAllBits();
817   Known.One.setAllBits();
818   for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
819     // Combine the shifted known input bits only for those shift amounts
820     // compatible with its known constraints.
821     if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt)
822       continue;
823     if ((ShiftAmt | ShiftAmtKO) != ShiftAmt)
824       continue;
825     // If we know the shifter is nonzero, we may be able to infer more known
826     // bits. This check is sunk down as far as possible to avoid the expensive
827     // call to isKnownNonZero if the cheaper checks above fail.
828     if (ShiftAmt == 0) {
829       if (!ShifterOperandIsNonZero.hasValue())
830         ShifterOperandIsNonZero =
831             isKnownNonZero(I->getOperand(1), Depth + 1, Q);
832       if (*ShifterOperandIsNonZero)
833         continue;
834     }
835 
836     Known.Zero &= KZF(Known2.Zero, ShiftAmt);
837     Known.One  &= KOF(Known2.One, ShiftAmt);
838   }
839 
840   // If there are no compatible shift amounts, then we've proven that the shift
841   // amount must be >= the BitWidth, and the result is undefined. We could
842   // return anything we'd like, but we need to make sure the sets of known bits
843   // stay disjoint (it should be better for some other code to actually
844   // propagate the undef than to pick a value here using known bits).
845   if (Known.Zero.intersects(Known.One))
846     Known.resetAll();
847 }
848 
849 static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
850                                          unsigned Depth, const Query &Q) {
851   unsigned BitWidth = Known.getBitWidth();
852 
853   KnownBits Known2(Known);
854   switch (I->getOpcode()) {
855   default: break;
856   case Instruction::Load:
857     if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
858       computeKnownBitsFromRangeMetadata(*MD, Known);
859     break;
860   case Instruction::And: {
861     // If either the LHS or the RHS are Zero, the result is zero.
862     computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
863     computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
864 
865     // Output known-1 bits are only known if set in both the LHS & RHS.
866     Known.One &= Known2.One;
867     // Output known-0 are known to be clear if zero in either the LHS | RHS.
868     Known.Zero |= Known2.Zero;
869 
870     // and(x, add (x, -1)) is a common idiom that always clears the low bit;
871     // here we handle the more general case of adding any odd number by
872     // matching the form add(x, add(x, y)) where y is odd.
873     // TODO: This could be generalized to clearing any bit set in y where the
874     // following bit is known to be unset in y.
875     Value *Y = nullptr;
876     if (!Known.Zero[0] && !Known.One[0] &&
877         (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)),
878                                        m_Value(Y))) ||
879          match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)),
880                                        m_Value(Y))))) {
881       Known2.resetAll();
882       computeKnownBits(Y, Known2, Depth + 1, Q);
883       if (Known2.countMinTrailingOnes() > 0)
884         Known.Zero.setBit(0);
885     }
886     break;
887   }
888   case Instruction::Or: {
889     computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
890     computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
891 
892     // Output known-0 bits are only known if clear in both the LHS & RHS.
893     Known.Zero &= Known2.Zero;
894     // Output known-1 are known to be set if set in either the LHS | RHS.
895     Known.One |= Known2.One;
896     break;
897   }
898   case Instruction::Xor: {
899     computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
900     computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
901 
902     // Output known-0 bits are known if clear or set in both the LHS & RHS.
903     APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
904     // Output known-1 are known to be set if set in only one of the LHS, RHS.
905     Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
906     Known.Zero = std::move(KnownZeroOut);
907     break;
908   }
909   case Instruction::Mul: {
910     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
911     computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known,
912                         Known2, Depth, Q);
913     break;
914   }
915   case Instruction::UDiv: {
916     // For the purposes of computing leading zeros we can conservatively
917     // treat a udiv as a logical right shift by the power of 2 known to
918     // be less than the denominator.
919     computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
920     unsigned LeadZ = Known2.countMinLeadingZeros();
921 
922     Known2.resetAll();
923     computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
924     unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
925     if (RHSMaxLeadingZeros != BitWidth)
926       LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
927 
928     Known.Zero.setHighBits(LeadZ);
929     break;
930   }
931   case Instruction::Select: {
932     const Value *LHS, *RHS;
933     SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
934     if (SelectPatternResult::isMinOrMax(SPF)) {
935       computeKnownBits(RHS, Known, Depth + 1, Q);
936       computeKnownBits(LHS, Known2, Depth + 1, Q);
937     } else {
938       computeKnownBits(I->getOperand(2), Known, Depth + 1, Q);
939       computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
940     }
941 
942     unsigned MaxHighOnes = 0;
943     unsigned MaxHighZeros = 0;
944     if (SPF == SPF_SMAX) {
945       // If both sides are negative, the result is negative.
946       if (Known.isNegative() && Known2.isNegative())
947         // We can derive a lower bound on the result by taking the max of the
948         // leading one bits.
949         MaxHighOnes =
950             std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes());
951       // If either side is non-negative, the result is non-negative.
952       else if (Known.isNonNegative() || Known2.isNonNegative())
953         MaxHighZeros = 1;
954     } else if (SPF == SPF_SMIN) {
955       // If both sides are non-negative, the result is non-negative.
956       if (Known.isNonNegative() && Known2.isNonNegative())
957         // We can derive an upper bound on the result by taking the max of the
958         // leading zero bits.
959         MaxHighZeros = std::max(Known.countMinLeadingZeros(),
960                                 Known2.countMinLeadingZeros());
961       // If either side is negative, the result is negative.
962       else if (Known.isNegative() || Known2.isNegative())
963         MaxHighOnes = 1;
964     } else if (SPF == SPF_UMAX) {
965       // We can derive a lower bound on the result by taking the max of the
966       // leading one bits.
967       MaxHighOnes =
968           std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes());
969     } else if (SPF == SPF_UMIN) {
970       // We can derive an upper bound on the result by taking the max of the
971       // leading zero bits.
972       MaxHighZeros =
973           std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
974     }
975 
976     // Only known if known in both the LHS and RHS.
977     Known.One &= Known2.One;
978     Known.Zero &= Known2.Zero;
979     if (MaxHighOnes > 0)
980       Known.One.setHighBits(MaxHighOnes);
981     if (MaxHighZeros > 0)
982       Known.Zero.setHighBits(MaxHighZeros);
983     break;
984   }
985   case Instruction::FPTrunc:
986   case Instruction::FPExt:
987   case Instruction::FPToUI:
988   case Instruction::FPToSI:
989   case Instruction::SIToFP:
990   case Instruction::UIToFP:
991     break; // Can't work with floating point.
992   case Instruction::PtrToInt:
993   case Instruction::IntToPtr:
994     // Fall through and handle them the same as zext/trunc.
995     LLVM_FALLTHROUGH;
996   case Instruction::ZExt:
997   case Instruction::Trunc: {
998     Type *SrcTy = I->getOperand(0)->getType();
999 
1000     unsigned SrcBitWidth;
1001     // Note that we handle pointer operands here because of inttoptr/ptrtoint
1002     // which fall through here.
1003     SrcBitWidth = Q.DL.getTypeSizeInBits(SrcTy->getScalarType());
1004 
1005     assert(SrcBitWidth && "SrcBitWidth can't be zero");
1006     Known = Known.zextOrTrunc(SrcBitWidth);
1007     computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1008     Known = Known.zextOrTrunc(BitWidth);
1009     // Any top bits are known to be zero.
1010     if (BitWidth > SrcBitWidth)
1011       Known.Zero.setBitsFrom(SrcBitWidth);
1012     break;
1013   }
1014   case Instruction::BitCast: {
1015     Type *SrcTy = I->getOperand(0)->getType();
1016     if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
1017         // TODO: For now, not handling conversions like:
1018         // (bitcast i64 %x to <2 x i32>)
1019         !I->getType()->isVectorTy()) {
1020       computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1021       break;
1022     }
1023     break;
1024   }
1025   case Instruction::SExt: {
1026     // Compute the bits in the result that are not present in the input.
1027     unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
1028 
1029     Known = Known.trunc(SrcBitWidth);
1030     computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1031     // If the sign bit of the input is known set or clear, then we know the
1032     // top bits of the result.
1033     Known = Known.sext(BitWidth);
1034     break;
1035   }
1036   case Instruction::Shl: {
1037     // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
1038     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
1039     auto KZF = [NSW](const APInt &KnownZero, unsigned ShiftAmt) {
1040       APInt KZResult = KnownZero << ShiftAmt;
1041       KZResult.setLowBits(ShiftAmt); // Low bits known 0.
1042       // If this shift has "nsw" keyword, then the result is either a poison
1043       // value or has the same sign bit as the first operand.
1044       if (NSW && KnownZero.isSignBitSet())
1045         KZResult.setSignBit();
1046       return KZResult;
1047     };
1048 
1049     auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) {
1050       APInt KOResult = KnownOne << ShiftAmt;
1051       if (NSW && KnownOne.isSignBitSet())
1052         KOResult.setSignBit();
1053       return KOResult;
1054     };
1055 
1056     computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
1057     break;
1058   }
1059   case Instruction::LShr: {
1060     // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
1061     auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
1062       APInt KZResult = KnownZero.lshr(ShiftAmt);
1063       // High bits known zero.
1064       KZResult.setHighBits(ShiftAmt);
1065       return KZResult;
1066     };
1067 
1068     auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
1069       return KnownOne.lshr(ShiftAmt);
1070     };
1071 
1072     computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
1073     break;
1074   }
1075   case Instruction::AShr: {
1076     // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
1077     auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
1078       return KnownZero.ashr(ShiftAmt);
1079     };
1080 
1081     auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
1082       return KnownOne.ashr(ShiftAmt);
1083     };
1084 
1085     computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
1086     break;
1087   }
1088   case Instruction::Sub: {
1089     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
1090     computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
1091                            Known, Known2, Depth, Q);
1092     break;
1093   }
1094   case Instruction::Add: {
1095     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
1096     computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
1097                            Known, Known2, Depth, Q);
1098     break;
1099   }
1100   case Instruction::SRem:
1101     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
1102       APInt RA = Rem->getValue().abs();
1103       if (RA.isPowerOf2()) {
1104         APInt LowBits = RA - 1;
1105         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1106 
1107         // The low bits of the first operand are unchanged by the srem.
1108         Known.Zero = Known2.Zero & LowBits;
1109         Known.One = Known2.One & LowBits;
1110 
1111         // If the first operand is non-negative or has all low bits zero, then
1112         // the upper bits are all zero.
1113         if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero))
1114           Known.Zero |= ~LowBits;
1115 
1116         // If the first operand is negative and not all low bits are zero, then
1117         // the upper bits are all one.
1118         if (Known2.isNegative() && LowBits.intersects(Known2.One))
1119           Known.One |= ~LowBits;
1120 
1121         assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
1122         break;
1123       }
1124     }
1125 
1126     // The sign bit is the LHS's sign bit, except when the result of the
1127     // remainder is zero.
1128     computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1129     // If it's known zero, our sign bit is also zero.
1130     if (Known2.isNonNegative())
1131       Known.makeNonNegative();
1132 
1133     break;
1134   case Instruction::URem: {
1135     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
1136       const APInt &RA = Rem->getValue();
1137       if (RA.isPowerOf2()) {
1138         APInt LowBits = (RA - 1);
1139         computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1140         Known.Zero |= ~LowBits;
1141         Known.One &= LowBits;
1142         break;
1143       }
1144     }
1145 
1146     // Since the result is less than or equal to either operand, any leading
1147     // zero bits in either operand must also exist in the result.
1148     computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1149     computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1150 
1151     unsigned Leaders =
1152         std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
1153     Known.resetAll();
1154     Known.Zero.setHighBits(Leaders);
1155     break;
1156   }
1157 
1158   case Instruction::Alloca: {
1159     const AllocaInst *AI = cast<AllocaInst>(I);
1160     unsigned Align = AI->getAlignment();
1161     if (Align == 0)
1162       Align = Q.DL.getABITypeAlignment(AI->getAllocatedType());
1163 
1164     if (Align > 0)
1165       Known.Zero.setLowBits(countTrailingZeros(Align));
1166     break;
1167   }
1168   case Instruction::GetElementPtr: {
1169     // Analyze all of the subscripts of this getelementptr instruction
1170     // to determine if we can prove known low zero bits.
1171     KnownBits LocalKnown(BitWidth);
1172     computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q);
1173     unsigned TrailZ = LocalKnown.countMinTrailingZeros();
1174 
1175     gep_type_iterator GTI = gep_type_begin(I);
1176     for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
1177       Value *Index = I->getOperand(i);
1178       if (StructType *STy = GTI.getStructTypeOrNull()) {
1179         // Handle struct member offset arithmetic.
1180 
1181         // Handle case when index is vector zeroinitializer
1182         Constant *CIndex = cast<Constant>(Index);
1183         if (CIndex->isZeroValue())
1184           continue;
1185 
1186         if (CIndex->getType()->isVectorTy())
1187           Index = CIndex->getSplatValue();
1188 
1189         unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
1190         const StructLayout *SL = Q.DL.getStructLayout(STy);
1191         uint64_t Offset = SL->getElementOffset(Idx);
1192         TrailZ = std::min<unsigned>(TrailZ,
1193                                     countTrailingZeros(Offset));
1194       } else {
1195         // Handle array index arithmetic.
1196         Type *IndexedTy = GTI.getIndexedType();
1197         if (!IndexedTy->isSized()) {
1198           TrailZ = 0;
1199           break;
1200         }
1201         unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
1202         uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy);
1203         LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0);
1204         computeKnownBits(Index, LocalKnown, Depth + 1, Q);
1205         TrailZ = std::min(TrailZ,
1206                           unsigned(countTrailingZeros(TypeSize) +
1207                                    LocalKnown.countMinTrailingZeros()));
1208       }
1209     }
1210 
1211     Known.Zero.setLowBits(TrailZ);
1212     break;
1213   }
1214   case Instruction::PHI: {
1215     const PHINode *P = cast<PHINode>(I);
1216     // Handle the case of a simple two-predecessor recurrence PHI.
1217     // There's a lot more that could theoretically be done here, but
1218     // this is sufficient to catch some interesting cases.
1219     if (P->getNumIncomingValues() == 2) {
1220       for (unsigned i = 0; i != 2; ++i) {
1221         Value *L = P->getIncomingValue(i);
1222         Value *R = P->getIncomingValue(!i);
1223         Operator *LU = dyn_cast<Operator>(L);
1224         if (!LU)
1225           continue;
1226         unsigned Opcode = LU->getOpcode();
1227         // Check for operations that have the property that if
1228         // both their operands have low zero bits, the result
1229         // will have low zero bits.
1230         if (Opcode == Instruction::Add ||
1231             Opcode == Instruction::Sub ||
1232             Opcode == Instruction::And ||
1233             Opcode == Instruction::Or ||
1234             Opcode == Instruction::Mul) {
1235           Value *LL = LU->getOperand(0);
1236           Value *LR = LU->getOperand(1);
1237           // Find a recurrence.
1238           if (LL == I)
1239             L = LR;
1240           else if (LR == I)
1241             L = LL;
1242           else
1243             break;
1244           // Ok, we have a PHI of the form L op= R. Check for low
1245           // zero bits.
1246           computeKnownBits(R, Known2, Depth + 1, Q);
1247 
1248           // We need to take the minimum number of known bits
1249           KnownBits Known3(Known);
1250           computeKnownBits(L, Known3, Depth + 1, Q);
1251 
1252           Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
1253                                          Known3.countMinTrailingZeros()));
1254 
1255           auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU);
1256           if (OverflowOp && OverflowOp->hasNoSignedWrap()) {
1257             // If initial value of recurrence is nonnegative, and we are adding
1258             // a nonnegative number with nsw, the result can only be nonnegative
1259             // or poison value regardless of the number of times we execute the
1260             // add in phi recurrence. If initial value is negative and we are
1261             // adding a negative number with nsw, the result can only be
1262             // negative or poison value. Similar arguments apply to sub and mul.
1263             //
1264             // (add non-negative, non-negative) --> non-negative
1265             // (add negative, negative) --> negative
1266             if (Opcode == Instruction::Add) {
1267               if (Known2.isNonNegative() && Known3.isNonNegative())
1268                 Known.makeNonNegative();
1269               else if (Known2.isNegative() && Known3.isNegative())
1270                 Known.makeNegative();
1271             }
1272 
1273             // (sub nsw non-negative, negative) --> non-negative
1274             // (sub nsw negative, non-negative) --> negative
1275             else if (Opcode == Instruction::Sub && LL == I) {
1276               if (Known2.isNonNegative() && Known3.isNegative())
1277                 Known.makeNonNegative();
1278               else if (Known2.isNegative() && Known3.isNonNegative())
1279                 Known.makeNegative();
1280             }
1281 
1282             // (mul nsw non-negative, non-negative) --> non-negative
1283             else if (Opcode == Instruction::Mul && Known2.isNonNegative() &&
1284                      Known3.isNonNegative())
1285               Known.makeNonNegative();
1286           }
1287 
1288           break;
1289         }
1290       }
1291     }
1292 
1293     // Unreachable blocks may have zero-operand PHI nodes.
1294     if (P->getNumIncomingValues() == 0)
1295       break;
1296 
1297     // Otherwise take the unions of the known bit sets of the operands,
1298     // taking conservative care to avoid excessive recursion.
1299     if (Depth < MaxDepth - 1 && !Known.Zero && !Known.One) {
1300       // Skip if every incoming value references to ourself.
1301       if (dyn_cast_or_null<UndefValue>(P->hasConstantValue()))
1302         break;
1303 
1304       Known.Zero.setAllBits();
1305       Known.One.setAllBits();
1306       for (Value *IncValue : P->incoming_values()) {
1307         // Skip direct self references.
1308         if (IncValue == P) continue;
1309 
1310         Known2 = KnownBits(BitWidth);
1311         // Recurse, but cap the recursion to one level, because we don't
1312         // want to waste time spinning around in loops.
1313         computeKnownBits(IncValue, Known2, MaxDepth - 1, Q);
1314         Known.Zero &= Known2.Zero;
1315         Known.One &= Known2.One;
1316         // If all bits have been ruled out, there's no need to check
1317         // more operands.
1318         if (!Known.Zero && !Known.One)
1319           break;
1320       }
1321     }
1322     break;
1323   }
1324   case Instruction::Call:
1325   case Instruction::Invoke:
1326     // If range metadata is attached to this call, set known bits from that,
1327     // and then intersect with known bits based on other properties of the
1328     // function.
1329     if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
1330       computeKnownBitsFromRangeMetadata(*MD, Known);
1331     if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) {
1332       computeKnownBits(RV, Known2, Depth + 1, Q);
1333       Known.Zero |= Known2.Zero;
1334       Known.One |= Known2.One;
1335     }
1336     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1337       switch (II->getIntrinsicID()) {
1338       default: break;
1339       case Intrinsic::bitreverse:
1340         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1341         Known.Zero |= Known2.Zero.reverseBits();
1342         Known.One |= Known2.One.reverseBits();
1343         break;
1344       case Intrinsic::bswap:
1345         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1346         Known.Zero |= Known2.Zero.byteSwap();
1347         Known.One |= Known2.One.byteSwap();
1348         break;
1349       case Intrinsic::ctlz: {
1350         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1351         // If we have a known 1, its position is our upper bound.
1352         unsigned PossibleLZ = Known2.One.countLeadingZeros();
1353         // If this call is undefined for 0, the result will be less than 2^n.
1354         if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1355           PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
1356         unsigned LowBits = Log2_32(PossibleLZ)+1;
1357         Known.Zero.setBitsFrom(LowBits);
1358         break;
1359       }
1360       case Intrinsic::cttz: {
1361         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1362         // If we have a known 1, its position is our upper bound.
1363         unsigned PossibleTZ = Known2.One.countTrailingZeros();
1364         // If this call is undefined for 0, the result will be less than 2^n.
1365         if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1366           PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
1367         unsigned LowBits = Log2_32(PossibleTZ)+1;
1368         Known.Zero.setBitsFrom(LowBits);
1369         break;
1370       }
1371       case Intrinsic::ctpop: {
1372         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1373         // We can bound the space the count needs.  Also, bits known to be zero
1374         // can't contribute to the population.
1375         unsigned BitsPossiblySet = Known2.countMaxPopulation();
1376         unsigned LowBits = Log2_32(BitsPossiblySet)+1;
1377         Known.Zero.setBitsFrom(LowBits);
1378         // TODO: we could bound KnownOne using the lower bound on the number
1379         // of bits which might be set provided by popcnt KnownOne2.
1380         break;
1381       }
1382       case Intrinsic::x86_sse42_crc32_64_64:
1383         Known.Zero.setBitsFrom(32);
1384         break;
1385       }
1386     }
1387     break;
1388   case Instruction::ExtractElement:
1389     // Look through extract element. At the moment we keep this simple and skip
1390     // tracking the specific element. But at least we might find information
1391     // valid for all elements of the vector (for example if vector is sign
1392     // extended, shifted, etc).
1393     computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1394     break;
1395   case Instruction::ExtractValue:
1396     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
1397       const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
1398       if (EVI->getNumIndices() != 1) break;
1399       if (EVI->getIndices()[0] == 0) {
1400         switch (II->getIntrinsicID()) {
1401         default: break;
1402         case Intrinsic::uadd_with_overflow:
1403         case Intrinsic::sadd_with_overflow:
1404           computeKnownBitsAddSub(true, II->getArgOperand(0),
1405                                  II->getArgOperand(1), false, Known, Known2,
1406                                  Depth, Q);
1407           break;
1408         case Intrinsic::usub_with_overflow:
1409         case Intrinsic::ssub_with_overflow:
1410           computeKnownBitsAddSub(false, II->getArgOperand(0),
1411                                  II->getArgOperand(1), false, Known, Known2,
1412                                  Depth, Q);
1413           break;
1414         case Intrinsic::umul_with_overflow:
1415         case Intrinsic::smul_with_overflow:
1416           computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
1417                               Known, Known2, Depth, Q);
1418           break;
1419         }
1420       }
1421     }
1422   }
1423 }
1424 
1425 /// Determine which bits of V are known to be either zero or one and return
1426 /// them.
1427 KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) {
1428   KnownBits Known(getBitWidth(V->getType(), Q.DL));
1429   computeKnownBits(V, Known, Depth, Q);
1430   return Known;
1431 }
1432 
1433 /// Determine which bits of V are known to be either zero or one and return
1434 /// them in the Known bit set.
1435 ///
1436 /// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
1437 /// we cannot optimize based on the assumption that it is zero without changing
1438 /// it to be an explicit zero.  If we don't change it to zero, other code could
1439 /// optimized based on the contradictory assumption that it is non-zero.
1440 /// Because instcombine aggressively folds operations with undef args anyway,
1441 /// this won't lose us code quality.
1442 ///
1443 /// This function is defined on values with integer type, values with pointer
1444 /// type, and vectors of integers.  In the case
1445 /// where V is a vector, known zero, and known one values are the
1446 /// same width as the vector element, and the bit is set only if it is true
1447 /// for all of the elements in the vector.
1448 void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
1449                       const Query &Q) {
1450   assert(V && "No Value?");
1451   assert(Depth <= MaxDepth && "Limit Search Depth");
1452   unsigned BitWidth = Known.getBitWidth();
1453 
1454   assert((V->getType()->isIntOrIntVectorTy(BitWidth) ||
1455           V->getType()->isPtrOrPtrVectorTy()) &&
1456          "Not integer or pointer type!");
1457   assert(Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth &&
1458          "V and Known should have same BitWidth");
1459   (void)BitWidth;
1460 
1461   const APInt *C;
1462   if (match(V, m_APInt(C))) {
1463     // We know all of the bits for a scalar constant or a splat vector constant!
1464     Known.One = *C;
1465     Known.Zero = ~Known.One;
1466     return;
1467   }
1468   // Null and aggregate-zero are all-zeros.
1469   if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
1470     Known.setAllZero();
1471     return;
1472   }
1473   // Handle a constant vector by taking the intersection of the known bits of
1474   // each element.
1475   if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
1476     // We know that CDS must be a vector of integers. Take the intersection of
1477     // each element.
1478     Known.Zero.setAllBits(); Known.One.setAllBits();
1479     APInt Elt(BitWidth, 0);
1480     for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1481       Elt = CDS->getElementAsInteger(i);
1482       Known.Zero &= ~Elt;
1483       Known.One &= Elt;
1484     }
1485     return;
1486   }
1487 
1488   if (const auto *CV = dyn_cast<ConstantVector>(V)) {
1489     // We know that CV must be a vector of integers. Take the intersection of
1490     // each element.
1491     Known.Zero.setAllBits(); Known.One.setAllBits();
1492     APInt Elt(BitWidth, 0);
1493     for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1494       Constant *Element = CV->getAggregateElement(i);
1495       auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
1496       if (!ElementCI) {
1497         Known.resetAll();
1498         return;
1499       }
1500       Elt = ElementCI->getValue();
1501       Known.Zero &= ~Elt;
1502       Known.One &= Elt;
1503     }
1504     return;
1505   }
1506 
1507   // Start out not knowing anything.
1508   Known.resetAll();
1509 
1510   // We can't imply anything about undefs.
1511   if (isa<UndefValue>(V))
1512     return;
1513 
1514   // There's no point in looking through other users of ConstantData for
1515   // assumptions.  Confirm that we've handled them all.
1516   assert(!isa<ConstantData>(V) && "Unhandled constant data!");
1517 
1518   // Limit search depth.
1519   // All recursive calls that increase depth must come after this.
1520   if (Depth == MaxDepth)
1521     return;
1522 
1523   // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
1524   // the bits of its aliasee.
1525   if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1526     if (!GA->isInterposable())
1527       computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
1528     return;
1529   }
1530 
1531   if (const Operator *I = dyn_cast<Operator>(V))
1532     computeKnownBitsFromOperator(I, Known, Depth, Q);
1533 
1534   // Aligned pointers have trailing zeros - refine Known.Zero set
1535   if (V->getType()->isPointerTy()) {
1536     unsigned Align = V->getPointerAlignment(Q.DL);
1537     if (Align)
1538       Known.Zero.setLowBits(countTrailingZeros(Align));
1539   }
1540 
1541   // computeKnownBitsFromAssume strictly refines Known.
1542   // Therefore, we run them after computeKnownBitsFromOperator.
1543 
1544   // Check whether a nearby assume intrinsic can determine some known bits.
1545   computeKnownBitsFromAssume(V, Known, Depth, Q);
1546 
1547   assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
1548 }
1549 
1550 /// Return true if the given value is known to have exactly one
1551 /// bit set when defined. For vectors return true if every element is known to
1552 /// be a power of two when defined. Supports values with integer or pointer
1553 /// types and vectors of integers.
1554 bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
1555                             const Query &Q) {
1556   if (const Constant *C = dyn_cast<Constant>(V)) {
1557     if (C->isNullValue())
1558       return OrZero;
1559 
1560     const APInt *ConstIntOrConstSplatInt;
1561     if (match(C, m_APInt(ConstIntOrConstSplatInt)))
1562       return ConstIntOrConstSplatInt->isPowerOf2();
1563   }
1564 
1565   // 1 << X is clearly a power of two if the one is not shifted off the end.  If
1566   // it is shifted off the end then the result is undefined.
1567   if (match(V, m_Shl(m_One(), m_Value())))
1568     return true;
1569 
1570   // (signmask) >>l X is clearly a power of two if the one is not shifted off
1571   // the bottom.  If it is shifted off the bottom then the result is undefined.
1572   if (match(V, m_LShr(m_SignMask(), m_Value())))
1573     return true;
1574 
1575   // The remaining tests are all recursive, so bail out if we hit the limit.
1576   if (Depth++ == MaxDepth)
1577     return false;
1578 
1579   Value *X = nullptr, *Y = nullptr;
1580   // A shift left or a logical shift right of a power of two is a power of two
1581   // or zero.
1582   if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
1583                  match(V, m_LShr(m_Value(X), m_Value()))))
1584     return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q);
1585 
1586   if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V))
1587     return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
1588 
1589   if (const SelectInst *SI = dyn_cast<SelectInst>(V))
1590     return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
1591            isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
1592 
1593   if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
1594     // A power of two and'd with anything is a power of two or zero.
1595     if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q) ||
1596         isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q))
1597       return true;
1598     // X & (-X) is always a power of two or zero.
1599     if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
1600       return true;
1601     return false;
1602   }
1603 
1604   // Adding a power-of-two or zero to the same power-of-two or zero yields
1605   // either the original power-of-two, a larger power-of-two or zero.
1606   if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
1607     const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
1608     if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
1609       if (match(X, m_And(m_Specific(Y), m_Value())) ||
1610           match(X, m_And(m_Value(), m_Specific(Y))))
1611         if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
1612           return true;
1613       if (match(Y, m_And(m_Specific(X), m_Value())) ||
1614           match(Y, m_And(m_Value(), m_Specific(X))))
1615         if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q))
1616           return true;
1617 
1618       unsigned BitWidth = V->getType()->getScalarSizeInBits();
1619       KnownBits LHSBits(BitWidth);
1620       computeKnownBits(X, LHSBits, Depth, Q);
1621 
1622       KnownBits RHSBits(BitWidth);
1623       computeKnownBits(Y, RHSBits, Depth, Q);
1624       // If i8 V is a power of two or zero:
1625       //  ZeroBits: 1 1 1 0 1 1 1 1
1626       // ~ZeroBits: 0 0 0 1 0 0 0 0
1627       if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
1628         // If OrZero isn't set, we cannot give back a zero result.
1629         // Make sure either the LHS or RHS has a bit set.
1630         if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
1631           return true;
1632     }
1633   }
1634 
1635   // An exact divide or right shift can only shift off zero bits, so the result
1636   // is a power of two only if the first operand is a power of two and not
1637   // copying a sign bit (sdiv int_min, 2).
1638   if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
1639       match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
1640     return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero,
1641                                   Depth, Q);
1642   }
1643 
1644   return false;
1645 }
1646 
1647 /// \brief Test whether a GEP's result is known to be non-null.
1648 ///
1649 /// Uses properties inherent in a GEP to try to determine whether it is known
1650 /// to be non-null.
1651 ///
1652 /// Currently this routine does not support vector GEPs.
1653 static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
1654                               const Query &Q) {
1655   if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
1656     return false;
1657 
1658   // FIXME: Support vector-GEPs.
1659   assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
1660 
1661   // If the base pointer is non-null, we cannot walk to a null address with an
1662   // inbounds GEP in address space zero.
1663   if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q))
1664     return true;
1665 
1666   // Walk the GEP operands and see if any operand introduces a non-zero offset.
1667   // If so, then the GEP cannot produce a null pointer, as doing so would
1668   // inherently violate the inbounds contract within address space zero.
1669   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
1670        GTI != GTE; ++GTI) {
1671     // Struct types are easy -- they must always be indexed by a constant.
1672     if (StructType *STy = GTI.getStructTypeOrNull()) {
1673       ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
1674       unsigned ElementIdx = OpC->getZExtValue();
1675       const StructLayout *SL = Q.DL.getStructLayout(STy);
1676       uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
1677       if (ElementOffset > 0)
1678         return true;
1679       continue;
1680     }
1681 
1682     // If we have a zero-sized type, the index doesn't matter. Keep looping.
1683     if (Q.DL.getTypeAllocSize(GTI.getIndexedType()) == 0)
1684       continue;
1685 
1686     // Fast path the constant operand case both for efficiency and so we don't
1687     // increment Depth when just zipping down an all-constant GEP.
1688     if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
1689       if (!OpC->isZero())
1690         return true;
1691       continue;
1692     }
1693 
1694     // We post-increment Depth here because while isKnownNonZero increments it
1695     // as well, when we pop back up that increment won't persist. We don't want
1696     // to recurse 10k times just because we have 10k GEP operands. We don't
1697     // bail completely out because we want to handle constant GEPs regardless
1698     // of depth.
1699     if (Depth++ >= MaxDepth)
1700       continue;
1701 
1702     if (isKnownNonZero(GTI.getOperand(), Depth, Q))
1703       return true;
1704   }
1705 
1706   return false;
1707 }
1708 
1709 /// Does the 'Range' metadata (which must be a valid MD_range operand list)
1710 /// ensure that the value it's attached to is never Value?  'RangeType' is
1711 /// is the type of the value described by the range.
1712 static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
1713   const unsigned NumRanges = Ranges->getNumOperands() / 2;
1714   assert(NumRanges >= 1);
1715   for (unsigned i = 0; i < NumRanges; ++i) {
1716     ConstantInt *Lower =
1717         mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
1718     ConstantInt *Upper =
1719         mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
1720     ConstantRange Range(Lower->getValue(), Upper->getValue());
1721     if (Range.contains(Value))
1722       return false;
1723   }
1724   return true;
1725 }
1726 
1727 /// Return true if the given value is known to be non-zero when defined. For
1728 /// vectors, return true if every element is known to be non-zero when
1729 /// defined. For pointers, if the context instruction and dominator tree are
1730 /// specified, perform context-sensitive analysis and return true if the
1731 /// pointer couldn't possibly be null at the specified instruction.
1732 /// Supports values with integer or pointer type and vectors of integers.
1733 bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
1734   if (auto *C = dyn_cast<Constant>(V)) {
1735     if (C->isNullValue())
1736       return false;
1737     if (isa<ConstantInt>(C))
1738       // Must be non-zero due to null test above.
1739       return true;
1740 
1741     // For constant vectors, check that all elements are undefined or known
1742     // non-zero to determine that the whole vector is known non-zero.
1743     if (auto *VecTy = dyn_cast<VectorType>(C->getType())) {
1744       for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
1745         Constant *Elt = C->getAggregateElement(i);
1746         if (!Elt || Elt->isNullValue())
1747           return false;
1748         if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt))
1749           return false;
1750       }
1751       return true;
1752     }
1753 
1754     return false;
1755   }
1756 
1757   if (auto *I = dyn_cast<Instruction>(V)) {
1758     if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) {
1759       // If the possible ranges don't contain zero, then the value is
1760       // definitely non-zero.
1761       if (auto *Ty = dyn_cast<IntegerType>(V->getType())) {
1762         const APInt ZeroValue(Ty->getBitWidth(), 0);
1763         if (rangeMetadataExcludesValue(Ranges, ZeroValue))
1764           return true;
1765       }
1766     }
1767   }
1768 
1769   // The remaining tests are all recursive, so bail out if we hit the limit.
1770   if (Depth++ >= MaxDepth)
1771     return false;
1772 
1773   // Check for pointer simplifications.
1774   if (V->getType()->isPointerTy()) {
1775     if (isKnownNonNullAt(V, Q.CxtI, Q.DT))
1776       return true;
1777     if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
1778       if (isGEPKnownNonNull(GEP, Depth, Q))
1779         return true;
1780   }
1781 
1782   unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL);
1783 
1784   // X | Y != 0 if X != 0 or Y != 0.
1785   Value *X = nullptr, *Y = nullptr;
1786   if (match(V, m_Or(m_Value(X), m_Value(Y))))
1787     return isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q);
1788 
1789   // ext X != 0 if X != 0.
1790   if (isa<SExtInst>(V) || isa<ZExtInst>(V))
1791     return isKnownNonZero(cast<Instruction>(V)->getOperand(0), Depth, Q);
1792 
1793   // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
1794   // if the lowest bit is shifted off the end.
1795   if (match(V, m_Shl(m_Value(X), m_Value(Y)))) {
1796     // shl nuw can't remove any non-zero bits.
1797     const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
1798     if (BO->hasNoUnsignedWrap())
1799       return isKnownNonZero(X, Depth, Q);
1800 
1801     KnownBits Known(BitWidth);
1802     computeKnownBits(X, Known, Depth, Q);
1803     if (Known.One[0])
1804       return true;
1805   }
1806   // shr X, Y != 0 if X is negative.  Note that the value of the shift is not
1807   // defined if the sign bit is shifted off the end.
1808   else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
1809     // shr exact can only shift out zero bits.
1810     const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
1811     if (BO->isExact())
1812       return isKnownNonZero(X, Depth, Q);
1813 
1814     KnownBits Known = computeKnownBits(X, Depth, Q);
1815     if (Known.isNegative())
1816       return true;
1817 
1818     // If the shifter operand is a constant, and all of the bits shifted
1819     // out are known to be zero, and X is known non-zero then at least one
1820     // non-zero bit must remain.
1821     if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
1822       auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
1823       // Is there a known one in the portion not shifted out?
1824       if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal)
1825         return true;
1826       // Are all the bits to be shifted out known zero?
1827       if (Known.countMinTrailingZeros() >= ShiftVal)
1828         return isKnownNonZero(X, Depth, Q);
1829     }
1830   }
1831   // div exact can only produce a zero if the dividend is zero.
1832   else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
1833     return isKnownNonZero(X, Depth, Q);
1834   }
1835   // X + Y.
1836   else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
1837     KnownBits XKnown = computeKnownBits(X, Depth, Q);
1838     KnownBits YKnown = computeKnownBits(Y, Depth, Q);
1839 
1840     // If X and Y are both non-negative (as signed values) then their sum is not
1841     // zero unless both X and Y are zero.
1842     if (XKnown.isNonNegative() && YKnown.isNonNegative())
1843       if (isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q))
1844         return true;
1845 
1846     // If X and Y are both negative (as signed values) then their sum is not
1847     // zero unless both X and Y equal INT_MIN.
1848     if (XKnown.isNegative() && YKnown.isNegative()) {
1849       APInt Mask = APInt::getSignedMaxValue(BitWidth);
1850       // The sign bit of X is set.  If some other bit is set then X is not equal
1851       // to INT_MIN.
1852       if (XKnown.One.intersects(Mask))
1853         return true;
1854       // The sign bit of Y is set.  If some other bit is set then Y is not equal
1855       // to INT_MIN.
1856       if (YKnown.One.intersects(Mask))
1857         return true;
1858     }
1859 
1860     // The sum of a non-negative number and a power of two is not zero.
1861     if (XKnown.isNonNegative() &&
1862         isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
1863       return true;
1864     if (YKnown.isNonNegative() &&
1865         isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
1866       return true;
1867   }
1868   // X * Y.
1869   else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) {
1870     const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
1871     // If X and Y are non-zero then so is X * Y as long as the multiplication
1872     // does not overflow.
1873     if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
1874         isKnownNonZero(X, Depth, Q) && isKnownNonZero(Y, Depth, Q))
1875       return true;
1876   }
1877   // (C ? X : Y) != 0 if X != 0 and Y != 0.
1878   else if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
1879     if (isKnownNonZero(SI->getTrueValue(), Depth, Q) &&
1880         isKnownNonZero(SI->getFalseValue(), Depth, Q))
1881       return true;
1882   }
1883   // PHI
1884   else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
1885     // Try and detect a recurrence that monotonically increases from a
1886     // starting value, as these are common as induction variables.
1887     if (PN->getNumIncomingValues() == 2) {
1888       Value *Start = PN->getIncomingValue(0);
1889       Value *Induction = PN->getIncomingValue(1);
1890       if (isa<ConstantInt>(Induction) && !isa<ConstantInt>(Start))
1891         std::swap(Start, Induction);
1892       if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) {
1893         if (!C->isZero() && !C->isNegative()) {
1894           ConstantInt *X;
1895           if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
1896                match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) &&
1897               !X->isNegative())
1898             return true;
1899         }
1900       }
1901     }
1902     // Check if all incoming values are non-zero constant.
1903     bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) {
1904       return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZero();
1905     });
1906     if (AllNonZeroConstants)
1907       return true;
1908   }
1909 
1910   KnownBits Known(BitWidth);
1911   computeKnownBits(V, Known, Depth, Q);
1912   return Known.One != 0;
1913 }
1914 
1915 /// Return true if V2 == V1 + X, where X is known non-zero.
1916 static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) {
1917   const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
1918   if (!BO || BO->getOpcode() != Instruction::Add)
1919     return false;
1920   Value *Op = nullptr;
1921   if (V2 == BO->getOperand(0))
1922     Op = BO->getOperand(1);
1923   else if (V2 == BO->getOperand(1))
1924     Op = BO->getOperand(0);
1925   else
1926     return false;
1927   return isKnownNonZero(Op, 0, Q);
1928 }
1929 
1930 /// Return true if it is known that V1 != V2.
1931 static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
1932   if (V1 == V2)
1933     return false;
1934   if (V1->getType() != V2->getType())
1935     // We can't look through casts yet.
1936     return false;
1937   if (isAddOfNonZero(V1, V2, Q) || isAddOfNonZero(V2, V1, Q))
1938     return true;
1939 
1940   if (V1->getType()->isIntOrIntVectorTy()) {
1941     // Are any known bits in V1 contradictory to known bits in V2? If V1
1942     // has a known zero where V2 has a known one, they must not be equal.
1943     KnownBits Known1 = computeKnownBits(V1, 0, Q);
1944     KnownBits Known2 = computeKnownBits(V2, 0, Q);
1945 
1946     if (Known1.Zero.intersects(Known2.One) ||
1947         Known2.Zero.intersects(Known1.One))
1948       return true;
1949   }
1950   return false;
1951 }
1952 
1953 /// Return true if 'V & Mask' is known to be zero.  We use this predicate to
1954 /// simplify operations downstream. Mask is known to be zero for bits that V
1955 /// cannot have.
1956 ///
1957 /// This function is defined on values with integer type, values with pointer
1958 /// type, and vectors of integers.  In the case
1959 /// where V is a vector, the mask, known zero, and known one values are the
1960 /// same width as the vector element, and the bit is set only if it is true
1961 /// for all of the elements in the vector.
1962 bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
1963                        const Query &Q) {
1964   KnownBits Known(Mask.getBitWidth());
1965   computeKnownBits(V, Known, Depth, Q);
1966   return Mask.isSubsetOf(Known.Zero);
1967 }
1968 
1969 /// For vector constants, loop over the elements and find the constant with the
1970 /// minimum number of sign bits. Return 0 if the value is not a vector constant
1971 /// or if any element was not analyzed; otherwise, return the count for the
1972 /// element with the minimum number of sign bits.
1973 static unsigned computeNumSignBitsVectorConstant(const Value *V,
1974                                                  unsigned TyBits) {
1975   const auto *CV = dyn_cast<Constant>(V);
1976   if (!CV || !CV->getType()->isVectorTy())
1977     return 0;
1978 
1979   unsigned MinSignBits = TyBits;
1980   unsigned NumElts = CV->getType()->getVectorNumElements();
1981   for (unsigned i = 0; i != NumElts; ++i) {
1982     // If we find a non-ConstantInt, bail out.
1983     auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
1984     if (!Elt)
1985       return 0;
1986 
1987     // If the sign bit is 1, flip the bits, so we always count leading zeros.
1988     APInt EltVal = Elt->getValue();
1989     if (EltVal.isNegative())
1990       EltVal = ~EltVal;
1991     MinSignBits = std::min(MinSignBits, EltVal.countLeadingZeros());
1992   }
1993 
1994   return MinSignBits;
1995 }
1996 
1997 static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
1998                                        const Query &Q);
1999 
2000 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
2001                                    const Query &Q) {
2002   unsigned Result = ComputeNumSignBitsImpl(V, Depth, Q);
2003   assert(Result > 0 && "At least one sign bit needs to be present!");
2004   return Result;
2005 }
2006 
2007 /// Return the number of times the sign bit of the register is replicated into
2008 /// the other bits. We know that at least 1 bit is always equal to the sign bit
2009 /// (itself), but other cases can give us information. For example, immediately
2010 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
2011 /// other, so we return 3. For vectors, return the number of sign bits for the
2012 /// vector element with the mininum number of known sign bits.
2013 static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
2014                                        const Query &Q) {
2015 
2016   // We return the minimum number of sign bits that are guaranteed to be present
2017   // in V, so for undef we have to conservatively return 1.  We don't have the
2018   // same behavior for poison though -- that's a FIXME today.
2019 
2020   unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType());
2021   unsigned Tmp, Tmp2;
2022   unsigned FirstAnswer = 1;
2023 
2024   // Note that ConstantInt is handled by the general computeKnownBits case
2025   // below.
2026 
2027   if (Depth == MaxDepth)
2028     return 1;  // Limit search depth.
2029 
2030   const Operator *U = dyn_cast<Operator>(V);
2031   switch (Operator::getOpcode(V)) {
2032   default: break;
2033   case Instruction::SExt:
2034     Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
2035     return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp;
2036 
2037   case Instruction::SDiv: {
2038     const APInt *Denominator;
2039     // sdiv X, C -> adds log(C) sign bits.
2040     if (match(U->getOperand(1), m_APInt(Denominator))) {
2041 
2042       // Ignore non-positive denominator.
2043       if (!Denominator->isStrictlyPositive())
2044         break;
2045 
2046       // Calculate the incoming numerator bits.
2047       unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2048 
2049       // Add floor(log(C)) bits to the numerator bits.
2050       return std::min(TyBits, NumBits + Denominator->logBase2());
2051     }
2052     break;
2053   }
2054 
2055   case Instruction::SRem: {
2056     const APInt *Denominator;
2057     // srem X, C -> we know that the result is within [-C+1,C) when C is a
2058     // positive constant.  This let us put a lower bound on the number of sign
2059     // bits.
2060     if (match(U->getOperand(1), m_APInt(Denominator))) {
2061 
2062       // Ignore non-positive denominator.
2063       if (!Denominator->isStrictlyPositive())
2064         break;
2065 
2066       // Calculate the incoming numerator bits. SRem by a positive constant
2067       // can't lower the number of sign bits.
2068       unsigned NumrBits =
2069           ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2070 
2071       // Calculate the leading sign bit constraints by examining the
2072       // denominator.  Given that the denominator is positive, there are two
2073       // cases:
2074       //
2075       //  1. the numerator is positive.  The result range is [0,C) and [0,C) u<
2076       //     (1 << ceilLogBase2(C)).
2077       //
2078       //  2. the numerator is negative.  Then the result range is (-C,0] and
2079       //     integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
2080       //
2081       // Thus a lower bound on the number of sign bits is `TyBits -
2082       // ceilLogBase2(C)`.
2083 
2084       unsigned ResBits = TyBits - Denominator->ceilLogBase2();
2085       return std::max(NumrBits, ResBits);
2086     }
2087     break;
2088   }
2089 
2090   case Instruction::AShr: {
2091     Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2092     // ashr X, C   -> adds C sign bits.  Vectors too.
2093     const APInt *ShAmt;
2094     if (match(U->getOperand(1), m_APInt(ShAmt))) {
2095       unsigned ShAmtLimited = ShAmt->getZExtValue();
2096       if (ShAmtLimited >= TyBits)
2097         break;  // Bad shift.
2098       Tmp += ShAmtLimited;
2099       if (Tmp > TyBits) Tmp = TyBits;
2100     }
2101     return Tmp;
2102   }
2103   case Instruction::Shl: {
2104     const APInt *ShAmt;
2105     if (match(U->getOperand(1), m_APInt(ShAmt))) {
2106       // shl destroys sign bits.
2107       Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2108       Tmp2 = ShAmt->getZExtValue();
2109       if (Tmp2 >= TyBits ||      // Bad shift.
2110           Tmp2 >= Tmp) break;    // Shifted all sign bits out.
2111       return Tmp - Tmp2;
2112     }
2113     break;
2114   }
2115   case Instruction::And:
2116   case Instruction::Or:
2117   case Instruction::Xor:    // NOT is handled here.
2118     // Logical binary ops preserve the number of sign bits at the worst.
2119     Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2120     if (Tmp != 1) {
2121       Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2122       FirstAnswer = std::min(Tmp, Tmp2);
2123       // We computed what we know about the sign bits as our first
2124       // answer. Now proceed to the generic code that uses
2125       // computeKnownBits, and pick whichever answer is better.
2126     }
2127     break;
2128 
2129   case Instruction::Select:
2130     Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2131     if (Tmp == 1) return 1;  // Early out.
2132     Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
2133     return std::min(Tmp, Tmp2);
2134 
2135   case Instruction::Add:
2136     // Add can have at most one carry bit.  Thus we know that the output
2137     // is, at worst, one more bit than the inputs.
2138     Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2139     if (Tmp == 1) return 1;  // Early out.
2140 
2141     // Special case decrementing a value (ADD X, -1):
2142     if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
2143       if (CRHS->isAllOnesValue()) {
2144         KnownBits Known(TyBits);
2145         computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
2146 
2147         // If the input is known to be 0 or 1, the output is 0/-1, which is all
2148         // sign bits set.
2149         if ((Known.Zero | 1).isAllOnesValue())
2150           return TyBits;
2151 
2152         // If we are subtracting one from a positive number, there is no carry
2153         // out of the result.
2154         if (Known.isNonNegative())
2155           return Tmp;
2156       }
2157 
2158     Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2159     if (Tmp2 == 1) return 1;
2160     return std::min(Tmp, Tmp2)-1;
2161 
2162   case Instruction::Sub:
2163     Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2164     if (Tmp2 == 1) return 1;
2165 
2166     // Handle NEG.
2167     if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
2168       if (CLHS->isNullValue()) {
2169         KnownBits Known(TyBits);
2170         computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
2171         // If the input is known to be 0 or 1, the output is 0/-1, which is all
2172         // sign bits set.
2173         if ((Known.Zero | 1).isAllOnesValue())
2174           return TyBits;
2175 
2176         // If the input is known to be positive (the sign bit is known clear),
2177         // the output of the NEG has the same number of sign bits as the input.
2178         if (Known.isNonNegative())
2179           return Tmp2;
2180 
2181         // Otherwise, we treat this like a SUB.
2182       }
2183 
2184     // Sub can have at most one carry bit.  Thus we know that the output
2185     // is, at worst, one more bit than the inputs.
2186     Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2187     if (Tmp == 1) return 1;  // Early out.
2188     return std::min(Tmp, Tmp2)-1;
2189 
2190   case Instruction::PHI: {
2191     const PHINode *PN = cast<PHINode>(U);
2192     unsigned NumIncomingValues = PN->getNumIncomingValues();
2193     // Don't analyze large in-degree PHIs.
2194     if (NumIncomingValues > 4) break;
2195     // Unreachable blocks may have zero-operand PHI nodes.
2196     if (NumIncomingValues == 0) break;
2197 
2198     // Take the minimum of all incoming values.  This can't infinitely loop
2199     // because of our depth threshold.
2200     Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q);
2201     for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
2202       if (Tmp == 1) return Tmp;
2203       Tmp = std::min(
2204           Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q));
2205     }
2206     return Tmp;
2207   }
2208 
2209   case Instruction::Trunc:
2210     // FIXME: it's tricky to do anything useful for this, but it is an important
2211     // case for targets like X86.
2212     break;
2213 
2214   case Instruction::ExtractElement:
2215     // Look through extract element. At the moment we keep this simple and skip
2216     // tracking the specific element. But at least we might find information
2217     // valid for all elements of the vector (for example if vector is sign
2218     // extended, shifted, etc).
2219     return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2220   }
2221 
2222   // Finally, if we can prove that the top bits of the result are 0's or 1's,
2223   // use this information.
2224 
2225   // If we can examine all elements of a vector constant successfully, we're
2226   // done (we can't do any better than that). If not, keep trying.
2227   if (unsigned VecSignBits = computeNumSignBitsVectorConstant(V, TyBits))
2228     return VecSignBits;
2229 
2230   KnownBits Known(TyBits);
2231   computeKnownBits(V, Known, Depth, Q);
2232 
2233   // If we know that the sign bit is either zero or one, determine the number of
2234   // identical bits in the top of the input value.
2235   return std::max(FirstAnswer, Known.countMinSignBits());
2236 }
2237 
2238 /// This function computes the integer multiple of Base that equals V.
2239 /// If successful, it returns true and returns the multiple in
2240 /// Multiple. If unsuccessful, it returns false. It looks
2241 /// through SExt instructions only if LookThroughSExt is true.
2242 bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
2243                            bool LookThroughSExt, unsigned Depth) {
2244   const unsigned MaxDepth = 6;
2245 
2246   assert(V && "No Value?");
2247   assert(Depth <= MaxDepth && "Limit Search Depth");
2248   assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
2249 
2250   Type *T = V->getType();
2251 
2252   ConstantInt *CI = dyn_cast<ConstantInt>(V);
2253 
2254   if (Base == 0)
2255     return false;
2256 
2257   if (Base == 1) {
2258     Multiple = V;
2259     return true;
2260   }
2261 
2262   ConstantExpr *CO = dyn_cast<ConstantExpr>(V);
2263   Constant *BaseVal = ConstantInt::get(T, Base);
2264   if (CO && CO == BaseVal) {
2265     // Multiple is 1.
2266     Multiple = ConstantInt::get(T, 1);
2267     return true;
2268   }
2269 
2270   if (CI && CI->getZExtValue() % Base == 0) {
2271     Multiple = ConstantInt::get(T, CI->getZExtValue() / Base);
2272     return true;
2273   }
2274 
2275   if (Depth == MaxDepth) return false;  // Limit search depth.
2276 
2277   Operator *I = dyn_cast<Operator>(V);
2278   if (!I) return false;
2279 
2280   switch (I->getOpcode()) {
2281   default: break;
2282   case Instruction::SExt:
2283     if (!LookThroughSExt) return false;
2284     // otherwise fall through to ZExt
2285     LLVM_FALLTHROUGH;
2286   case Instruction::ZExt:
2287     return ComputeMultiple(I->getOperand(0), Base, Multiple,
2288                            LookThroughSExt, Depth+1);
2289   case Instruction::Shl:
2290   case Instruction::Mul: {
2291     Value *Op0 = I->getOperand(0);
2292     Value *Op1 = I->getOperand(1);
2293 
2294     if (I->getOpcode() == Instruction::Shl) {
2295       ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
2296       if (!Op1CI) return false;
2297       // Turn Op0 << Op1 into Op0 * 2^Op1
2298       APInt Op1Int = Op1CI->getValue();
2299       uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
2300       APInt API(Op1Int.getBitWidth(), 0);
2301       API.setBit(BitToSet);
2302       Op1 = ConstantInt::get(V->getContext(), API);
2303     }
2304 
2305     Value *Mul0 = nullptr;
2306     if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
2307       if (Constant *Op1C = dyn_cast<Constant>(Op1))
2308         if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
2309           if (Op1C->getType()->getPrimitiveSizeInBits() <
2310               MulC->getType()->getPrimitiveSizeInBits())
2311             Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
2312           if (Op1C->getType()->getPrimitiveSizeInBits() >
2313               MulC->getType()->getPrimitiveSizeInBits())
2314             MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
2315 
2316           // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
2317           Multiple = ConstantExpr::getMul(MulC, Op1C);
2318           return true;
2319         }
2320 
2321       if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
2322         if (Mul0CI->getValue() == 1) {
2323           // V == Base * Op1, so return Op1
2324           Multiple = Op1;
2325           return true;
2326         }
2327     }
2328 
2329     Value *Mul1 = nullptr;
2330     if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
2331       if (Constant *Op0C = dyn_cast<Constant>(Op0))
2332         if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
2333           if (Op0C->getType()->getPrimitiveSizeInBits() <
2334               MulC->getType()->getPrimitiveSizeInBits())
2335             Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
2336           if (Op0C->getType()->getPrimitiveSizeInBits() >
2337               MulC->getType()->getPrimitiveSizeInBits())
2338             MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
2339 
2340           // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
2341           Multiple = ConstantExpr::getMul(MulC, Op0C);
2342           return true;
2343         }
2344 
2345       if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
2346         if (Mul1CI->getValue() == 1) {
2347           // V == Base * Op0, so return Op0
2348           Multiple = Op0;
2349           return true;
2350         }
2351     }
2352   }
2353   }
2354 
2355   // We could not determine if V is a multiple of Base.
2356   return false;
2357 }
2358 
2359 Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS,
2360                                             const TargetLibraryInfo *TLI) {
2361   const Function *F = ICS.getCalledFunction();
2362   if (!F)
2363     return Intrinsic::not_intrinsic;
2364 
2365   if (F->isIntrinsic())
2366     return F->getIntrinsicID();
2367 
2368   if (!TLI)
2369     return Intrinsic::not_intrinsic;
2370 
2371   LibFunc Func;
2372   // We're going to make assumptions on the semantics of the functions, check
2373   // that the target knows that it's available in this environment and it does
2374   // not have local linkage.
2375   if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(*F, Func))
2376     return Intrinsic::not_intrinsic;
2377 
2378   if (!ICS.onlyReadsMemory())
2379     return Intrinsic::not_intrinsic;
2380 
2381   // Otherwise check if we have a call to a function that can be turned into a
2382   // vector intrinsic.
2383   switch (Func) {
2384   default:
2385     break;
2386   case LibFunc_sin:
2387   case LibFunc_sinf:
2388   case LibFunc_sinl:
2389     return Intrinsic::sin;
2390   case LibFunc_cos:
2391   case LibFunc_cosf:
2392   case LibFunc_cosl:
2393     return Intrinsic::cos;
2394   case LibFunc_exp:
2395   case LibFunc_expf:
2396   case LibFunc_expl:
2397     return Intrinsic::exp;
2398   case LibFunc_exp2:
2399   case LibFunc_exp2f:
2400   case LibFunc_exp2l:
2401     return Intrinsic::exp2;
2402   case LibFunc_log:
2403   case LibFunc_logf:
2404   case LibFunc_logl:
2405     return Intrinsic::log;
2406   case LibFunc_log10:
2407   case LibFunc_log10f:
2408   case LibFunc_log10l:
2409     return Intrinsic::log10;
2410   case LibFunc_log2:
2411   case LibFunc_log2f:
2412   case LibFunc_log2l:
2413     return Intrinsic::log2;
2414   case LibFunc_fabs:
2415   case LibFunc_fabsf:
2416   case LibFunc_fabsl:
2417     return Intrinsic::fabs;
2418   case LibFunc_fmin:
2419   case LibFunc_fminf:
2420   case LibFunc_fminl:
2421     return Intrinsic::minnum;
2422   case LibFunc_fmax:
2423   case LibFunc_fmaxf:
2424   case LibFunc_fmaxl:
2425     return Intrinsic::maxnum;
2426   case LibFunc_copysign:
2427   case LibFunc_copysignf:
2428   case LibFunc_copysignl:
2429     return Intrinsic::copysign;
2430   case LibFunc_floor:
2431   case LibFunc_floorf:
2432   case LibFunc_floorl:
2433     return Intrinsic::floor;
2434   case LibFunc_ceil:
2435   case LibFunc_ceilf:
2436   case LibFunc_ceill:
2437     return Intrinsic::ceil;
2438   case LibFunc_trunc:
2439   case LibFunc_truncf:
2440   case LibFunc_truncl:
2441     return Intrinsic::trunc;
2442   case LibFunc_rint:
2443   case LibFunc_rintf:
2444   case LibFunc_rintl:
2445     return Intrinsic::rint;
2446   case LibFunc_nearbyint:
2447   case LibFunc_nearbyintf:
2448   case LibFunc_nearbyintl:
2449     return Intrinsic::nearbyint;
2450   case LibFunc_round:
2451   case LibFunc_roundf:
2452   case LibFunc_roundl:
2453     return Intrinsic::round;
2454   case LibFunc_pow:
2455   case LibFunc_powf:
2456   case LibFunc_powl:
2457     return Intrinsic::pow;
2458   case LibFunc_sqrt:
2459   case LibFunc_sqrtf:
2460   case LibFunc_sqrtl:
2461     if (ICS->hasNoNaNs())
2462       return Intrinsic::sqrt;
2463     return Intrinsic::not_intrinsic;
2464   }
2465 
2466   return Intrinsic::not_intrinsic;
2467 }
2468 
2469 /// Return true if we can prove that the specified FP value is never equal to
2470 /// -0.0.
2471 ///
2472 /// NOTE: this function will need to be revisited when we support non-default
2473 /// rounding modes!
2474 ///
2475 bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
2476                                 unsigned Depth) {
2477   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
2478     return !CFP->getValueAPF().isNegZero();
2479 
2480   if (Depth == MaxDepth)
2481     return false;  // Limit search depth.
2482 
2483   const Operator *I = dyn_cast<Operator>(V);
2484   if (!I) return false;
2485 
2486   // Check if the nsz fast-math flag is set
2487   if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I))
2488     if (FPO->hasNoSignedZeros())
2489       return true;
2490 
2491   // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
2492   if (I->getOpcode() == Instruction::FAdd)
2493     if (ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(1)))
2494       if (CFP->isNullValue())
2495         return true;
2496 
2497   // sitofp and uitofp turn into +0.0 for zero.
2498   if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
2499     return true;
2500 
2501   if (const CallInst *CI = dyn_cast<CallInst>(I)) {
2502     Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI);
2503     switch (IID) {
2504     default:
2505       break;
2506     // sqrt(-0.0) = -0.0, no other negative results are possible.
2507     case Intrinsic::sqrt:
2508       return CannotBeNegativeZero(CI->getArgOperand(0), TLI, Depth + 1);
2509     // fabs(x) != -0.0
2510     case Intrinsic::fabs:
2511       return true;
2512     }
2513   }
2514 
2515   return false;
2516 }
2517 
2518 /// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
2519 /// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
2520 /// bit despite comparing equal.
2521 static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
2522                                             const TargetLibraryInfo *TLI,
2523                                             bool SignBitOnly,
2524                                             unsigned Depth) {
2525   // TODO: This function does not do the right thing when SignBitOnly is true
2526   // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform
2527   // which flips the sign bits of NaNs.  See
2528   // https://llvm.org/bugs/show_bug.cgi?id=31702.
2529 
2530   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
2531     return !CFP->getValueAPF().isNegative() ||
2532            (!SignBitOnly && CFP->getValueAPF().isZero());
2533   }
2534 
2535   if (Depth == MaxDepth)
2536     return false; // Limit search depth.
2537 
2538   const Operator *I = dyn_cast<Operator>(V);
2539   if (!I)
2540     return false;
2541 
2542   switch (I->getOpcode()) {
2543   default:
2544     break;
2545   // Unsigned integers are always nonnegative.
2546   case Instruction::UIToFP:
2547     return true;
2548   case Instruction::FMul:
2549     // x*x is always non-negative or a NaN.
2550     if (I->getOperand(0) == I->getOperand(1) &&
2551         (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()))
2552       return true;
2553 
2554     LLVM_FALLTHROUGH;
2555   case Instruction::FAdd:
2556   case Instruction::FDiv:
2557   case Instruction::FRem:
2558     return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2559                                            Depth + 1) &&
2560            cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2561                                            Depth + 1);
2562   case Instruction::Select:
2563     return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2564                                            Depth + 1) &&
2565            cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
2566                                            Depth + 1);
2567   case Instruction::FPExt:
2568   case Instruction::FPTrunc:
2569     // Widening/narrowing never change sign.
2570     return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2571                                            Depth + 1);
2572   case Instruction::Call:
2573     const auto *CI = cast<CallInst>(I);
2574     Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI);
2575     switch (IID) {
2576     default:
2577       break;
2578     case Intrinsic::maxnum:
2579       return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2580                                              Depth + 1) ||
2581              cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2582                                              Depth + 1);
2583     case Intrinsic::minnum:
2584       return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2585                                              Depth + 1) &&
2586              cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2587                                              Depth + 1);
2588     case Intrinsic::exp:
2589     case Intrinsic::exp2:
2590     case Intrinsic::fabs:
2591       return true;
2592 
2593     case Intrinsic::sqrt:
2594       // sqrt(x) is always >= -0 or NaN.  Moreover, sqrt(x) == -0 iff x == -0.
2595       if (!SignBitOnly)
2596         return true;
2597       return CI->hasNoNaNs() && (CI->hasNoSignedZeros() ||
2598                                  CannotBeNegativeZero(CI->getOperand(0), TLI));
2599 
2600     case Intrinsic::powi:
2601       if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) {
2602         // powi(x,n) is non-negative if n is even.
2603         if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0)
2604           return true;
2605       }
2606       // TODO: This is not correct.  Given that exp is an integer, here are the
2607       // ways that pow can return a negative value:
2608       //
2609       //   pow(x, exp)    --> negative if exp is odd and x is negative.
2610       //   pow(-0, exp)   --> -inf if exp is negative odd.
2611       //   pow(-0, exp)   --> -0 if exp is positive odd.
2612       //   pow(-inf, exp) --> -0 if exp is negative odd.
2613       //   pow(-inf, exp) --> -inf if exp is positive odd.
2614       //
2615       // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN,
2616       // but we must return false if x == -0.  Unfortunately we do not currently
2617       // have a way of expressing this constraint.  See details in
2618       // https://llvm.org/bugs/show_bug.cgi?id=31702.
2619       return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2620                                              Depth + 1);
2621 
2622     case Intrinsic::fma:
2623     case Intrinsic::fmuladd:
2624       // x*x+y is non-negative if y is non-negative.
2625       return I->getOperand(0) == I->getOperand(1) &&
2626              (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) &&
2627              cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
2628                                              Depth + 1);
2629     }
2630     break;
2631   }
2632   return false;
2633 }
2634 
2635 bool llvm::CannotBeOrderedLessThanZero(const Value *V,
2636                                        const TargetLibraryInfo *TLI) {
2637   return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0);
2638 }
2639 
2640 bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
2641   return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
2642 }
2643 
2644 /// If the specified value can be set by repeating the same byte in memory,
2645 /// return the i8 value that it is represented with.  This is
2646 /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
2647 /// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
2648 /// byte store (e.g. i16 0x1234), return null.
2649 Value *llvm::isBytewiseValue(Value *V) {
2650   // All byte-wide stores are splatable, even of arbitrary variables.
2651   if (V->getType()->isIntegerTy(8)) return V;
2652 
2653   // Handle 'null' ConstantArrayZero etc.
2654   if (Constant *C = dyn_cast<Constant>(V))
2655     if (C->isNullValue())
2656       return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
2657 
2658   // Constant float and double values can be handled as integer values if the
2659   // corresponding integer value is "byteable".  An important case is 0.0.
2660   if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
2661     if (CFP->getType()->isFloatTy())
2662       V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
2663     if (CFP->getType()->isDoubleTy())
2664       V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
2665     // Don't handle long double formats, which have strange constraints.
2666   }
2667 
2668   // We can handle constant integers that are multiple of 8 bits.
2669   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
2670     if (CI->getBitWidth() % 8 == 0) {
2671       assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
2672 
2673       if (!CI->getValue().isSplat(8))
2674         return nullptr;
2675       return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
2676     }
2677   }
2678 
2679   // A ConstantDataArray/Vector is splatable if all its members are equal and
2680   // also splatable.
2681   if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
2682     Value *Elt = CA->getElementAsConstant(0);
2683     Value *Val = isBytewiseValue(Elt);
2684     if (!Val)
2685       return nullptr;
2686 
2687     for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
2688       if (CA->getElementAsConstant(I) != Elt)
2689         return nullptr;
2690 
2691     return Val;
2692   }
2693 
2694   // Conceptually, we could handle things like:
2695   //   %a = zext i8 %X to i16
2696   //   %b = shl i16 %a, 8
2697   //   %c = or i16 %a, %b
2698   // but until there is an example that actually needs this, it doesn't seem
2699   // worth worrying about.
2700   return nullptr;
2701 }
2702 
2703 
2704 // This is the recursive version of BuildSubAggregate. It takes a few different
2705 // arguments. Idxs is the index within the nested struct From that we are
2706 // looking at now (which is of type IndexedType). IdxSkip is the number of
2707 // indices from Idxs that should be left out when inserting into the resulting
2708 // struct. To is the result struct built so far, new insertvalue instructions
2709 // build on that.
2710 static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
2711                                 SmallVectorImpl<unsigned> &Idxs,
2712                                 unsigned IdxSkip,
2713                                 Instruction *InsertBefore) {
2714   llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType);
2715   if (STy) {
2716     // Save the original To argument so we can modify it
2717     Value *OrigTo = To;
2718     // General case, the type indexed by Idxs is a struct
2719     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
2720       // Process each struct element recursively
2721       Idxs.push_back(i);
2722       Value *PrevTo = To;
2723       To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
2724                              InsertBefore);
2725       Idxs.pop_back();
2726       if (!To) {
2727         // Couldn't find any inserted value for this index? Cleanup
2728         while (PrevTo != OrigTo) {
2729           InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
2730           PrevTo = Del->getAggregateOperand();
2731           Del->eraseFromParent();
2732         }
2733         // Stop processing elements
2734         break;
2735       }
2736     }
2737     // If we successfully found a value for each of our subaggregates
2738     if (To)
2739       return To;
2740   }
2741   // Base case, the type indexed by SourceIdxs is not a struct, or not all of
2742   // the struct's elements had a value that was inserted directly. In the latter
2743   // case, perhaps we can't determine each of the subelements individually, but
2744   // we might be able to find the complete struct somewhere.
2745 
2746   // Find the value that is at that particular spot
2747   Value *V = FindInsertedValue(From, Idxs);
2748 
2749   if (!V)
2750     return nullptr;
2751 
2752   // Insert the value in the new (sub) aggregrate
2753   return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
2754                                        "tmp", InsertBefore);
2755 }
2756 
2757 // This helper takes a nested struct and extracts a part of it (which is again a
2758 // struct) into a new value. For example, given the struct:
2759 // { a, { b, { c, d }, e } }
2760 // and the indices "1, 1" this returns
2761 // { c, d }.
2762 //
2763 // It does this by inserting an insertvalue for each element in the resulting
2764 // struct, as opposed to just inserting a single struct. This will only work if
2765 // each of the elements of the substruct are known (ie, inserted into From by an
2766 // insertvalue instruction somewhere).
2767 //
2768 // All inserted insertvalue instructions are inserted before InsertBefore
2769 static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
2770                                 Instruction *InsertBefore) {
2771   assert(InsertBefore && "Must have someplace to insert!");
2772   Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
2773                                                              idx_range);
2774   Value *To = UndefValue::get(IndexedType);
2775   SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
2776   unsigned IdxSkip = Idxs.size();
2777 
2778   return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
2779 }
2780 
2781 /// Given an aggregrate and an sequence of indices, see if
2782 /// the scalar value indexed is already around as a register, for example if it
2783 /// were inserted directly into the aggregrate.
2784 ///
2785 /// If InsertBefore is not null, this function will duplicate (modified)
2786 /// insertvalues when a part of a nested struct is extracted.
2787 Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
2788                                Instruction *InsertBefore) {
2789   // Nothing to index? Just return V then (this is useful at the end of our
2790   // recursion).
2791   if (idx_range.empty())
2792     return V;
2793   // We have indices, so V should have an indexable type.
2794   assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
2795          "Not looking at a struct or array?");
2796   assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
2797          "Invalid indices for type?");
2798 
2799   if (Constant *C = dyn_cast<Constant>(V)) {
2800     C = C->getAggregateElement(idx_range[0]);
2801     if (!C) return nullptr;
2802     return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
2803   }
2804 
2805   if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
2806     // Loop the indices for the insertvalue instruction in parallel with the
2807     // requested indices
2808     const unsigned *req_idx = idx_range.begin();
2809     for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
2810          i != e; ++i, ++req_idx) {
2811       if (req_idx == idx_range.end()) {
2812         // We can't handle this without inserting insertvalues
2813         if (!InsertBefore)
2814           return nullptr;
2815 
2816         // The requested index identifies a part of a nested aggregate. Handle
2817         // this specially. For example,
2818         // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
2819         // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
2820         // %C = extractvalue {i32, { i32, i32 } } %B, 1
2821         // This can be changed into
2822         // %A = insertvalue {i32, i32 } undef, i32 10, 0
2823         // %C = insertvalue {i32, i32 } %A, i32 11, 1
2824         // which allows the unused 0,0 element from the nested struct to be
2825         // removed.
2826         return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
2827                                  InsertBefore);
2828       }
2829 
2830       // This insert value inserts something else than what we are looking for.
2831       // See if the (aggregate) value inserted into has the value we are
2832       // looking for, then.
2833       if (*req_idx != *i)
2834         return FindInsertedValue(I->getAggregateOperand(), idx_range,
2835                                  InsertBefore);
2836     }
2837     // If we end up here, the indices of the insertvalue match with those
2838     // requested (though possibly only partially). Now we recursively look at
2839     // the inserted value, passing any remaining indices.
2840     return FindInsertedValue(I->getInsertedValueOperand(),
2841                              makeArrayRef(req_idx, idx_range.end()),
2842                              InsertBefore);
2843   }
2844 
2845   if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
2846     // If we're extracting a value from an aggregate that was extracted from
2847     // something else, we can extract from that something else directly instead.
2848     // However, we will need to chain I's indices with the requested indices.
2849 
2850     // Calculate the number of indices required
2851     unsigned size = I->getNumIndices() + idx_range.size();
2852     // Allocate some space to put the new indices in
2853     SmallVector<unsigned, 5> Idxs;
2854     Idxs.reserve(size);
2855     // Add indices from the extract value instruction
2856     Idxs.append(I->idx_begin(), I->idx_end());
2857 
2858     // Add requested indices
2859     Idxs.append(idx_range.begin(), idx_range.end());
2860 
2861     assert(Idxs.size() == size
2862            && "Number of indices added not correct?");
2863 
2864     return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
2865   }
2866   // Otherwise, we don't know (such as, extracting from a function return value
2867   // or load instruction)
2868   return nullptr;
2869 }
2870 
2871 /// Analyze the specified pointer to see if it can be expressed as a base
2872 /// pointer plus a constant offset. Return the base and offset to the caller.
2873 Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
2874                                               const DataLayout &DL) {
2875   unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType());
2876   APInt ByteOffset(BitWidth, 0);
2877 
2878   // We walk up the defs but use a visited set to handle unreachable code. In
2879   // that case, we stop after accumulating the cycle once (not that it
2880   // matters).
2881   SmallPtrSet<Value *, 16> Visited;
2882   while (Visited.insert(Ptr).second) {
2883     if (Ptr->getType()->isVectorTy())
2884       break;
2885 
2886     if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
2887       // If one of the values we have visited is an addrspacecast, then
2888       // the pointer type of this GEP may be different from the type
2889       // of the Ptr parameter which was passed to this function.  This
2890       // means when we construct GEPOffset, we need to use the size
2891       // of GEP's pointer type rather than the size of the original
2892       // pointer type.
2893       APInt GEPOffset(DL.getPointerTypeSizeInBits(Ptr->getType()), 0);
2894       if (!GEP->accumulateConstantOffset(DL, GEPOffset))
2895         break;
2896 
2897       ByteOffset += GEPOffset.getSExtValue();
2898 
2899       Ptr = GEP->getPointerOperand();
2900     } else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
2901                Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) {
2902       Ptr = cast<Operator>(Ptr)->getOperand(0);
2903     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
2904       if (GA->isInterposable())
2905         break;
2906       Ptr = GA->getAliasee();
2907     } else {
2908       break;
2909     }
2910   }
2911   Offset = ByteOffset.getSExtValue();
2912   return Ptr;
2913 }
2914 
2915 bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
2916                                        unsigned CharSize) {
2917   // Make sure the GEP has exactly three arguments.
2918   if (GEP->getNumOperands() != 3)
2919     return false;
2920 
2921   // Make sure the index-ee is a pointer to array of \p CharSize integers.
2922   // CharSize.
2923   ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
2924   if (!AT || !AT->getElementType()->isIntegerTy(CharSize))
2925     return false;
2926 
2927   // Check to make sure that the first operand of the GEP is an integer and
2928   // has value 0 so that we are sure we're indexing into the initializer.
2929   const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
2930   if (!FirstIdx || !FirstIdx->isZero())
2931     return false;
2932 
2933   return true;
2934 }
2935 
2936 bool llvm::getConstantDataArrayInfo(const Value *V,
2937                                     ConstantDataArraySlice &Slice,
2938                                     unsigned ElementSize, uint64_t Offset) {
2939   assert(V);
2940 
2941   // Look through bitcast instructions and geps.
2942   V = V->stripPointerCasts();
2943 
2944   // If the value is a GEP instruction or constant expression, treat it as an
2945   // offset.
2946   if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
2947     // The GEP operator should be based on a pointer to string constant, and is
2948     // indexing into the string constant.
2949     if (!isGEPBasedOnPointerToString(GEP, ElementSize))
2950       return false;
2951 
2952     // If the second index isn't a ConstantInt, then this is a variable index
2953     // into the array.  If this occurs, we can't say anything meaningful about
2954     // the string.
2955     uint64_t StartIdx = 0;
2956     if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
2957       StartIdx = CI->getZExtValue();
2958     else
2959       return false;
2960     return getConstantDataArrayInfo(GEP->getOperand(0), Slice, ElementSize,
2961                                     StartIdx + Offset);
2962   }
2963 
2964   // The GEP instruction, constant or instruction, must reference a global
2965   // variable that is a constant and is initialized. The referenced constant
2966   // initializer is the array that we'll use for optimization.
2967   const GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
2968   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
2969     return false;
2970 
2971   const ConstantDataArray *Array;
2972   ArrayType *ArrayTy;
2973   if (GV->getInitializer()->isNullValue()) {
2974     Type *GVTy = GV->getValueType();
2975     if ( (ArrayTy = dyn_cast<ArrayType>(GVTy)) ) {
2976       // A zeroinitializer for the array; there is no ConstantDataArray.
2977       Array = nullptr;
2978     } else {
2979       const DataLayout &DL = GV->getParent()->getDataLayout();
2980       uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy);
2981       uint64_t Length = SizeInBytes / (ElementSize / 8);
2982       if (Length <= Offset)
2983         return false;
2984 
2985       Slice.Array = nullptr;
2986       Slice.Offset = 0;
2987       Slice.Length = Length - Offset;
2988       return true;
2989     }
2990   } else {
2991     // This must be a ConstantDataArray.
2992     Array = dyn_cast<ConstantDataArray>(GV->getInitializer());
2993     if (!Array)
2994       return false;
2995     ArrayTy = Array->getType();
2996   }
2997   if (!ArrayTy->getElementType()->isIntegerTy(ElementSize))
2998     return false;
2999 
3000   uint64_t NumElts = ArrayTy->getArrayNumElements();
3001   if (Offset > NumElts)
3002     return false;
3003 
3004   Slice.Array = Array;
3005   Slice.Offset = Offset;
3006   Slice.Length = NumElts - Offset;
3007   return true;
3008 }
3009 
3010 /// This function computes the length of a null-terminated C string pointed to
3011 /// by V. If successful, it returns true and returns the string in Str.
3012 /// If unsuccessful, it returns false.
3013 bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
3014                                  uint64_t Offset, bool TrimAtNul) {
3015   ConstantDataArraySlice Slice;
3016   if (!getConstantDataArrayInfo(V, Slice, 8, Offset))
3017     return false;
3018 
3019   if (Slice.Array == nullptr) {
3020     if (TrimAtNul) {
3021       Str = StringRef();
3022       return true;
3023     }
3024     if (Slice.Length == 1) {
3025       Str = StringRef("", 1);
3026       return true;
3027     }
3028     // We cannot instantiate a StringRef as we do not have an appropriate string
3029     // of 0s at hand.
3030     return false;
3031   }
3032 
3033   // Start out with the entire array in the StringRef.
3034   Str = Slice.Array->getAsString();
3035   // Skip over 'offset' bytes.
3036   Str = Str.substr(Slice.Offset);
3037 
3038   if (TrimAtNul) {
3039     // Trim off the \0 and anything after it.  If the array is not nul
3040     // terminated, we just return the whole end of string.  The client may know
3041     // some other way that the string is length-bound.
3042     Str = Str.substr(0, Str.find('\0'));
3043   }
3044   return true;
3045 }
3046 
3047 // These next two are very similar to the above, but also look through PHI
3048 // nodes.
3049 // TODO: See if we can integrate these two together.
3050 
3051 /// If we can compute the length of the string pointed to by
3052 /// the specified pointer, return 'len+1'.  If we can't, return 0.
3053 static uint64_t GetStringLengthH(const Value *V,
3054                                  SmallPtrSetImpl<const PHINode*> &PHIs,
3055                                  unsigned CharSize) {
3056   // Look through noop bitcast instructions.
3057   V = V->stripPointerCasts();
3058 
3059   // If this is a PHI node, there are two cases: either we have already seen it
3060   // or we haven't.
3061   if (const PHINode *PN = dyn_cast<PHINode>(V)) {
3062     if (!PHIs.insert(PN).second)
3063       return ~0ULL;  // already in the set.
3064 
3065     // If it was new, see if all the input strings are the same length.
3066     uint64_t LenSoFar = ~0ULL;
3067     for (Value *IncValue : PN->incoming_values()) {
3068       uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
3069       if (Len == 0) return 0; // Unknown length -> unknown.
3070 
3071       if (Len == ~0ULL) continue;
3072 
3073       if (Len != LenSoFar && LenSoFar != ~0ULL)
3074         return 0;    // Disagree -> unknown.
3075       LenSoFar = Len;
3076     }
3077 
3078     // Success, all agree.
3079     return LenSoFar;
3080   }
3081 
3082   // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
3083   if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
3084     uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
3085     if (Len1 == 0) return 0;
3086     uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
3087     if (Len2 == 0) return 0;
3088     if (Len1 == ~0ULL) return Len2;
3089     if (Len2 == ~0ULL) return Len1;
3090     if (Len1 != Len2) return 0;
3091     return Len1;
3092   }
3093 
3094   // Otherwise, see if we can read the string.
3095   ConstantDataArraySlice Slice;
3096   if (!getConstantDataArrayInfo(V, Slice, CharSize))
3097     return 0;
3098 
3099   if (Slice.Array == nullptr)
3100     return 1;
3101 
3102   // Search for nul characters
3103   unsigned NullIndex = 0;
3104   for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
3105     if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)
3106       break;
3107   }
3108 
3109   return NullIndex + 1;
3110 }
3111 
3112 /// If we can compute the length of the string pointed to by
3113 /// the specified pointer, return 'len+1'.  If we can't, return 0.
3114 uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
3115   if (!V->getType()->isPointerTy()) return 0;
3116 
3117   SmallPtrSet<const PHINode*, 32> PHIs;
3118   uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
3119   // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
3120   // an empty string as a length.
3121   return Len == ~0ULL ? 1 : Len;
3122 }
3123 
3124 /// \brief \p PN defines a loop-variant pointer to an object.  Check if the
3125 /// previous iteration of the loop was referring to the same object as \p PN.
3126 static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
3127                                          const LoopInfo *LI) {
3128   // Find the loop-defined value.
3129   Loop *L = LI->getLoopFor(PN->getParent());
3130   if (PN->getNumIncomingValues() != 2)
3131     return true;
3132 
3133   // Find the value from previous iteration.
3134   auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
3135   if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
3136     PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
3137   if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
3138     return true;
3139 
3140   // If a new pointer is loaded in the loop, the pointer references a different
3141   // object in every iteration.  E.g.:
3142   //    for (i)
3143   //       int *p = a[i];
3144   //       ...
3145   if (auto *Load = dyn_cast<LoadInst>(PrevValue))
3146     if (!L->isLoopInvariant(Load->getPointerOperand()))
3147       return false;
3148   return true;
3149 }
3150 
3151 Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
3152                                  unsigned MaxLookup) {
3153   if (!V->getType()->isPointerTy())
3154     return V;
3155   for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
3156     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
3157       V = GEP->getPointerOperand();
3158     } else if (Operator::getOpcode(V) == Instruction::BitCast ||
3159                Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
3160       V = cast<Operator>(V)->getOperand(0);
3161     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
3162       if (GA->isInterposable())
3163         return V;
3164       V = GA->getAliasee();
3165     } else if (isa<AllocaInst>(V)) {
3166       // An alloca can't be further simplified.
3167       return V;
3168     } else {
3169       if (auto CS = CallSite(V))
3170         if (Value *RV = CS.getReturnedArgOperand()) {
3171           V = RV;
3172           continue;
3173         }
3174 
3175       // See if InstructionSimplify knows any relevant tricks.
3176       if (Instruction *I = dyn_cast<Instruction>(V))
3177         // TODO: Acquire a DominatorTree and AssumptionCache and use them.
3178         if (Value *Simplified = SimplifyInstruction(I, {DL, I})) {
3179           V = Simplified;
3180           continue;
3181         }
3182 
3183       return V;
3184     }
3185     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
3186   }
3187   return V;
3188 }
3189 
3190 void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
3191                                 const DataLayout &DL, LoopInfo *LI,
3192                                 unsigned MaxLookup) {
3193   SmallPtrSet<Value *, 4> Visited;
3194   SmallVector<Value *, 4> Worklist;
3195   Worklist.push_back(V);
3196   do {
3197     Value *P = Worklist.pop_back_val();
3198     P = GetUnderlyingObject(P, DL, MaxLookup);
3199 
3200     if (!Visited.insert(P).second)
3201       continue;
3202 
3203     if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
3204       Worklist.push_back(SI->getTrueValue());
3205       Worklist.push_back(SI->getFalseValue());
3206       continue;
3207     }
3208 
3209     if (PHINode *PN = dyn_cast<PHINode>(P)) {
3210       // If this PHI changes the underlying object in every iteration of the
3211       // loop, don't look through it.  Consider:
3212       //   int **A;
3213       //   for (i) {
3214       //     Prev = Curr;     // Prev = PHI (Prev_0, Curr)
3215       //     Curr = A[i];
3216       //     *Prev, *Curr;
3217       //
3218       // Prev is tracking Curr one iteration behind so they refer to different
3219       // underlying objects.
3220       if (!LI || !LI->isLoopHeader(PN->getParent()) ||
3221           isSameUnderlyingObjectInLoop(PN, LI))
3222         for (Value *IncValue : PN->incoming_values())
3223           Worklist.push_back(IncValue);
3224       continue;
3225     }
3226 
3227     Objects.push_back(P);
3228   } while (!Worklist.empty());
3229 }
3230 
3231 /// This is the function that does the work of looking through basic
3232 /// ptrtoint+arithmetic+inttoptr sequences.
3233 static const Value *getUnderlyingObjectFromInt(const Value *V) {
3234   do {
3235     if (const Operator *U = dyn_cast<Operator>(V)) {
3236       // If we find a ptrtoint, we can transfer control back to the
3237       // regular getUnderlyingObjectFromInt.
3238       if (U->getOpcode() == Instruction::PtrToInt)
3239         return U->getOperand(0);
3240       // If we find an add of a constant, a multiplied value, or a phi, it's
3241       // likely that the other operand will lead us to the base
3242       // object. We don't have to worry about the case where the
3243       // object address is somehow being computed by the multiply,
3244       // because our callers only care when the result is an
3245       // identifiable object.
3246       if (U->getOpcode() != Instruction::Add ||
3247           (!isa<ConstantInt>(U->getOperand(1)) &&
3248            Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
3249            !isa<PHINode>(U->getOperand(1))))
3250         return V;
3251       V = U->getOperand(0);
3252     } else {
3253       return V;
3254     }
3255     assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
3256   } while (true);
3257 }
3258 
3259 /// This is a wrapper around GetUnderlyingObjects and adds support for basic
3260 /// ptrtoint+arithmetic+inttoptr sequences.
3261 void llvm::getUnderlyingObjectsForCodeGen(const Value *V,
3262                           SmallVectorImpl<Value *> &Objects,
3263                           const DataLayout &DL) {
3264   SmallPtrSet<const Value *, 16> Visited;
3265   SmallVector<const Value *, 4> Working(1, V);
3266   do {
3267     V = Working.pop_back_val();
3268 
3269     SmallVector<Value *, 4> Objs;
3270     GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
3271 
3272     for (Value *V : Objs) {
3273       if (!Visited.insert(V).second)
3274         continue;
3275       if (Operator::getOpcode(V) == Instruction::IntToPtr) {
3276         const Value *O =
3277           getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
3278         if (O->getType()->isPointerTy()) {
3279           Working.push_back(O);
3280           continue;
3281         }
3282       }
3283       // If GetUnderlyingObjects fails to find an identifiable object,
3284       // getUnderlyingObjectsForCodeGen also fails for safety.
3285       if (!isIdentifiedObject(V)) {
3286         Objects.clear();
3287         return;
3288       }
3289       Objects.push_back(const_cast<Value *>(V));
3290     }
3291   } while (!Working.empty());
3292 }
3293 
3294 /// Return true if the only users of this pointer are lifetime markers.
3295 bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
3296   for (const User *U : V->users()) {
3297     const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
3298     if (!II) return false;
3299 
3300     if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
3301         II->getIntrinsicID() != Intrinsic::lifetime_end)
3302       return false;
3303   }
3304   return true;
3305 }
3306 
3307 bool llvm::isSafeToSpeculativelyExecute(const Value *V,
3308                                         const Instruction *CtxI,
3309                                         const DominatorTree *DT) {
3310   const Operator *Inst = dyn_cast<Operator>(V);
3311   if (!Inst)
3312     return false;
3313 
3314   for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
3315     if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
3316       if (C->canTrap())
3317         return false;
3318 
3319   switch (Inst->getOpcode()) {
3320   default:
3321     return true;
3322   case Instruction::UDiv:
3323   case Instruction::URem: {
3324     // x / y is undefined if y == 0.
3325     const APInt *V;
3326     if (match(Inst->getOperand(1), m_APInt(V)))
3327       return *V != 0;
3328     return false;
3329   }
3330   case Instruction::SDiv:
3331   case Instruction::SRem: {
3332     // x / y is undefined if y == 0 or x == INT_MIN and y == -1
3333     const APInt *Numerator, *Denominator;
3334     if (!match(Inst->getOperand(1), m_APInt(Denominator)))
3335       return false;
3336     // We cannot hoist this division if the denominator is 0.
3337     if (*Denominator == 0)
3338       return false;
3339     // It's safe to hoist if the denominator is not 0 or -1.
3340     if (*Denominator != -1)
3341       return true;
3342     // At this point we know that the denominator is -1.  It is safe to hoist as
3343     // long we know that the numerator is not INT_MIN.
3344     if (match(Inst->getOperand(0), m_APInt(Numerator)))
3345       return !Numerator->isMinSignedValue();
3346     // The numerator *might* be MinSignedValue.
3347     return false;
3348   }
3349   case Instruction::Load: {
3350     const LoadInst *LI = cast<LoadInst>(Inst);
3351     if (!LI->isUnordered() ||
3352         // Speculative load may create a race that did not exist in the source.
3353         LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) ||
3354         // Speculative load may load data from dirty regions.
3355         LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress))
3356       return false;
3357     const DataLayout &DL = LI->getModule()->getDataLayout();
3358     return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
3359                                               LI->getAlignment(), DL, CtxI, DT);
3360   }
3361   case Instruction::Call: {
3362     auto *CI = cast<const CallInst>(Inst);
3363     const Function *Callee = CI->getCalledFunction();
3364 
3365     // The called function could have undefined behavior or side-effects, even
3366     // if marked readnone nounwind.
3367     return Callee && Callee->isSpeculatable();
3368   }
3369   case Instruction::VAArg:
3370   case Instruction::Alloca:
3371   case Instruction::Invoke:
3372   case Instruction::PHI:
3373   case Instruction::Store:
3374   case Instruction::Ret:
3375   case Instruction::Br:
3376   case Instruction::IndirectBr:
3377   case Instruction::Switch:
3378   case Instruction::Unreachable:
3379   case Instruction::Fence:
3380   case Instruction::AtomicRMW:
3381   case Instruction::AtomicCmpXchg:
3382   case Instruction::LandingPad:
3383   case Instruction::Resume:
3384   case Instruction::CatchSwitch:
3385   case Instruction::CatchPad:
3386   case Instruction::CatchRet:
3387   case Instruction::CleanupPad:
3388   case Instruction::CleanupRet:
3389     return false; // Misc instructions which have effects
3390   }
3391 }
3392 
3393 bool llvm::mayBeMemoryDependent(const Instruction &I) {
3394   return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
3395 }
3396 
3397 /// Return true if we know that the specified value is never null.
3398 bool llvm::isKnownNonNull(const Value *V) {
3399   assert(V->getType()->isPointerTy() && "V must be pointer type");
3400 
3401   // Alloca never returns null, malloc might.
3402   if (isa<AllocaInst>(V)) return true;
3403 
3404   // A byval, inalloca, or nonnull argument is never null.
3405   if (const Argument *A = dyn_cast<Argument>(V))
3406     return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
3407 
3408   // A global variable in address space 0 is non null unless extern weak
3409   // or an absolute symbol reference. Other address spaces may have null as a
3410   // valid address for a global, so we can't assume anything.
3411   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
3412     return !GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
3413            GV->getType()->getAddressSpace() == 0;
3414 
3415   // A Load tagged with nonnull metadata is never null.
3416   if (const LoadInst *LI = dyn_cast<LoadInst>(V))
3417     return LI->getMetadata(LLVMContext::MD_nonnull);
3418 
3419   if (auto CS = ImmutableCallSite(V))
3420     if (CS.isReturnNonNull())
3421       return true;
3422 
3423   return false;
3424 }
3425 
3426 static bool isKnownNonNullFromDominatingCondition(const Value *V,
3427                                                   const Instruction *CtxI,
3428                                                   const DominatorTree *DT) {
3429   assert(V->getType()->isPointerTy() && "V must be pointer type");
3430   assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull");
3431   assert(CtxI && "Context instruction required for analysis");
3432   assert(DT && "Dominator tree required for analysis");
3433 
3434   unsigned NumUsesExplored = 0;
3435   for (auto *U : V->users()) {
3436     // Avoid massive lists
3437     if (NumUsesExplored >= DomConditionsMaxUses)
3438       break;
3439     NumUsesExplored++;
3440 
3441     // If the value is used as an argument to a call or invoke, then argument
3442     // attributes may provide an answer about null-ness.
3443     if (auto CS = ImmutableCallSite(U))
3444       if (auto *CalledFunc = CS.getCalledFunction())
3445         for (const Argument &Arg : CalledFunc->args())
3446           if (CS.getArgOperand(Arg.getArgNo()) == V &&
3447               Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI))
3448             return true;
3449 
3450     // Consider only compare instructions uniquely controlling a branch
3451     CmpInst::Predicate Pred;
3452     if (!match(const_cast<User *>(U),
3453                m_c_ICmp(Pred, m_Specific(V), m_Zero())) ||
3454         (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE))
3455       continue;
3456 
3457     for (auto *CmpU : U->users()) {
3458       if (const BranchInst *BI = dyn_cast<BranchInst>(CmpU)) {
3459         assert(BI->isConditional() && "uses a comparison!");
3460 
3461         BasicBlock *NonNullSuccessor =
3462             BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0);
3463         BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
3464         if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
3465           return true;
3466       } else if (Pred == ICmpInst::ICMP_NE &&
3467                  match(CmpU, m_Intrinsic<Intrinsic::experimental_guard>()) &&
3468                  DT->dominates(cast<Instruction>(CmpU), CtxI)) {
3469         return true;
3470       }
3471     }
3472   }
3473 
3474   return false;
3475 }
3476 
3477 bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI,
3478                             const DominatorTree *DT) {
3479   if (isa<ConstantPointerNull>(V) || isa<UndefValue>(V))
3480     return false;
3481 
3482   if (isKnownNonNull(V))
3483     return true;
3484 
3485   if (!CtxI || !DT)
3486     return false;
3487 
3488   return ::isKnownNonNullFromDominatingCondition(V, CtxI, DT);
3489 }
3490 
3491 OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
3492                                                    const Value *RHS,
3493                                                    const DataLayout &DL,
3494                                                    AssumptionCache *AC,
3495                                                    const Instruction *CxtI,
3496                                                    const DominatorTree *DT) {
3497   // Multiplying n * m significant bits yields a result of n + m significant
3498   // bits. If the total number of significant bits does not exceed the
3499   // result bit width (minus 1), there is no overflow.
3500   // This means if we have enough leading zero bits in the operands
3501   // we can guarantee that the result does not overflow.
3502   // Ref: "Hacker's Delight" by Henry Warren
3503   unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
3504   KnownBits LHSKnown(BitWidth);
3505   KnownBits RHSKnown(BitWidth);
3506   computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
3507   computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
3508   // Note that underestimating the number of zero bits gives a more
3509   // conservative answer.
3510   unsigned ZeroBits = LHSKnown.countMinLeadingZeros() +
3511                       RHSKnown.countMinLeadingZeros();
3512   // First handle the easy case: if we have enough zero bits there's
3513   // definitely no overflow.
3514   if (ZeroBits >= BitWidth)
3515     return OverflowResult::NeverOverflows;
3516 
3517   // Get the largest possible values for each operand.
3518   APInt LHSMax = ~LHSKnown.Zero;
3519   APInt RHSMax = ~RHSKnown.Zero;
3520 
3521   // We know the multiply operation doesn't overflow if the maximum values for
3522   // each operand will not overflow after we multiply them together.
3523   bool MaxOverflow;
3524   (void)LHSMax.umul_ov(RHSMax, MaxOverflow);
3525   if (!MaxOverflow)
3526     return OverflowResult::NeverOverflows;
3527 
3528   // We know it always overflows if multiplying the smallest possible values for
3529   // the operands also results in overflow.
3530   bool MinOverflow;
3531   (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow);
3532   if (MinOverflow)
3533     return OverflowResult::AlwaysOverflows;
3534 
3535   return OverflowResult::MayOverflow;
3536 }
3537 
3538 OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
3539                                                    const Value *RHS,
3540                                                    const DataLayout &DL,
3541                                                    AssumptionCache *AC,
3542                                                    const Instruction *CxtI,
3543                                                    const DominatorTree *DT) {
3544   KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
3545   if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
3546     KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
3547 
3548     if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
3549       // The sign bit is set in both cases: this MUST overflow.
3550       // Create a simple add instruction, and insert it into the struct.
3551       return OverflowResult::AlwaysOverflows;
3552     }
3553 
3554     if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) {
3555       // The sign bit is clear in both cases: this CANNOT overflow.
3556       // Create a simple add instruction, and insert it into the struct.
3557       return OverflowResult::NeverOverflows;
3558     }
3559   }
3560 
3561   return OverflowResult::MayOverflow;
3562 }
3563 
3564 /// \brief Return true if we can prove that adding the two values of the
3565 /// knownbits will not overflow.
3566 /// Otherwise return false.
3567 static bool checkRippleForSignedAdd(const KnownBits &LHSKnown,
3568                                     const KnownBits &RHSKnown) {
3569   // Addition of two 2's complement numbers having opposite signs will never
3570   // overflow.
3571   if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) ||
3572       (LHSKnown.isNonNegative() && RHSKnown.isNegative()))
3573     return true;
3574 
3575   // If either of the values is known to be non-negative, adding them can only
3576   // overflow if the second is also non-negative, so we can assume that.
3577   // Two non-negative numbers will only overflow if there is a carry to the
3578   // sign bit, so we can check if even when the values are as big as possible
3579   // there is no overflow to the sign bit.
3580   if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) {
3581     APInt MaxLHS = ~LHSKnown.Zero;
3582     MaxLHS.clearSignBit();
3583     APInt MaxRHS = ~RHSKnown.Zero;
3584     MaxRHS.clearSignBit();
3585     APInt Result = std::move(MaxLHS) + std::move(MaxRHS);
3586     return Result.isSignBitClear();
3587   }
3588 
3589   // If either of the values is known to be negative, adding them can only
3590   // overflow if the second is also negative, so we can assume that.
3591   // Two negative number will only overflow if there is no carry to the sign
3592   // bit, so we can check if even when the values are as small as possible
3593   // there is overflow to the sign bit.
3594   if (LHSKnown.isNegative() || RHSKnown.isNegative()) {
3595     APInt MinLHS = LHSKnown.One;
3596     MinLHS.clearSignBit();
3597     APInt MinRHS = RHSKnown.One;
3598     MinRHS.clearSignBit();
3599     APInt Result = std::move(MinLHS) + std::move(MinRHS);
3600     return Result.isSignBitSet();
3601   }
3602 
3603   // If we reached here it means that we know nothing about the sign bits.
3604   // In this case we can't know if there will be an overflow, since by
3605   // changing the sign bits any two values can be made to overflow.
3606   return false;
3607 }
3608 
3609 static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
3610                                                   const Value *RHS,
3611                                                   const AddOperator *Add,
3612                                                   const DataLayout &DL,
3613                                                   AssumptionCache *AC,
3614                                                   const Instruction *CxtI,
3615                                                   const DominatorTree *DT) {
3616   if (Add && Add->hasNoSignedWrap()) {
3617     return OverflowResult::NeverOverflows;
3618   }
3619 
3620   // If LHS and RHS each have at least two sign bits, the addition will look
3621   // like
3622   //
3623   // XX..... +
3624   // YY.....
3625   //
3626   // If the carry into the most significant position is 0, X and Y can't both
3627   // be 1 and therefore the carry out of the addition is also 0.
3628   //
3629   // If the carry into the most significant position is 1, X and Y can't both
3630   // be 0 and therefore the carry out of the addition is also 1.
3631   //
3632   // Since the carry into the most significant position is always equal to
3633   // the carry out of the addition, there is no signed overflow.
3634   if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 &&
3635       ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
3636     return OverflowResult::NeverOverflows;
3637 
3638   KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
3639   KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
3640 
3641   if (checkRippleForSignedAdd(LHSKnown, RHSKnown))
3642     return OverflowResult::NeverOverflows;
3643 
3644   // The remaining code needs Add to be available. Early returns if not so.
3645   if (!Add)
3646     return OverflowResult::MayOverflow;
3647 
3648   // If the sign of Add is the same as at least one of the operands, this add
3649   // CANNOT overflow. This is particularly useful when the sum is
3650   // @llvm.assume'ed non-negative rather than proved so from analyzing its
3651   // operands.
3652   bool LHSOrRHSKnownNonNegative =
3653       (LHSKnown.isNonNegative() || RHSKnown.isNonNegative());
3654   bool LHSOrRHSKnownNegative =
3655       (LHSKnown.isNegative() || RHSKnown.isNegative());
3656   if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
3657     KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT);
3658     if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
3659         (AddKnown.isNegative() && LHSOrRHSKnownNegative)) {
3660       return OverflowResult::NeverOverflows;
3661     }
3662   }
3663 
3664   return OverflowResult::MayOverflow;
3665 }
3666 
3667 bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
3668                                      const DominatorTree &DT) {
3669 #ifndef NDEBUG
3670   auto IID = II->getIntrinsicID();
3671   assert((IID == Intrinsic::sadd_with_overflow ||
3672           IID == Intrinsic::uadd_with_overflow ||
3673           IID == Intrinsic::ssub_with_overflow ||
3674           IID == Intrinsic::usub_with_overflow ||
3675           IID == Intrinsic::smul_with_overflow ||
3676           IID == Intrinsic::umul_with_overflow) &&
3677          "Not an overflow intrinsic!");
3678 #endif
3679 
3680   SmallVector<const BranchInst *, 2> GuardingBranches;
3681   SmallVector<const ExtractValueInst *, 2> Results;
3682 
3683   for (const User *U : II->users()) {
3684     if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) {
3685       assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
3686 
3687       if (EVI->getIndices()[0] == 0)
3688         Results.push_back(EVI);
3689       else {
3690         assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type");
3691 
3692         for (const auto *U : EVI->users())
3693           if (const auto *B = dyn_cast<BranchInst>(U)) {
3694             assert(B->isConditional() && "How else is it using an i1?");
3695             GuardingBranches.push_back(B);
3696           }
3697       }
3698     } else {
3699       // We are using the aggregate directly in a way we don't want to analyze
3700       // here (storing it to a global, say).
3701       return false;
3702     }
3703   }
3704 
3705   auto AllUsesGuardedByBranch = [&](const BranchInst *BI) {
3706     BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1));
3707     if (!NoWrapEdge.isSingleEdge())
3708       return false;
3709 
3710     // Check if all users of the add are provably no-wrap.
3711     for (const auto *Result : Results) {
3712       // If the extractvalue itself is not executed on overflow, the we don't
3713       // need to check each use separately, since domination is transitive.
3714       if (DT.dominates(NoWrapEdge, Result->getParent()))
3715         continue;
3716 
3717       for (auto &RU : Result->uses())
3718         if (!DT.dominates(NoWrapEdge, RU))
3719           return false;
3720     }
3721 
3722     return true;
3723   };
3724 
3725   return any_of(GuardingBranches, AllUsesGuardedByBranch);
3726 }
3727 
3728 
3729 OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
3730                                                  const DataLayout &DL,
3731                                                  AssumptionCache *AC,
3732                                                  const Instruction *CxtI,
3733                                                  const DominatorTree *DT) {
3734   return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
3735                                        Add, DL, AC, CxtI, DT);
3736 }
3737 
3738 OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS,
3739                                                  const Value *RHS,
3740                                                  const DataLayout &DL,
3741                                                  AssumptionCache *AC,
3742                                                  const Instruction *CxtI,
3743                                                  const DominatorTree *DT) {
3744   return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT);
3745 }
3746 
3747 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
3748   // A memory operation returns normally if it isn't volatile. A volatile
3749   // operation is allowed to trap.
3750   //
3751   // An atomic operation isn't guaranteed to return in a reasonable amount of
3752   // time because it's possible for another thread to interfere with it for an
3753   // arbitrary length of time, but programs aren't allowed to rely on that.
3754   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
3755     return !LI->isVolatile();
3756   if (const StoreInst *SI = dyn_cast<StoreInst>(I))
3757     return !SI->isVolatile();
3758   if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
3759     return !CXI->isVolatile();
3760   if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
3761     return !RMWI->isVolatile();
3762   if (const MemIntrinsic *MII = dyn_cast<MemIntrinsic>(I))
3763     return !MII->isVolatile();
3764 
3765   // If there is no successor, then execution can't transfer to it.
3766   if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
3767     return !CRI->unwindsToCaller();
3768   if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I))
3769     return !CatchSwitch->unwindsToCaller();
3770   if (isa<ResumeInst>(I))
3771     return false;
3772   if (isa<ReturnInst>(I))
3773     return false;
3774   if (isa<UnreachableInst>(I))
3775     return false;
3776 
3777   // Calls can throw, or contain an infinite loop, or kill the process.
3778   if (auto CS = ImmutableCallSite(I)) {
3779     // Call sites that throw have implicit non-local control flow.
3780     if (!CS.doesNotThrow())
3781       return false;
3782 
3783     // Non-throwing call sites can loop infinitely, call exit/pthread_exit
3784     // etc. and thus not return.  However, LLVM already assumes that
3785     //
3786     //  - Thread exiting actions are modeled as writes to memory invisible to
3787     //    the program.
3788     //
3789     //  - Loops that don't have side effects (side effects are volatile/atomic
3790     //    stores and IO) always terminate (see http://llvm.org/PR965).
3791     //    Furthermore IO itself is also modeled as writes to memory invisible to
3792     //    the program.
3793     //
3794     // We rely on those assumptions here, and use the memory effects of the call
3795     // target as a proxy for checking that it always returns.
3796 
3797     // FIXME: This isn't aggressive enough; a call which only writes to a global
3798     // is guaranteed to return.
3799     return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() ||
3800            match(I, m_Intrinsic<Intrinsic::assume>());
3801   }
3802 
3803   // Other instructions return normally.
3804   return true;
3805 }
3806 
3807 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
3808                                                   const Loop *L) {
3809   // The loop header is guaranteed to be executed for every iteration.
3810   //
3811   // FIXME: Relax this constraint to cover all basic blocks that are
3812   // guaranteed to be executed at every iteration.
3813   if (I->getParent() != L->getHeader()) return false;
3814 
3815   for (const Instruction &LI : *L->getHeader()) {
3816     if (&LI == I) return true;
3817     if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false;
3818   }
3819   llvm_unreachable("Instruction not contained in its own parent basic block.");
3820 }
3821 
3822 bool llvm::propagatesFullPoison(const Instruction *I) {
3823   switch (I->getOpcode()) {
3824   case Instruction::Add:
3825   case Instruction::Sub:
3826   case Instruction::Xor:
3827   case Instruction::Trunc:
3828   case Instruction::BitCast:
3829   case Instruction::AddrSpaceCast:
3830   case Instruction::Mul:
3831   case Instruction::Shl:
3832   case Instruction::GetElementPtr:
3833     // These operations all propagate poison unconditionally. Note that poison
3834     // is not any particular value, so xor or subtraction of poison with
3835     // itself still yields poison, not zero.
3836     return true;
3837 
3838   case Instruction::AShr:
3839   case Instruction::SExt:
3840     // For these operations, one bit of the input is replicated across
3841     // multiple output bits. A replicated poison bit is still poison.
3842     return true;
3843 
3844   case Instruction::ICmp:
3845     // Comparing poison with any value yields poison.  This is why, for
3846     // instance, x s< (x +nsw 1) can be folded to true.
3847     return true;
3848 
3849   default:
3850     return false;
3851   }
3852 }
3853 
3854 const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
3855   switch (I->getOpcode()) {
3856     case Instruction::Store:
3857       return cast<StoreInst>(I)->getPointerOperand();
3858 
3859     case Instruction::Load:
3860       return cast<LoadInst>(I)->getPointerOperand();
3861 
3862     case Instruction::AtomicCmpXchg:
3863       return cast<AtomicCmpXchgInst>(I)->getPointerOperand();
3864 
3865     case Instruction::AtomicRMW:
3866       return cast<AtomicRMWInst>(I)->getPointerOperand();
3867 
3868     case Instruction::UDiv:
3869     case Instruction::SDiv:
3870     case Instruction::URem:
3871     case Instruction::SRem:
3872       return I->getOperand(1);
3873 
3874     default:
3875       return nullptr;
3876   }
3877 }
3878 
3879 bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
3880   // We currently only look for uses of poison values within the same basic
3881   // block, as that makes it easier to guarantee that the uses will be
3882   // executed given that PoisonI is executed.
3883   //
3884   // FIXME: Expand this to consider uses beyond the same basic block. To do
3885   // this, look out for the distinction between post-dominance and strong
3886   // post-dominance.
3887   const BasicBlock *BB = PoisonI->getParent();
3888 
3889   // Set of instructions that we have proved will yield poison if PoisonI
3890   // does.
3891   SmallSet<const Value *, 16> YieldsPoison;
3892   SmallSet<const BasicBlock *, 4> Visited;
3893   YieldsPoison.insert(PoisonI);
3894   Visited.insert(PoisonI->getParent());
3895 
3896   BasicBlock::const_iterator Begin = PoisonI->getIterator(), End = BB->end();
3897 
3898   unsigned Iter = 0;
3899   while (Iter++ < MaxDepth) {
3900     for (auto &I : make_range(Begin, End)) {
3901       if (&I != PoisonI) {
3902         const Value *NotPoison = getGuaranteedNonFullPoisonOp(&I);
3903         if (NotPoison != nullptr && YieldsPoison.count(NotPoison))
3904           return true;
3905         if (!isGuaranteedToTransferExecutionToSuccessor(&I))
3906           return false;
3907       }
3908 
3909       // Mark poison that propagates from I through uses of I.
3910       if (YieldsPoison.count(&I)) {
3911         for (const User *User : I.users()) {
3912           const Instruction *UserI = cast<Instruction>(User);
3913           if (propagatesFullPoison(UserI))
3914             YieldsPoison.insert(User);
3915         }
3916       }
3917     }
3918 
3919     if (auto *NextBB = BB->getSingleSuccessor()) {
3920       if (Visited.insert(NextBB).second) {
3921         BB = NextBB;
3922         Begin = BB->getFirstNonPHI()->getIterator();
3923         End = BB->end();
3924         continue;
3925       }
3926     }
3927 
3928     break;
3929   };
3930   return false;
3931 }
3932 
3933 static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
3934   if (FMF.noNaNs())
3935     return true;
3936 
3937   if (auto *C = dyn_cast<ConstantFP>(V))
3938     return !C->isNaN();
3939   return false;
3940 }
3941 
3942 static bool isKnownNonZero(const Value *V) {
3943   if (auto *C = dyn_cast<ConstantFP>(V))
3944     return !C->isZero();
3945   return false;
3946 }
3947 
3948 /// Match clamp pattern for float types without care about NaNs or signed zeros.
3949 /// Given non-min/max outer cmp/select from the clamp pattern this
3950 /// function recognizes if it can be substitued by a "canonical" min/max
3951 /// pattern.
3952 static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred,
3953                                                Value *CmpLHS, Value *CmpRHS,
3954                                                Value *TrueVal, Value *FalseVal,
3955                                                Value *&LHS, Value *&RHS) {
3956   // Try to match
3957   //   X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2))
3958   //   X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2))
3959   // and return description of the outer Max/Min.
3960 
3961   // First, check if select has inverse order:
3962   if (CmpRHS == FalseVal) {
3963     std::swap(TrueVal, FalseVal);
3964     Pred = CmpInst::getInversePredicate(Pred);
3965   }
3966 
3967   // Assume success now. If there's no match, callers should not use these anyway.
3968   LHS = TrueVal;
3969   RHS = FalseVal;
3970 
3971   const APFloat *FC1;
3972   if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite())
3973     return {SPF_UNKNOWN, SPNB_NA, false};
3974 
3975   const APFloat *FC2;
3976   switch (Pred) {
3977   case CmpInst::FCMP_OLT:
3978   case CmpInst::FCMP_OLE:
3979   case CmpInst::FCMP_ULT:
3980   case CmpInst::FCMP_ULE:
3981     if (match(FalseVal,
3982               m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)),
3983                           m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) &&
3984         FC1->compare(*FC2) == APFloat::cmpResult::cmpLessThan)
3985       return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false};
3986     break;
3987   case CmpInst::FCMP_OGT:
3988   case CmpInst::FCMP_OGE:
3989   case CmpInst::FCMP_UGT:
3990   case CmpInst::FCMP_UGE:
3991     if (match(FalseVal,
3992               m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)),
3993                           m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) &&
3994         FC1->compare(*FC2) == APFloat::cmpResult::cmpGreaterThan)
3995       return {SPF_FMINNUM, SPNB_RETURNS_ANY, false};
3996     break;
3997   default:
3998     break;
3999   }
4000 
4001   return {SPF_UNKNOWN, SPNB_NA, false};
4002 }
4003 
4004 /// Match non-obvious integer minimum and maximum sequences.
4005 static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
4006                                        Value *CmpLHS, Value *CmpRHS,
4007                                        Value *TrueVal, Value *FalseVal,
4008                                        Value *&LHS, Value *&RHS) {
4009   // Assume success. If there's no match, callers should not use these anyway.
4010   LHS = TrueVal;
4011   RHS = FalseVal;
4012 
4013   // Recognize variations of:
4014   // CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
4015   const APInt *C1;
4016   if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) {
4017     const APInt *C2;
4018 
4019     // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
4020     if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) &&
4021         C1->slt(*C2) && Pred == CmpInst::ICMP_SLT)
4022       return {SPF_SMAX, SPNB_NA, false};
4023 
4024     // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
4025     if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) &&
4026         C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT)
4027       return {SPF_SMIN, SPNB_NA, false};
4028 
4029     // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
4030     if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) &&
4031         C1->ult(*C2) && Pred == CmpInst::ICMP_ULT)
4032       return {SPF_UMAX, SPNB_NA, false};
4033 
4034     // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
4035     if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) &&
4036         C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT)
4037       return {SPF_UMIN, SPNB_NA, false};
4038   }
4039 
4040   if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
4041     return {SPF_UNKNOWN, SPNB_NA, false};
4042 
4043   // Z = X -nsw Y
4044   // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
4045   // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
4046   if (match(TrueVal, m_Zero()) &&
4047       match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
4048     return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
4049 
4050   // Z = X -nsw Y
4051   // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
4052   // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
4053   if (match(FalseVal, m_Zero()) &&
4054       match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
4055     return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
4056 
4057   if (!match(CmpRHS, m_APInt(C1)))
4058     return {SPF_UNKNOWN, SPNB_NA, false};
4059 
4060   // An unsigned min/max can be written with a signed compare.
4061   const APInt *C2;
4062   if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) ||
4063       (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) {
4064     // Is the sign bit set?
4065     // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
4066     // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
4067     if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue())
4068       return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
4069 
4070     // Is the sign bit clear?
4071     // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
4072     // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
4073     if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() &&
4074         C2->isMinSignedValue())
4075       return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
4076   }
4077 
4078   // Look through 'not' ops to find disguised signed min/max.
4079   // (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C)
4080   // (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C)
4081   if (match(TrueVal, m_Not(m_Specific(CmpLHS))) &&
4082       match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2)
4083     return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
4084 
4085   // (X >s C) ? ~C : ~X ==> (~X <s ~C) ? ~C : ~X ==> SMAX(~C, ~X)
4086   // (X <s C) ? ~C : ~X ==> (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X)
4087   if (match(FalseVal, m_Not(m_Specific(CmpLHS))) &&
4088       match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2)
4089     return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
4090 
4091   return {SPF_UNKNOWN, SPNB_NA, false};
4092 }
4093 
4094 static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
4095                                               FastMathFlags FMF,
4096                                               Value *CmpLHS, Value *CmpRHS,
4097                                               Value *TrueVal, Value *FalseVal,
4098                                               Value *&LHS, Value *&RHS) {
4099   LHS = CmpLHS;
4100   RHS = CmpRHS;
4101 
4102   // If the predicate is an "or-equal"  (FP) predicate, then signed zeroes may
4103   // return inconsistent results between implementations.
4104   //   (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
4105   //   minNum(0.0, -0.0)          // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
4106   // Therefore we behave conservatively and only proceed if at least one of the
4107   // operands is known to not be zero, or if we don't care about signed zeroes.
4108   switch (Pred) {
4109   default: break;
4110   case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
4111   case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
4112     if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
4113         !isKnownNonZero(CmpRHS))
4114       return {SPF_UNKNOWN, SPNB_NA, false};
4115   }
4116 
4117   SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
4118   bool Ordered = false;
4119 
4120   // When given one NaN and one non-NaN input:
4121   //   - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
4122   //   - A simple C99 (a < b ? a : b) construction will return 'b' (as the
4123   //     ordered comparison fails), which could be NaN or non-NaN.
4124   // so here we discover exactly what NaN behavior is required/accepted.
4125   if (CmpInst::isFPPredicate(Pred)) {
4126     bool LHSSafe = isKnownNonNaN(CmpLHS, FMF);
4127     bool RHSSafe = isKnownNonNaN(CmpRHS, FMF);
4128 
4129     if (LHSSafe && RHSSafe) {
4130       // Both operands are known non-NaN.
4131       NaNBehavior = SPNB_RETURNS_ANY;
4132     } else if (CmpInst::isOrdered(Pred)) {
4133       // An ordered comparison will return false when given a NaN, so it
4134       // returns the RHS.
4135       Ordered = true;
4136       if (LHSSafe)
4137         // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
4138         NaNBehavior = SPNB_RETURNS_NAN;
4139       else if (RHSSafe)
4140         NaNBehavior = SPNB_RETURNS_OTHER;
4141       else
4142         // Completely unsafe.
4143         return {SPF_UNKNOWN, SPNB_NA, false};
4144     } else {
4145       Ordered = false;
4146       // An unordered comparison will return true when given a NaN, so it
4147       // returns the LHS.
4148       if (LHSSafe)
4149         // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
4150         NaNBehavior = SPNB_RETURNS_OTHER;
4151       else if (RHSSafe)
4152         NaNBehavior = SPNB_RETURNS_NAN;
4153       else
4154         // Completely unsafe.
4155         return {SPF_UNKNOWN, SPNB_NA, false};
4156     }
4157   }
4158 
4159   if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
4160     std::swap(CmpLHS, CmpRHS);
4161     Pred = CmpInst::getSwappedPredicate(Pred);
4162     if (NaNBehavior == SPNB_RETURNS_NAN)
4163       NaNBehavior = SPNB_RETURNS_OTHER;
4164     else if (NaNBehavior == SPNB_RETURNS_OTHER)
4165       NaNBehavior = SPNB_RETURNS_NAN;
4166     Ordered = !Ordered;
4167   }
4168 
4169   // ([if]cmp X, Y) ? X : Y
4170   if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
4171     switch (Pred) {
4172     default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality.
4173     case ICmpInst::ICMP_UGT:
4174     case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false};
4175     case ICmpInst::ICMP_SGT:
4176     case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false};
4177     case ICmpInst::ICMP_ULT:
4178     case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false};
4179     case ICmpInst::ICMP_SLT:
4180     case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false};
4181     case FCmpInst::FCMP_UGT:
4182     case FCmpInst::FCMP_UGE:
4183     case FCmpInst::FCMP_OGT:
4184     case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered};
4185     case FCmpInst::FCMP_ULT:
4186     case FCmpInst::FCMP_ULE:
4187     case FCmpInst::FCMP_OLT:
4188     case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered};
4189     }
4190   }
4191 
4192   const APInt *C1;
4193   if (match(CmpRHS, m_APInt(C1))) {
4194     if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
4195         (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
4196 
4197       // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
4198       // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
4199       if (Pred == ICmpInst::ICMP_SGT && (*C1 == 0 || C1->isAllOnesValue())) {
4200         return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
4201       }
4202 
4203       // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
4204       // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
4205       if (Pred == ICmpInst::ICMP_SLT && (*C1 == 0 || *C1 == 1)) {
4206         return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
4207       }
4208     }
4209   }
4210 
4211   if (CmpInst::isIntPredicate(Pred))
4212     return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
4213 
4214   // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
4215   // may return either -0.0 or 0.0, so fcmp/select pair has stricter
4216   // semantics than minNum. Be conservative in such case.
4217   if (NaNBehavior != SPNB_RETURNS_ANY ||
4218       (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
4219        !isKnownNonZero(CmpRHS)))
4220     return {SPF_UNKNOWN, SPNB_NA, false};
4221 
4222   return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
4223 }
4224 
4225 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
4226                               Instruction::CastOps *CastOp) {
4227   auto *Cast1 = dyn_cast<CastInst>(V1);
4228   if (!Cast1)
4229     return nullptr;
4230 
4231   *CastOp = Cast1->getOpcode();
4232   Type *SrcTy = Cast1->getSrcTy();
4233   if (auto *Cast2 = dyn_cast<CastInst>(V2)) {
4234     // If V1 and V2 are both the same cast from the same type, look through V1.
4235     if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy())
4236       return Cast2->getOperand(0);
4237     return nullptr;
4238   }
4239 
4240   auto *C = dyn_cast<Constant>(V2);
4241   if (!C)
4242     return nullptr;
4243 
4244   Constant *CastedTo = nullptr;
4245   switch (*CastOp) {
4246   case Instruction::ZExt:
4247     if (CmpI->isUnsigned())
4248       CastedTo = ConstantExpr::getTrunc(C, SrcTy);
4249     break;
4250   case Instruction::SExt:
4251     if (CmpI->isSigned())
4252       CastedTo = ConstantExpr::getTrunc(C, SrcTy, true);
4253     break;
4254   case Instruction::Trunc:
4255     CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned());
4256     break;
4257   case Instruction::FPTrunc:
4258     CastedTo = ConstantExpr::getFPExtend(C, SrcTy, true);
4259     break;
4260   case Instruction::FPExt:
4261     CastedTo = ConstantExpr::getFPTrunc(C, SrcTy, true);
4262     break;
4263   case Instruction::FPToUI:
4264     CastedTo = ConstantExpr::getUIToFP(C, SrcTy, true);
4265     break;
4266   case Instruction::FPToSI:
4267     CastedTo = ConstantExpr::getSIToFP(C, SrcTy, true);
4268     break;
4269   case Instruction::UIToFP:
4270     CastedTo = ConstantExpr::getFPToUI(C, SrcTy, true);
4271     break;
4272   case Instruction::SIToFP:
4273     CastedTo = ConstantExpr::getFPToSI(C, SrcTy, true);
4274     break;
4275   default:
4276     break;
4277   }
4278 
4279   if (!CastedTo)
4280     return nullptr;
4281 
4282   // Make sure the cast doesn't lose any information.
4283   Constant *CastedBack =
4284       ConstantExpr::getCast(*CastOp, CastedTo, C->getType(), true);
4285   if (CastedBack != C)
4286     return nullptr;
4287 
4288   return CastedTo;
4289 }
4290 
4291 SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
4292                                              Instruction::CastOps *CastOp) {
4293   SelectInst *SI = dyn_cast<SelectInst>(V);
4294   if (!SI) return {SPF_UNKNOWN, SPNB_NA, false};
4295 
4296   CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
4297   if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false};
4298 
4299   CmpInst::Predicate Pred = CmpI->getPredicate();
4300   Value *CmpLHS = CmpI->getOperand(0);
4301   Value *CmpRHS = CmpI->getOperand(1);
4302   Value *TrueVal = SI->getTrueValue();
4303   Value *FalseVal = SI->getFalseValue();
4304   FastMathFlags FMF;
4305   if (isa<FPMathOperator>(CmpI))
4306     FMF = CmpI->getFastMathFlags();
4307 
4308   // Bail out early.
4309   if (CmpI->isEquality())
4310     return {SPF_UNKNOWN, SPNB_NA, false};
4311 
4312   // Deal with type mismatches.
4313   if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
4314     if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
4315       return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
4316                                   cast<CastInst>(TrueVal)->getOperand(0), C,
4317                                   LHS, RHS);
4318     if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
4319       return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
4320                                   C, cast<CastInst>(FalseVal)->getOperand(0),
4321                                   LHS, RHS);
4322   }
4323   return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
4324                               LHS, RHS);
4325 }
4326 
4327 /// Return true if "icmp Pred LHS RHS" is always true.
4328 static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
4329                             const Value *RHS, const DataLayout &DL,
4330                             unsigned Depth) {
4331   assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!");
4332   if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
4333     return true;
4334 
4335   switch (Pred) {
4336   default:
4337     return false;
4338 
4339   case CmpInst::ICMP_SLE: {
4340     const APInt *C;
4341 
4342     // LHS s<= LHS +_{nsw} C   if C >= 0
4343     if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C))))
4344       return !C->isNegative();
4345     return false;
4346   }
4347 
4348   case CmpInst::ICMP_ULE: {
4349     const APInt *C;
4350 
4351     // LHS u<= LHS +_{nuw} C   for any C
4352     if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C))))
4353       return true;
4354 
4355     // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
4356     auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B,
4357                                        const Value *&X,
4358                                        const APInt *&CA, const APInt *&CB) {
4359       if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) &&
4360           match(B, m_NUWAdd(m_Specific(X), m_APInt(CB))))
4361         return true;
4362 
4363       // If X & C == 0 then (X | C) == X +_{nuw} C
4364       if (match(A, m_Or(m_Value(X), m_APInt(CA))) &&
4365           match(B, m_Or(m_Specific(X), m_APInt(CB)))) {
4366         KnownBits Known(CA->getBitWidth());
4367         computeKnownBits(X, Known, DL, Depth + 1, /*AC*/ nullptr,
4368                          /*CxtI*/ nullptr, /*DT*/ nullptr);
4369         if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero))
4370           return true;
4371       }
4372 
4373       return false;
4374     };
4375 
4376     const Value *X;
4377     const APInt *CLHS, *CRHS;
4378     if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS))
4379       return CLHS->ule(*CRHS);
4380 
4381     return false;
4382   }
4383   }
4384 }
4385 
4386 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
4387 /// ALHS ARHS" is true.  Otherwise, return None.
4388 static Optional<bool>
4389 isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS,
4390                       const Value *ARHS, const Value *BLHS, const Value *BRHS,
4391                       const DataLayout &DL, unsigned Depth) {
4392   switch (Pred) {
4393   default:
4394     return None;
4395 
4396   case CmpInst::ICMP_SLT:
4397   case CmpInst::ICMP_SLE:
4398     if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth) &&
4399         isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth))
4400       return true;
4401     return None;
4402 
4403   case CmpInst::ICMP_ULT:
4404   case CmpInst::ICMP_ULE:
4405     if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth) &&
4406         isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth))
4407       return true;
4408     return None;
4409   }
4410 }
4411 
4412 /// Return true if the operands of the two compares match.  IsSwappedOps is true
4413 /// when the operands match, but are swapped.
4414 static bool isMatchingOps(const Value *ALHS, const Value *ARHS,
4415                           const Value *BLHS, const Value *BRHS,
4416                           bool &IsSwappedOps) {
4417 
4418   bool IsMatchingOps = (ALHS == BLHS && ARHS == BRHS);
4419   IsSwappedOps = (ALHS == BRHS && ARHS == BLHS);
4420   return IsMatchingOps || IsSwappedOps;
4421 }
4422 
4423 /// Return true if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS BRHS" is
4424 /// true.  Return false if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS
4425 /// BRHS" is false.  Otherwise, return None if we can't infer anything.
4426 static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
4427                                                     const Value *ALHS,
4428                                                     const Value *ARHS,
4429                                                     CmpInst::Predicate BPred,
4430                                                     const Value *BLHS,
4431                                                     const Value *BRHS,
4432                                                     bool IsSwappedOps) {
4433   // Canonicalize the operands so they're matching.
4434   if (IsSwappedOps) {
4435     std::swap(BLHS, BRHS);
4436     BPred = ICmpInst::getSwappedPredicate(BPred);
4437   }
4438   if (CmpInst::isImpliedTrueByMatchingCmp(APred, BPred))
4439     return true;
4440   if (CmpInst::isImpliedFalseByMatchingCmp(APred, BPred))
4441     return false;
4442 
4443   return None;
4444 }
4445 
4446 /// Return true if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS C2" is
4447 /// true.  Return false if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS
4448 /// C2" is false.  Otherwise, return None if we can't infer anything.
4449 static Optional<bool>
4450 isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS,
4451                                  const ConstantInt *C1,
4452                                  CmpInst::Predicate BPred,
4453                                  const Value *BLHS, const ConstantInt *C2) {
4454   assert(ALHS == BLHS && "LHS operands must match.");
4455   ConstantRange DomCR =
4456       ConstantRange::makeExactICmpRegion(APred, C1->getValue());
4457   ConstantRange CR =
4458       ConstantRange::makeAllowedICmpRegion(BPred, C2->getValue());
4459   ConstantRange Intersection = DomCR.intersectWith(CR);
4460   ConstantRange Difference = DomCR.difference(CR);
4461   if (Intersection.isEmptySet())
4462     return false;
4463   if (Difference.isEmptySet())
4464     return true;
4465   return None;
4466 }
4467 
4468 /// Return true if LHS implies RHS is true.  Return false if LHS implies RHS is
4469 /// false.  Otherwise, return None if we can't infer anything.
4470 static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS,
4471                                          const ICmpInst *RHS,
4472                                          const DataLayout &DL, bool LHSIsTrue,
4473                                          unsigned Depth) {
4474   Value *ALHS = LHS->getOperand(0);
4475   Value *ARHS = LHS->getOperand(1);
4476   // The rest of the logic assumes the LHS condition is true.  If that's not the
4477   // case, invert the predicate to make it so.
4478   ICmpInst::Predicate APred =
4479       LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate();
4480 
4481   Value *BLHS = RHS->getOperand(0);
4482   Value *BRHS = RHS->getOperand(1);
4483   ICmpInst::Predicate BPred = RHS->getPredicate();
4484 
4485   // Can we infer anything when the two compares have matching operands?
4486   bool IsSwappedOps;
4487   if (isMatchingOps(ALHS, ARHS, BLHS, BRHS, IsSwappedOps)) {
4488     if (Optional<bool> Implication = isImpliedCondMatchingOperands(
4489             APred, ALHS, ARHS, BPred, BLHS, BRHS, IsSwappedOps))
4490       return Implication;
4491     // No amount of additional analysis will infer the second condition, so
4492     // early exit.
4493     return None;
4494   }
4495 
4496   // Can we infer anything when the LHS operands match and the RHS operands are
4497   // constants (not necessarily matching)?
4498   if (ALHS == BLHS && isa<ConstantInt>(ARHS) && isa<ConstantInt>(BRHS)) {
4499     if (Optional<bool> Implication = isImpliedCondMatchingImmOperands(
4500             APred, ALHS, cast<ConstantInt>(ARHS), BPred, BLHS,
4501             cast<ConstantInt>(BRHS)))
4502       return Implication;
4503     // No amount of additional analysis will infer the second condition, so
4504     // early exit.
4505     return None;
4506   }
4507 
4508   if (APred == BPred)
4509     return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth);
4510   return None;
4511 }
4512 
4513 /// Return true if LHS implies RHS is true.  Return false if LHS implies RHS is
4514 /// false.  Otherwise, return None if we can't infer anything.  We expect the
4515 /// RHS to be an icmp and the LHS to be an 'and' or an 'or' instruction.
4516 static Optional<bool> isImpliedCondAndOr(const BinaryOperator *LHS,
4517                                          const ICmpInst *RHS,
4518                                          const DataLayout &DL, bool LHSIsTrue,
4519                                          unsigned Depth) {
4520   // The LHS must be an 'or' or an 'and' instruction.
4521   assert((LHS->getOpcode() == Instruction::And ||
4522           LHS->getOpcode() == Instruction::Or) &&
4523          "Expected LHS to be 'and' or 'or'.");
4524 
4525   assert(Depth <= MaxDepth && "Hit recursion limit");
4526 
4527   // If the result of an 'or' is false, then we know both legs of the 'or' are
4528   // false.  Similarly, if the result of an 'and' is true, then we know both
4529   // legs of the 'and' are true.
4530   Value *ALHS, *ARHS;
4531   if ((!LHSIsTrue && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) ||
4532       (LHSIsTrue && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) {
4533     // FIXME: Make this non-recursion.
4534     if (Optional<bool> Implication =
4535             isImpliedCondition(ALHS, RHS, DL, LHSIsTrue, Depth + 1))
4536       return Implication;
4537     if (Optional<bool> Implication =
4538             isImpliedCondition(ARHS, RHS, DL, LHSIsTrue, Depth + 1))
4539       return Implication;
4540     return None;
4541   }
4542   return None;
4543 }
4544 
4545 Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
4546                                         const DataLayout &DL, bool LHSIsTrue,
4547                                         unsigned Depth) {
4548   // Bail out when we hit the limit.
4549   if (Depth == MaxDepth)
4550     return None;
4551 
4552   // A mismatch occurs when we compare a scalar cmp to a vector cmp, for
4553   // example.
4554   if (LHS->getType() != RHS->getType())
4555     return None;
4556 
4557   Type *OpTy = LHS->getType();
4558   assert(OpTy->isIntOrIntVectorTy(1) && "Expected integer type only!");
4559 
4560   // LHS ==> RHS by definition
4561   if (LHS == RHS)
4562     return LHSIsTrue;
4563 
4564   // FIXME: Extending the code below to handle vectors.
4565   if (OpTy->isVectorTy())
4566     return None;
4567 
4568   assert(OpTy->isIntegerTy(1) && "implied by above");
4569 
4570   // Both LHS and RHS are icmps.
4571   const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS);
4572   const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS);
4573   if (LHSCmp && RHSCmp)
4574     return isImpliedCondICmps(LHSCmp, RHSCmp, DL, LHSIsTrue, Depth);
4575 
4576   // The LHS should be an 'or' or an 'and' instruction.  We expect the RHS to be
4577   // an icmp. FIXME: Add support for and/or on the RHS.
4578   const BinaryOperator *LHSBO = dyn_cast<BinaryOperator>(LHS);
4579   if (LHSBO && RHSCmp) {
4580     if ((LHSBO->getOpcode() == Instruction::And ||
4581          LHSBO->getOpcode() == Instruction::Or))
4582       return isImpliedCondAndOr(LHSBO, RHSCmp, DL, LHSIsTrue, Depth);
4583   }
4584   return None;
4585 }
4586