1 //===- InstCombineCompares.cpp --------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visitICmp and visitFCmp functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/ConstantFolding.h"
19 #include "llvm/Analysis/InstructionSimplify.h"
20 #include "llvm/Analysis/MemoryBuiltins.h"
21 #include "llvm/Analysis/TargetLibraryInfo.h"
22 #include "llvm/Analysis/VectorUtils.h"
23 #include "llvm/IR/ConstantRange.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/GetElementPtrTypeIterator.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/PatternMatch.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/KnownBits.h"
30 
31 using namespace llvm;
32 using namespace PatternMatch;
33 
34 #define DEBUG_TYPE "instcombine"
35 
36 // How many times is a select replaced by one of its operands?
37 STATISTIC(NumSel, "Number of select opts");
38 
39 
40 /// Compute Result = In1+In2, returning true if the result overflowed for this
41 /// type.
42 static bool addWithOverflow(APInt &Result, const APInt &In1,
43                             const APInt &In2, bool IsSigned = false) {
44   bool Overflow;
45   if (IsSigned)
46     Result = In1.sadd_ov(In2, Overflow);
47   else
48     Result = In1.uadd_ov(In2, Overflow);
49 
50   return Overflow;
51 }
52 
53 /// Compute Result = In1-In2, returning true if the result overflowed for this
54 /// type.
55 static bool subWithOverflow(APInt &Result, const APInt &In1,
56                             const APInt &In2, bool IsSigned = false) {
57   bool Overflow;
58   if (IsSigned)
59     Result = In1.ssub_ov(In2, Overflow);
60   else
61     Result = In1.usub_ov(In2, Overflow);
62 
63   return Overflow;
64 }
65 
66 /// Given an icmp instruction, return true if any use of this comparison is a
67 /// branch on sign bit comparison.
68 static bool hasBranchUse(ICmpInst &I) {
69   for (auto *U : I.users())
70     if (isa<BranchInst>(U))
71       return true;
72   return false;
73 }
74 
75 /// Given an exploded icmp instruction, return true if the comparison only
76 /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the
77 /// result of the comparison is true when the input value is signed.
78 static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
79                            bool &TrueIfSigned) {
80   switch (Pred) {
81   case ICmpInst::ICMP_SLT:   // True if LHS s< 0
82     TrueIfSigned = true;
83     return RHS.isNullValue();
84   case ICmpInst::ICMP_SLE:   // True if LHS s<= RHS and RHS == -1
85     TrueIfSigned = true;
86     return RHS.isAllOnesValue();
87   case ICmpInst::ICMP_SGT:   // True if LHS s> -1
88     TrueIfSigned = false;
89     return RHS.isAllOnesValue();
90   case ICmpInst::ICMP_UGT:
91     // True if LHS u> RHS and RHS == high-bit-mask - 1
92     TrueIfSigned = true;
93     return RHS.isMaxSignedValue();
94   case ICmpInst::ICMP_UGE:
95     // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
96     TrueIfSigned = true;
97     return RHS.isSignMask();
98   default:
99     return false;
100   }
101 }
102 
103 /// Returns true if the exploded icmp can be expressed as a signed comparison
104 /// to zero and updates the predicate accordingly.
105 /// The signedness of the comparison is preserved.
106 /// TODO: Refactor with decomposeBitTestICmp()?
107 static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
108   if (!ICmpInst::isSigned(Pred))
109     return false;
110 
111   if (C.isNullValue())
112     return ICmpInst::isRelational(Pred);
113 
114   if (C.isOneValue()) {
115     if (Pred == ICmpInst::ICMP_SLT) {
116       Pred = ICmpInst::ICMP_SLE;
117       return true;
118     }
119   } else if (C.isAllOnesValue()) {
120     if (Pred == ICmpInst::ICMP_SGT) {
121       Pred = ICmpInst::ICMP_SGE;
122       return true;
123     }
124   }
125 
126   return false;
127 }
128 
129 /// Given a signed integer type and a set of known zero and one bits, compute
130 /// the maximum and minimum values that could have the specified known zero and
131 /// known one bits, returning them in Min/Max.
132 /// TODO: Move to method on KnownBits struct?
133 static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known,
134                                                    APInt &Min, APInt &Max) {
135   assert(Known.getBitWidth() == Min.getBitWidth() &&
136          Known.getBitWidth() == Max.getBitWidth() &&
137          "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
138   APInt UnknownBits = ~(Known.Zero|Known.One);
139 
140   // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
141   // bit if it is unknown.
142   Min = Known.One;
143   Max = Known.One|UnknownBits;
144 
145   if (UnknownBits.isNegative()) { // Sign bit is unknown
146     Min.setSignBit();
147     Max.clearSignBit();
148   }
149 }
150 
151 /// Given an unsigned integer type and a set of known zero and one bits, compute
152 /// the maximum and minimum values that could have the specified known zero and
153 /// known one bits, returning them in Min/Max.
154 /// TODO: Move to method on KnownBits struct?
155 static void computeUnsignedMinMaxValuesFromKnownBits(const KnownBits &Known,
156                                                      APInt &Min, APInt &Max) {
157   assert(Known.getBitWidth() == Min.getBitWidth() &&
158          Known.getBitWidth() == Max.getBitWidth() &&
159          "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
160   APInt UnknownBits = ~(Known.Zero|Known.One);
161 
162   // The minimum value is when the unknown bits are all zeros.
163   Min = Known.One;
164   // The maximum value is when the unknown bits are all ones.
165   Max = Known.One|UnknownBits;
166 }
167 
168 /// This is called when we see this pattern:
169 ///   cmp pred (load (gep GV, ...)), cmpcst
170 /// where GV is a global variable with a constant initializer. Try to simplify
171 /// this into some simple computation that does not need the load. For example
172 /// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
173 ///
174 /// If AndCst is non-null, then the loaded value is masked with that constant
175 /// before doing the comparison. This handles cases like "A[i]&4 == 0".
176 Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
177                                                         GlobalVariable *GV,
178                                                         CmpInst &ICI,
179                                                         ConstantInt *AndCst) {
180   Constant *Init = GV->getInitializer();
181   if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
182     return nullptr;
183 
184   uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
185   // Don't blow up on huge arrays.
186   if (ArrayElementCount > MaxArraySizeForCombine)
187     return nullptr;
188 
189   // There are many forms of this optimization we can handle, for now, just do
190   // the simple index into a single-dimensional array.
191   //
192   // Require: GEP GV, 0, i {{, constant indices}}
193   if (GEP->getNumOperands() < 3 ||
194       !isa<ConstantInt>(GEP->getOperand(1)) ||
195       !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
196       isa<Constant>(GEP->getOperand(2)))
197     return nullptr;
198 
199   // Check that indices after the variable are constants and in-range for the
200   // type they index.  Collect the indices.  This is typically for arrays of
201   // structs.
202   SmallVector<unsigned, 4> LaterIndices;
203 
204   Type *EltTy = Init->getType()->getArrayElementType();
205   for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
206     ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
207     if (!Idx) return nullptr;  // Variable index.
208 
209     uint64_t IdxVal = Idx->getZExtValue();
210     if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
211 
212     if (StructType *STy = dyn_cast<StructType>(EltTy))
213       EltTy = STy->getElementType(IdxVal);
214     else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
215       if (IdxVal >= ATy->getNumElements()) return nullptr;
216       EltTy = ATy->getElementType();
217     } else {
218       return nullptr; // Unknown type.
219     }
220 
221     LaterIndices.push_back(IdxVal);
222   }
223 
224   enum { Overdefined = -3, Undefined = -2 };
225 
226   // Variables for our state machines.
227 
228   // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
229   // "i == 47 | i == 87", where 47 is the first index the condition is true for,
230   // and 87 is the second (and last) index.  FirstTrueElement is -2 when
231   // undefined, otherwise set to the first true element.  SecondTrueElement is
232   // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
233   int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
234 
235   // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
236   // form "i != 47 & i != 87".  Same state transitions as for true elements.
237   int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
238 
239   /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
240   /// define a state machine that triggers for ranges of values that the index
241   /// is true or false for.  This triggers on things like "abbbbc"[i] == 'b'.
242   /// This is -2 when undefined, -3 when overdefined, and otherwise the last
243   /// index in the range (inclusive).  We use -2 for undefined here because we
244   /// use relative comparisons and don't want 0-1 to match -1.
245   int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
246 
247   // MagicBitvector - This is a magic bitvector where we set a bit if the
248   // comparison is true for element 'i'.  If there are 64 elements or less in
249   // the array, this will fully represent all the comparison results.
250   uint64_t MagicBitvector = 0;
251 
252   // Scan the array and see if one of our patterns matches.
253   Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
254   for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
255     Constant *Elt = Init->getAggregateElement(i);
256     if (!Elt) return nullptr;
257 
258     // If this is indexing an array of structures, get the structure element.
259     if (!LaterIndices.empty())
260       Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
261 
262     // If the element is masked, handle it.
263     if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
264 
265     // Find out if the comparison would be true or false for the i'th element.
266     Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
267                                                   CompareRHS, DL, &TLI);
268     // If the result is undef for this element, ignore it.
269     if (isa<UndefValue>(C)) {
270       // Extend range state machines to cover this element in case there is an
271       // undef in the middle of the range.
272       if (TrueRangeEnd == (int)i-1)
273         TrueRangeEnd = i;
274       if (FalseRangeEnd == (int)i-1)
275         FalseRangeEnd = i;
276       continue;
277     }
278 
279     // If we can't compute the result for any of the elements, we have to give
280     // up evaluating the entire conditional.
281     if (!isa<ConstantInt>(C)) return nullptr;
282 
283     // Otherwise, we know if the comparison is true or false for this element,
284     // update our state machines.
285     bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
286 
287     // State machine for single/double/range index comparison.
288     if (IsTrueForElt) {
289       // Update the TrueElement state machine.
290       if (FirstTrueElement == Undefined)
291         FirstTrueElement = TrueRangeEnd = i;  // First true element.
292       else {
293         // Update double-compare state machine.
294         if (SecondTrueElement == Undefined)
295           SecondTrueElement = i;
296         else
297           SecondTrueElement = Overdefined;
298 
299         // Update range state machine.
300         if (TrueRangeEnd == (int)i-1)
301           TrueRangeEnd = i;
302         else
303           TrueRangeEnd = Overdefined;
304       }
305     } else {
306       // Update the FalseElement state machine.
307       if (FirstFalseElement == Undefined)
308         FirstFalseElement = FalseRangeEnd = i; // First false element.
309       else {
310         // Update double-compare state machine.
311         if (SecondFalseElement == Undefined)
312           SecondFalseElement = i;
313         else
314           SecondFalseElement = Overdefined;
315 
316         // Update range state machine.
317         if (FalseRangeEnd == (int)i-1)
318           FalseRangeEnd = i;
319         else
320           FalseRangeEnd = Overdefined;
321       }
322     }
323 
324     // If this element is in range, update our magic bitvector.
325     if (i < 64 && IsTrueForElt)
326       MagicBitvector |= 1ULL << i;
327 
328     // If all of our states become overdefined, bail out early.  Since the
329     // predicate is expensive, only check it every 8 elements.  This is only
330     // really useful for really huge arrays.
331     if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
332         SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
333         FalseRangeEnd == Overdefined)
334       return nullptr;
335   }
336 
337   // Now that we've scanned the entire array, emit our new comparison(s).  We
338   // order the state machines in complexity of the generated code.
339   Value *Idx = GEP->getOperand(2);
340 
341   // If the index is larger than the pointer size of the target, truncate the
342   // index down like the GEP would do implicitly.  We don't have to do this for
343   // an inbounds GEP because the index can't be out of range.
344   if (!GEP->isInBounds()) {
345     Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
346     unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
347     if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
348       Idx = Builder.CreateTrunc(Idx, IntPtrTy);
349   }
350 
351   // If the comparison is only true for one or two elements, emit direct
352   // comparisons.
353   if (SecondTrueElement != Overdefined) {
354     // None true -> false.
355     if (FirstTrueElement == Undefined)
356       return replaceInstUsesWith(ICI, Builder.getFalse());
357 
358     Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
359 
360     // True for one element -> 'i == 47'.
361     if (SecondTrueElement == Undefined)
362       return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
363 
364     // True for two elements -> 'i == 47 | i == 72'.
365     Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx);
366     Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
367     Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx);
368     return BinaryOperator::CreateOr(C1, C2);
369   }
370 
371   // If the comparison is only false for one or two elements, emit direct
372   // comparisons.
373   if (SecondFalseElement != Overdefined) {
374     // None false -> true.
375     if (FirstFalseElement == Undefined)
376       return replaceInstUsesWith(ICI, Builder.getTrue());
377 
378     Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
379 
380     // False for one element -> 'i != 47'.
381     if (SecondFalseElement == Undefined)
382       return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
383 
384     // False for two elements -> 'i != 47 & i != 72'.
385     Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
386     Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
387     Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
388     return BinaryOperator::CreateAnd(C1, C2);
389   }
390 
391   // If the comparison can be replaced with a range comparison for the elements
392   // where it is true, emit the range check.
393   if (TrueRangeEnd != Overdefined) {
394     assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
395 
396     // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
397     if (FirstTrueElement) {
398       Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
399       Idx = Builder.CreateAdd(Idx, Offs);
400     }
401 
402     Value *End = ConstantInt::get(Idx->getType(),
403                                   TrueRangeEnd-FirstTrueElement+1);
404     return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
405   }
406 
407   // False range check.
408   if (FalseRangeEnd != Overdefined) {
409     assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
410     // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
411     if (FirstFalseElement) {
412       Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
413       Idx = Builder.CreateAdd(Idx, Offs);
414     }
415 
416     Value *End = ConstantInt::get(Idx->getType(),
417                                   FalseRangeEnd-FirstFalseElement);
418     return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
419   }
420 
421   // If a magic bitvector captures the entire comparison state
422   // of this load, replace it with computation that does:
423   //   ((magic_cst >> i) & 1) != 0
424   {
425     Type *Ty = nullptr;
426 
427     // Look for an appropriate type:
428     // - The type of Idx if the magic fits
429     // - The smallest fitting legal type
430     if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
431       Ty = Idx->getType();
432     else
433       Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
434 
435     if (Ty) {
436       Value *V = Builder.CreateIntCast(Idx, Ty, false);
437       V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
438       V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
439       return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
440     }
441   }
442 
443   return nullptr;
444 }
445 
446 /// Return a value that can be used to compare the *offset* implied by a GEP to
447 /// zero. For example, if we have &A[i], we want to return 'i' for
448 /// "icmp ne i, 0". Note that, in general, indices can be complex, and scales
449 /// are involved. The above expression would also be legal to codegen as
450 /// "icmp ne (i*4), 0" (assuming A is a pointer to i32).
451 /// This latter form is less amenable to optimization though, and we are allowed
452 /// to generate the first by knowing that pointer arithmetic doesn't overflow.
453 ///
454 /// If we can't emit an optimized form for this expression, this returns null.
455 ///
456 static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
457                                           const DataLayout &DL) {
458   gep_type_iterator GTI = gep_type_begin(GEP);
459 
460   // Check to see if this gep only has a single variable index.  If so, and if
461   // any constant indices are a multiple of its scale, then we can compute this
462   // in terms of the scale of the variable index.  For example, if the GEP
463   // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
464   // because the expression will cross zero at the same point.
465   unsigned i, e = GEP->getNumOperands();
466   int64_t Offset = 0;
467   for (i = 1; i != e; ++i, ++GTI) {
468     if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
469       // Compute the aggregate offset of constant indices.
470       if (CI->isZero()) continue;
471 
472       // Handle a struct index, which adds its field offset to the pointer.
473       if (StructType *STy = GTI.getStructTypeOrNull()) {
474         Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
475       } else {
476         uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
477         Offset += Size*CI->getSExtValue();
478       }
479     } else {
480       // Found our variable index.
481       break;
482     }
483   }
484 
485   // If there are no variable indices, we must have a constant offset, just
486   // evaluate it the general way.
487   if (i == e) return nullptr;
488 
489   Value *VariableIdx = GEP->getOperand(i);
490   // Determine the scale factor of the variable element.  For example, this is
491   // 4 if the variable index is into an array of i32.
492   uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType());
493 
494   // Verify that there are no other variable indices.  If so, emit the hard way.
495   for (++i, ++GTI; i != e; ++i, ++GTI) {
496     ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
497     if (!CI) return nullptr;
498 
499     // Compute the aggregate offset of constant indices.
500     if (CI->isZero()) continue;
501 
502     // Handle a struct index, which adds its field offset to the pointer.
503     if (StructType *STy = GTI.getStructTypeOrNull()) {
504       Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
505     } else {
506       uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
507       Offset += Size*CI->getSExtValue();
508     }
509   }
510 
511   // Okay, we know we have a single variable index, which must be a
512   // pointer/array/vector index.  If there is no offset, life is simple, return
513   // the index.
514   Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType());
515   unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
516   if (Offset == 0) {
517     // Cast to intptrty in case a truncation occurs.  If an extension is needed,
518     // we don't need to bother extending: the extension won't affect where the
519     // computation crosses zero.
520     if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
521       VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy);
522     }
523     return VariableIdx;
524   }
525 
526   // Otherwise, there is an index.  The computation we will do will be modulo
527   // the pointer size, so get it.
528   uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
529 
530   Offset &= PtrSizeMask;
531   VariableScale &= PtrSizeMask;
532 
533   // To do this transformation, any constant index must be a multiple of the
534   // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
535   // but we can't evaluate "10 + 3*i" in terms of i.  Check that the offset is a
536   // multiple of the variable scale.
537   int64_t NewOffs = Offset / (int64_t)VariableScale;
538   if (Offset != NewOffs*(int64_t)VariableScale)
539     return nullptr;
540 
541   // Okay, we can do this evaluation.  Start by converting the index to intptr.
542   if (VariableIdx->getType() != IntPtrTy)
543     VariableIdx = IC.Builder.CreateIntCast(VariableIdx, IntPtrTy,
544                                             true /*Signed*/);
545   Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
546   return IC.Builder.CreateAdd(VariableIdx, OffsetVal, "offset");
547 }
548 
549 /// Returns true if we can rewrite Start as a GEP with pointer Base
550 /// and some integer offset. The nodes that need to be re-written
551 /// for this transformation will be added to Explored.
552 static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
553                                   const DataLayout &DL,
554                                   SetVector<Value *> &Explored) {
555   SmallVector<Value *, 16> WorkList(1, Start);
556   Explored.insert(Base);
557 
558   // The following traversal gives us an order which can be used
559   // when doing the final transformation. Since in the final
560   // transformation we create the PHI replacement instructions first,
561   // we don't have to get them in any particular order.
562   //
563   // However, for other instructions we will have to traverse the
564   // operands of an instruction first, which means that we have to
565   // do a post-order traversal.
566   while (!WorkList.empty()) {
567     SetVector<PHINode *> PHIs;
568 
569     while (!WorkList.empty()) {
570       if (Explored.size() >= 100)
571         return false;
572 
573       Value *V = WorkList.back();
574 
575       if (Explored.count(V) != 0) {
576         WorkList.pop_back();
577         continue;
578       }
579 
580       if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&
581           !isa<GetElementPtrInst>(V) && !isa<PHINode>(V))
582         // We've found some value that we can't explore which is different from
583         // the base. Therefore we can't do this transformation.
584         return false;
585 
586       if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) {
587         auto *CI = dyn_cast<CastInst>(V);
588         if (!CI->isNoopCast(DL))
589           return false;
590 
591         if (Explored.count(CI->getOperand(0)) == 0)
592           WorkList.push_back(CI->getOperand(0));
593       }
594 
595       if (auto *GEP = dyn_cast<GEPOperator>(V)) {
596         // We're limiting the GEP to having one index. This will preserve
597         // the original pointer type. We could handle more cases in the
598         // future.
599         if (GEP->getNumIndices() != 1 || !GEP->isInBounds() ||
600             GEP->getType() != Start->getType())
601           return false;
602 
603         if (Explored.count(GEP->getOperand(0)) == 0)
604           WorkList.push_back(GEP->getOperand(0));
605       }
606 
607       if (WorkList.back() == V) {
608         WorkList.pop_back();
609         // We've finished visiting this node, mark it as such.
610         Explored.insert(V);
611       }
612 
613       if (auto *PN = dyn_cast<PHINode>(V)) {
614         // We cannot transform PHIs on unsplittable basic blocks.
615         if (isa<CatchSwitchInst>(PN->getParent()->getTerminator()))
616           return false;
617         Explored.insert(PN);
618         PHIs.insert(PN);
619       }
620     }
621 
622     // Explore the PHI nodes further.
623     for (auto *PN : PHIs)
624       for (Value *Op : PN->incoming_values())
625         if (Explored.count(Op) == 0)
626           WorkList.push_back(Op);
627   }
628 
629   // Make sure that we can do this. Since we can't insert GEPs in a basic
630   // block before a PHI node, we can't easily do this transformation if
631   // we have PHI node users of transformed instructions.
632   for (Value *Val : Explored) {
633     for (Value *Use : Val->uses()) {
634 
635       auto *PHI = dyn_cast<PHINode>(Use);
636       auto *Inst = dyn_cast<Instruction>(Val);
637 
638       if (Inst == Base || Inst == PHI || !Inst || !PHI ||
639           Explored.count(PHI) == 0)
640         continue;
641 
642       if (PHI->getParent() == Inst->getParent())
643         return false;
644     }
645   }
646   return true;
647 }
648 
649 // Sets the appropriate insert point on Builder where we can add
650 // a replacement Instruction for V (if that is possible).
651 static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
652                               bool Before = true) {
653   if (auto *PHI = dyn_cast<PHINode>(V)) {
654     Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt());
655     return;
656   }
657   if (auto *I = dyn_cast<Instruction>(V)) {
658     if (!Before)
659       I = &*std::next(I->getIterator());
660     Builder.SetInsertPoint(I);
661     return;
662   }
663   if (auto *A = dyn_cast<Argument>(V)) {
664     // Set the insertion point in the entry block.
665     BasicBlock &Entry = A->getParent()->getEntryBlock();
666     Builder.SetInsertPoint(&*Entry.getFirstInsertionPt());
667     return;
668   }
669   // Otherwise, this is a constant and we don't need to set a new
670   // insertion point.
671   assert(isa<Constant>(V) && "Setting insertion point for unknown value!");
672 }
673 
674 /// Returns a re-written value of Start as an indexed GEP using Base as a
675 /// pointer.
676 static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
677                                  const DataLayout &DL,
678                                  SetVector<Value *> &Explored) {
679   // Perform all the substitutions. This is a bit tricky because we can
680   // have cycles in our use-def chains.
681   // 1. Create the PHI nodes without any incoming values.
682   // 2. Create all the other values.
683   // 3. Add the edges for the PHI nodes.
684   // 4. Emit GEPs to get the original pointers.
685   // 5. Remove the original instructions.
686   Type *IndexType = IntegerType::get(
687       Base->getContext(), DL.getPointerTypeSizeInBits(Start->getType()));
688 
689   DenseMap<Value *, Value *> NewInsts;
690   NewInsts[Base] = ConstantInt::getNullValue(IndexType);
691 
692   // Create the new PHI nodes, without adding any incoming values.
693   for (Value *Val : Explored) {
694     if (Val == Base)
695       continue;
696     // Create empty phi nodes. This avoids cyclic dependencies when creating
697     // the remaining instructions.
698     if (auto *PHI = dyn_cast<PHINode>(Val))
699       NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(),
700                                       PHI->getName() + ".idx", PHI);
701   }
702   IRBuilder<> Builder(Base->getContext());
703 
704   // Create all the other instructions.
705   for (Value *Val : Explored) {
706 
707     if (NewInsts.find(Val) != NewInsts.end())
708       continue;
709 
710     if (auto *CI = dyn_cast<CastInst>(Val)) {
711       NewInsts[CI] = NewInsts[CI->getOperand(0)];
712       continue;
713     }
714     if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
715       Value *Index = NewInsts[GEP->getOperand(1)] ? NewInsts[GEP->getOperand(1)]
716                                                   : GEP->getOperand(1);
717       setInsertionPoint(Builder, GEP);
718       // Indices might need to be sign extended. GEPs will magically do
719       // this, but we need to do it ourselves here.
720       if (Index->getType()->getScalarSizeInBits() !=
721           NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) {
722         Index = Builder.CreateSExtOrTrunc(
723             Index, NewInsts[GEP->getOperand(0)]->getType(),
724             GEP->getOperand(0)->getName() + ".sext");
725       }
726 
727       auto *Op = NewInsts[GEP->getOperand(0)];
728       if (isa<ConstantInt>(Op) && dyn_cast<ConstantInt>(Op)->isZero())
729         NewInsts[GEP] = Index;
730       else
731         NewInsts[GEP] = Builder.CreateNSWAdd(
732             Op, Index, GEP->getOperand(0)->getName() + ".add");
733       continue;
734     }
735     if (isa<PHINode>(Val))
736       continue;
737 
738     llvm_unreachable("Unexpected instruction type");
739   }
740 
741   // Add the incoming values to the PHI nodes.
742   for (Value *Val : Explored) {
743     if (Val == Base)
744       continue;
745     // All the instructions have been created, we can now add edges to the
746     // phi nodes.
747     if (auto *PHI = dyn_cast<PHINode>(Val)) {
748       PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]);
749       for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
750         Value *NewIncoming = PHI->getIncomingValue(I);
751 
752         if (NewInsts.find(NewIncoming) != NewInsts.end())
753           NewIncoming = NewInsts[NewIncoming];
754 
755         NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I));
756       }
757     }
758   }
759 
760   for (Value *Val : Explored) {
761     if (Val == Base)
762       continue;
763 
764     // Depending on the type, for external users we have to emit
765     // a GEP or a GEP + ptrtoint.
766     setInsertionPoint(Builder, Val, false);
767 
768     // If required, create an inttoptr instruction for Base.
769     Value *NewBase = Base;
770     if (!Base->getType()->isPointerTy())
771       NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(),
772                                                Start->getName() + "to.ptr");
773 
774     Value *GEP = Builder.CreateInBoundsGEP(
775         Start->getType()->getPointerElementType(), NewBase,
776         makeArrayRef(NewInsts[Val]), Val->getName() + ".ptr");
777 
778     if (!Val->getType()->isPointerTy()) {
779       Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(),
780                                               Val->getName() + ".conv");
781       GEP = Cast;
782     }
783     Val->replaceAllUsesWith(GEP);
784   }
785 
786   return NewInsts[Start];
787 }
788 
789 /// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express
790 /// the input Value as a constant indexed GEP. Returns a pair containing
791 /// the GEPs Pointer and Index.
792 static std::pair<Value *, Value *>
793 getAsConstantIndexedAddress(Value *V, const DataLayout &DL) {
794   Type *IndexType = IntegerType::get(V->getContext(),
795                                      DL.getPointerTypeSizeInBits(V->getType()));
796 
797   Constant *Index = ConstantInt::getNullValue(IndexType);
798   while (true) {
799     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
800       // We accept only inbouds GEPs here to exclude the possibility of
801       // overflow.
802       if (!GEP->isInBounds())
803         break;
804       if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1 &&
805           GEP->getType() == V->getType()) {
806         V = GEP->getOperand(0);
807         Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));
808         Index = ConstantExpr::getAdd(
809             Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType));
810         continue;
811       }
812       break;
813     }
814     if (auto *CI = dyn_cast<IntToPtrInst>(V)) {
815       if (!CI->isNoopCast(DL))
816         break;
817       V = CI->getOperand(0);
818       continue;
819     }
820     if (auto *CI = dyn_cast<PtrToIntInst>(V)) {
821       if (!CI->isNoopCast(DL))
822         break;
823       V = CI->getOperand(0);
824       continue;
825     }
826     break;
827   }
828   return {V, Index};
829 }
830 
831 /// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
832 /// We can look through PHIs, GEPs and casts in order to determine a common base
833 /// between GEPLHS and RHS.
834 static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
835                                               ICmpInst::Predicate Cond,
836                                               const DataLayout &DL) {
837   if (!GEPLHS->hasAllConstantIndices())
838     return nullptr;
839 
840   // Make sure the pointers have the same type.
841   if (GEPLHS->getType() != RHS->getType())
842     return nullptr;
843 
844   Value *PtrBase, *Index;
845   std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL);
846 
847   // The set of nodes that will take part in this transformation.
848   SetVector<Value *> Nodes;
849 
850   if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))
851     return nullptr;
852 
853   // We know we can re-write this as
854   //  ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)
855   // Since we've only looked through inbouds GEPs we know that we
856   // can't have overflow on either side. We can therefore re-write
857   // this as:
858   //   OFFSET1 cmp OFFSET2
859   Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes);
860 
861   // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
862   // GEP having PtrBase as the pointer base, and has returned in NewRHS the
863   // offset. Since Index is the offset of LHS to the base pointer, we will now
864   // compare the offsets instead of comparing the pointers.
865   return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS);
866 }
867 
868 /// Fold comparisons between a GEP instruction and something else. At this point
869 /// we know that the GEP is on the LHS of the comparison.
870 Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
871                                        ICmpInst::Predicate Cond,
872                                        Instruction &I) {
873   // Don't transform signed compares of GEPs into index compares. Even if the
874   // GEP is inbounds, the final add of the base pointer can have signed overflow
875   // and would change the result of the icmp.
876   // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
877   // the maximum signed value for the pointer type.
878   if (ICmpInst::isSigned(Cond))
879     return nullptr;
880 
881   // Look through bitcasts and addrspacecasts. We do not however want to remove
882   // 0 GEPs.
883   if (!isa<GetElementPtrInst>(RHS))
884     RHS = RHS->stripPointerCasts();
885 
886   Value *PtrBase = GEPLHS->getOperand(0);
887   if (PtrBase == RHS && GEPLHS->isInBounds()) {
888     // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
889     // This transformation (ignoring the base and scales) is valid because we
890     // know pointers can't overflow since the gep is inbounds.  See if we can
891     // output an optimized form.
892     Value *Offset = evaluateGEPOffsetExpression(GEPLHS, *this, DL);
893 
894     // If not, synthesize the offset the hard way.
895     if (!Offset)
896       Offset = EmitGEPOffset(GEPLHS);
897     return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
898                         Constant::getNullValue(Offset->getType()));
899   } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
900     // If the base pointers are different, but the indices are the same, just
901     // compare the base pointer.
902     if (PtrBase != GEPRHS->getOperand(0)) {
903       bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
904       IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
905                         GEPRHS->getOperand(0)->getType();
906       if (IndicesTheSame)
907         for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
908           if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
909             IndicesTheSame = false;
910             break;
911           }
912 
913       // If all indices are the same, just compare the base pointers.
914       if (IndicesTheSame)
915         return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
916 
917       // If we're comparing GEPs with two base pointers that only differ in type
918       // and both GEPs have only constant indices or just one use, then fold
919       // the compare with the adjusted indices.
920       if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
921           (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
922           (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
923           PtrBase->stripPointerCasts() ==
924               GEPRHS->getOperand(0)->stripPointerCasts()) {
925         Value *LOffset = EmitGEPOffset(GEPLHS);
926         Value *ROffset = EmitGEPOffset(GEPRHS);
927 
928         // If we looked through an addrspacecast between different sized address
929         // spaces, the LHS and RHS pointers are different sized
930         // integers. Truncate to the smaller one.
931         Type *LHSIndexTy = LOffset->getType();
932         Type *RHSIndexTy = ROffset->getType();
933         if (LHSIndexTy != RHSIndexTy) {
934           if (LHSIndexTy->getPrimitiveSizeInBits() <
935               RHSIndexTy->getPrimitiveSizeInBits()) {
936             ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy);
937           } else
938             LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy);
939         }
940 
941         Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond),
942                                         LOffset, ROffset);
943         return replaceInstUsesWith(I, Cmp);
944       }
945 
946       // Otherwise, the base pointers are different and the indices are
947       // different. Try convert this to an indexed compare by looking through
948       // PHIs/casts.
949       return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
950     }
951 
952     // If one of the GEPs has all zero indices, recurse.
953     if (GEPLHS->hasAllZeroIndices())
954       return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
955                          ICmpInst::getSwappedPredicate(Cond), I);
956 
957     // If the other GEP has all zero indices, recurse.
958     if (GEPRHS->hasAllZeroIndices())
959       return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
960 
961     bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
962     if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
963       // If the GEPs only differ by one index, compare it.
964       unsigned NumDifferences = 0;  // Keep track of # differences.
965       unsigned DiffOperand = 0;     // The operand that differs.
966       for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
967         if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
968           if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
969                    GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
970             // Irreconcilable differences.
971             NumDifferences = 2;
972             break;
973           } else {
974             if (NumDifferences++) break;
975             DiffOperand = i;
976           }
977         }
978 
979       if (NumDifferences == 0)   // SAME GEP?
980         return replaceInstUsesWith(I, // No comparison is needed here.
981                              Builder.getInt1(ICmpInst::isTrueWhenEqual(Cond)));
982 
983       else if (NumDifferences == 1 && GEPsInBounds) {
984         Value *LHSV = GEPLHS->getOperand(DiffOperand);
985         Value *RHSV = GEPRHS->getOperand(DiffOperand);
986         // Make sure we do a signed comparison here.
987         return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
988       }
989     }
990 
991     // Only lower this if the icmp is the only user of the GEP or if we expect
992     // the result to fold to a constant!
993     if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
994         (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
995       // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
996       Value *L = EmitGEPOffset(GEPLHS);
997       Value *R = EmitGEPOffset(GEPRHS);
998       return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
999     }
1000   }
1001 
1002   // Try convert this to an indexed compare by looking through PHIs/casts as a
1003   // last resort.
1004   return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
1005 }
1006 
1007 Instruction *InstCombiner::foldAllocaCmp(ICmpInst &ICI,
1008                                          const AllocaInst *Alloca,
1009                                          const Value *Other) {
1010   assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
1011 
1012   // It would be tempting to fold away comparisons between allocas and any
1013   // pointer not based on that alloca (e.g. an argument). However, even
1014   // though such pointers cannot alias, they can still compare equal.
1015   //
1016   // But LLVM doesn't specify where allocas get their memory, so if the alloca
1017   // doesn't escape we can argue that it's impossible to guess its value, and we
1018   // can therefore act as if any such guesses are wrong.
1019   //
1020   // The code below checks that the alloca doesn't escape, and that it's only
1021   // used in a comparison once (the current instruction). The
1022   // single-comparison-use condition ensures that we're trivially folding all
1023   // comparisons against the alloca consistently, and avoids the risk of
1024   // erroneously folding a comparison of the pointer with itself.
1025 
1026   unsigned MaxIter = 32; // Break cycles and bound to constant-time.
1027 
1028   SmallVector<const Use *, 32> Worklist;
1029   for (const Use &U : Alloca->uses()) {
1030     if (Worklist.size() >= MaxIter)
1031       return nullptr;
1032     Worklist.push_back(&U);
1033   }
1034 
1035   unsigned NumCmps = 0;
1036   while (!Worklist.empty()) {
1037     assert(Worklist.size() <= MaxIter);
1038     const Use *U = Worklist.pop_back_val();
1039     const Value *V = U->getUser();
1040     --MaxIter;
1041 
1042     if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
1043         isa<SelectInst>(V)) {
1044       // Track the uses.
1045     } else if (isa<LoadInst>(V)) {
1046       // Loading from the pointer doesn't escape it.
1047       continue;
1048     } else if (const auto *SI = dyn_cast<StoreInst>(V)) {
1049       // Storing *to* the pointer is fine, but storing the pointer escapes it.
1050       if (SI->getValueOperand() == U->get())
1051         return nullptr;
1052       continue;
1053     } else if (isa<ICmpInst>(V)) {
1054       if (NumCmps++)
1055         return nullptr; // Found more than one cmp.
1056       continue;
1057     } else if (const auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
1058       switch (Intrin->getIntrinsicID()) {
1059         // These intrinsics don't escape or compare the pointer. Memset is safe
1060         // because we don't allow ptrtoint. Memcpy and memmove are safe because
1061         // we don't allow stores, so src cannot point to V.
1062         case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
1063         case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset:
1064           continue;
1065         default:
1066           return nullptr;
1067       }
1068     } else {
1069       return nullptr;
1070     }
1071     for (const Use &U : V->uses()) {
1072       if (Worklist.size() >= MaxIter)
1073         return nullptr;
1074       Worklist.push_back(&U);
1075     }
1076   }
1077 
1078   Type *CmpTy = CmpInst::makeCmpResultType(Other->getType());
1079   return replaceInstUsesWith(
1080       ICI,
1081       ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
1082 }
1083 
1084 /// Fold "icmp pred (X+CI), X".
1085 Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
1086                                               ICmpInst::Predicate Pred) {
1087   // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
1088   // so the values can never be equal.  Similarly for all other "or equals"
1089   // operators.
1090 
1091   // (X+1) <u X        --> X >u (MAXUINT-1)        --> X == 255
1092   // (X+2) <u X        --> X >u (MAXUINT-2)        --> X > 253
1093   // (X+MAXUINT) <u X  --> X >u (MAXUINT-MAXUINT)  --> X != 0
1094   if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
1095     Value *R =
1096       ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
1097     return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
1098   }
1099 
1100   // (X+1) >u X        --> X <u (0-1)        --> X != 255
1101   // (X+2) >u X        --> X <u (0-2)        --> X <u 254
1102   // (X+MAXUINT) >u X  --> X <u (0-MAXUINT)  --> X <u 1  --> X == 0
1103   if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
1104     return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
1105 
1106   unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
1107   ConstantInt *SMax = ConstantInt::get(X->getContext(),
1108                                        APInt::getSignedMaxValue(BitWidth));
1109 
1110   // (X+ 1) <s X       --> X >s (MAXSINT-1)          --> X == 127
1111   // (X+ 2) <s X       --> X >s (MAXSINT-2)          --> X >s 125
1112   // (X+MAXSINT) <s X  --> X >s (MAXSINT-MAXSINT)    --> X >s 0
1113   // (X+MINSINT) <s X  --> X >s (MAXSINT-MINSINT)    --> X >s -1
1114   // (X+ -2) <s X      --> X >s (MAXSINT- -2)        --> X >s 126
1115   // (X+ -1) <s X      --> X >s (MAXSINT- -1)        --> X != 127
1116   if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
1117     return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
1118 
1119   // (X+ 1) >s X       --> X <s (MAXSINT-(1-1))       --> X != 127
1120   // (X+ 2) >s X       --> X <s (MAXSINT-(2-1))       --> X <s 126
1121   // (X+MAXSINT) >s X  --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
1122   // (X+MINSINT) >s X  --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
1123   // (X+ -2) >s X      --> X <s (MAXSINT-(-2-1))      --> X <s -126
1124   // (X+ -1) >s X      --> X <s (MAXSINT-(-1-1))      --> X == -128
1125 
1126   assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
1127   Constant *C = Builder.getInt(CI->getValue() - 1);
1128   return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
1129 }
1130 
1131 /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
1132 /// (icmp eq/ne A, Log2(AP2/AP1)) ->
1133 /// (icmp eq/ne A, Log2(AP2) - Log2(AP1)).
1134 Instruction *InstCombiner::foldICmpShrConstConst(ICmpInst &I, Value *A,
1135                                                  const APInt &AP1,
1136                                                  const APInt &AP2) {
1137   assert(I.isEquality() && "Cannot fold icmp gt/lt");
1138 
1139   auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
1140     if (I.getPredicate() == I.ICMP_NE)
1141       Pred = CmpInst::getInversePredicate(Pred);
1142     return new ICmpInst(Pred, LHS, RHS);
1143   };
1144 
1145   // Don't bother doing any work for cases which InstSimplify handles.
1146   if (AP2.isNullValue())
1147     return nullptr;
1148 
1149   bool IsAShr = isa<AShrOperator>(I.getOperand(0));
1150   if (IsAShr) {
1151     if (AP2.isAllOnesValue())
1152       return nullptr;
1153     if (AP2.isNegative() != AP1.isNegative())
1154       return nullptr;
1155     if (AP2.sgt(AP1))
1156       return nullptr;
1157   }
1158 
1159   if (!AP1)
1160     // 'A' must be large enough to shift out the highest set bit.
1161     return getICmp(I.ICMP_UGT, A,
1162                    ConstantInt::get(A->getType(), AP2.logBase2()));
1163 
1164   if (AP1 == AP2)
1165     return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
1166 
1167   int Shift;
1168   if (IsAShr && AP1.isNegative())
1169     Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
1170   else
1171     Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
1172 
1173   if (Shift > 0) {
1174     if (IsAShr && AP1 == AP2.ashr(Shift)) {
1175       // There are multiple solutions if we are comparing against -1 and the LHS
1176       // of the ashr is not a power of two.
1177       if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
1178         return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
1179       return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1180     } else if (AP1 == AP2.lshr(Shift)) {
1181       return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1182     }
1183   }
1184 
1185   // Shifting const2 will never be equal to const1.
1186   // FIXME: This should always be handled by InstSimplify?
1187   auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
1188   return replaceInstUsesWith(I, TorF);
1189 }
1190 
1191 /// Handle "(icmp eq/ne (shl AP2, A), AP1)" ->
1192 /// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)).
1193 Instruction *InstCombiner::foldICmpShlConstConst(ICmpInst &I, Value *A,
1194                                                  const APInt &AP1,
1195                                                  const APInt &AP2) {
1196   assert(I.isEquality() && "Cannot fold icmp gt/lt");
1197 
1198   auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
1199     if (I.getPredicate() == I.ICMP_NE)
1200       Pred = CmpInst::getInversePredicate(Pred);
1201     return new ICmpInst(Pred, LHS, RHS);
1202   };
1203 
1204   // Don't bother doing any work for cases which InstSimplify handles.
1205   if (AP2.isNullValue())
1206     return nullptr;
1207 
1208   unsigned AP2TrailingZeros = AP2.countTrailingZeros();
1209 
1210   if (!AP1 && AP2TrailingZeros != 0)
1211     return getICmp(
1212         I.ICMP_UGE, A,
1213         ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
1214 
1215   if (AP1 == AP2)
1216     return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
1217 
1218   // Get the distance between the lowest bits that are set.
1219   int Shift = AP1.countTrailingZeros() - AP2TrailingZeros;
1220 
1221   if (Shift > 0 && AP2.shl(Shift) == AP1)
1222     return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1223 
1224   // Shifting const2 will never be equal to const1.
1225   // FIXME: This should always be handled by InstSimplify?
1226   auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
1227   return replaceInstUsesWith(I, TorF);
1228 }
1229 
1230 /// The caller has matched a pattern of the form:
1231 ///   I = icmp ugt (add (add A, B), CI2), CI1
1232 /// If this is of the form:
1233 ///   sum = a + b
1234 ///   if (sum+128 >u 255)
1235 /// Then replace it with llvm.sadd.with.overflow.i8.
1236 ///
1237 static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
1238                                           ConstantInt *CI2, ConstantInt *CI1,
1239                                           InstCombiner &IC) {
1240   // The transformation we're trying to do here is to transform this into an
1241   // llvm.sadd.with.overflow.  To do this, we have to replace the original add
1242   // with a narrower add, and discard the add-with-constant that is part of the
1243   // range check (if we can't eliminate it, this isn't profitable).
1244 
1245   // In order to eliminate the add-with-constant, the compare can be its only
1246   // use.
1247   Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
1248   if (!AddWithCst->hasOneUse())
1249     return nullptr;
1250 
1251   // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
1252   if (!CI2->getValue().isPowerOf2())
1253     return nullptr;
1254   unsigned NewWidth = CI2->getValue().countTrailingZeros();
1255   if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31)
1256     return nullptr;
1257 
1258   // The width of the new add formed is 1 more than the bias.
1259   ++NewWidth;
1260 
1261   // Check to see that CI1 is an all-ones value with NewWidth bits.
1262   if (CI1->getBitWidth() == NewWidth ||
1263       CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
1264     return nullptr;
1265 
1266   // This is only really a signed overflow check if the inputs have been
1267   // sign-extended; check for that condition. For example, if CI2 is 2^31 and
1268   // the operands of the add are 64 bits wide, we need at least 33 sign bits.
1269   unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
1270   if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits ||
1271       IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits)
1272     return nullptr;
1273 
1274   // In order to replace the original add with a narrower
1275   // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
1276   // and truncates that discard the high bits of the add.  Verify that this is
1277   // the case.
1278   Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
1279   for (User *U : OrigAdd->users()) {
1280     if (U == AddWithCst)
1281       continue;
1282 
1283     // Only accept truncates for now.  We would really like a nice recursive
1284     // predicate like SimplifyDemandedBits, but which goes downwards the use-def
1285     // chain to see which bits of a value are actually demanded.  If the
1286     // original add had another add which was then immediately truncated, we
1287     // could still do the transformation.
1288     TruncInst *TI = dyn_cast<TruncInst>(U);
1289     if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
1290       return nullptr;
1291   }
1292 
1293   // If the pattern matches, truncate the inputs to the narrower type and
1294   // use the sadd_with_overflow intrinsic to efficiently compute both the
1295   // result and the overflow bit.
1296   Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
1297   Value *F = Intrinsic::getDeclaration(I.getModule(),
1298                                        Intrinsic::sadd_with_overflow, NewType);
1299 
1300   InstCombiner::BuilderTy &Builder = IC.Builder;
1301 
1302   // Put the new code above the original add, in case there are any uses of the
1303   // add between the add and the compare.
1304   Builder.SetInsertPoint(OrigAdd);
1305 
1306   Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc");
1307   Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc");
1308   CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd");
1309   Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result");
1310   Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType());
1311 
1312   // The inner add was the result of the narrow add, zero extended to the
1313   // wider type.  Replace it with the result computed by the intrinsic.
1314   IC.replaceInstUsesWith(*OrigAdd, ZExt);
1315 
1316   // The original icmp gets replaced with the overflow value.
1317   return ExtractValueInst::Create(Call, 1, "sadd.overflow");
1318 }
1319 
1320 // Handle (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
1321 Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
1322   CmpInst::Predicate Pred = Cmp.getPredicate();
1323   Value *X = Cmp.getOperand(0);
1324 
1325   if (match(Cmp.getOperand(1), m_Zero()) && Pred == ICmpInst::ICMP_SGT) {
1326     Value *A, *B;
1327     SelectPatternResult SPR = matchSelectPattern(X, A, B);
1328     if (SPR.Flavor == SPF_SMIN) {
1329       if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
1330         return new ICmpInst(Pred, B, Cmp.getOperand(1));
1331       if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT))
1332         return new ICmpInst(Pred, A, Cmp.getOperand(1));
1333     }
1334   }
1335   return nullptr;
1336 }
1337 
1338 // Fold icmp Pred X, C.
1339 Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
1340   CmpInst::Predicate Pred = Cmp.getPredicate();
1341   Value *X = Cmp.getOperand(0);
1342 
1343   const APInt *C;
1344   if (!match(Cmp.getOperand(1), m_APInt(C)))
1345     return nullptr;
1346 
1347   Value *A = nullptr, *B = nullptr;
1348 
1349   // Match the following pattern, which is a common idiom when writing
1350   // overflow-safe integer arithmetic functions. The source performs an addition
1351   // in wider type and explicitly checks for overflow using comparisons against
1352   // INT_MIN and INT_MAX. Simplify by using the sadd_with_overflow intrinsic.
1353   //
1354   // TODO: This could probably be generalized to handle other overflow-safe
1355   // operations if we worked out the formulas to compute the appropriate magic
1356   // constants.
1357   //
1358   // sum = a + b
1359   // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
1360   {
1361     ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
1362     if (Pred == ICmpInst::ICMP_UGT &&
1363         match(X, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
1364       if (Instruction *Res = processUGT_ADDCST_ADD(
1365               Cmp, A, B, CI2, cast<ConstantInt>(Cmp.getOperand(1)), *this))
1366         return Res;
1367   }
1368 
1369   // FIXME: Use m_APInt to allow folds for splat constants.
1370   ConstantInt *CI = dyn_cast<ConstantInt>(Cmp.getOperand(1));
1371   if (!CI)
1372     return nullptr;
1373 
1374   // Canonicalize icmp instructions based on dominating conditions.
1375   BasicBlock *Parent = Cmp.getParent();
1376   BasicBlock *Dom = Parent->getSinglePredecessor();
1377   auto *BI = Dom ? dyn_cast<BranchInst>(Dom->getTerminator()) : nullptr;
1378   ICmpInst::Predicate Pred2;
1379   BasicBlock *TrueBB, *FalseBB;
1380   ConstantInt *CI2;
1381   if (BI && match(BI, m_Br(m_ICmp(Pred2, m_Specific(X), m_ConstantInt(CI2)),
1382                            TrueBB, FalseBB)) &&
1383       TrueBB != FalseBB) {
1384     ConstantRange CR =
1385         ConstantRange::makeAllowedICmpRegion(Pred, CI->getValue());
1386     ConstantRange DominatingCR =
1387         (Parent == TrueBB)
1388             ? ConstantRange::makeExactICmpRegion(Pred2, CI2->getValue())
1389             : ConstantRange::makeExactICmpRegion(
1390                   CmpInst::getInversePredicate(Pred2), CI2->getValue());
1391     ConstantRange Intersection = DominatingCR.intersectWith(CR);
1392     ConstantRange Difference = DominatingCR.difference(CR);
1393     if (Intersection.isEmptySet())
1394       return replaceInstUsesWith(Cmp, Builder.getFalse());
1395     if (Difference.isEmptySet())
1396       return replaceInstUsesWith(Cmp, Builder.getTrue());
1397 
1398     // If this is a normal comparison, it demands all bits. If it is a sign
1399     // bit comparison, it only demands the sign bit.
1400     bool UnusedBit;
1401     bool IsSignBit = isSignBitCheck(Pred, CI->getValue(), UnusedBit);
1402 
1403     // Canonicalizing a sign bit comparison that gets used in a branch,
1404     // pessimizes codegen by generating branch on zero instruction instead
1405     // of a test and branch. So we avoid canonicalizing in such situations
1406     // because test and branch instruction has better branch displacement
1407     // than compare and branch instruction.
1408     if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
1409       return nullptr;
1410 
1411     if (auto *AI = Intersection.getSingleElement())
1412       return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI));
1413     if (auto *AD = Difference.getSingleElement())
1414       return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD));
1415   }
1416 
1417   return nullptr;
1418 }
1419 
1420 /// Fold icmp (trunc X, Y), C.
1421 Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp,
1422                                                  TruncInst *Trunc,
1423                                                  const APInt &C) {
1424   ICmpInst::Predicate Pred = Cmp.getPredicate();
1425   Value *X = Trunc->getOperand(0);
1426   if (C.isOneValue() && C.getBitWidth() > 1) {
1427     // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
1428     Value *V = nullptr;
1429     if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V))))
1430       return new ICmpInst(ICmpInst::ICMP_SLT, V,
1431                           ConstantInt::get(V->getType(), 1));
1432   }
1433 
1434   if (Cmp.isEquality() && Trunc->hasOneUse()) {
1435     // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
1436     // of the high bits truncated out of x are known.
1437     unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
1438              SrcBits = X->getType()->getScalarSizeInBits();
1439     KnownBits Known = computeKnownBits(X, 0, &Cmp);
1440 
1441     // If all the high bits are known, we can do this xform.
1442     if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) {
1443       // Pull in the high bits from known-ones set.
1444       APInt NewRHS = C.zext(SrcBits);
1445       NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
1446       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS));
1447     }
1448   }
1449 
1450   return nullptr;
1451 }
1452 
1453 /// Fold icmp (xor X, Y), C.
1454 Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
1455                                                BinaryOperator *Xor,
1456                                                const APInt &C) {
1457   Value *X = Xor->getOperand(0);
1458   Value *Y = Xor->getOperand(1);
1459   const APInt *XorC;
1460   if (!match(Y, m_APInt(XorC)))
1461     return nullptr;
1462 
1463   // If this is a comparison that tests the signbit (X < 0) or (x > -1),
1464   // fold the xor.
1465   ICmpInst::Predicate Pred = Cmp.getPredicate();
1466   bool TrueIfSigned = false;
1467   if (isSignBitCheck(Cmp.getPredicate(), C, TrueIfSigned)) {
1468 
1469     // If the sign bit of the XorCst is not set, there is no change to
1470     // the operation, just stop using the Xor.
1471     if (!XorC->isNegative()) {
1472       Cmp.setOperand(0, X);
1473       Worklist.Add(Xor);
1474       return &Cmp;
1475     }
1476 
1477     // Emit the opposite comparison.
1478     if (TrueIfSigned)
1479       return new ICmpInst(ICmpInst::ICMP_SGT, X,
1480                           ConstantInt::getAllOnesValue(X->getType()));
1481     else
1482       return new ICmpInst(ICmpInst::ICMP_SLT, X,
1483                           ConstantInt::getNullValue(X->getType()));
1484   }
1485 
1486   if (Xor->hasOneUse()) {
1487     // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask))
1488     if (!Cmp.isEquality() && XorC->isSignMask()) {
1489       Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
1490                             : Cmp.getSignedPredicate();
1491       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
1492     }
1493 
1494     // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask))
1495     if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
1496       Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
1497                             : Cmp.getSignedPredicate();
1498       Pred = Cmp.getSwappedPredicate(Pred);
1499       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
1500     }
1501   }
1502 
1503   // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C)
1504   //   iff -C is a power of 2
1505   if (Pred == ICmpInst::ICMP_UGT && *XorC == ~C && (C + 1).isPowerOf2())
1506     return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
1507 
1508   // (icmp ult (xor X, C), -C) -> (icmp uge X, C)
1509   //   iff -C is a power of 2
1510   if (Pred == ICmpInst::ICMP_ULT && *XorC == -C && C.isPowerOf2())
1511     return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
1512 
1513   return nullptr;
1514 }
1515 
1516 /// Fold icmp (and (sh X, Y), C2), C1.
1517 Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
1518                                             const APInt &C1, const APInt &C2) {
1519   BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0));
1520   if (!Shift || !Shift->isShift())
1521     return nullptr;
1522 
1523   // If this is: (X >> C3) & C2 != C1 (where any shift and any compare could
1524   // exist), turn it into (X & (C2 << C3)) != (C1 << C3). This happens a LOT in
1525   // code produced by the clang front-end, for bitfield access.
1526   // This seemingly simple opportunity to fold away a shift turns out to be
1527   // rather complicated. See PR17827 for details.
1528   unsigned ShiftOpcode = Shift->getOpcode();
1529   bool IsShl = ShiftOpcode == Instruction::Shl;
1530   const APInt *C3;
1531   if (match(Shift->getOperand(1), m_APInt(C3))) {
1532     bool CanFold = false;
1533     if (ShiftOpcode == Instruction::Shl) {
1534       // For a left shift, we can fold if the comparison is not signed. We can
1535       // also fold a signed comparison if the mask value and comparison value
1536       // are not negative. These constraints may not be obvious, but we can
1537       // prove that they are correct using an SMT solver.
1538       if (!Cmp.isSigned() || (!C2.isNegative() && !C1.isNegative()))
1539         CanFold = true;
1540     } else {
1541       bool IsAshr = ShiftOpcode == Instruction::AShr;
1542       // For a logical right shift, we can fold if the comparison is not signed.
1543       // We can also fold a signed comparison if the shifted mask value and the
1544       // shifted comparison value are not negative. These constraints may not be
1545       // obvious, but we can prove that they are correct using an SMT solver.
1546       // For an arithmetic shift right we can do the same, if we ensure
1547       // the And doesn't use any bits being shifted in. Normally these would
1548       // be turned into lshr by SimplifyDemandedBits, but not if there is an
1549       // additional user.
1550       if (!IsAshr || (C2.shl(*C3).lshr(*C3) == C2)) {
1551         if (!Cmp.isSigned() ||
1552             (!C2.shl(*C3).isNegative() && !C1.shl(*C3).isNegative()))
1553           CanFold = true;
1554       }
1555     }
1556 
1557     if (CanFold) {
1558       APInt NewCst = IsShl ? C1.lshr(*C3) : C1.shl(*C3);
1559       APInt SameAsC1 = IsShl ? NewCst.shl(*C3) : NewCst.lshr(*C3);
1560       // Check to see if we are shifting out any of the bits being compared.
1561       if (SameAsC1 != C1) {
1562         // If we shifted bits out, the fold is not going to work out. As a
1563         // special case, check to see if this means that the result is always
1564         // true or false now.
1565         if (Cmp.getPredicate() == ICmpInst::ICMP_EQ)
1566           return replaceInstUsesWith(Cmp, ConstantInt::getFalse(Cmp.getType()));
1567         if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
1568           return replaceInstUsesWith(Cmp, ConstantInt::getTrue(Cmp.getType()));
1569       } else {
1570         Cmp.setOperand(1, ConstantInt::get(And->getType(), NewCst));
1571         APInt NewAndCst = IsShl ? C2.lshr(*C3) : C2.shl(*C3);
1572         And->setOperand(1, ConstantInt::get(And->getType(), NewAndCst));
1573         And->setOperand(0, Shift->getOperand(0));
1574         Worklist.Add(Shift); // Shift is dead.
1575         return &Cmp;
1576       }
1577     }
1578   }
1579 
1580   // Turn ((X >> Y) & C2) == 0  into  (X & (C2 << Y)) == 0.  The latter is
1581   // preferable because it allows the C2 << Y expression to be hoisted out of a
1582   // loop if Y is invariant and X is not.
1583   if (Shift->hasOneUse() && C1.isNullValue() && Cmp.isEquality() &&
1584       !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
1585     // Compute C2 << Y.
1586     Value *NewShift =
1587         IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1))
1588               : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1));
1589 
1590     // Compute X & (C2 << Y).
1591     Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift);
1592     Cmp.setOperand(0, NewAnd);
1593     return &Cmp;
1594   }
1595 
1596   return nullptr;
1597 }
1598 
1599 /// Fold icmp (and X, C2), C1.
1600 Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
1601                                                  BinaryOperator *And,
1602                                                  const APInt &C1) {
1603   const APInt *C2;
1604   if (!match(And->getOperand(1), m_APInt(C2)))
1605     return nullptr;
1606 
1607   if (!And->hasOneUse())
1608     return nullptr;
1609 
1610   // If the LHS is an 'and' of a truncate and we can widen the and/compare to
1611   // the input width without changing the value produced, eliminate the cast:
1612   //
1613   // icmp (and (trunc W), C2), C1 -> icmp (and W, C2'), C1'
1614   //
1615   // We can do this transformation if the constants do not have their sign bits
1616   // set or if it is an equality comparison. Extending a relational comparison
1617   // when we're checking the sign bit would not work.
1618   Value *W;
1619   if (match(And->getOperand(0), m_OneUse(m_Trunc(m_Value(W)))) &&
1620       (Cmp.isEquality() || (!C1.isNegative() && !C2->isNegative()))) {
1621     // TODO: Is this a good transform for vectors? Wider types may reduce
1622     // throughput. Should this transform be limited (even for scalars) by using
1623     // shouldChangeType()?
1624     if (!Cmp.getType()->isVectorTy()) {
1625       Type *WideType = W->getType();
1626       unsigned WideScalarBits = WideType->getScalarSizeInBits();
1627       Constant *ZextC1 = ConstantInt::get(WideType, C1.zext(WideScalarBits));
1628       Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits));
1629       Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName());
1630       return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1);
1631     }
1632   }
1633 
1634   if (Instruction *I = foldICmpAndShift(Cmp, And, C1, *C2))
1635     return I;
1636 
1637   // (icmp pred (and (or (lshr A, B), A), 1), 0) -->
1638   // (icmp pred (and A, (or (shl 1, B), 1), 0))
1639   //
1640   // iff pred isn't signed
1641   if (!Cmp.isSigned() && C1.isNullValue() && And->getOperand(0)->hasOneUse() &&
1642       match(And->getOperand(1), m_One())) {
1643     Constant *One = cast<Constant>(And->getOperand(1));
1644     Value *Or = And->getOperand(0);
1645     Value *A, *B, *LShr;
1646     if (match(Or, m_Or(m_Value(LShr), m_Value(A))) &&
1647         match(LShr, m_LShr(m_Specific(A), m_Value(B)))) {
1648       unsigned UsesRemoved = 0;
1649       if (And->hasOneUse())
1650         ++UsesRemoved;
1651       if (Or->hasOneUse())
1652         ++UsesRemoved;
1653       if (LShr->hasOneUse())
1654         ++UsesRemoved;
1655 
1656       // Compute A & ((1 << B) | 1)
1657       Value *NewOr = nullptr;
1658       if (auto *C = dyn_cast<Constant>(B)) {
1659         if (UsesRemoved >= 1)
1660           NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
1661       } else {
1662         if (UsesRemoved >= 3)
1663           NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(),
1664                                                      /*HasNUW=*/true),
1665                                    One, Or->getName());
1666       }
1667       if (NewOr) {
1668         Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName());
1669         Cmp.setOperand(0, NewAnd);
1670         return &Cmp;
1671       }
1672     }
1673   }
1674 
1675   return nullptr;
1676 }
1677 
1678 /// Fold icmp (and X, Y), C.
1679 Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
1680                                                BinaryOperator *And,
1681                                                const APInt &C) {
1682   if (Instruction *I = foldICmpAndConstConst(Cmp, And, C))
1683     return I;
1684 
1685   // TODO: These all require that Y is constant too, so refactor with the above.
1686 
1687   // Try to optimize things like "A[i] & 42 == 0" to index computations.
1688   Value *X = And->getOperand(0);
1689   Value *Y = And->getOperand(1);
1690   if (auto *LI = dyn_cast<LoadInst>(X))
1691     if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
1692       if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
1693         if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
1694             !LI->isVolatile() && isa<ConstantInt>(Y)) {
1695           ConstantInt *C2 = cast<ConstantInt>(Y);
1696           if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C2))
1697             return Res;
1698         }
1699 
1700   if (!Cmp.isEquality())
1701     return nullptr;
1702 
1703   // X & -C == -C -> X >  u ~C
1704   // X & -C != -C -> X <= u ~C
1705   //   iff C is a power of 2
1706   if (Cmp.getOperand(1) == Y && (-C).isPowerOf2()) {
1707     auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT
1708                                                           : CmpInst::ICMP_ULE;
1709     return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
1710   }
1711 
1712   // (X & C2) == 0 -> (trunc X) >= 0
1713   // (X & C2) != 0 -> (trunc X) <  0
1714   //   iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
1715   const APInt *C2;
1716   if (And->hasOneUse() && C.isNullValue() && match(Y, m_APInt(C2))) {
1717     int32_t ExactLogBase2 = C2->exactLogBase2();
1718     if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
1719       Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
1720       if (And->getType()->isVectorTy())
1721         NTy = VectorType::get(NTy, And->getType()->getVectorNumElements());
1722       Value *Trunc = Builder.CreateTrunc(X, NTy);
1723       auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE
1724                                                             : CmpInst::ICMP_SLT;
1725       return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy));
1726     }
1727   }
1728 
1729   return nullptr;
1730 }
1731 
1732 /// Fold icmp (or X, Y), C.
1733 Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
1734                                               const APInt &C) {
1735   ICmpInst::Predicate Pred = Cmp.getPredicate();
1736   if (C.isOneValue()) {
1737     // icmp slt signum(V) 1 --> icmp slt V, 1
1738     Value *V = nullptr;
1739     if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V))))
1740       return new ICmpInst(ICmpInst::ICMP_SLT, V,
1741                           ConstantInt::get(V->getType(), 1));
1742   }
1743 
1744   // X | C == C --> X <=u C
1745   // X | C != C --> X  >u C
1746   //   iff C+1 is a power of 2 (C is a bitmask of the low bits)
1747   if (Cmp.isEquality() && Cmp.getOperand(1) == Or->getOperand(1) &&
1748       (C + 1).isPowerOf2()) {
1749     Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
1750     return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1));
1751   }
1752 
1753   if (!Cmp.isEquality() || !C.isNullValue() || !Or->hasOneUse())
1754     return nullptr;
1755 
1756   Value *P, *Q;
1757   if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
1758     // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
1759     // -> and (icmp eq P, null), (icmp eq Q, null).
1760     Value *CmpP =
1761         Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
1762     Value *CmpQ =
1763         Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
1764     auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
1765     return BinaryOperator::Create(BOpc, CmpP, CmpQ);
1766   }
1767 
1768   // Are we using xors to bitwise check for a pair of (in)equalities? Convert to
1769   // a shorter form that has more potential to be folded even further.
1770   Value *X1, *X2, *X3, *X4;
1771   if (match(Or->getOperand(0), m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
1772       match(Or->getOperand(1), m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
1773     // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
1774     // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
1775     Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2);
1776     Value *Cmp34 = Builder.CreateICmp(Pred, X3, X4);
1777     auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
1778     return BinaryOperator::Create(BOpc, Cmp12, Cmp34);
1779   }
1780 
1781   return nullptr;
1782 }
1783 
1784 /// Fold icmp (mul X, Y), C.
1785 Instruction *InstCombiner::foldICmpMulConstant(ICmpInst &Cmp,
1786                                                BinaryOperator *Mul,
1787                                                const APInt &C) {
1788   const APInt *MulC;
1789   if (!match(Mul->getOperand(1), m_APInt(MulC)))
1790     return nullptr;
1791 
1792   // If this is a test of the sign bit and the multiply is sign-preserving with
1793   // a constant operand, use the multiply LHS operand instead.
1794   ICmpInst::Predicate Pred = Cmp.getPredicate();
1795   if (isSignTest(Pred, C) && Mul->hasNoSignedWrap()) {
1796     if (MulC->isNegative())
1797       Pred = ICmpInst::getSwappedPredicate(Pred);
1798     return new ICmpInst(Pred, Mul->getOperand(0),
1799                         Constant::getNullValue(Mul->getType()));
1800   }
1801 
1802   return nullptr;
1803 }
1804 
1805 /// Fold icmp (shl 1, Y), C.
1806 static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
1807                                    const APInt &C) {
1808   Value *Y;
1809   if (!match(Shl, m_Shl(m_One(), m_Value(Y))))
1810     return nullptr;
1811 
1812   Type *ShiftType = Shl->getType();
1813   unsigned TypeBits = C.getBitWidth();
1814   bool CIsPowerOf2 = C.isPowerOf2();
1815   ICmpInst::Predicate Pred = Cmp.getPredicate();
1816   if (Cmp.isUnsigned()) {
1817     // (1 << Y) pred C -> Y pred Log2(C)
1818     if (!CIsPowerOf2) {
1819       // (1 << Y) <  30 -> Y <= 4
1820       // (1 << Y) <= 30 -> Y <= 4
1821       // (1 << Y) >= 30 -> Y >  4
1822       // (1 << Y) >  30 -> Y >  4
1823       if (Pred == ICmpInst::ICMP_ULT)
1824         Pred = ICmpInst::ICMP_ULE;
1825       else if (Pred == ICmpInst::ICMP_UGE)
1826         Pred = ICmpInst::ICMP_UGT;
1827     }
1828 
1829     // (1 << Y) >= 2147483648 -> Y >= 31 -> Y == 31
1830     // (1 << Y) <  2147483648 -> Y <  31 -> Y != 31
1831     unsigned CLog2 = C.logBase2();
1832     if (CLog2 == TypeBits - 1) {
1833       if (Pred == ICmpInst::ICMP_UGE)
1834         Pred = ICmpInst::ICMP_EQ;
1835       else if (Pred == ICmpInst::ICMP_ULT)
1836         Pred = ICmpInst::ICMP_NE;
1837     }
1838     return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
1839   } else if (Cmp.isSigned()) {
1840     Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
1841     if (C.isAllOnesValue()) {
1842       // (1 << Y) <= -1 -> Y == 31
1843       if (Pred == ICmpInst::ICMP_SLE)
1844         return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
1845 
1846       // (1 << Y) >  -1 -> Y != 31
1847       if (Pred == ICmpInst::ICMP_SGT)
1848         return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
1849     } else if (!C) {
1850       // (1 << Y) <  0 -> Y == 31
1851       // (1 << Y) <= 0 -> Y == 31
1852       if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
1853         return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
1854 
1855       // (1 << Y) >= 0 -> Y != 31
1856       // (1 << Y) >  0 -> Y != 31
1857       if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
1858         return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
1859     }
1860   } else if (Cmp.isEquality() && CIsPowerOf2) {
1861     return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, C.logBase2()));
1862   }
1863 
1864   return nullptr;
1865 }
1866 
1867 /// Fold icmp (shl X, Y), C.
1868 Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
1869                                                BinaryOperator *Shl,
1870                                                const APInt &C) {
1871   const APInt *ShiftVal;
1872   if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal)))
1873     return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal);
1874 
1875   const APInt *ShiftAmt;
1876   if (!match(Shl->getOperand(1), m_APInt(ShiftAmt)))
1877     return foldICmpShlOne(Cmp, Shl, C);
1878 
1879   // Check that the shift amount is in range. If not, don't perform undefined
1880   // shifts. When the shift is visited, it will be simplified.
1881   unsigned TypeBits = C.getBitWidth();
1882   if (ShiftAmt->uge(TypeBits))
1883     return nullptr;
1884 
1885   ICmpInst::Predicate Pred = Cmp.getPredicate();
1886   Value *X = Shl->getOperand(0);
1887   Type *ShType = Shl->getType();
1888 
1889   // NSW guarantees that we are only shifting out sign bits from the high bits,
1890   // so we can ASHR the compare constant without needing a mask and eliminate
1891   // the shift.
1892   if (Shl->hasNoSignedWrap()) {
1893     if (Pred == ICmpInst::ICMP_SGT) {
1894       // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt)
1895       APInt ShiftedC = C.ashr(*ShiftAmt);
1896       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1897     }
1898     if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) {
1899       // This is the same code as the SGT case, but assert the pre-condition
1900       // that is needed for this to work with equality predicates.
1901       assert(C.ashr(*ShiftAmt).shl(*ShiftAmt) == C &&
1902              "Compare known true or false was not folded");
1903       APInt ShiftedC = C.ashr(*ShiftAmt);
1904       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1905     }
1906     if (Pred == ICmpInst::ICMP_SLT) {
1907       // SLE is the same as above, but SLE is canonicalized to SLT, so convert:
1908       // (X << S) <=s C is equiv to X <=s (C >> S) for all C
1909       // (X << S) <s (C + 1) is equiv to X <s (C >> S) + 1 if C <s SMAX
1910       // (X << S) <s C is equiv to X <s ((C - 1) >> S) + 1 if C >s SMIN
1911       assert(!C.isMinSignedValue() && "Unexpected icmp slt");
1912       APInt ShiftedC = (C - 1).ashr(*ShiftAmt) + 1;
1913       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1914     }
1915     // If this is a signed comparison to 0 and the shift is sign preserving,
1916     // use the shift LHS operand instead; isSignTest may change 'Pred', so only
1917     // do that if we're sure to not continue on in this function.
1918     if (isSignTest(Pred, C))
1919       return new ICmpInst(Pred, X, Constant::getNullValue(ShType));
1920   }
1921 
1922   // NUW guarantees that we are only shifting out zero bits from the high bits,
1923   // so we can LSHR the compare constant without needing a mask and eliminate
1924   // the shift.
1925   if (Shl->hasNoUnsignedWrap()) {
1926     if (Pred == ICmpInst::ICMP_UGT) {
1927       // icmp Pred (shl nuw X, ShiftAmt), C --> icmp Pred X, (C >>u ShiftAmt)
1928       APInt ShiftedC = C.lshr(*ShiftAmt);
1929       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1930     }
1931     if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) {
1932       // This is the same code as the UGT case, but assert the pre-condition
1933       // that is needed for this to work with equality predicates.
1934       assert(C.lshr(*ShiftAmt).shl(*ShiftAmt) == C &&
1935              "Compare known true or false was not folded");
1936       APInt ShiftedC = C.lshr(*ShiftAmt);
1937       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1938     }
1939     if (Pred == ICmpInst::ICMP_ULT) {
1940       // ULE is the same as above, but ULE is canonicalized to ULT, so convert:
1941       // (X << S) <=u C is equiv to X <=u (C >> S) for all C
1942       // (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u
1943       // (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0
1944       assert(C.ugt(0) && "ult 0 should have been eliminated");
1945       APInt ShiftedC = (C - 1).lshr(*ShiftAmt) + 1;
1946       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1947     }
1948   }
1949 
1950   if (Cmp.isEquality() && Shl->hasOneUse()) {
1951     // Strength-reduce the shift into an 'and'.
1952     Constant *Mask = ConstantInt::get(
1953         ShType,
1954         APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
1955     Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
1956     Constant *LShrC = ConstantInt::get(ShType, C.lshr(*ShiftAmt));
1957     return new ICmpInst(Pred, And, LShrC);
1958   }
1959 
1960   // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
1961   bool TrueIfSigned = false;
1962   if (Shl->hasOneUse() && isSignBitCheck(Pred, C, TrueIfSigned)) {
1963     // (X << 31) <s 0  --> (X & 1) != 0
1964     Constant *Mask = ConstantInt::get(
1965         ShType,
1966         APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1));
1967     Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
1968     return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
1969                         And, Constant::getNullValue(ShType));
1970   }
1971 
1972   // Transform (icmp pred iM (shl iM %v, N), C)
1973   // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
1974   // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
1975   // This enables us to get rid of the shift in favor of a trunc that may be
1976   // free on the target. It has the additional benefit of comparing to a
1977   // smaller constant that may be more target-friendly.
1978   unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
1979   if (Shl->hasOneUse() && Amt != 0 && C.countTrailingZeros() >= Amt &&
1980       DL.isLegalInteger(TypeBits - Amt)) {
1981     Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt);
1982     if (ShType->isVectorTy())
1983       TruncTy = VectorType::get(TruncTy, ShType->getVectorNumElements());
1984     Constant *NewC =
1985         ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt));
1986     return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC);
1987   }
1988 
1989   return nullptr;
1990 }
1991 
1992 /// Fold icmp ({al}shr X, Y), C.
1993 Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
1994                                                BinaryOperator *Shr,
1995                                                const APInt &C) {
1996   // An exact shr only shifts out zero bits, so:
1997   // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
1998   Value *X = Shr->getOperand(0);
1999   CmpInst::Predicate Pred = Cmp.getPredicate();
2000   if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() &&
2001       C.isNullValue())
2002     return new ICmpInst(Pred, X, Cmp.getOperand(1));
2003 
2004   const APInt *ShiftVal;
2005   if (Cmp.isEquality() && match(Shr->getOperand(0), m_APInt(ShiftVal)))
2006     return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftVal);
2007 
2008   const APInt *ShiftAmt;
2009   if (!match(Shr->getOperand(1), m_APInt(ShiftAmt)))
2010     return nullptr;
2011 
2012   // Check that the shift amount is in range. If not, don't perform undefined
2013   // shifts. When the shift is visited it will be simplified.
2014   unsigned TypeBits = C.getBitWidth();
2015   unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits);
2016   if (ShAmtVal >= TypeBits || ShAmtVal == 0)
2017     return nullptr;
2018 
2019   bool IsAShr = Shr->getOpcode() == Instruction::AShr;
2020   bool IsExact = Shr->isExact();
2021   Type *ShrTy = Shr->getType();
2022   // TODO: If we could guarantee that InstSimplify would handle all of the
2023   // constant-value-based preconditions in the folds below, then we could assert
2024   // those conditions rather than checking them. This is difficult because of
2025   // undef/poison (PR34838).
2026   if (IsAShr) {
2027     if (Pred == CmpInst::ICMP_SLT || (Pred == CmpInst::ICMP_SGT && IsExact)) {
2028       // icmp slt (ashr X, ShAmtC), C --> icmp slt X, (C << ShAmtC)
2029       // icmp sgt (ashr exact X, ShAmtC), C --> icmp sgt X, (C << ShAmtC)
2030       APInt ShiftedC = C.shl(ShAmtVal);
2031       if (ShiftedC.ashr(ShAmtVal) == C)
2032         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2033     }
2034     if (Pred == CmpInst::ICMP_SGT) {
2035       // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1
2036       APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2037       if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() &&
2038           (ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
2039         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2040     }
2041   } else {
2042     if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
2043       // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
2044       // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)
2045       APInt ShiftedC = C.shl(ShAmtVal);
2046       if (ShiftedC.lshr(ShAmtVal) == C)
2047         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2048     }
2049     if (Pred == CmpInst::ICMP_UGT) {
2050       // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
2051       APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2052       if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1))
2053         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2054     }
2055   }
2056 
2057   if (!Cmp.isEquality())
2058     return nullptr;
2059 
2060   // Handle equality comparisons of shift-by-constant.
2061 
2062   // If the comparison constant changes with the shift, the comparison cannot
2063   // succeed (bits of the comparison constant cannot match the shifted value).
2064   // This should be known by InstSimplify and already be folded to true/false.
2065   assert(((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) ||
2066           (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) &&
2067          "Expected icmp+shr simplify did not occur.");
2068 
2069   // If the bits shifted out are known zero, compare the unshifted value:
2070   //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
2071   if (Shr->isExact())
2072     return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
2073 
2074   if (Shr->hasOneUse()) {
2075     // Canonicalize the shift into an 'and':
2076     // icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt)
2077     APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
2078     Constant *Mask = ConstantInt::get(ShrTy, Val);
2079     Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
2080     return new ICmpInst(Pred, And, ConstantInt::get(ShrTy, C << ShAmtVal));
2081   }
2082 
2083   return nullptr;
2084 }
2085 
2086 /// Fold icmp (udiv X, Y), C.
2087 Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp,
2088                                                 BinaryOperator *UDiv,
2089                                                 const APInt &C) {
2090   const APInt *C2;
2091   if (!match(UDiv->getOperand(0), m_APInt(C2)))
2092     return nullptr;
2093 
2094   assert(*C2 != 0 && "udiv 0, X should have been simplified already.");
2095 
2096   // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1))
2097   Value *Y = UDiv->getOperand(1);
2098   if (Cmp.getPredicate() == ICmpInst::ICMP_UGT) {
2099     assert(!C.isMaxValue() &&
2100            "icmp ugt X, UINT_MAX should have been simplified already.");
2101     return new ICmpInst(ICmpInst::ICMP_ULE, Y,
2102                         ConstantInt::get(Y->getType(), C2->udiv(C + 1)));
2103   }
2104 
2105   // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C)
2106   if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) {
2107     assert(C != 0 && "icmp ult X, 0 should have been simplified already.");
2108     return new ICmpInst(ICmpInst::ICMP_UGT, Y,
2109                         ConstantInt::get(Y->getType(), C2->udiv(C)));
2110   }
2111 
2112   return nullptr;
2113 }
2114 
2115 /// Fold icmp ({su}div X, Y), C.
2116 Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
2117                                                BinaryOperator *Div,
2118                                                const APInt &C) {
2119   // Fold: icmp pred ([us]div X, C2), C -> range test
2120   // Fold this div into the comparison, producing a range check.
2121   // Determine, based on the divide type, what the range is being
2122   // checked.  If there is an overflow on the low or high side, remember
2123   // it, otherwise compute the range [low, hi) bounding the new value.
2124   // See: InsertRangeTest above for the kinds of replacements possible.
2125   const APInt *C2;
2126   if (!match(Div->getOperand(1), m_APInt(C2)))
2127     return nullptr;
2128 
2129   // FIXME: If the operand types don't match the type of the divide
2130   // then don't attempt this transform. The code below doesn't have the
2131   // logic to deal with a signed divide and an unsigned compare (and
2132   // vice versa). This is because (x /s C2) <s C  produces different
2133   // results than (x /s C2) <u C or (x /u C2) <s C or even
2134   // (x /u C2) <u C.  Simply casting the operands and result won't
2135   // work. :(  The if statement below tests that condition and bails
2136   // if it finds it.
2137   bool DivIsSigned = Div->getOpcode() == Instruction::SDiv;
2138   if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned())
2139     return nullptr;
2140 
2141   // The ProdOV computation fails on divide by 0 and divide by -1. Cases with
2142   // INT_MIN will also fail if the divisor is 1. Although folds of all these
2143   // division-by-constant cases should be present, we can not assert that they
2144   // have happened before we reach this icmp instruction.
2145   if (C2->isNullValue() || C2->isOneValue() ||
2146       (DivIsSigned && C2->isAllOnesValue()))
2147     return nullptr;
2148 
2149   // Compute Prod = C * C2. We are essentially solving an equation of
2150   // form X / C2 = C. We solve for X by multiplying C2 and C.
2151   // By solving for X, we can turn this into a range check instead of computing
2152   // a divide.
2153   APInt Prod = C * *C2;
2154 
2155   // Determine if the product overflows by seeing if the product is not equal to
2156   // the divide. Make sure we do the same kind of divide as in the LHS
2157   // instruction that we're folding.
2158   bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C;
2159 
2160   ICmpInst::Predicate Pred = Cmp.getPredicate();
2161 
2162   // If the division is known to be exact, then there is no remainder from the
2163   // divide, so the covered range size is unit, otherwise it is the divisor.
2164   APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2;
2165 
2166   // Figure out the interval that is being checked.  For example, a comparison
2167   // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
2168   // Compute this interval based on the constants involved and the signedness of
2169   // the compare/divide.  This computes a half-open interval, keeping track of
2170   // whether either value in the interval overflows.  After analysis each
2171   // overflow variable is set to 0 if it's corresponding bound variable is valid
2172   // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
2173   int LoOverflow = 0, HiOverflow = 0;
2174   APInt LoBound, HiBound;
2175 
2176   if (!DivIsSigned) {  // udiv
2177     // e.g. X/5 op 3  --> [15, 20)
2178     LoBound = Prod;
2179     HiOverflow = LoOverflow = ProdOV;
2180     if (!HiOverflow) {
2181       // If this is not an exact divide, then many values in the range collapse
2182       // to the same result value.
2183       HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false);
2184     }
2185   } else if (C2->isStrictlyPositive()) { // Divisor is > 0.
2186     if (C.isNullValue()) {       // (X / pos) op 0
2187       // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
2188       LoBound = -(RangeSize - 1);
2189       HiBound = RangeSize;
2190     } else if (C.isStrictlyPositive()) {   // (X / pos) op pos
2191       LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
2192       HiOverflow = LoOverflow = ProdOV;
2193       if (!HiOverflow)
2194         HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true);
2195     } else {                       // (X / pos) op neg
2196       // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
2197       HiBound = Prod + 1;
2198       LoOverflow = HiOverflow = ProdOV ? -1 : 0;
2199       if (!LoOverflow) {
2200         APInt DivNeg = -RangeSize;
2201         LoOverflow = addWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
2202       }
2203     }
2204   } else if (C2->isNegative()) { // Divisor is < 0.
2205     if (Div->isExact())
2206       RangeSize.negate();
2207     if (C.isNullValue()) { // (X / neg) op 0
2208       // e.g. X/-5 op 0  --> [-4, 5)
2209       LoBound = RangeSize + 1;
2210       HiBound = -RangeSize;
2211       if (HiBound == *C2) {        // -INTMIN = INTMIN
2212         HiOverflow = 1;            // [INTMIN+1, overflow)
2213         HiBound = APInt();         // e.g. X/INTMIN = 0 --> X > INTMIN
2214       }
2215     } else if (C.isStrictlyPositive()) {   // (X / neg) op pos
2216       // e.g. X/-5 op 3  --> [-19, -14)
2217       HiBound = Prod + 1;
2218       HiOverflow = LoOverflow = ProdOV ? -1 : 0;
2219       if (!LoOverflow)
2220         LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
2221     } else {                       // (X / neg) op neg
2222       LoBound = Prod;       // e.g. X/-5 op -3  --> [15, 20)
2223       LoOverflow = HiOverflow = ProdOV;
2224       if (!HiOverflow)
2225         HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true);
2226     }
2227 
2228     // Dividing by a negative swaps the condition.  LT <-> GT
2229     Pred = ICmpInst::getSwappedPredicate(Pred);
2230   }
2231 
2232   Value *X = Div->getOperand(0);
2233   switch (Pred) {
2234     default: llvm_unreachable("Unhandled icmp opcode!");
2235     case ICmpInst::ICMP_EQ:
2236       if (LoOverflow && HiOverflow)
2237         return replaceInstUsesWith(Cmp, Builder.getFalse());
2238       if (HiOverflow)
2239         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
2240                             ICmpInst::ICMP_UGE, X,
2241                             ConstantInt::get(Div->getType(), LoBound));
2242       if (LoOverflow)
2243         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
2244                             ICmpInst::ICMP_ULT, X,
2245                             ConstantInt::get(Div->getType(), HiBound));
2246       return replaceInstUsesWith(
2247           Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true));
2248     case ICmpInst::ICMP_NE:
2249       if (LoOverflow && HiOverflow)
2250         return replaceInstUsesWith(Cmp, Builder.getTrue());
2251       if (HiOverflow)
2252         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
2253                             ICmpInst::ICMP_ULT, X,
2254                             ConstantInt::get(Div->getType(), LoBound));
2255       if (LoOverflow)
2256         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
2257                             ICmpInst::ICMP_UGE, X,
2258                             ConstantInt::get(Div->getType(), HiBound));
2259       return replaceInstUsesWith(Cmp,
2260                                  insertRangeTest(X, LoBound, HiBound,
2261                                                  DivIsSigned, false));
2262     case ICmpInst::ICMP_ULT:
2263     case ICmpInst::ICMP_SLT:
2264       if (LoOverflow == +1)   // Low bound is greater than input range.
2265         return replaceInstUsesWith(Cmp, Builder.getTrue());
2266       if (LoOverflow == -1)   // Low bound is less than input range.
2267         return replaceInstUsesWith(Cmp, Builder.getFalse());
2268       return new ICmpInst(Pred, X, ConstantInt::get(Div->getType(), LoBound));
2269     case ICmpInst::ICMP_UGT:
2270     case ICmpInst::ICMP_SGT:
2271       if (HiOverflow == +1)       // High bound greater than input range.
2272         return replaceInstUsesWith(Cmp, Builder.getFalse());
2273       if (HiOverflow == -1)       // High bound less than input range.
2274         return replaceInstUsesWith(Cmp, Builder.getTrue());
2275       if (Pred == ICmpInst::ICMP_UGT)
2276         return new ICmpInst(ICmpInst::ICMP_UGE, X,
2277                             ConstantInt::get(Div->getType(), HiBound));
2278       return new ICmpInst(ICmpInst::ICMP_SGE, X,
2279                           ConstantInt::get(Div->getType(), HiBound));
2280   }
2281 
2282   return nullptr;
2283 }
2284 
2285 /// Fold icmp (sub X, Y), C.
2286 Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
2287                                                BinaryOperator *Sub,
2288                                                const APInt &C) {
2289   Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
2290   ICmpInst::Predicate Pred = Cmp.getPredicate();
2291 
2292   // The following transforms are only worth it if the only user of the subtract
2293   // is the icmp.
2294   if (!Sub->hasOneUse())
2295     return nullptr;
2296 
2297   if (Sub->hasNoSignedWrap()) {
2298     // (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y)
2299     if (Pred == ICmpInst::ICMP_SGT && C.isAllOnesValue())
2300       return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
2301 
2302     // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y)
2303     if (Pred == ICmpInst::ICMP_SGT && C.isNullValue())
2304       return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
2305 
2306     // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y)
2307     if (Pred == ICmpInst::ICMP_SLT && C.isNullValue())
2308       return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
2309 
2310     // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y)
2311     if (Pred == ICmpInst::ICMP_SLT && C.isOneValue())
2312       return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
2313   }
2314 
2315   const APInt *C2;
2316   if (!match(X, m_APInt(C2)))
2317     return nullptr;
2318 
2319   // C2 - Y <u C -> (Y | (C - 1)) == C2
2320   //   iff (C2 & (C - 1)) == C - 1 and C is a power of 2
2321   if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() &&
2322       (*C2 & (C - 1)) == (C - 1))
2323     return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, C - 1), X);
2324 
2325   // C2 - Y >u C -> (Y | C) != C2
2326   //   iff C2 & C == C and C + 1 is a power of 2
2327   if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == C)
2328     return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, C), X);
2329 
2330   return nullptr;
2331 }
2332 
2333 /// Fold icmp (add X, Y), C.
2334 Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
2335                                                BinaryOperator *Add,
2336                                                const APInt &C) {
2337   Value *Y = Add->getOperand(1);
2338   const APInt *C2;
2339   if (Cmp.isEquality() || !match(Y, m_APInt(C2)))
2340     return nullptr;
2341 
2342   // Fold icmp pred (add X, C2), C.
2343   Value *X = Add->getOperand(0);
2344   Type *Ty = Add->getType();
2345   CmpInst::Predicate Pred = Cmp.getPredicate();
2346 
2347   // If the add does not wrap, we can always adjust the compare by subtracting
2348   // the constants. Equality comparisons are handled elsewhere. SGE/SLE are
2349   // canonicalized to SGT/SLT.
2350   if (Add->hasNoSignedWrap() &&
2351       (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) {
2352     bool Overflow;
2353     APInt NewC = C.ssub_ov(*C2, Overflow);
2354     // If there is overflow, the result must be true or false.
2355     // TODO: Can we assert there is no overflow because InstSimplify always
2356     // handles those cases?
2357     if (!Overflow)
2358       // icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2)
2359       return new ICmpInst(Pred, X, ConstantInt::get(Ty, NewC));
2360   }
2361 
2362   auto CR = ConstantRange::makeExactICmpRegion(Pred, C).subtract(*C2);
2363   const APInt &Upper = CR.getUpper();
2364   const APInt &Lower = CR.getLower();
2365   if (Cmp.isSigned()) {
2366     if (Lower.isSignMask())
2367       return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper));
2368     if (Upper.isSignMask())
2369       return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower));
2370   } else {
2371     if (Lower.isMinValue())
2372       return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, Upper));
2373     if (Upper.isMinValue())
2374       return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
2375   }
2376 
2377   if (!Add->hasOneUse())
2378     return nullptr;
2379 
2380   // X+C <u C2 -> (X & -C2) == C
2381   //   iff C & (C2-1) == 0
2382   //       C2 is a power of 2
2383   if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && (*C2 & (C - 1)) == 0)
2384     return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -C),
2385                         ConstantExpr::getNeg(cast<Constant>(Y)));
2386 
2387   // X+C >u C2 -> (X & ~C2) != C
2388   //   iff C & C2 == 0
2389   //       C2+1 is a power of 2
2390   if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == 0)
2391     return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C),
2392                         ConstantExpr::getNeg(cast<Constant>(Y)));
2393 
2394   return nullptr;
2395 }
2396 
2397 bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
2398                                            Value *&RHS, ConstantInt *&Less,
2399                                            ConstantInt *&Equal,
2400                                            ConstantInt *&Greater) {
2401   // TODO: Generalize this to work with other comparison idioms or ensure
2402   // they get canonicalized into this form.
2403 
2404   // select i1 (a == b), i32 Equal, i32 (select i1 (a < b), i32 Less, i32
2405   // Greater), where Equal, Less and Greater are placeholders for any three
2406   // constants.
2407   ICmpInst::Predicate PredA, PredB;
2408   if (match(SI->getTrueValue(), m_ConstantInt(Equal)) &&
2409       match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) &&
2410       PredA == ICmpInst::ICMP_EQ &&
2411       match(SI->getFalseValue(),
2412             m_Select(m_ICmp(PredB, m_Specific(LHS), m_Specific(RHS)),
2413                      m_ConstantInt(Less), m_ConstantInt(Greater))) &&
2414       PredB == ICmpInst::ICMP_SLT) {
2415     return true;
2416   }
2417   return false;
2418 }
2419 
2420 Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
2421                                                   SelectInst *Select,
2422                                                   ConstantInt *C) {
2423 
2424   assert(C && "Cmp RHS should be a constant int!");
2425   // If we're testing a constant value against the result of a three way
2426   // comparison, the result can be expressed directly in terms of the
2427   // original values being compared.  Note: We could possibly be more
2428   // aggressive here and remove the hasOneUse test. The original select is
2429   // really likely to simplify or sink when we remove a test of the result.
2430   Value *OrigLHS, *OrigRHS;
2431   ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan;
2432   if (Cmp.hasOneUse() &&
2433       matchThreeWayIntCompare(Select, OrigLHS, OrigRHS, C1LessThan, C2Equal,
2434                               C3GreaterThan)) {
2435     assert(C1LessThan && C2Equal && C3GreaterThan);
2436 
2437     bool TrueWhenLessThan =
2438         ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C)
2439             ->isAllOnesValue();
2440     bool TrueWhenEqual =
2441         ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C)
2442             ->isAllOnesValue();
2443     bool TrueWhenGreaterThan =
2444         ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C)
2445             ->isAllOnesValue();
2446 
2447     // This generates the new instruction that will replace the original Cmp
2448     // Instruction. Instead of enumerating the various combinations when
2449     // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus
2450     // false, we rely on chaining of ORs and future passes of InstCombine to
2451     // simplify the OR further (i.e. a s< b || a == b becomes a s<= b).
2452 
2453     // When none of the three constants satisfy the predicate for the RHS (C),
2454     // the entire original Cmp can be simplified to a false.
2455     Value *Cond = Builder.getFalse();
2456     if (TrueWhenLessThan)
2457       Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
2458     if (TrueWhenEqual)
2459       Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
2460     if (TrueWhenGreaterThan)
2461       Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
2462 
2463     return replaceInstUsesWith(Cmp, Cond);
2464   }
2465   return nullptr;
2466 }
2467 
2468 /// Try to fold integer comparisons with a constant operand: icmp Pred X, C
2469 /// where X is some kind of instruction.
2470 Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
2471   const APInt *C;
2472   if (!match(Cmp.getOperand(1), m_APInt(C)))
2473     return nullptr;
2474 
2475   if (auto *BO = dyn_cast<BinaryOperator>(Cmp.getOperand(0))) {
2476     switch (BO->getOpcode()) {
2477     case Instruction::Xor:
2478       if (Instruction *I = foldICmpXorConstant(Cmp, BO, *C))
2479         return I;
2480       break;
2481     case Instruction::And:
2482       if (Instruction *I = foldICmpAndConstant(Cmp, BO, *C))
2483         return I;
2484       break;
2485     case Instruction::Or:
2486       if (Instruction *I = foldICmpOrConstant(Cmp, BO, *C))
2487         return I;
2488       break;
2489     case Instruction::Mul:
2490       if (Instruction *I = foldICmpMulConstant(Cmp, BO, *C))
2491         return I;
2492       break;
2493     case Instruction::Shl:
2494       if (Instruction *I = foldICmpShlConstant(Cmp, BO, *C))
2495         return I;
2496       break;
2497     case Instruction::LShr:
2498     case Instruction::AShr:
2499       if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C))
2500         return I;
2501       break;
2502     case Instruction::UDiv:
2503       if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C))
2504         return I;
2505       LLVM_FALLTHROUGH;
2506     case Instruction::SDiv:
2507       if (Instruction *I = foldICmpDivConstant(Cmp, BO, *C))
2508         return I;
2509       break;
2510     case Instruction::Sub:
2511       if (Instruction *I = foldICmpSubConstant(Cmp, BO, *C))
2512         return I;
2513       break;
2514     case Instruction::Add:
2515       if (Instruction *I = foldICmpAddConstant(Cmp, BO, *C))
2516         return I;
2517       break;
2518     default:
2519       break;
2520     }
2521     // TODO: These folds could be refactored to be part of the above calls.
2522     if (Instruction *I = foldICmpBinOpEqualityWithConstant(Cmp, BO, *C))
2523       return I;
2524   }
2525 
2526   // Match against CmpInst LHS being instructions other than binary operators.
2527 
2528   if (auto *SI = dyn_cast<SelectInst>(Cmp.getOperand(0))) {
2529     // For now, we only support constant integers while folding the
2530     // ICMP(SELECT)) pattern. We can extend this to support vector of integers
2531     // similar to the cases handled by binary ops above.
2532     if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1)))
2533       if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS))
2534         return I;
2535   }
2536 
2537   if (auto *TI = dyn_cast<TruncInst>(Cmp.getOperand(0))) {
2538     if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C))
2539       return I;
2540   }
2541 
2542   if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C))
2543     return I;
2544 
2545   return nullptr;
2546 }
2547 
2548 /// Fold an icmp equality instruction with binary operator LHS and constant RHS:
2549 /// icmp eq/ne BO, C.
2550 Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
2551                                                              BinaryOperator *BO,
2552                                                              const APInt &C) {
2553   // TODO: Some of these folds could work with arbitrary constants, but this
2554   // function is limited to scalar and vector splat constants.
2555   if (!Cmp.isEquality())
2556     return nullptr;
2557 
2558   ICmpInst::Predicate Pred = Cmp.getPredicate();
2559   bool isICMP_NE = Pred == ICmpInst::ICMP_NE;
2560   Constant *RHS = cast<Constant>(Cmp.getOperand(1));
2561   Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
2562 
2563   switch (BO->getOpcode()) {
2564   case Instruction::SRem:
2565     // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
2566     if (C.isNullValue() && BO->hasOneUse()) {
2567       const APInt *BOC;
2568       if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
2569         Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName());
2570         return new ICmpInst(Pred, NewRem,
2571                             Constant::getNullValue(BO->getType()));
2572       }
2573     }
2574     break;
2575   case Instruction::Add: {
2576     // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
2577     const APInt *BOC;
2578     if (match(BOp1, m_APInt(BOC))) {
2579       if (BO->hasOneUse()) {
2580         Constant *SubC = ConstantExpr::getSub(RHS, cast<Constant>(BOp1));
2581         return new ICmpInst(Pred, BOp0, SubC);
2582       }
2583     } else if (C.isNullValue()) {
2584       // Replace ((add A, B) != 0) with (A != -B) if A or B is
2585       // efficiently invertible, or if the add has just this one use.
2586       if (Value *NegVal = dyn_castNegVal(BOp1))
2587         return new ICmpInst(Pred, BOp0, NegVal);
2588       if (Value *NegVal = dyn_castNegVal(BOp0))
2589         return new ICmpInst(Pred, NegVal, BOp1);
2590       if (BO->hasOneUse()) {
2591         Value *Neg = Builder.CreateNeg(BOp1);
2592         Neg->takeName(BO);
2593         return new ICmpInst(Pred, BOp0, Neg);
2594       }
2595     }
2596     break;
2597   }
2598   case Instruction::Xor:
2599     if (BO->hasOneUse()) {
2600       if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
2601         // For the xor case, we can xor two constants together, eliminating
2602         // the explicit xor.
2603         return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
2604       } else if (C.isNullValue()) {
2605         // Replace ((xor A, B) != 0) with (A != B)
2606         return new ICmpInst(Pred, BOp0, BOp1);
2607       }
2608     }
2609     break;
2610   case Instruction::Sub:
2611     if (BO->hasOneUse()) {
2612       const APInt *BOC;
2613       if (match(BOp0, m_APInt(BOC))) {
2614         // Replace ((sub BOC, B) != C) with (B != BOC-C).
2615         Constant *SubC = ConstantExpr::getSub(cast<Constant>(BOp0), RHS);
2616         return new ICmpInst(Pred, BOp1, SubC);
2617       } else if (C.isNullValue()) {
2618         // Replace ((sub A, B) != 0) with (A != B).
2619         return new ICmpInst(Pred, BOp0, BOp1);
2620       }
2621     }
2622     break;
2623   case Instruction::Or: {
2624     const APInt *BOC;
2625     if (match(BOp1, m_APInt(BOC)) && BO->hasOneUse() && RHS->isAllOnesValue()) {
2626       // Comparing if all bits outside of a constant mask are set?
2627       // Replace (X | C) == -1 with (X & ~C) == ~C.
2628       // This removes the -1 constant.
2629       Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1));
2630       Value *And = Builder.CreateAnd(BOp0, NotBOC);
2631       return new ICmpInst(Pred, And, NotBOC);
2632     }
2633     break;
2634   }
2635   case Instruction::And: {
2636     const APInt *BOC;
2637     if (match(BOp1, m_APInt(BOC))) {
2638       // If we have ((X & C) == C), turn it into ((X & C) != 0).
2639       if (C == *BOC && C.isPowerOf2())
2640         return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
2641                             BO, Constant::getNullValue(RHS->getType()));
2642 
2643       // Don't perform the following transforms if the AND has multiple uses
2644       if (!BO->hasOneUse())
2645         break;
2646 
2647       // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
2648       if (BOC->isSignMask()) {
2649         Constant *Zero = Constant::getNullValue(BOp0->getType());
2650         auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
2651         return new ICmpInst(NewPred, BOp0, Zero);
2652       }
2653 
2654       // ((X & ~7) == 0) --> X < 8
2655       if (C.isNullValue() && (~(*BOC) + 1).isPowerOf2()) {
2656         Constant *NegBOC = ConstantExpr::getNeg(cast<Constant>(BOp1));
2657         auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
2658         return new ICmpInst(NewPred, BOp0, NegBOC);
2659       }
2660     }
2661     break;
2662   }
2663   case Instruction::Mul:
2664     if (C.isNullValue() && BO->hasNoSignedWrap()) {
2665       const APInt *BOC;
2666       if (match(BOp1, m_APInt(BOC)) && !BOC->isNullValue()) {
2667         // The trivial case (mul X, 0) is handled by InstSimplify.
2668         // General case : (mul X, C) != 0 iff X != 0
2669         //                (mul X, C) == 0 iff X == 0
2670         return new ICmpInst(Pred, BOp0, Constant::getNullValue(RHS->getType()));
2671       }
2672     }
2673     break;
2674   case Instruction::UDiv:
2675     if (C.isNullValue()) {
2676       // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
2677       auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
2678       return new ICmpInst(NewPred, BOp1, BOp0);
2679     }
2680     break;
2681   default:
2682     break;
2683   }
2684   return nullptr;
2685 }
2686 
2687 /// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
2688 Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
2689                                                          const APInt &C) {
2690   IntrinsicInst *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0));
2691   if (!II || !Cmp.isEquality())
2692     return nullptr;
2693 
2694   // Handle icmp {eq|ne} <intrinsic>, Constant.
2695   Type *Ty = II->getType();
2696   switch (II->getIntrinsicID()) {
2697   case Intrinsic::bswap:
2698     Worklist.Add(II);
2699     Cmp.setOperand(0, II->getArgOperand(0));
2700     Cmp.setOperand(1, ConstantInt::get(Ty, C.byteSwap()));
2701     return &Cmp;
2702 
2703   case Intrinsic::ctlz:
2704   case Intrinsic::cttz:
2705     // ctz(A) == bitwidth(A)  ->  A == 0 and likewise for !=
2706     if (C == C.getBitWidth()) {
2707       Worklist.Add(II);
2708       Cmp.setOperand(0, II->getArgOperand(0));
2709       Cmp.setOperand(1, ConstantInt::getNullValue(Ty));
2710       return &Cmp;
2711     }
2712     break;
2713 
2714   case Intrinsic::ctpop: {
2715     // popcount(A) == 0  ->  A == 0 and likewise for !=
2716     // popcount(A) == bitwidth(A)  ->  A == -1 and likewise for !=
2717     bool IsZero = C.isNullValue();
2718     if (IsZero || C == C.getBitWidth()) {
2719       Worklist.Add(II);
2720       Cmp.setOperand(0, II->getArgOperand(0));
2721       auto *NewOp =
2722           IsZero ? Constant::getNullValue(Ty) : Constant::getAllOnesValue(Ty);
2723       Cmp.setOperand(1, NewOp);
2724       return &Cmp;
2725     }
2726     break;
2727   }
2728   default:
2729     break;
2730   }
2731 
2732   return nullptr;
2733 }
2734 
2735 /// Handle icmp with constant (but not simple integer constant) RHS.
2736 Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
2737   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2738   Constant *RHSC = dyn_cast<Constant>(Op1);
2739   Instruction *LHSI = dyn_cast<Instruction>(Op0);
2740   if (!RHSC || !LHSI)
2741     return nullptr;
2742 
2743   switch (LHSI->getOpcode()) {
2744   case Instruction::GetElementPtr:
2745     // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
2746     if (RHSC->isNullValue() &&
2747         cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
2748       return new ICmpInst(
2749           I.getPredicate(), LHSI->getOperand(0),
2750           Constant::getNullValue(LHSI->getOperand(0)->getType()));
2751     break;
2752   case Instruction::PHI:
2753     // Only fold icmp into the PHI if the phi and icmp are in the same
2754     // block.  If in the same block, we're encouraging jump threading.  If
2755     // not, we are just pessimizing the code by making an i1 phi.
2756     if (LHSI->getParent() == I.getParent())
2757       if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
2758         return NV;
2759     break;
2760   case Instruction::Select: {
2761     // If either operand of the select is a constant, we can fold the
2762     // comparison into the select arms, which will cause one to be
2763     // constant folded and the select turned into a bitwise or.
2764     Value *Op1 = nullptr, *Op2 = nullptr;
2765     ConstantInt *CI = nullptr;
2766     if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
2767       Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
2768       CI = dyn_cast<ConstantInt>(Op1);
2769     }
2770     if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
2771       Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
2772       CI = dyn_cast<ConstantInt>(Op2);
2773     }
2774 
2775     // We only want to perform this transformation if it will not lead to
2776     // additional code. This is true if either both sides of the select
2777     // fold to a constant (in which case the icmp is replaced with a select
2778     // which will usually simplify) or this is the only user of the
2779     // select (in which case we are trading a select+icmp for a simpler
2780     // select+icmp) or all uses of the select can be replaced based on
2781     // dominance information ("Global cases").
2782     bool Transform = false;
2783     if (Op1 && Op2)
2784       Transform = true;
2785     else if (Op1 || Op2) {
2786       // Local case
2787       if (LHSI->hasOneUse())
2788         Transform = true;
2789       // Global cases
2790       else if (CI && !CI->isZero())
2791         // When Op1 is constant try replacing select with second operand.
2792         // Otherwise Op2 is constant and try replacing select with first
2793         // operand.
2794         Transform =
2795             replacedSelectWithOperand(cast<SelectInst>(LHSI), &I, Op1 ? 2 : 1);
2796     }
2797     if (Transform) {
2798       if (!Op1)
2799         Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC,
2800                                  I.getName());
2801       if (!Op2)
2802         Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC,
2803                                  I.getName());
2804       return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
2805     }
2806     break;
2807   }
2808   case Instruction::IntToPtr:
2809     // icmp pred inttoptr(X), null -> icmp pred X, 0
2810     if (RHSC->isNullValue() &&
2811         DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
2812       return new ICmpInst(
2813           I.getPredicate(), LHSI->getOperand(0),
2814           Constant::getNullValue(LHSI->getOperand(0)->getType()));
2815     break;
2816 
2817   case Instruction::Load:
2818     // Try to optimize things like "A[i] > 4" to index computations.
2819     if (GetElementPtrInst *GEP =
2820             dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
2821       if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
2822         if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
2823             !cast<LoadInst>(LHSI)->isVolatile())
2824           if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
2825             return Res;
2826     }
2827     break;
2828   }
2829 
2830   return nullptr;
2831 }
2832 
2833 /// Try to fold icmp (binop), X or icmp X, (binop).
2834 /// TODO: A large part of this logic is duplicated in InstSimplify's
2835 /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
2836 /// duplication.
2837 Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
2838   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2839 
2840   // Special logic for binary operators.
2841   BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
2842   BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
2843   if (!BO0 && !BO1)
2844     return nullptr;
2845 
2846   const CmpInst::Predicate Pred = I.getPredicate();
2847   bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
2848   if (BO0 && isa<OverflowingBinaryOperator>(BO0))
2849     NoOp0WrapProblem =
2850         ICmpInst::isEquality(Pred) ||
2851         (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
2852         (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
2853   if (BO1 && isa<OverflowingBinaryOperator>(BO1))
2854     NoOp1WrapProblem =
2855         ICmpInst::isEquality(Pred) ||
2856         (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
2857         (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
2858 
2859   // Analyze the case when either Op0 or Op1 is an add instruction.
2860   // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
2861   Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
2862   if (BO0 && BO0->getOpcode() == Instruction::Add) {
2863     A = BO0->getOperand(0);
2864     B = BO0->getOperand(1);
2865   }
2866   if (BO1 && BO1->getOpcode() == Instruction::Add) {
2867     C = BO1->getOperand(0);
2868     D = BO1->getOperand(1);
2869   }
2870 
2871   // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
2872   if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
2873     return new ICmpInst(Pred, A == Op1 ? B : A,
2874                         Constant::getNullValue(Op1->getType()));
2875 
2876   // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
2877   if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
2878     return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
2879                         C == Op0 ? D : C);
2880 
2881   // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
2882   if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem &&
2883       NoOp1WrapProblem &&
2884       // Try not to increase register pressure.
2885       BO0->hasOneUse() && BO1->hasOneUse()) {
2886     // Determine Y and Z in the form icmp (X+Y), (X+Z).
2887     Value *Y, *Z;
2888     if (A == C) {
2889       // C + B == C + D  ->  B == D
2890       Y = B;
2891       Z = D;
2892     } else if (A == D) {
2893       // D + B == C + D  ->  B == C
2894       Y = B;
2895       Z = C;
2896     } else if (B == C) {
2897       // A + C == C + D  ->  A == D
2898       Y = A;
2899       Z = D;
2900     } else {
2901       assert(B == D);
2902       // A + D == C + D  ->  A == C
2903       Y = A;
2904       Z = C;
2905     }
2906     return new ICmpInst(Pred, Y, Z);
2907   }
2908 
2909   // icmp slt (X + -1), Y -> icmp sle X, Y
2910   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
2911       match(B, m_AllOnes()))
2912     return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
2913 
2914   // icmp sge (X + -1), Y -> icmp sgt X, Y
2915   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
2916       match(B, m_AllOnes()))
2917     return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
2918 
2919   // icmp sle (X + 1), Y -> icmp slt X, Y
2920   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One()))
2921     return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
2922 
2923   // icmp sgt (X + 1), Y -> icmp sge X, Y
2924   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One()))
2925     return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
2926 
2927   // icmp sgt X, (Y + -1) -> icmp sge X, Y
2928   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
2929       match(D, m_AllOnes()))
2930     return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
2931 
2932   // icmp sle X, (Y + -1) -> icmp slt X, Y
2933   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
2934       match(D, m_AllOnes()))
2935     return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
2936 
2937   // icmp sge X, (Y + 1) -> icmp sgt X, Y
2938   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One()))
2939     return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
2940 
2941   // icmp slt X, (Y + 1) -> icmp sle X, Y
2942   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
2943     return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
2944 
2945   // TODO: The subtraction-related identities shown below also hold, but
2946   // canonicalization from (X -nuw 1) to (X + -1) means that the combinations
2947   // wouldn't happen even if they were implemented.
2948   //
2949   // icmp ult (X - 1), Y -> icmp ule X, Y
2950   // icmp uge (X - 1), Y -> icmp ugt X, Y
2951   // icmp ugt X, (Y - 1) -> icmp uge X, Y
2952   // icmp ule X, (Y - 1) -> icmp ult X, Y
2953 
2954   // icmp ule (X + 1), Y -> icmp ult X, Y
2955   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One()))
2956     return new ICmpInst(CmpInst::ICMP_ULT, A, Op1);
2957 
2958   // icmp ugt (X + 1), Y -> icmp uge X, Y
2959   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One()))
2960     return new ICmpInst(CmpInst::ICMP_UGE, A, Op1);
2961 
2962   // icmp uge X, (Y + 1) -> icmp ugt X, Y
2963   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One()))
2964     return new ICmpInst(CmpInst::ICMP_UGT, Op0, C);
2965 
2966   // icmp ult X, (Y + 1) -> icmp ule X, Y
2967   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One()))
2968     return new ICmpInst(CmpInst::ICMP_ULE, Op0, C);
2969 
2970   // if C1 has greater magnitude than C2:
2971   //  icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
2972   //  s.t. C3 = C1 - C2
2973   //
2974   // if C2 has greater magnitude than C1:
2975   //  icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
2976   //  s.t. C3 = C2 - C1
2977   if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
2978       (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
2979     if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
2980       if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
2981         const APInt &AP1 = C1->getValue();
2982         const APInt &AP2 = C2->getValue();
2983         if (AP1.isNegative() == AP2.isNegative()) {
2984           APInt AP1Abs = C1->getValue().abs();
2985           APInt AP2Abs = C2->getValue().abs();
2986           if (AP1Abs.uge(AP2Abs)) {
2987             ConstantInt *C3 = Builder.getInt(AP1 - AP2);
2988             Value *NewAdd = Builder.CreateNSWAdd(A, C3);
2989             return new ICmpInst(Pred, NewAdd, C);
2990           } else {
2991             ConstantInt *C3 = Builder.getInt(AP2 - AP1);
2992             Value *NewAdd = Builder.CreateNSWAdd(C, C3);
2993             return new ICmpInst(Pred, A, NewAdd);
2994           }
2995         }
2996       }
2997 
2998   // Analyze the case when either Op0 or Op1 is a sub instruction.
2999   // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
3000   A = nullptr;
3001   B = nullptr;
3002   C = nullptr;
3003   D = nullptr;
3004   if (BO0 && BO0->getOpcode() == Instruction::Sub) {
3005     A = BO0->getOperand(0);
3006     B = BO0->getOperand(1);
3007   }
3008   if (BO1 && BO1->getOpcode() == Instruction::Sub) {
3009     C = BO1->getOperand(0);
3010     D = BO1->getOperand(1);
3011   }
3012 
3013   // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
3014   if (A == Op1 && NoOp0WrapProblem)
3015     return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
3016 
3017   // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
3018   if (C == Op0 && NoOp1WrapProblem)
3019     return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
3020 
3021   // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
3022   if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
3023       // Try not to increase register pressure.
3024       BO0->hasOneUse() && BO1->hasOneUse())
3025     return new ICmpInst(Pred, A, C);
3026 
3027   // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
3028   if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
3029       // Try not to increase register pressure.
3030       BO0->hasOneUse() && BO1->hasOneUse())
3031     return new ICmpInst(Pred, D, B);
3032 
3033   // icmp (0-X) < cst --> x > -cst
3034   if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
3035     Value *X;
3036     if (match(BO0, m_Neg(m_Value(X))))
3037       if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
3038         if (!RHSC->isMinValue(/*isSigned=*/true))
3039           return new ICmpInst(I.getSwappedPredicate(), X,
3040                               ConstantExpr::getNeg(RHSC));
3041   }
3042 
3043   BinaryOperator *SRem = nullptr;
3044   // icmp (srem X, Y), Y
3045   if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1))
3046     SRem = BO0;
3047   // icmp Y, (srem X, Y)
3048   else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
3049            Op0 == BO1->getOperand(1))
3050     SRem = BO1;
3051   if (SRem) {
3052     // We don't check hasOneUse to avoid increasing register pressure because
3053     // the value we use is the same value this instruction was already using.
3054     switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
3055     default:
3056       break;
3057     case ICmpInst::ICMP_EQ:
3058       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3059     case ICmpInst::ICMP_NE:
3060       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3061     case ICmpInst::ICMP_SGT:
3062     case ICmpInst::ICMP_SGE:
3063       return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
3064                           Constant::getAllOnesValue(SRem->getType()));
3065     case ICmpInst::ICMP_SLT:
3066     case ICmpInst::ICMP_SLE:
3067       return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
3068                           Constant::getNullValue(SRem->getType()));
3069     }
3070   }
3071 
3072   if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && BO0->hasOneUse() &&
3073       BO1->hasOneUse() && BO0->getOperand(1) == BO1->getOperand(1)) {
3074     switch (BO0->getOpcode()) {
3075     default:
3076       break;
3077     case Instruction::Add:
3078     case Instruction::Sub:
3079     case Instruction::Xor: {
3080       if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
3081         return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3082 
3083       const APInt *C;
3084       if (match(BO0->getOperand(1), m_APInt(C))) {
3085         // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
3086         if (C->isSignMask()) {
3087           ICmpInst::Predicate NewPred =
3088               I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
3089           return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
3090         }
3091 
3092         // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b
3093         if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) {
3094           ICmpInst::Predicate NewPred =
3095               I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
3096           NewPred = I.getSwappedPredicate(NewPred);
3097           return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
3098         }
3099       }
3100       break;
3101     }
3102     case Instruction::Mul: {
3103       if (!I.isEquality())
3104         break;
3105 
3106       const APInt *C;
3107       if (match(BO0->getOperand(1), m_APInt(C)) && !C->isNullValue() &&
3108           !C->isOneValue()) {
3109         // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
3110         // Mask = -1 >> count-trailing-zeros(C).
3111         if (unsigned TZs = C->countTrailingZeros()) {
3112           Constant *Mask = ConstantInt::get(
3113               BO0->getType(),
3114               APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
3115           Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask);
3116           Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask);
3117           return new ICmpInst(Pred, And1, And2);
3118         }
3119         // If there are no trailing zeros in the multiplier, just eliminate
3120         // the multiplies (no masking is needed):
3121         // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y
3122         return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3123       }
3124       break;
3125     }
3126     case Instruction::UDiv:
3127     case Instruction::LShr:
3128       if (I.isSigned() || !BO0->isExact() || !BO1->isExact())
3129         break;
3130       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3131 
3132     case Instruction::SDiv:
3133       if (!I.isEquality() || !BO0->isExact() || !BO1->isExact())
3134         break;
3135       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3136 
3137     case Instruction::AShr:
3138       if (!BO0->isExact() || !BO1->isExact())
3139         break;
3140       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3141 
3142     case Instruction::Shl: {
3143       bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
3144       bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
3145       if (!NUW && !NSW)
3146         break;
3147       if (!NSW && I.isSigned())
3148         break;
3149       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3150     }
3151     }
3152   }
3153 
3154   if (BO0) {
3155     // Transform  A & (L - 1) `ult` L --> L != 0
3156     auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
3157     auto BitwiseAnd = m_c_And(m_Value(), LSubOne);
3158 
3159     if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) {
3160       auto *Zero = Constant::getNullValue(BO0->getType());
3161       return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
3162     }
3163   }
3164 
3165   return nullptr;
3166 }
3167 
3168 /// Fold icmp Pred min|max(X, Y), X.
3169 static Instruction *foldICmpWithMinMax(ICmpInst &Cmp) {
3170   ICmpInst::Predicate Pred = Cmp.getPredicate();
3171   Value *Op0 = Cmp.getOperand(0);
3172   Value *X = Cmp.getOperand(1);
3173 
3174   // Canonicalize minimum or maximum operand to LHS of the icmp.
3175   if (match(X, m_c_SMin(m_Specific(Op0), m_Value())) ||
3176       match(X, m_c_SMax(m_Specific(Op0), m_Value())) ||
3177       match(X, m_c_UMin(m_Specific(Op0), m_Value())) ||
3178       match(X, m_c_UMax(m_Specific(Op0), m_Value()))) {
3179     std::swap(Op0, X);
3180     Pred = Cmp.getSwappedPredicate();
3181   }
3182 
3183   Value *Y;
3184   if (match(Op0, m_c_SMin(m_Specific(X), m_Value(Y)))) {
3185     // smin(X, Y)  == X --> X s<= Y
3186     // smin(X, Y) s>= X --> X s<= Y
3187     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SGE)
3188       return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
3189 
3190     // smin(X, Y) != X --> X s> Y
3191     // smin(X, Y) s< X --> X s> Y
3192     if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SLT)
3193       return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
3194 
3195     // These cases should be handled in InstSimplify:
3196     // smin(X, Y) s<= X --> true
3197     // smin(X, Y) s> X --> false
3198     return nullptr;
3199   }
3200 
3201   if (match(Op0, m_c_SMax(m_Specific(X), m_Value(Y)))) {
3202     // smax(X, Y)  == X --> X s>= Y
3203     // smax(X, Y) s<= X --> X s>= Y
3204     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLE)
3205       return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
3206 
3207     // smax(X, Y) != X --> X s< Y
3208     // smax(X, Y) s> X --> X s< Y
3209     if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SGT)
3210       return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
3211 
3212     // These cases should be handled in InstSimplify:
3213     // smax(X, Y) s>= X --> true
3214     // smax(X, Y) s< X --> false
3215     return nullptr;
3216   }
3217 
3218   if (match(Op0, m_c_UMin(m_Specific(X), m_Value(Y)))) {
3219     // umin(X, Y)  == X --> X u<= Y
3220     // umin(X, Y) u>= X --> X u<= Y
3221     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_UGE)
3222       return new ICmpInst(ICmpInst::ICMP_ULE, X, Y);
3223 
3224     // umin(X, Y) != X --> X u> Y
3225     // umin(X, Y) u< X --> X u> Y
3226     if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT)
3227       return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
3228 
3229     // These cases should be handled in InstSimplify:
3230     // umin(X, Y) u<= X --> true
3231     // umin(X, Y) u> X --> false
3232     return nullptr;
3233   }
3234 
3235   if (match(Op0, m_c_UMax(m_Specific(X), m_Value(Y)))) {
3236     // umax(X, Y)  == X --> X u>= Y
3237     // umax(X, Y) u<= X --> X u>= Y
3238     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_ULE)
3239       return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
3240 
3241     // umax(X, Y) != X --> X u< Y
3242     // umax(X, Y) u> X --> X u< Y
3243     if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_UGT)
3244       return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
3245 
3246     // These cases should be handled in InstSimplify:
3247     // umax(X, Y) u>= X --> true
3248     // umax(X, Y) u< X --> false
3249     return nullptr;
3250   }
3251 
3252   return nullptr;
3253 }
3254 
3255 Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
3256   if (!I.isEquality())
3257     return nullptr;
3258 
3259   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3260   const CmpInst::Predicate Pred = I.getPredicate();
3261   Value *A, *B, *C, *D;
3262   if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
3263     if (A == Op1 || B == Op1) { // (A^B) == A  ->  B == 0
3264       Value *OtherVal = A == Op1 ? B : A;
3265       return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
3266     }
3267 
3268     if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
3269       // A^c1 == C^c2 --> A == C^(c1^c2)
3270       ConstantInt *C1, *C2;
3271       if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) &&
3272           Op1->hasOneUse()) {
3273         Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue());
3274         Value *Xor = Builder.CreateXor(C, NC);
3275         return new ICmpInst(Pred, A, Xor);
3276       }
3277 
3278       // A^B == A^D -> B == D
3279       if (A == C)
3280         return new ICmpInst(Pred, B, D);
3281       if (A == D)
3282         return new ICmpInst(Pred, B, C);
3283       if (B == C)
3284         return new ICmpInst(Pred, A, D);
3285       if (B == D)
3286         return new ICmpInst(Pred, A, C);
3287     }
3288   }
3289 
3290   if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
3291     // A == (A^B)  ->  B == 0
3292     Value *OtherVal = A == Op0 ? B : A;
3293     return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
3294   }
3295 
3296   // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
3297   if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
3298       match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
3299     Value *X = nullptr, *Y = nullptr, *Z = nullptr;
3300 
3301     if (A == C) {
3302       X = B;
3303       Y = D;
3304       Z = A;
3305     } else if (A == D) {
3306       X = B;
3307       Y = C;
3308       Z = A;
3309     } else if (B == C) {
3310       X = A;
3311       Y = D;
3312       Z = B;
3313     } else if (B == D) {
3314       X = A;
3315       Y = C;
3316       Z = B;
3317     }
3318 
3319     if (X) { // Build (X^Y) & Z
3320       Op1 = Builder.CreateXor(X, Y);
3321       Op1 = Builder.CreateAnd(Op1, Z);
3322       I.setOperand(0, Op1);
3323       I.setOperand(1, Constant::getNullValue(Op1->getType()));
3324       return &I;
3325     }
3326   }
3327 
3328   // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
3329   // and       (B & (1<<X)-1) == (zext A) --> A == (trunc B)
3330   ConstantInt *Cst1;
3331   if ((Op0->hasOneUse() && match(Op0, m_ZExt(m_Value(A))) &&
3332        match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) ||
3333       (Op1->hasOneUse() && match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) &&
3334        match(Op1, m_ZExt(m_Value(A))))) {
3335     APInt Pow2 = Cst1->getValue() + 1;
3336     if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
3337         Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
3338       return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));
3339   }
3340 
3341   // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
3342   // For lshr and ashr pairs.
3343   if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
3344        match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
3345       (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
3346        match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
3347     unsigned TypeBits = Cst1->getBitWidth();
3348     unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
3349     if (ShAmt < TypeBits && ShAmt != 0) {
3350       ICmpInst::Predicate NewPred =
3351           Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
3352       Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
3353       APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
3354       return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal));
3355     }
3356   }
3357 
3358   // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
3359   if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
3360       match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
3361     unsigned TypeBits = Cst1->getBitWidth();
3362     unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
3363     if (ShAmt < TypeBits && ShAmt != 0) {
3364       Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
3365       APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
3366       Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal),
3367                                       I.getName() + ".mask");
3368       return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType()));
3369     }
3370   }
3371 
3372   // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
3373   // "icmp (and X, mask), cst"
3374   uint64_t ShAmt = 0;
3375   if (Op0->hasOneUse() &&
3376       match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), m_ConstantInt(ShAmt))))) &&
3377       match(Op1, m_ConstantInt(Cst1)) &&
3378       // Only do this when A has multiple uses.  This is most important to do
3379       // when it exposes other optimizations.
3380       !A->hasOneUse()) {
3381     unsigned ASize = cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
3382 
3383     if (ShAmt < ASize) {
3384       APInt MaskV =
3385           APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
3386       MaskV <<= ShAmt;
3387 
3388       APInt CmpV = Cst1->getValue().zext(ASize);
3389       CmpV <<= ShAmt;
3390 
3391       Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV));
3392       return new ICmpInst(Pred, Mask, Builder.getInt(CmpV));
3393     }
3394   }
3395 
3396   // If both operands are byte-swapped or bit-reversed, just compare the
3397   // original values.
3398   // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant()
3399   // and handle more intrinsics.
3400   if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) ||
3401       (match(Op0, m_BitReverse(m_Value(A))) &&
3402        match(Op1, m_BitReverse(m_Value(B)))))
3403     return new ICmpInst(Pred, A, B);
3404 
3405   return nullptr;
3406 }
3407 
3408 /// Handle icmp (cast x to y), (cast/cst). We only handle extending casts so
3409 /// far.
3410 Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
3411   const CastInst *LHSCI = cast<CastInst>(ICmp.getOperand(0));
3412   Value *LHSCIOp        = LHSCI->getOperand(0);
3413   Type *SrcTy     = LHSCIOp->getType();
3414   Type *DestTy    = LHSCI->getType();
3415   Value *RHSCIOp;
3416 
3417   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
3418   // integer type is the same size as the pointer type.
3419   if (LHSCI->getOpcode() == Instruction::PtrToInt &&
3420       DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
3421     Value *RHSOp = nullptr;
3422     if (auto *RHSC = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) {
3423       Value *RHSCIOp = RHSC->getOperand(0);
3424       if (RHSCIOp->getType()->getPointerAddressSpace() ==
3425           LHSCIOp->getType()->getPointerAddressSpace()) {
3426         RHSOp = RHSC->getOperand(0);
3427         // If the pointer types don't match, insert a bitcast.
3428         if (LHSCIOp->getType() != RHSOp->getType())
3429           RHSOp = Builder.CreateBitCast(RHSOp, LHSCIOp->getType());
3430       }
3431     } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
3432       RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
3433     }
3434 
3435     if (RHSOp)
3436       return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSOp);
3437   }
3438 
3439   // The code below only handles extension cast instructions, so far.
3440   // Enforce this.
3441   if (LHSCI->getOpcode() != Instruction::ZExt &&
3442       LHSCI->getOpcode() != Instruction::SExt)
3443     return nullptr;
3444 
3445   bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
3446   bool isSignedCmp = ICmp.isSigned();
3447 
3448   if (auto *CI = dyn_cast<CastInst>(ICmp.getOperand(1))) {
3449     // Not an extension from the same type?
3450     RHSCIOp = CI->getOperand(0);
3451     if (RHSCIOp->getType() != LHSCIOp->getType())
3452       return nullptr;
3453 
3454     // If the signedness of the two casts doesn't agree (i.e. one is a sext
3455     // and the other is a zext), then we can't handle this.
3456     if (CI->getOpcode() != LHSCI->getOpcode())
3457       return nullptr;
3458 
3459     // Deal with equality cases early.
3460     if (ICmp.isEquality())
3461       return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp);
3462 
3463     // A signed comparison of sign extended values simplifies into a
3464     // signed comparison.
3465     if (isSignedCmp && isSignedExt)
3466       return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp);
3467 
3468     // The other three cases all fold into an unsigned comparison.
3469     return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
3470   }
3471 
3472   // If we aren't dealing with a constant on the RHS, exit early.
3473   auto *C = dyn_cast<Constant>(ICmp.getOperand(1));
3474   if (!C)
3475     return nullptr;
3476 
3477   // Compute the constant that would happen if we truncated to SrcTy then
3478   // re-extended to DestTy.
3479   Constant *Res1 = ConstantExpr::getTrunc(C, SrcTy);
3480   Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy);
3481 
3482   // If the re-extended constant didn't change...
3483   if (Res2 == C) {
3484     // Deal with equality cases early.
3485     if (ICmp.isEquality())
3486       return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1);
3487 
3488     // A signed comparison of sign extended values simplifies into a
3489     // signed comparison.
3490     if (isSignedExt && isSignedCmp)
3491       return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1);
3492 
3493     // The other three cases all fold into an unsigned comparison.
3494     return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, Res1);
3495   }
3496 
3497   // The re-extended constant changed, partly changed (in the case of a vector),
3498   // or could not be determined to be equal (in the case of a constant
3499   // expression), so the constant cannot be represented in the shorter type.
3500   // Consequently, we cannot emit a simple comparison.
3501   // All the cases that fold to true or false will have already been handled
3502   // by SimplifyICmpInst, so only deal with the tricky case.
3503 
3504   if (isSignedCmp || !isSignedExt || !isa<ConstantInt>(C))
3505     return nullptr;
3506 
3507   // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
3508   // should have been folded away previously and not enter in here.
3509 
3510   // We're performing an unsigned comp with a sign extended value.
3511   // This is true if the input is >= 0. [aka >s -1]
3512   Constant *NegOne = Constant::getAllOnesValue(SrcTy);
3513   Value *Result = Builder.CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName());
3514 
3515   // Finally, return the value computed.
3516   if (ICmp.getPredicate() == ICmpInst::ICMP_ULT)
3517     return replaceInstUsesWith(ICmp, Result);
3518 
3519   assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
3520   return BinaryOperator::CreateNot(Result);
3521 }
3522 
3523 bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
3524                                          Value *RHS, Instruction &OrigI,
3525                                          Value *&Result, Constant *&Overflow) {
3526   if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
3527     std::swap(LHS, RHS);
3528 
3529   auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
3530     Result = OpResult;
3531     Overflow = OverflowVal;
3532     if (ReuseName)
3533       Result->takeName(&OrigI);
3534     return true;
3535   };
3536 
3537   // If the overflow check was an add followed by a compare, the insertion point
3538   // may be pointing to the compare.  We want to insert the new instructions
3539   // before the add in case there are uses of the add between the add and the
3540   // compare.
3541   Builder.SetInsertPoint(&OrigI);
3542 
3543   switch (OCF) {
3544   case OCF_INVALID:
3545     llvm_unreachable("bad overflow check kind!");
3546 
3547   case OCF_UNSIGNED_ADD: {
3548     OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
3549     if (OR == OverflowResult::NeverOverflows)
3550       return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(),
3551                        true);
3552 
3553     if (OR == OverflowResult::AlwaysOverflows)
3554       return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true);
3555 
3556     // Fall through uadd into sadd
3557     LLVM_FALLTHROUGH;
3558   }
3559   case OCF_SIGNED_ADD: {
3560     // X + 0 -> {X, false}
3561     if (match(RHS, m_Zero()))
3562       return SetResult(LHS, Builder.getFalse(), false);
3563 
3564     // We can strength reduce this signed add into a regular add if we can prove
3565     // that it will never overflow.
3566     if (OCF == OCF_SIGNED_ADD)
3567       if (willNotOverflowSignedAdd(LHS, RHS, OrigI))
3568         return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(),
3569                          true);
3570     break;
3571   }
3572 
3573   case OCF_UNSIGNED_SUB:
3574   case OCF_SIGNED_SUB: {
3575     // X - 0 -> {X, false}
3576     if (match(RHS, m_Zero()))
3577       return SetResult(LHS, Builder.getFalse(), false);
3578 
3579     if (OCF == OCF_SIGNED_SUB) {
3580       if (willNotOverflowSignedSub(LHS, RHS, OrigI))
3581         return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(),
3582                          true);
3583     } else {
3584       if (willNotOverflowUnsignedSub(LHS, RHS, OrigI))
3585         return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(),
3586                          true);
3587     }
3588     break;
3589   }
3590 
3591   case OCF_UNSIGNED_MUL: {
3592     OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
3593     if (OR == OverflowResult::NeverOverflows)
3594       return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(),
3595                        true);
3596     if (OR == OverflowResult::AlwaysOverflows)
3597       return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true);
3598     LLVM_FALLTHROUGH;
3599   }
3600   case OCF_SIGNED_MUL:
3601     // X * undef -> undef
3602     if (isa<UndefValue>(RHS))
3603       return SetResult(RHS, UndefValue::get(Builder.getInt1Ty()), false);
3604 
3605     // X * 0 -> {0, false}
3606     if (match(RHS, m_Zero()))
3607       return SetResult(RHS, Builder.getFalse(), false);
3608 
3609     // X * 1 -> {X, false}
3610     if (match(RHS, m_One()))
3611       return SetResult(LHS, Builder.getFalse(), false);
3612 
3613     if (OCF == OCF_SIGNED_MUL)
3614       if (willNotOverflowSignedMul(LHS, RHS, OrigI))
3615         return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(),
3616                          true);
3617     break;
3618   }
3619 
3620   return false;
3621 }
3622 
3623 /// \brief Recognize and process idiom involving test for multiplication
3624 /// overflow.
3625 ///
3626 /// The caller has matched a pattern of the form:
3627 ///   I = cmp u (mul(zext A, zext B), V
3628 /// The function checks if this is a test for overflow and if so replaces
3629 /// multiplication with call to 'mul.with.overflow' intrinsic.
3630 ///
3631 /// \param I Compare instruction.
3632 /// \param MulVal Result of 'mult' instruction.  It is one of the arguments of
3633 ///               the compare instruction.  Must be of integer type.
3634 /// \param OtherVal The other argument of compare instruction.
3635 /// \returns Instruction which must replace the compare instruction, NULL if no
3636 ///          replacement required.
3637 static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
3638                                          Value *OtherVal, InstCombiner &IC) {
3639   // Don't bother doing this transformation for pointers, don't do it for
3640   // vectors.
3641   if (!isa<IntegerType>(MulVal->getType()))
3642     return nullptr;
3643 
3644   assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
3645   assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
3646   auto *MulInstr = dyn_cast<Instruction>(MulVal);
3647   if (!MulInstr)
3648     return nullptr;
3649   assert(MulInstr->getOpcode() == Instruction::Mul);
3650 
3651   auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),
3652        *RHS = cast<ZExtOperator>(MulInstr->getOperand(1));
3653   assert(LHS->getOpcode() == Instruction::ZExt);
3654   assert(RHS->getOpcode() == Instruction::ZExt);
3655   Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
3656 
3657   // Calculate type and width of the result produced by mul.with.overflow.
3658   Type *TyA = A->getType(), *TyB = B->getType();
3659   unsigned WidthA = TyA->getPrimitiveSizeInBits(),
3660            WidthB = TyB->getPrimitiveSizeInBits();
3661   unsigned MulWidth;
3662   Type *MulType;
3663   if (WidthB > WidthA) {
3664     MulWidth = WidthB;
3665     MulType = TyB;
3666   } else {
3667     MulWidth = WidthA;
3668     MulType = TyA;
3669   }
3670 
3671   // In order to replace the original mul with a narrower mul.with.overflow,
3672   // all uses must ignore upper bits of the product.  The number of used low
3673   // bits must be not greater than the width of mul.with.overflow.
3674   if (MulVal->hasNUsesOrMore(2))
3675     for (User *U : MulVal->users()) {
3676       if (U == &I)
3677         continue;
3678       if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
3679         // Check if truncation ignores bits above MulWidth.
3680         unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
3681         if (TruncWidth > MulWidth)
3682           return nullptr;
3683       } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
3684         // Check if AND ignores bits above MulWidth.
3685         if (BO->getOpcode() != Instruction::And)
3686           return nullptr;
3687         if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
3688           const APInt &CVal = CI->getValue();
3689           if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth)
3690             return nullptr;
3691         } else {
3692           // In this case we could have the operand of the binary operation
3693           // being defined in another block, and performing the replacement
3694           // could break the dominance relation.
3695           return nullptr;
3696         }
3697       } else {
3698         // Other uses prohibit this transformation.
3699         return nullptr;
3700       }
3701     }
3702 
3703   // Recognize patterns
3704   switch (I.getPredicate()) {
3705   case ICmpInst::ICMP_EQ:
3706   case ICmpInst::ICMP_NE:
3707     // Recognize pattern:
3708     //   mulval = mul(zext A, zext B)
3709     //   cmp eq/neq mulval, zext trunc mulval
3710     if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal))
3711       if (Zext->hasOneUse()) {
3712         Value *ZextArg = Zext->getOperand(0);
3713         if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg))
3714           if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth)
3715             break; //Recognized
3716       }
3717 
3718     // Recognize pattern:
3719     //   mulval = mul(zext A, zext B)
3720     //   cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits.
3721     ConstantInt *CI;
3722     Value *ValToMask;
3723     if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) {
3724       if (ValToMask != MulVal)
3725         return nullptr;
3726       const APInt &CVal = CI->getValue() + 1;
3727       if (CVal.isPowerOf2()) {
3728         unsigned MaskWidth = CVal.logBase2();
3729         if (MaskWidth == MulWidth)
3730           break; // Recognized
3731       }
3732     }
3733     return nullptr;
3734 
3735   case ICmpInst::ICMP_UGT:
3736     // Recognize pattern:
3737     //   mulval = mul(zext A, zext B)
3738     //   cmp ugt mulval, max
3739     if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
3740       APInt MaxVal = APInt::getMaxValue(MulWidth);
3741       MaxVal = MaxVal.zext(CI->getBitWidth());
3742       if (MaxVal.eq(CI->getValue()))
3743         break; // Recognized
3744     }
3745     return nullptr;
3746 
3747   case ICmpInst::ICMP_UGE:
3748     // Recognize pattern:
3749     //   mulval = mul(zext A, zext B)
3750     //   cmp uge mulval, max+1
3751     if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
3752       APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
3753       if (MaxVal.eq(CI->getValue()))
3754         break; // Recognized
3755     }
3756     return nullptr;
3757 
3758   case ICmpInst::ICMP_ULE:
3759     // Recognize pattern:
3760     //   mulval = mul(zext A, zext B)
3761     //   cmp ule mulval, max
3762     if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
3763       APInt MaxVal = APInt::getMaxValue(MulWidth);
3764       MaxVal = MaxVal.zext(CI->getBitWidth());
3765       if (MaxVal.eq(CI->getValue()))
3766         break; // Recognized
3767     }
3768     return nullptr;
3769 
3770   case ICmpInst::ICMP_ULT:
3771     // Recognize pattern:
3772     //   mulval = mul(zext A, zext B)
3773     //   cmp ule mulval, max + 1
3774     if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
3775       APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
3776       if (MaxVal.eq(CI->getValue()))
3777         break; // Recognized
3778     }
3779     return nullptr;
3780 
3781   default:
3782     return nullptr;
3783   }
3784 
3785   InstCombiner::BuilderTy &Builder = IC.Builder;
3786   Builder.SetInsertPoint(MulInstr);
3787 
3788   // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
3789   Value *MulA = A, *MulB = B;
3790   if (WidthA < MulWidth)
3791     MulA = Builder.CreateZExt(A, MulType);
3792   if (WidthB < MulWidth)
3793     MulB = Builder.CreateZExt(B, MulType);
3794   Value *F = Intrinsic::getDeclaration(I.getModule(),
3795                                        Intrinsic::umul_with_overflow, MulType);
3796   CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
3797   IC.Worklist.Add(MulInstr);
3798 
3799   // If there are uses of mul result other than the comparison, we know that
3800   // they are truncation or binary AND. Change them to use result of
3801   // mul.with.overflow and adjust properly mask/size.
3802   if (MulVal->hasNUsesOrMore(2)) {
3803     Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");
3804     for (User *U : MulVal->users()) {
3805       if (U == &I || U == OtherVal)
3806         continue;
3807       if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
3808         if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
3809           IC.replaceInstUsesWith(*TI, Mul);
3810         else
3811           TI->setOperand(0, Mul);
3812       } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
3813         assert(BO->getOpcode() == Instruction::And);
3814         // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
3815         ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
3816         APInt ShortMask = CI->getValue().trunc(MulWidth);
3817         Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
3818         Instruction *Zext =
3819             cast<Instruction>(Builder.CreateZExt(ShortAnd, BO->getType()));
3820         IC.Worklist.Add(Zext);
3821         IC.replaceInstUsesWith(*BO, Zext);
3822       } else {
3823         llvm_unreachable("Unexpected Binary operation");
3824       }
3825       IC.Worklist.Add(cast<Instruction>(U));
3826     }
3827   }
3828   if (isa<Instruction>(OtherVal))
3829     IC.Worklist.Add(cast<Instruction>(OtherVal));
3830 
3831   // The original icmp gets replaced with the overflow value, maybe inverted
3832   // depending on predicate.
3833   bool Inverse = false;
3834   switch (I.getPredicate()) {
3835   case ICmpInst::ICMP_NE:
3836     break;
3837   case ICmpInst::ICMP_EQ:
3838     Inverse = true;
3839     break;
3840   case ICmpInst::ICMP_UGT:
3841   case ICmpInst::ICMP_UGE:
3842     if (I.getOperand(0) == MulVal)
3843       break;
3844     Inverse = true;
3845     break;
3846   case ICmpInst::ICMP_ULT:
3847   case ICmpInst::ICMP_ULE:
3848     if (I.getOperand(1) == MulVal)
3849       break;
3850     Inverse = true;
3851     break;
3852   default:
3853     llvm_unreachable("Unexpected predicate");
3854   }
3855   if (Inverse) {
3856     Value *Res = Builder.CreateExtractValue(Call, 1);
3857     return BinaryOperator::CreateNot(Res);
3858   }
3859 
3860   return ExtractValueInst::Create(Call, 1);
3861 }
3862 
3863 /// When performing a comparison against a constant, it is possible that not all
3864 /// the bits in the LHS are demanded. This helper method computes the mask that
3865 /// IS demanded.
3866 static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
3867   const APInt *RHS;
3868   if (!match(I.getOperand(1), m_APInt(RHS)))
3869     return APInt::getAllOnesValue(BitWidth);
3870 
3871   // If this is a normal comparison, it demands all bits. If it is a sign bit
3872   // comparison, it only demands the sign bit.
3873   bool UnusedBit;
3874   if (isSignBitCheck(I.getPredicate(), *RHS, UnusedBit))
3875     return APInt::getSignMask(BitWidth);
3876 
3877   switch (I.getPredicate()) {
3878   // For a UGT comparison, we don't care about any bits that
3879   // correspond to the trailing ones of the comparand.  The value of these
3880   // bits doesn't impact the outcome of the comparison, because any value
3881   // greater than the RHS must differ in a bit higher than these due to carry.
3882   case ICmpInst::ICMP_UGT:
3883     return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingOnes());
3884 
3885   // Similarly, for a ULT comparison, we don't care about the trailing zeros.
3886   // Any value less than the RHS must differ in a higher bit because of carries.
3887   case ICmpInst::ICMP_ULT:
3888     return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingZeros());
3889 
3890   default:
3891     return APInt::getAllOnesValue(BitWidth);
3892   }
3893 }
3894 
3895 /// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst
3896 /// should be swapped.
3897 /// The decision is based on how many times these two operands are reused
3898 /// as subtract operands and their positions in those instructions.
3899 /// The rational is that several architectures use the same instruction for
3900 /// both subtract and cmp, thus it is better if the order of those operands
3901 /// match.
3902 /// \return true if Op0 and Op1 should be swapped.
3903 static bool swapMayExposeCSEOpportunities(const Value * Op0,
3904                                           const Value * Op1) {
3905   // Filter out pointer value as those cannot appears directly in subtract.
3906   // FIXME: we may want to go through inttoptrs or bitcasts.
3907   if (Op0->getType()->isPointerTy())
3908     return false;
3909   // Count every uses of both Op0 and Op1 in a subtract.
3910   // Each time Op0 is the first operand, count -1: swapping is bad, the
3911   // subtract has already the same layout as the compare.
3912   // Each time Op0 is the second operand, count +1: swapping is good, the
3913   // subtract has a different layout as the compare.
3914   // At the end, if the benefit is greater than 0, Op0 should come second to
3915   // expose more CSE opportunities.
3916   int GlobalSwapBenefits = 0;
3917   for (const User *U : Op0->users()) {
3918     const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(U);
3919     if (!BinOp || BinOp->getOpcode() != Instruction::Sub)
3920       continue;
3921     // If Op0 is the first argument, this is not beneficial to swap the
3922     // arguments.
3923     int LocalSwapBenefits = -1;
3924     unsigned Op1Idx = 1;
3925     if (BinOp->getOperand(Op1Idx) == Op0) {
3926       Op1Idx = 0;
3927       LocalSwapBenefits = 1;
3928     }
3929     if (BinOp->getOperand(Op1Idx) != Op1)
3930       continue;
3931     GlobalSwapBenefits += LocalSwapBenefits;
3932   }
3933   return GlobalSwapBenefits > 0;
3934 }
3935 
3936 /// \brief Check that one use is in the same block as the definition and all
3937 /// other uses are in blocks dominated by a given block.
3938 ///
3939 /// \param DI Definition
3940 /// \param UI Use
3941 /// \param DB Block that must dominate all uses of \p DI outside
3942 ///           the parent block
3943 /// \return true when \p UI is the only use of \p DI in the parent block
3944 /// and all other uses of \p DI are in blocks dominated by \p DB.
3945 ///
3946 bool InstCombiner::dominatesAllUses(const Instruction *DI,
3947                                     const Instruction *UI,
3948                                     const BasicBlock *DB) const {
3949   assert(DI && UI && "Instruction not defined\n");
3950   // Ignore incomplete definitions.
3951   if (!DI->getParent())
3952     return false;
3953   // DI and UI must be in the same block.
3954   if (DI->getParent() != UI->getParent())
3955     return false;
3956   // Protect from self-referencing blocks.
3957   if (DI->getParent() == DB)
3958     return false;
3959   for (const User *U : DI->users()) {
3960     auto *Usr = cast<Instruction>(U);
3961     if (Usr != UI && !DT.dominates(DB, Usr->getParent()))
3962       return false;
3963   }
3964   return true;
3965 }
3966 
3967 /// Return true when the instruction sequence within a block is select-cmp-br.
3968 static bool isChainSelectCmpBranch(const SelectInst *SI) {
3969   const BasicBlock *BB = SI->getParent();
3970   if (!BB)
3971     return false;
3972   auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator());
3973   if (!BI || BI->getNumSuccessors() != 2)
3974     return false;
3975   auto *IC = dyn_cast<ICmpInst>(BI->getCondition());
3976   if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI))
3977     return false;
3978   return true;
3979 }
3980 
3981 /// \brief True when a select result is replaced by one of its operands
3982 /// in select-icmp sequence. This will eventually result in the elimination
3983 /// of the select.
3984 ///
3985 /// \param SI    Select instruction
3986 /// \param Icmp  Compare instruction
3987 /// \param SIOpd Operand that replaces the select
3988 ///
3989 /// Notes:
3990 /// - The replacement is global and requires dominator information
3991 /// - The caller is responsible for the actual replacement
3992 ///
3993 /// Example:
3994 ///
3995 /// entry:
3996 ///  %4 = select i1 %3, %C* %0, %C* null
3997 ///  %5 = icmp eq %C* %4, null
3998 ///  br i1 %5, label %9, label %7
3999 ///  ...
4000 ///  ; <label>:7                                       ; preds = %entry
4001 ///  %8 = getelementptr inbounds %C* %4, i64 0, i32 0
4002 ///  ...
4003 ///
4004 /// can be transformed to
4005 ///
4006 ///  %5 = icmp eq %C* %0, null
4007 ///  %6 = select i1 %3, i1 %5, i1 true
4008 ///  br i1 %6, label %9, label %7
4009 ///  ...
4010 ///  ; <label>:7                                       ; preds = %entry
4011 ///  %8 = getelementptr inbounds %C* %0, i64 0, i32 0  // replace by %0!
4012 ///
4013 /// Similar when the first operand of the select is a constant or/and
4014 /// the compare is for not equal rather than equal.
4015 ///
4016 /// NOTE: The function is only called when the select and compare constants
4017 /// are equal, the optimization can work only for EQ predicates. This is not a
4018 /// major restriction since a NE compare should be 'normalized' to an equal
4019 /// compare, which usually happens in the combiner and test case
4020 /// select-cmp-br.ll checks for it.
4021 bool InstCombiner::replacedSelectWithOperand(SelectInst *SI,
4022                                              const ICmpInst *Icmp,
4023                                              const unsigned SIOpd) {
4024   assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!");
4025   if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) {
4026     BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1);
4027     // The check for the single predecessor is not the best that can be
4028     // done. But it protects efficiently against cases like when SI's
4029     // home block has two successors, Succ and Succ1, and Succ1 predecessor
4030     // of Succ. Then SI can't be replaced by SIOpd because the use that gets
4031     // replaced can be reached on either path. So the uniqueness check
4032     // guarantees that the path all uses of SI (outside SI's parent) are on
4033     // is disjoint from all other paths out of SI. But that information
4034     // is more expensive to compute, and the trade-off here is in favor
4035     // of compile-time. It should also be noticed that we check for a single
4036     // predecessor and not only uniqueness. This to handle the situation when
4037     // Succ and Succ1 points to the same basic block.
4038     if (Succ->getSinglePredecessor() && dominatesAllUses(SI, Icmp, Succ)) {
4039       NumSel++;
4040       SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent());
4041       return true;
4042     }
4043   }
4044   return false;
4045 }
4046 
4047 /// Try to fold the comparison based on range information we can get by checking
4048 /// whether bits are known to be zero or one in the inputs.
4049 Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
4050   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
4051   Type *Ty = Op0->getType();
4052   ICmpInst::Predicate Pred = I.getPredicate();
4053 
4054   // Get scalar or pointer size.
4055   unsigned BitWidth = Ty->isIntOrIntVectorTy()
4056                           ? Ty->getScalarSizeInBits()
4057                           : DL.getTypeSizeInBits(Ty->getScalarType());
4058 
4059   if (!BitWidth)
4060     return nullptr;
4061 
4062   KnownBits Op0Known(BitWidth);
4063   KnownBits Op1Known(BitWidth);
4064 
4065   if (SimplifyDemandedBits(&I, 0,
4066                            getDemandedBitsLHSMask(I, BitWidth),
4067                            Op0Known, 0))
4068     return &I;
4069 
4070   if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth),
4071                            Op1Known, 0))
4072     return &I;
4073 
4074   // Given the known and unknown bits, compute a range that the LHS could be
4075   // in.  Compute the Min, Max and RHS values based on the known bits. For the
4076   // EQ and NE we use unsigned values.
4077   APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
4078   APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
4079   if (I.isSigned()) {
4080     computeSignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
4081     computeSignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
4082   } else {
4083     computeUnsignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
4084     computeUnsignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
4085   }
4086 
4087   // If Min and Max are known to be the same, then SimplifyDemandedBits
4088   // figured out that the LHS is a constant. Constant fold this now, so that
4089   // code below can assume that Min != Max.
4090   if (!isa<Constant>(Op0) && Op0Min == Op0Max)
4091     return new ICmpInst(Pred, ConstantInt::get(Op0->getType(), Op0Min), Op1);
4092   if (!isa<Constant>(Op1) && Op1Min == Op1Max)
4093     return new ICmpInst(Pred, Op0, ConstantInt::get(Op1->getType(), Op1Min));
4094 
4095   // Based on the range information we know about the LHS, see if we can
4096   // simplify this comparison.  For example, (x&4) < 8 is always true.
4097   switch (Pred) {
4098   default:
4099     llvm_unreachable("Unknown icmp opcode!");
4100   case ICmpInst::ICMP_EQ:
4101   case ICmpInst::ICMP_NE: {
4102     if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) {
4103       return Pred == CmpInst::ICMP_EQ
4104                  ? replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()))
4105                  : replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4106     }
4107 
4108     // If all bits are known zero except for one, then we know at most one bit
4109     // is set. If the comparison is against zero, then this is a check to see if
4110     // *that* bit is set.
4111     APInt Op0KnownZeroInverted = ~Op0Known.Zero;
4112     if (Op1Known.isZero()) {
4113       // If the LHS is an AND with the same constant, look through it.
4114       Value *LHS = nullptr;
4115       const APInt *LHSC;
4116       if (!match(Op0, m_And(m_Value(LHS), m_APInt(LHSC))) ||
4117           *LHSC != Op0KnownZeroInverted)
4118         LHS = Op0;
4119 
4120       Value *X;
4121       if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
4122         APInt ValToCheck = Op0KnownZeroInverted;
4123         Type *XTy = X->getType();
4124         if (ValToCheck.isPowerOf2()) {
4125           // ((1 << X) & 8) == 0 -> X != 3
4126           // ((1 << X) & 8) != 0 -> X == 3
4127           auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
4128           auto NewPred = ICmpInst::getInversePredicate(Pred);
4129           return new ICmpInst(NewPred, X, CmpC);
4130         } else if ((++ValToCheck).isPowerOf2()) {
4131           // ((1 << X) & 7) == 0 -> X >= 3
4132           // ((1 << X) & 7) != 0 -> X  < 3
4133           auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
4134           auto NewPred =
4135               Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT;
4136           return new ICmpInst(NewPred, X, CmpC);
4137         }
4138       }
4139 
4140       // Check if the LHS is 8 >>u x and the result is a power of 2 like 1.
4141       const APInt *CI;
4142       if (Op0KnownZeroInverted.isOneValue() &&
4143           match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) {
4144         // ((8 >>u X) & 1) == 0 -> X != 3
4145         // ((8 >>u X) & 1) != 0 -> X == 3
4146         unsigned CmpVal = CI->countTrailingZeros();
4147         auto NewPred = ICmpInst::getInversePredicate(Pred);
4148         return new ICmpInst(NewPred, X, ConstantInt::get(X->getType(), CmpVal));
4149       }
4150     }
4151     break;
4152   }
4153   case ICmpInst::ICMP_ULT: {
4154     if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
4155       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4156     if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
4157       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4158     if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
4159       return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4160 
4161     const APInt *CmpC;
4162     if (match(Op1, m_APInt(CmpC))) {
4163       // A <u C -> A == C-1 if min(A)+1 == C
4164       if (*CmpC == Op0Min + 1)
4165         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4166                             ConstantInt::get(Op1->getType(), *CmpC - 1));
4167       // X <u C --> X == 0, if the number of zero bits in the bottom of X
4168       // exceeds the log2 of C.
4169       if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
4170         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4171                             Constant::getNullValue(Op1->getType()));
4172     }
4173     break;
4174   }
4175   case ICmpInst::ICMP_UGT: {
4176     if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
4177       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4178     if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
4179       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4180     if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
4181       return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4182 
4183     const APInt *CmpC;
4184     if (match(Op1, m_APInt(CmpC))) {
4185       // A >u C -> A == C+1 if max(a)-1 == C
4186       if (*CmpC == Op0Max - 1)
4187         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4188                             ConstantInt::get(Op1->getType(), *CmpC + 1));
4189       // X >u C --> X != 0, if the number of zero bits in the bottom of X
4190       // exceeds the log2 of C.
4191       if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
4192         return new ICmpInst(ICmpInst::ICMP_NE, Op0,
4193                             Constant::getNullValue(Op1->getType()));
4194     }
4195     break;
4196   }
4197   case ICmpInst::ICMP_SLT: {
4198     if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
4199       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4200     if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
4201       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4202     if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
4203       return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4204     const APInt *CmpC;
4205     if (match(Op1, m_APInt(CmpC))) {
4206       if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
4207         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4208                             ConstantInt::get(Op1->getType(), *CmpC - 1));
4209     }
4210     break;
4211   }
4212   case ICmpInst::ICMP_SGT: {
4213     if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
4214       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4215     if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
4216       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4217     if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
4218       return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4219     const APInt *CmpC;
4220     if (match(Op1, m_APInt(CmpC))) {
4221       if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
4222         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4223                             ConstantInt::get(Op1->getType(), *CmpC + 1));
4224     }
4225     break;
4226   }
4227   case ICmpInst::ICMP_SGE:
4228     assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
4229     if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
4230       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4231     if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
4232       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4233     if (Op1Min == Op0Max) // A >=s B -> A == B if max(A) == min(B)
4234       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4235     break;
4236   case ICmpInst::ICMP_SLE:
4237     assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
4238     if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
4239       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4240     if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
4241       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4242     if (Op1Max == Op0Min) // A <=s B -> A == B if min(A) == max(B)
4243       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4244     break;
4245   case ICmpInst::ICMP_UGE:
4246     assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
4247     if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
4248       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4249     if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
4250       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4251     if (Op1Min == Op0Max) // A >=u B -> A == B if max(A) == min(B)
4252       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4253     break;
4254   case ICmpInst::ICMP_ULE:
4255     assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
4256     if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
4257       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4258     if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
4259       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4260     if (Op1Max == Op0Min) // A <=u B -> A == B if min(A) == max(B)
4261       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4262     break;
4263   }
4264 
4265   // Turn a signed comparison into an unsigned one if both operands are known to
4266   // have the same sign.
4267   if (I.isSigned() &&
4268       ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) ||
4269        (Op0Known.One.isNegative() && Op1Known.One.isNegative())))
4270     return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
4271 
4272   return nullptr;
4273 }
4274 
4275 /// If we have an icmp le or icmp ge instruction with a constant operand, turn
4276 /// it into the appropriate icmp lt or icmp gt instruction. This transform
4277 /// allows them to be folded in visitICmpInst.
4278 static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
4279   ICmpInst::Predicate Pred = I.getPredicate();
4280   if (Pred != ICmpInst::ICMP_SLE && Pred != ICmpInst::ICMP_SGE &&
4281       Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_UGE)
4282     return nullptr;
4283 
4284   Value *Op0 = I.getOperand(0);
4285   Value *Op1 = I.getOperand(1);
4286   auto *Op1C = dyn_cast<Constant>(Op1);
4287   if (!Op1C)
4288     return nullptr;
4289 
4290   // Check if the constant operand can be safely incremented/decremented without
4291   // overflowing/underflowing. For scalars, SimplifyICmpInst has already handled
4292   // the edge cases for us, so we just assert on them. For vectors, we must
4293   // handle the edge cases.
4294   Type *Op1Type = Op1->getType();
4295   bool IsSigned = I.isSigned();
4296   bool IsLE = (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_ULE);
4297   auto *CI = dyn_cast<ConstantInt>(Op1C);
4298   if (CI) {
4299     // A <= MAX -> TRUE ; A >= MIN -> TRUE
4300     assert(IsLE ? !CI->isMaxValue(IsSigned) : !CI->isMinValue(IsSigned));
4301   } else if (Op1Type->isVectorTy()) {
4302     // TODO? If the edge cases for vectors were guaranteed to be handled as they
4303     // are for scalar, we could remove the min/max checks. However, to do that,
4304     // we would have to use insertelement/shufflevector to replace edge values.
4305     unsigned NumElts = Op1Type->getVectorNumElements();
4306     for (unsigned i = 0; i != NumElts; ++i) {
4307       Constant *Elt = Op1C->getAggregateElement(i);
4308       if (!Elt)
4309         return nullptr;
4310 
4311       if (isa<UndefValue>(Elt))
4312         continue;
4313 
4314       // Bail out if we can't determine if this constant is min/max or if we
4315       // know that this constant is min/max.
4316       auto *CI = dyn_cast<ConstantInt>(Elt);
4317       if (!CI || (IsLE ? CI->isMaxValue(IsSigned) : CI->isMinValue(IsSigned)))
4318         return nullptr;
4319     }
4320   } else {
4321     // ConstantExpr?
4322     return nullptr;
4323   }
4324 
4325   // Increment or decrement the constant and set the new comparison predicate:
4326   // ULE -> ULT ; UGE -> UGT ; SLE -> SLT ; SGE -> SGT
4327   Constant *OneOrNegOne = ConstantInt::get(Op1Type, IsLE ? 1 : -1, true);
4328   CmpInst::Predicate NewPred = IsLE ? ICmpInst::ICMP_ULT: ICmpInst::ICMP_UGT;
4329   NewPred = IsSigned ? ICmpInst::getSignedPredicate(NewPred) : NewPred;
4330   return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne));
4331 }
4332 
4333 /// Integer compare with boolean values can always be turned into bitwise ops.
4334 static Instruction *canonicalizeICmpBool(ICmpInst &I,
4335                                          InstCombiner::BuilderTy &Builder) {
4336   Value *A = I.getOperand(0), *B = I.getOperand(1);
4337   assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only");
4338 
4339   // A boolean compared to true/false can be simplified to Op0/true/false in
4340   // 14 out of the 20 (10 predicates * 2 constants) possible combinations.
4341   // Cases not handled by InstSimplify are always 'not' of Op0.
4342   if (match(B, m_Zero())) {
4343     switch (I.getPredicate()) {
4344       case CmpInst::ICMP_EQ:  // A ==   0 -> !A
4345       case CmpInst::ICMP_ULE: // A <=u  0 -> !A
4346       case CmpInst::ICMP_SGE: // A >=s  0 -> !A
4347         return BinaryOperator::CreateNot(A);
4348       default:
4349         llvm_unreachable("ICmp i1 X, C not simplified as expected.");
4350     }
4351   } else if (match(B, m_One())) {
4352     switch (I.getPredicate()) {
4353       case CmpInst::ICMP_NE:  // A !=  1 -> !A
4354       case CmpInst::ICMP_ULT: // A <u  1 -> !A
4355       case CmpInst::ICMP_SGT: // A >s -1 -> !A
4356         return BinaryOperator::CreateNot(A);
4357       default:
4358         llvm_unreachable("ICmp i1 X, C not simplified as expected.");
4359     }
4360   }
4361 
4362   switch (I.getPredicate()) {
4363   default:
4364     llvm_unreachable("Invalid icmp instruction!");
4365   case ICmpInst::ICMP_EQ:
4366     // icmp eq i1 A, B -> ~(A ^ B)
4367     return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
4368 
4369   case ICmpInst::ICMP_NE:
4370     // icmp ne i1 A, B -> A ^ B
4371     return BinaryOperator::CreateXor(A, B);
4372 
4373   case ICmpInst::ICMP_UGT:
4374     // icmp ugt -> icmp ult
4375     std::swap(A, B);
4376     LLVM_FALLTHROUGH;
4377   case ICmpInst::ICMP_ULT:
4378     // icmp ult i1 A, B -> ~A & B
4379     return BinaryOperator::CreateAnd(Builder.CreateNot(A), B);
4380 
4381   case ICmpInst::ICMP_SGT:
4382     // icmp sgt -> icmp slt
4383     std::swap(A, B);
4384     LLVM_FALLTHROUGH;
4385   case ICmpInst::ICMP_SLT:
4386     // icmp slt i1 A, B -> A & ~B
4387     return BinaryOperator::CreateAnd(Builder.CreateNot(B), A);
4388 
4389   case ICmpInst::ICMP_UGE:
4390     // icmp uge -> icmp ule
4391     std::swap(A, B);
4392     LLVM_FALLTHROUGH;
4393   case ICmpInst::ICMP_ULE:
4394     // icmp ule i1 A, B -> ~A | B
4395     return BinaryOperator::CreateOr(Builder.CreateNot(A), B);
4396 
4397   case ICmpInst::ICMP_SGE:
4398     // icmp sge -> icmp sle
4399     std::swap(A, B);
4400     LLVM_FALLTHROUGH;
4401   case ICmpInst::ICMP_SLE:
4402     // icmp sle i1 A, B -> A | ~B
4403     return BinaryOperator::CreateOr(Builder.CreateNot(B), A);
4404   }
4405 }
4406 
4407 Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
4408   bool Changed = false;
4409   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
4410   unsigned Op0Cplxity = getComplexity(Op0);
4411   unsigned Op1Cplxity = getComplexity(Op1);
4412 
4413   /// Orders the operands of the compare so that they are listed from most
4414   /// complex to least complex.  This puts constants before unary operators,
4415   /// before binary operators.
4416   if (Op0Cplxity < Op1Cplxity ||
4417       (Op0Cplxity == Op1Cplxity && swapMayExposeCSEOpportunities(Op0, Op1))) {
4418     I.swapOperands();
4419     std::swap(Op0, Op1);
4420     Changed = true;
4421   }
4422 
4423   if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1,
4424                                   SQ.getWithInstruction(&I)))
4425     return replaceInstUsesWith(I, V);
4426 
4427   // Comparing -val or val with non-zero is the same as just comparing val
4428   // ie, abs(val) != 0 -> val != 0
4429   if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) {
4430     Value *Cond, *SelectTrue, *SelectFalse;
4431     if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
4432                             m_Value(SelectFalse)))) {
4433       if (Value *V = dyn_castNegVal(SelectTrue)) {
4434         if (V == SelectFalse)
4435           return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
4436       }
4437       else if (Value *V = dyn_castNegVal(SelectFalse)) {
4438         if (V == SelectTrue)
4439           return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
4440       }
4441     }
4442   }
4443 
4444   if (Op0->getType()->isIntOrIntVectorTy(1))
4445     if (Instruction *Res = canonicalizeICmpBool(I, Builder))
4446       return Res;
4447 
4448   if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I))
4449     return NewICmp;
4450 
4451   if (Instruction *Res = foldICmpWithConstant(I))
4452     return Res;
4453 
4454   if (Instruction *Res = foldICmpUsingKnownBits(I))
4455     return Res;
4456 
4457   // Test if the ICmpInst instruction is used exclusively by a select as
4458   // part of a minimum or maximum operation. If so, refrain from doing
4459   // any other folding. This helps out other analyses which understand
4460   // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
4461   // and CodeGen. And in this case, at least one of the comparison
4462   // operands has at least one user besides the compare (the select),
4463   // which would often largely negate the benefit of folding anyway.
4464   //
4465   // Do the same for the other patterns recognized by matchSelectPattern.
4466   if (I.hasOneUse())
4467     if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) {
4468       Value *A, *B;
4469       SelectPatternResult SPR = matchSelectPattern(SI, A, B);
4470       if (SPR.Flavor != SPF_UNKNOWN)
4471         return nullptr;
4472     }
4473 
4474   // Do this after checking for min/max to prevent infinite looping.
4475   if (Instruction *Res = foldICmpWithZero(I))
4476     return Res;
4477 
4478   // FIXME: We only do this after checking for min/max to prevent infinite
4479   // looping caused by a reverse canonicalization of these patterns for min/max.
4480   // FIXME: The organization of folds is a mess. These would naturally go into
4481   // canonicalizeCmpWithConstant(), but we can't move all of the above folds
4482   // down here after the min/max restriction.
4483   ICmpInst::Predicate Pred = I.getPredicate();
4484   const APInt *C;
4485   if (match(Op1, m_APInt(C))) {
4486     // For i32: x >u 2147483647 -> x <s 0  -> true if sign bit set
4487     if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) {
4488       Constant *Zero = Constant::getNullValue(Op0->getType());
4489       return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero);
4490     }
4491 
4492     // For i32: x <u 2147483648 -> x >s -1  -> true if sign bit clear
4493     if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) {
4494       Constant *AllOnes = Constant::getAllOnesValue(Op0->getType());
4495       return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes);
4496     }
4497   }
4498 
4499   if (Instruction *Res = foldICmpInstWithConstant(I))
4500     return Res;
4501 
4502   if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
4503     return Res;
4504 
4505   // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
4506   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
4507     if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I))
4508       return NI;
4509   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
4510     if (Instruction *NI = foldGEPICmp(GEP, Op0,
4511                            ICmpInst::getSwappedPredicate(I.getPredicate()), I))
4512       return NI;
4513 
4514   // Try to optimize equality comparisons against alloca-based pointers.
4515   if (Op0->getType()->isPointerTy() && I.isEquality()) {
4516     assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
4517     if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op0, DL)))
4518       if (Instruction *New = foldAllocaCmp(I, Alloca, Op1))
4519         return New;
4520     if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op1, DL)))
4521       if (Instruction *New = foldAllocaCmp(I, Alloca, Op0))
4522         return New;
4523   }
4524 
4525   // Test to see if the operands of the icmp are casted versions of other
4526   // values.  If the ptr->ptr cast can be stripped off both arguments, we do so
4527   // now.
4528   if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
4529     if (Op0->getType()->isPointerTy() &&
4530         (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
4531       // We keep moving the cast from the left operand over to the right
4532       // operand, where it can often be eliminated completely.
4533       Op0 = CI->getOperand(0);
4534 
4535       // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
4536       // so eliminate it as well.
4537       if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
4538         Op1 = CI2->getOperand(0);
4539 
4540       // If Op1 is a constant, we can fold the cast into the constant.
4541       if (Op0->getType() != Op1->getType()) {
4542         if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
4543           Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
4544         } else {
4545           // Otherwise, cast the RHS right before the icmp
4546           Op1 = Builder.CreateBitCast(Op1, Op0->getType());
4547         }
4548       }
4549       return new ICmpInst(I.getPredicate(), Op0, Op1);
4550     }
4551   }
4552 
4553   if (isa<CastInst>(Op0)) {
4554     // Handle the special case of: icmp (cast bool to X), <cst>
4555     // This comes up when you have code like
4556     //   int X = A < B;
4557     //   if (X) ...
4558     // For generality, we handle any zero-extension of any operand comparison
4559     // with a constant or another cast from the same type.
4560     if (isa<Constant>(Op1) || isa<CastInst>(Op1))
4561       if (Instruction *R = foldICmpWithCastAndCast(I))
4562         return R;
4563   }
4564 
4565   if (Instruction *Res = foldICmpBinOp(I))
4566     return Res;
4567 
4568   if (Instruction *Res = foldICmpWithMinMax(I))
4569     return Res;
4570 
4571   {
4572     Value *A, *B;
4573     // Transform (A & ~B) == 0 --> (A & B) != 0
4574     // and       (A & ~B) != 0 --> (A & B) == 0
4575     // if A is a power of 2.
4576     if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
4577         match(Op1, m_Zero()) &&
4578         isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality())
4579       return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B),
4580                           Op1);
4581 
4582     // ~X < ~Y --> Y < X
4583     // ~X < C -->  X > ~C
4584     if (match(Op0, m_Not(m_Value(A)))) {
4585       if (match(Op1, m_Not(m_Value(B))))
4586         return new ICmpInst(I.getPredicate(), B, A);
4587 
4588       const APInt *C;
4589       if (match(Op1, m_APInt(C)))
4590         return new ICmpInst(I.getSwappedPredicate(), A,
4591                             ConstantInt::get(Op1->getType(), ~(*C)));
4592     }
4593 
4594     Instruction *AddI = nullptr;
4595     if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
4596                                      m_Instruction(AddI))) &&
4597         isa<IntegerType>(A->getType())) {
4598       Value *Result;
4599       Constant *Overflow;
4600       if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
4601                                 Overflow)) {
4602         replaceInstUsesWith(*AddI, Result);
4603         return replaceInstUsesWith(I, Overflow);
4604       }
4605     }
4606 
4607     // (zext a) * (zext b)  --> llvm.umul.with.overflow.
4608     if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
4609       if (Instruction *R = processUMulZExtIdiom(I, Op0, Op1, *this))
4610         return R;
4611     }
4612     if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
4613       if (Instruction *R = processUMulZExtIdiom(I, Op1, Op0, *this))
4614         return R;
4615     }
4616   }
4617 
4618   if (Instruction *Res = foldICmpEquality(I))
4619     return Res;
4620 
4621   // The 'cmpxchg' instruction returns an aggregate containing the old value and
4622   // an i1 which indicates whether or not we successfully did the swap.
4623   //
4624   // Replace comparisons between the old value and the expected value with the
4625   // indicator that 'cmpxchg' returns.
4626   //
4627   // N.B.  This transform is only valid when the 'cmpxchg' is not permitted to
4628   // spuriously fail.  In those cases, the old value may equal the expected
4629   // value but it is possible for the swap to not occur.
4630   if (I.getPredicate() == ICmpInst::ICMP_EQ)
4631     if (auto *EVI = dyn_cast<ExtractValueInst>(Op0))
4632       if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand()))
4633         if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 &&
4634             !ACXI->isWeak())
4635           return ExtractValueInst::Create(ACXI, 1);
4636 
4637   {
4638     Value *X; ConstantInt *Cst;
4639     // icmp X+Cst, X
4640     if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
4641       return foldICmpAddOpConst(X, Cst, I.getPredicate());
4642 
4643     // icmp X, X+Cst
4644     if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
4645       return foldICmpAddOpConst(X, Cst, I.getSwappedPredicate());
4646   }
4647   return Changed ? &I : nullptr;
4648 }
4649 
4650 /// Fold fcmp ([us]itofp x, cst) if possible.
4651 Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
4652                                                 Constant *RHSC) {
4653   if (!isa<ConstantFP>(RHSC)) return nullptr;
4654   const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
4655 
4656   // Get the width of the mantissa.  We don't want to hack on conversions that
4657   // might lose information from the integer, e.g. "i64 -> float"
4658   int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
4659   if (MantissaWidth == -1) return nullptr;  // Unknown.
4660 
4661   IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
4662 
4663   bool LHSUnsigned = isa<UIToFPInst>(LHSI);
4664 
4665   if (I.isEquality()) {
4666     FCmpInst::Predicate P = I.getPredicate();
4667     bool IsExact = false;
4668     APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned);
4669     RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact);
4670 
4671     // If the floating point constant isn't an integer value, we know if we will
4672     // ever compare equal / not equal to it.
4673     if (!IsExact) {
4674       // TODO: Can never be -0.0 and other non-representable values
4675       APFloat RHSRoundInt(RHS);
4676       RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven);
4677       if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) {
4678         if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ)
4679           return replaceInstUsesWith(I, Builder.getFalse());
4680 
4681         assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE);
4682         return replaceInstUsesWith(I, Builder.getTrue());
4683       }
4684     }
4685 
4686     // TODO: If the constant is exactly representable, is it always OK to do
4687     // equality compares as integer?
4688   }
4689 
4690   // Check to see that the input is converted from an integer type that is small
4691   // enough that preserves all bits.  TODO: check here for "known" sign bits.
4692   // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
4693   unsigned InputSize = IntTy->getScalarSizeInBits();
4694 
4695   // Following test does NOT adjust InputSize downwards for signed inputs,
4696   // because the most negative value still requires all the mantissa bits
4697   // to distinguish it from one less than that value.
4698   if ((int)InputSize > MantissaWidth) {
4699     // Conversion would lose accuracy. Check if loss can impact comparison.
4700     int Exp = ilogb(RHS);
4701     if (Exp == APFloat::IEK_Inf) {
4702       int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics()));
4703       if (MaxExponent < (int)InputSize - !LHSUnsigned)
4704         // Conversion could create infinity.
4705         return nullptr;
4706     } else {
4707       // Note that if RHS is zero or NaN, then Exp is negative
4708       // and first condition is trivially false.
4709       if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned)
4710         // Conversion could affect comparison.
4711         return nullptr;
4712     }
4713   }
4714 
4715   // Otherwise, we can potentially simplify the comparison.  We know that it
4716   // will always come through as an integer value and we know the constant is
4717   // not a NAN (it would have been previously simplified).
4718   assert(!RHS.isNaN() && "NaN comparison not already folded!");
4719 
4720   ICmpInst::Predicate Pred;
4721   switch (I.getPredicate()) {
4722   default: llvm_unreachable("Unexpected predicate!");
4723   case FCmpInst::FCMP_UEQ:
4724   case FCmpInst::FCMP_OEQ:
4725     Pred = ICmpInst::ICMP_EQ;
4726     break;
4727   case FCmpInst::FCMP_UGT:
4728   case FCmpInst::FCMP_OGT:
4729     Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
4730     break;
4731   case FCmpInst::FCMP_UGE:
4732   case FCmpInst::FCMP_OGE:
4733     Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
4734     break;
4735   case FCmpInst::FCMP_ULT:
4736   case FCmpInst::FCMP_OLT:
4737     Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
4738     break;
4739   case FCmpInst::FCMP_ULE:
4740   case FCmpInst::FCMP_OLE:
4741     Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
4742     break;
4743   case FCmpInst::FCMP_UNE:
4744   case FCmpInst::FCMP_ONE:
4745     Pred = ICmpInst::ICMP_NE;
4746     break;
4747   case FCmpInst::FCMP_ORD:
4748     return replaceInstUsesWith(I, Builder.getTrue());
4749   case FCmpInst::FCMP_UNO:
4750     return replaceInstUsesWith(I, Builder.getFalse());
4751   }
4752 
4753   // Now we know that the APFloat is a normal number, zero or inf.
4754 
4755   // See if the FP constant is too large for the integer.  For example,
4756   // comparing an i8 to 300.0.
4757   unsigned IntWidth = IntTy->getScalarSizeInBits();
4758 
4759   if (!LHSUnsigned) {
4760     // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
4761     // and large values.
4762     APFloat SMax(RHS.getSemantics());
4763     SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
4764                           APFloat::rmNearestTiesToEven);
4765     if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0
4766       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
4767           Pred == ICmpInst::ICMP_SLE)
4768         return replaceInstUsesWith(I, Builder.getTrue());
4769       return replaceInstUsesWith(I, Builder.getFalse());
4770     }
4771   } else {
4772     // If the RHS value is > UnsignedMax, fold the comparison. This handles
4773     // +INF and large values.
4774     APFloat UMax(RHS.getSemantics());
4775     UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
4776                           APFloat::rmNearestTiesToEven);
4777     if (UMax.compare(RHS) == APFloat::cmpLessThan) {  // umax < 13123.0
4778       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
4779           Pred == ICmpInst::ICMP_ULE)
4780         return replaceInstUsesWith(I, Builder.getTrue());
4781       return replaceInstUsesWith(I, Builder.getFalse());
4782     }
4783   }
4784 
4785   if (!LHSUnsigned) {
4786     // See if the RHS value is < SignedMin.
4787     APFloat SMin(RHS.getSemantics());
4788     SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
4789                           APFloat::rmNearestTiesToEven);
4790     if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
4791       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
4792           Pred == ICmpInst::ICMP_SGE)
4793         return replaceInstUsesWith(I, Builder.getTrue());
4794       return replaceInstUsesWith(I, Builder.getFalse());
4795     }
4796   } else {
4797     // See if the RHS value is < UnsignedMin.
4798     APFloat SMin(RHS.getSemantics());
4799     SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true,
4800                           APFloat::rmNearestTiesToEven);
4801     if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0
4802       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
4803           Pred == ICmpInst::ICMP_UGE)
4804         return replaceInstUsesWith(I, Builder.getTrue());
4805       return replaceInstUsesWith(I, Builder.getFalse());
4806     }
4807   }
4808 
4809   // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
4810   // [0, UMAX], but it may still be fractional.  See if it is fractional by
4811   // casting the FP value to the integer value and back, checking for equality.
4812   // Don't do this for zero, because -0.0 is not fractional.
4813   Constant *RHSInt = LHSUnsigned
4814     ? ConstantExpr::getFPToUI(RHSC, IntTy)
4815     : ConstantExpr::getFPToSI(RHSC, IntTy);
4816   if (!RHS.isZero()) {
4817     bool Equal = LHSUnsigned
4818       ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
4819       : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
4820     if (!Equal) {
4821       // If we had a comparison against a fractional value, we have to adjust
4822       // the compare predicate and sometimes the value.  RHSC is rounded towards
4823       // zero at this point.
4824       switch (Pred) {
4825       default: llvm_unreachable("Unexpected integer comparison!");
4826       case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
4827         return replaceInstUsesWith(I, Builder.getTrue());
4828       case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
4829         return replaceInstUsesWith(I, Builder.getFalse());
4830       case ICmpInst::ICMP_ULE:
4831         // (float)int <= 4.4   --> int <= 4
4832         // (float)int <= -4.4  --> false
4833         if (RHS.isNegative())
4834           return replaceInstUsesWith(I, Builder.getFalse());
4835         break;
4836       case ICmpInst::ICMP_SLE:
4837         // (float)int <= 4.4   --> int <= 4
4838         // (float)int <= -4.4  --> int < -4
4839         if (RHS.isNegative())
4840           Pred = ICmpInst::ICMP_SLT;
4841         break;
4842       case ICmpInst::ICMP_ULT:
4843         // (float)int < -4.4   --> false
4844         // (float)int < 4.4    --> int <= 4
4845         if (RHS.isNegative())
4846           return replaceInstUsesWith(I, Builder.getFalse());
4847         Pred = ICmpInst::ICMP_ULE;
4848         break;
4849       case ICmpInst::ICMP_SLT:
4850         // (float)int < -4.4   --> int < -4
4851         // (float)int < 4.4    --> int <= 4
4852         if (!RHS.isNegative())
4853           Pred = ICmpInst::ICMP_SLE;
4854         break;
4855       case ICmpInst::ICMP_UGT:
4856         // (float)int > 4.4    --> int > 4
4857         // (float)int > -4.4   --> true
4858         if (RHS.isNegative())
4859           return replaceInstUsesWith(I, Builder.getTrue());
4860         break;
4861       case ICmpInst::ICMP_SGT:
4862         // (float)int > 4.4    --> int > 4
4863         // (float)int > -4.4   --> int >= -4
4864         if (RHS.isNegative())
4865           Pred = ICmpInst::ICMP_SGE;
4866         break;
4867       case ICmpInst::ICMP_UGE:
4868         // (float)int >= -4.4   --> true
4869         // (float)int >= 4.4    --> int > 4
4870         if (RHS.isNegative())
4871           return replaceInstUsesWith(I, Builder.getTrue());
4872         Pred = ICmpInst::ICMP_UGT;
4873         break;
4874       case ICmpInst::ICMP_SGE:
4875         // (float)int >= -4.4   --> int >= -4
4876         // (float)int >= 4.4    --> int > 4
4877         if (!RHS.isNegative())
4878           Pred = ICmpInst::ICMP_SGT;
4879         break;
4880       }
4881     }
4882   }
4883 
4884   // Lower this FP comparison into an appropriate integer version of the
4885   // comparison.
4886   return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
4887 }
4888 
4889 Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
4890   bool Changed = false;
4891 
4892   /// Orders the operands of the compare so that they are listed from most
4893   /// complex to least complex.  This puts constants before unary operators,
4894   /// before binary operators.
4895   if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
4896     I.swapOperands();
4897     Changed = true;
4898   }
4899 
4900   const CmpInst::Predicate Pred = I.getPredicate();
4901   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
4902   if (Value *V = SimplifyFCmpInst(Pred, Op0, Op1, I.getFastMathFlags(),
4903                                   SQ.getWithInstruction(&I)))
4904     return replaceInstUsesWith(I, V);
4905 
4906   // Simplify 'fcmp pred X, X'
4907   if (Op0 == Op1) {
4908     switch (Pred) {
4909       default: break;
4910     case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
4911     case FCmpInst::FCMP_ULT:    // True if unordered or less than
4912     case FCmpInst::FCMP_UGT:    // True if unordered or greater than
4913     case FCmpInst::FCMP_UNE:    // True if unordered or not equal
4914       // Canonicalize these to be 'fcmp uno %X, 0.0'.
4915       I.setPredicate(FCmpInst::FCMP_UNO);
4916       I.setOperand(1, Constant::getNullValue(Op0->getType()));
4917       return &I;
4918 
4919     case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
4920     case FCmpInst::FCMP_OEQ:    // True if ordered and equal
4921     case FCmpInst::FCMP_OGE:    // True if ordered and greater than or equal
4922     case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
4923       // Canonicalize these to be 'fcmp ord %X, 0.0'.
4924       I.setPredicate(FCmpInst::FCMP_ORD);
4925       I.setOperand(1, Constant::getNullValue(Op0->getType()));
4926       return &I;
4927     }
4928   }
4929 
4930   // If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand,
4931   // then canonicalize the operand to 0.0.
4932   if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
4933     if (!match(Op0, m_Zero()) && isKnownNeverNaN(Op0)) {
4934       I.setOperand(0, ConstantFP::getNullValue(Op0->getType()));
4935       return &I;
4936     }
4937     if (!match(Op1, m_Zero()) && isKnownNeverNaN(Op1)) {
4938       I.setOperand(1, ConstantFP::getNullValue(Op0->getType()));
4939       return &I;
4940     }
4941   }
4942 
4943   // Test if the FCmpInst instruction is used exclusively by a select as
4944   // part of a minimum or maximum operation. If so, refrain from doing
4945   // any other folding. This helps out other analyses which understand
4946   // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
4947   // and CodeGen. And in this case, at least one of the comparison
4948   // operands has at least one user besides the compare (the select),
4949   // which would often largely negate the benefit of folding anyway.
4950   if (I.hasOneUse())
4951     if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) {
4952       Value *A, *B;
4953       SelectPatternResult SPR = matchSelectPattern(SI, A, B);
4954       if (SPR.Flavor != SPF_UNKNOWN)
4955         return nullptr;
4956     }
4957 
4958   // Handle fcmp with constant RHS
4959   if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
4960     if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
4961       switch (LHSI->getOpcode()) {
4962       case Instruction::FPExt: {
4963         // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless
4964         FPExtInst *LHSExt = cast<FPExtInst>(LHSI);
4965         ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC);
4966         if (!RHSF)
4967           break;
4968 
4969         const fltSemantics *Sem;
4970         // FIXME: This shouldn't be here.
4971         if (LHSExt->getSrcTy()->isHalfTy())
4972           Sem = &APFloat::IEEEhalf();
4973         else if (LHSExt->getSrcTy()->isFloatTy())
4974           Sem = &APFloat::IEEEsingle();
4975         else if (LHSExt->getSrcTy()->isDoubleTy())
4976           Sem = &APFloat::IEEEdouble();
4977         else if (LHSExt->getSrcTy()->isFP128Ty())
4978           Sem = &APFloat::IEEEquad();
4979         else if (LHSExt->getSrcTy()->isX86_FP80Ty())
4980           Sem = &APFloat::x87DoubleExtended();
4981         else if (LHSExt->getSrcTy()->isPPC_FP128Ty())
4982           Sem = &APFloat::PPCDoubleDouble();
4983         else
4984           break;
4985 
4986         bool Lossy;
4987         APFloat F = RHSF->getValueAPF();
4988         F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
4989 
4990         // Avoid lossy conversions and denormals. Zero is a special case
4991         // that's OK to convert.
4992         APFloat Fabs = F;
4993         Fabs.clearSign();
4994         if (!Lossy &&
4995             ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) !=
4996                  APFloat::cmpLessThan) || Fabs.isZero()))
4997 
4998           return new FCmpInst(Pred, LHSExt->getOperand(0),
4999                               ConstantFP::get(RHSC->getContext(), F));
5000         break;
5001       }
5002       case Instruction::PHI:
5003         // Only fold fcmp into the PHI if the phi and fcmp are in the same
5004         // block.  If in the same block, we're encouraging jump threading.  If
5005         // not, we are just pessimizing the code by making an i1 phi.
5006         if (LHSI->getParent() == I.getParent())
5007           if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
5008             return NV;
5009         break;
5010       case Instruction::SIToFP:
5011       case Instruction::UIToFP:
5012         if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC))
5013           return NV;
5014         break;
5015       case Instruction::FSub: {
5016         // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
5017         Value *Op;
5018         if (match(LHSI, m_FNeg(m_Value(Op))))
5019           return new FCmpInst(I.getSwappedPredicate(), Op,
5020                               ConstantExpr::getFNeg(RHSC));
5021         break;
5022       }
5023       case Instruction::Load:
5024         if (GetElementPtrInst *GEP =
5025             dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
5026           if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
5027             if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
5028                 !cast<LoadInst>(LHSI)->isVolatile())
5029               if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
5030                 return Res;
5031         }
5032         break;
5033       case Instruction::Call: {
5034         if (!RHSC->isNullValue())
5035           break;
5036 
5037         CallInst *CI = cast<CallInst>(LHSI);
5038         Intrinsic::ID IID = getIntrinsicForCallSite(CI, &TLI);
5039         if (IID != Intrinsic::fabs)
5040           break;
5041 
5042         // Various optimization for fabs compared with zero.
5043         switch (Pred) {
5044         default:
5045           break;
5046         // fabs(x) < 0 --> false
5047         case FCmpInst::FCMP_OLT:
5048           llvm_unreachable("handled by SimplifyFCmpInst");
5049         // fabs(x) > 0 --> x != 0
5050         case FCmpInst::FCMP_OGT:
5051           return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), RHSC);
5052         // fabs(x) <= 0 --> x == 0
5053         case FCmpInst::FCMP_OLE:
5054           return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), RHSC);
5055         // fabs(x) >= 0 --> !isnan(x)
5056         case FCmpInst::FCMP_OGE:
5057           return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), RHSC);
5058         // fabs(x) == 0 --> x == 0
5059         // fabs(x) != 0 --> x != 0
5060         case FCmpInst::FCMP_OEQ:
5061         case FCmpInst::FCMP_UEQ:
5062         case FCmpInst::FCMP_ONE:
5063         case FCmpInst::FCMP_UNE:
5064           return new FCmpInst(Pred, CI->getArgOperand(0), RHSC);
5065         }
5066       }
5067       }
5068   }
5069 
5070   // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y
5071   Value *X, *Y;
5072   if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
5073     return new FCmpInst(I.getSwappedPredicate(), X, Y);
5074 
5075   // fcmp (fpext x), (fpext y) -> fcmp x, y
5076   if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
5077     if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
5078       if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
5079         return new FCmpInst(Pred, LHSExt->getOperand(0), RHSExt->getOperand(0));
5080 
5081   return Changed ? &I : nullptr;
5082 }
5083