1 //===- InstCombineCompares.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visitICmp and visitFCmp functions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/APSInt.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/ConstantFolding.h"
18 #include "llvm/Analysis/InstructionSimplify.h"
19 #include "llvm/Analysis/TargetLibraryInfo.h"
20 #include "llvm/IR/ConstantRange.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/GetElementPtrTypeIterator.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/IR/PatternMatch.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/KnownBits.h"
27 #include "llvm/Transforms/InstCombine/InstCombiner.h"
28 
29 using namespace llvm;
30 using namespace PatternMatch;
31 
32 #define DEBUG_TYPE "instcombine"
33 
34 // How many times is a select replaced by one of its operands?
35 STATISTIC(NumSel, "Number of select opts");
36 
37 
38 /// Compute Result = In1+In2, returning true if the result overflowed for this
39 /// type.
40 static bool addWithOverflow(APInt &Result, const APInt &In1,
41                             const APInt &In2, bool IsSigned = false) {
42   bool Overflow;
43   if (IsSigned)
44     Result = In1.sadd_ov(In2, Overflow);
45   else
46     Result = In1.uadd_ov(In2, Overflow);
47 
48   return Overflow;
49 }
50 
51 /// Compute Result = In1-In2, returning true if the result overflowed for this
52 /// type.
53 static bool subWithOverflow(APInt &Result, const APInt &In1,
54                             const APInt &In2, bool IsSigned = false) {
55   bool Overflow;
56   if (IsSigned)
57     Result = In1.ssub_ov(In2, Overflow);
58   else
59     Result = In1.usub_ov(In2, Overflow);
60 
61   return Overflow;
62 }
63 
64 /// Given an icmp instruction, return true if any use of this comparison is a
65 /// branch on sign bit comparison.
66 static bool hasBranchUse(ICmpInst &I) {
67   for (auto *U : I.users())
68     if (isa<BranchInst>(U))
69       return true;
70   return false;
71 }
72 
73 /// Returns true if the exploded icmp can be expressed as a signed comparison
74 /// to zero and updates the predicate accordingly.
75 /// The signedness of the comparison is preserved.
76 /// TODO: Refactor with decomposeBitTestICmp()?
77 static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
78   if (!ICmpInst::isSigned(Pred))
79     return false;
80 
81   if (C.isNullValue())
82     return ICmpInst::isRelational(Pred);
83 
84   if (C.isOneValue()) {
85     if (Pred == ICmpInst::ICMP_SLT) {
86       Pred = ICmpInst::ICMP_SLE;
87       return true;
88     }
89   } else if (C.isAllOnesValue()) {
90     if (Pred == ICmpInst::ICMP_SGT) {
91       Pred = ICmpInst::ICMP_SGE;
92       return true;
93     }
94   }
95 
96   return false;
97 }
98 
99 /// This is called when we see this pattern:
100 ///   cmp pred (load (gep GV, ...)), cmpcst
101 /// where GV is a global variable with a constant initializer. Try to simplify
102 /// this into some simple computation that does not need the load. For example
103 /// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
104 ///
105 /// If AndCst is non-null, then the loaded value is masked with that constant
106 /// before doing the comparison. This handles cases like "A[i]&4 == 0".
107 Instruction *
108 InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
109                                                GlobalVariable *GV, CmpInst &ICI,
110                                                ConstantInt *AndCst) {
111   Constant *Init = GV->getInitializer();
112   if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
113     return nullptr;
114 
115   uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
116   // Don't blow up on huge arrays.
117   if (ArrayElementCount > MaxArraySizeForCombine)
118     return nullptr;
119 
120   // There are many forms of this optimization we can handle, for now, just do
121   // the simple index into a single-dimensional array.
122   //
123   // Require: GEP GV, 0, i {{, constant indices}}
124   if (GEP->getNumOperands() < 3 ||
125       !isa<ConstantInt>(GEP->getOperand(1)) ||
126       !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
127       isa<Constant>(GEP->getOperand(2)))
128     return nullptr;
129 
130   // Check that indices after the variable are constants and in-range for the
131   // type they index.  Collect the indices.  This is typically for arrays of
132   // structs.
133   SmallVector<unsigned, 4> LaterIndices;
134 
135   Type *EltTy = Init->getType()->getArrayElementType();
136   for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
137     ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
138     if (!Idx) return nullptr;  // Variable index.
139 
140     uint64_t IdxVal = Idx->getZExtValue();
141     if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
142 
143     if (StructType *STy = dyn_cast<StructType>(EltTy))
144       EltTy = STy->getElementType(IdxVal);
145     else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
146       if (IdxVal >= ATy->getNumElements()) return nullptr;
147       EltTy = ATy->getElementType();
148     } else {
149       return nullptr; // Unknown type.
150     }
151 
152     LaterIndices.push_back(IdxVal);
153   }
154 
155   enum { Overdefined = -3, Undefined = -2 };
156 
157   // Variables for our state machines.
158 
159   // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
160   // "i == 47 | i == 87", where 47 is the first index the condition is true for,
161   // and 87 is the second (and last) index.  FirstTrueElement is -2 when
162   // undefined, otherwise set to the first true element.  SecondTrueElement is
163   // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
164   int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
165 
166   // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
167   // form "i != 47 & i != 87".  Same state transitions as for true elements.
168   int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
169 
170   /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
171   /// define a state machine that triggers for ranges of values that the index
172   /// is true or false for.  This triggers on things like "abbbbc"[i] == 'b'.
173   /// This is -2 when undefined, -3 when overdefined, and otherwise the last
174   /// index in the range (inclusive).  We use -2 for undefined here because we
175   /// use relative comparisons and don't want 0-1 to match -1.
176   int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
177 
178   // MagicBitvector - This is a magic bitvector where we set a bit if the
179   // comparison is true for element 'i'.  If there are 64 elements or less in
180   // the array, this will fully represent all the comparison results.
181   uint64_t MagicBitvector = 0;
182 
183   // Scan the array and see if one of our patterns matches.
184   Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
185   for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
186     Constant *Elt = Init->getAggregateElement(i);
187     if (!Elt) return nullptr;
188 
189     // If this is indexing an array of structures, get the structure element.
190     if (!LaterIndices.empty())
191       Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
192 
193     // If the element is masked, handle it.
194     if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
195 
196     // Find out if the comparison would be true or false for the i'th element.
197     Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
198                                                   CompareRHS, DL, &TLI);
199     // If the result is undef for this element, ignore it.
200     if (isa<UndefValue>(C)) {
201       // Extend range state machines to cover this element in case there is an
202       // undef in the middle of the range.
203       if (TrueRangeEnd == (int)i-1)
204         TrueRangeEnd = i;
205       if (FalseRangeEnd == (int)i-1)
206         FalseRangeEnd = i;
207       continue;
208     }
209 
210     // If we can't compute the result for any of the elements, we have to give
211     // up evaluating the entire conditional.
212     if (!isa<ConstantInt>(C)) return nullptr;
213 
214     // Otherwise, we know if the comparison is true or false for this element,
215     // update our state machines.
216     bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
217 
218     // State machine for single/double/range index comparison.
219     if (IsTrueForElt) {
220       // Update the TrueElement state machine.
221       if (FirstTrueElement == Undefined)
222         FirstTrueElement = TrueRangeEnd = i;  // First true element.
223       else {
224         // Update double-compare state machine.
225         if (SecondTrueElement == Undefined)
226           SecondTrueElement = i;
227         else
228           SecondTrueElement = Overdefined;
229 
230         // Update range state machine.
231         if (TrueRangeEnd == (int)i-1)
232           TrueRangeEnd = i;
233         else
234           TrueRangeEnd = Overdefined;
235       }
236     } else {
237       // Update the FalseElement state machine.
238       if (FirstFalseElement == Undefined)
239         FirstFalseElement = FalseRangeEnd = i; // First false element.
240       else {
241         // Update double-compare state machine.
242         if (SecondFalseElement == Undefined)
243           SecondFalseElement = i;
244         else
245           SecondFalseElement = Overdefined;
246 
247         // Update range state machine.
248         if (FalseRangeEnd == (int)i-1)
249           FalseRangeEnd = i;
250         else
251           FalseRangeEnd = Overdefined;
252       }
253     }
254 
255     // If this element is in range, update our magic bitvector.
256     if (i < 64 && IsTrueForElt)
257       MagicBitvector |= 1ULL << i;
258 
259     // If all of our states become overdefined, bail out early.  Since the
260     // predicate is expensive, only check it every 8 elements.  This is only
261     // really useful for really huge arrays.
262     if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
263         SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
264         FalseRangeEnd == Overdefined)
265       return nullptr;
266   }
267 
268   // Now that we've scanned the entire array, emit our new comparison(s).  We
269   // order the state machines in complexity of the generated code.
270   Value *Idx = GEP->getOperand(2);
271 
272   if (!GEP->isInBounds()) {
273     // If the index is larger than the pointer size of the target, truncate the
274     // index down like the GEP would do implicitly.  We don't have to do this
275     // for an inbounds GEP because the index can't be out of range.
276     Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
277     unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
278     if (Idx->getType()->getPrimitiveSizeInBits().getFixedSize() > PtrSize)
279       Idx = Builder.CreateTrunc(Idx, IntPtrTy);
280 
281     unsigned ElementSize =
282         DL.getTypeAllocSize(Init->getType()->getArrayElementType());
283 
284     // If inbounds keyword is not present, Idx * ElementSize can overflow.
285     // Let's assume that ElementSize is 2 and the wanted value is at offset 0.
286     // Then, there are two possible values for Idx to match offset 0:
287     // 0x00..00, 0x80..00.
288     // Emitting 'icmp eq Idx, 0' isn't correct in this case because the
289     // comparison is false if Idx was 0x80..00.
290     // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
291     if (countTrailingZeros(ElementSize) != 0) {
292       Value *Mask = ConstantInt::getSigned(Idx->getType(), -1);
293       Mask = Builder.CreateLShr(Mask, countTrailingZeros(ElementSize));
294       Idx = Builder.CreateAnd(Idx, Mask);
295     }
296   }
297 
298   // If the comparison is only true for one or two elements, emit direct
299   // comparisons.
300   if (SecondTrueElement != Overdefined) {
301     // None true -> false.
302     if (FirstTrueElement == Undefined)
303       return replaceInstUsesWith(ICI, Builder.getFalse());
304 
305     Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
306 
307     // True for one element -> 'i == 47'.
308     if (SecondTrueElement == Undefined)
309       return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
310 
311     // True for two elements -> 'i == 47 | i == 72'.
312     Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx);
313     Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
314     Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx);
315     return BinaryOperator::CreateOr(C1, C2);
316   }
317 
318   // If the comparison is only false for one or two elements, emit direct
319   // comparisons.
320   if (SecondFalseElement != Overdefined) {
321     // None false -> true.
322     if (FirstFalseElement == Undefined)
323       return replaceInstUsesWith(ICI, Builder.getTrue());
324 
325     Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
326 
327     // False for one element -> 'i != 47'.
328     if (SecondFalseElement == Undefined)
329       return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
330 
331     // False for two elements -> 'i != 47 & i != 72'.
332     Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
333     Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
334     Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
335     return BinaryOperator::CreateAnd(C1, C2);
336   }
337 
338   // If the comparison can be replaced with a range comparison for the elements
339   // where it is true, emit the range check.
340   if (TrueRangeEnd != Overdefined) {
341     assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
342 
343     // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
344     if (FirstTrueElement) {
345       Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
346       Idx = Builder.CreateAdd(Idx, Offs);
347     }
348 
349     Value *End = ConstantInt::get(Idx->getType(),
350                                   TrueRangeEnd-FirstTrueElement+1);
351     return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
352   }
353 
354   // False range check.
355   if (FalseRangeEnd != Overdefined) {
356     assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
357     // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
358     if (FirstFalseElement) {
359       Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
360       Idx = Builder.CreateAdd(Idx, Offs);
361     }
362 
363     Value *End = ConstantInt::get(Idx->getType(),
364                                   FalseRangeEnd-FirstFalseElement);
365     return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
366   }
367 
368   // If a magic bitvector captures the entire comparison state
369   // of this load, replace it with computation that does:
370   //   ((magic_cst >> i) & 1) != 0
371   {
372     Type *Ty = nullptr;
373 
374     // Look for an appropriate type:
375     // - The type of Idx if the magic fits
376     // - The smallest fitting legal type
377     if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
378       Ty = Idx->getType();
379     else
380       Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
381 
382     if (Ty) {
383       Value *V = Builder.CreateIntCast(Idx, Ty, false);
384       V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
385       V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
386       return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
387     }
388   }
389 
390   return nullptr;
391 }
392 
393 /// Return a value that can be used to compare the *offset* implied by a GEP to
394 /// zero. For example, if we have &A[i], we want to return 'i' for
395 /// "icmp ne i, 0". Note that, in general, indices can be complex, and scales
396 /// are involved. The above expression would also be legal to codegen as
397 /// "icmp ne (i*4), 0" (assuming A is a pointer to i32).
398 /// This latter form is less amenable to optimization though, and we are allowed
399 /// to generate the first by knowing that pointer arithmetic doesn't overflow.
400 ///
401 /// If we can't emit an optimized form for this expression, this returns null.
402 ///
403 static Value *evaluateGEPOffsetExpression(User *GEP, InstCombinerImpl &IC,
404                                           const DataLayout &DL) {
405   gep_type_iterator GTI = gep_type_begin(GEP);
406 
407   // Check to see if this gep only has a single variable index.  If so, and if
408   // any constant indices are a multiple of its scale, then we can compute this
409   // in terms of the scale of the variable index.  For example, if the GEP
410   // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
411   // because the expression will cross zero at the same point.
412   unsigned i, e = GEP->getNumOperands();
413   int64_t Offset = 0;
414   for (i = 1; i != e; ++i, ++GTI) {
415     if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
416       // Compute the aggregate offset of constant indices.
417       if (CI->isZero()) continue;
418 
419       // Handle a struct index, which adds its field offset to the pointer.
420       if (StructType *STy = GTI.getStructTypeOrNull()) {
421         Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
422       } else {
423         uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
424         Offset += Size*CI->getSExtValue();
425       }
426     } else {
427       // Found our variable index.
428       break;
429     }
430   }
431 
432   // If there are no variable indices, we must have a constant offset, just
433   // evaluate it the general way.
434   if (i == e) return nullptr;
435 
436   Value *VariableIdx = GEP->getOperand(i);
437   // Determine the scale factor of the variable element.  For example, this is
438   // 4 if the variable index is into an array of i32.
439   uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType());
440 
441   // Verify that there are no other variable indices.  If so, emit the hard way.
442   for (++i, ++GTI; i != e; ++i, ++GTI) {
443     ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
444     if (!CI) return nullptr;
445 
446     // Compute the aggregate offset of constant indices.
447     if (CI->isZero()) continue;
448 
449     // Handle a struct index, which adds its field offset to the pointer.
450     if (StructType *STy = GTI.getStructTypeOrNull()) {
451       Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
452     } else {
453       uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
454       Offset += Size*CI->getSExtValue();
455     }
456   }
457 
458   // Okay, we know we have a single variable index, which must be a
459   // pointer/array/vector index.  If there is no offset, life is simple, return
460   // the index.
461   Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType());
462   unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
463   if (Offset == 0) {
464     // Cast to intptrty in case a truncation occurs.  If an extension is needed,
465     // we don't need to bother extending: the extension won't affect where the
466     // computation crosses zero.
467     if (VariableIdx->getType()->getPrimitiveSizeInBits().getFixedSize() >
468         IntPtrWidth) {
469       VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy);
470     }
471     return VariableIdx;
472   }
473 
474   // Otherwise, there is an index.  The computation we will do will be modulo
475   // the pointer size.
476   Offset = SignExtend64(Offset, IntPtrWidth);
477   VariableScale = SignExtend64(VariableScale, IntPtrWidth);
478 
479   // To do this transformation, any constant index must be a multiple of the
480   // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
481   // but we can't evaluate "10 + 3*i" in terms of i.  Check that the offset is a
482   // multiple of the variable scale.
483   int64_t NewOffs = Offset / (int64_t)VariableScale;
484   if (Offset != NewOffs*(int64_t)VariableScale)
485     return nullptr;
486 
487   // Okay, we can do this evaluation.  Start by converting the index to intptr.
488   if (VariableIdx->getType() != IntPtrTy)
489     VariableIdx = IC.Builder.CreateIntCast(VariableIdx, IntPtrTy,
490                                             true /*Signed*/);
491   Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
492   return IC.Builder.CreateAdd(VariableIdx, OffsetVal, "offset");
493 }
494 
495 /// Returns true if we can rewrite Start as a GEP with pointer Base
496 /// and some integer offset. The nodes that need to be re-written
497 /// for this transformation will be added to Explored.
498 static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
499                                   const DataLayout &DL,
500                                   SetVector<Value *> &Explored) {
501   SmallVector<Value *, 16> WorkList(1, Start);
502   Explored.insert(Base);
503 
504   // The following traversal gives us an order which can be used
505   // when doing the final transformation. Since in the final
506   // transformation we create the PHI replacement instructions first,
507   // we don't have to get them in any particular order.
508   //
509   // However, for other instructions we will have to traverse the
510   // operands of an instruction first, which means that we have to
511   // do a post-order traversal.
512   while (!WorkList.empty()) {
513     SetVector<PHINode *> PHIs;
514 
515     while (!WorkList.empty()) {
516       if (Explored.size() >= 100)
517         return false;
518 
519       Value *V = WorkList.back();
520 
521       if (Explored.contains(V)) {
522         WorkList.pop_back();
523         continue;
524       }
525 
526       if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&
527           !isa<GetElementPtrInst>(V) && !isa<PHINode>(V))
528         // We've found some value that we can't explore which is different from
529         // the base. Therefore we can't do this transformation.
530         return false;
531 
532       if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) {
533         auto *CI = cast<CastInst>(V);
534         if (!CI->isNoopCast(DL))
535           return false;
536 
537         if (Explored.count(CI->getOperand(0)) == 0)
538           WorkList.push_back(CI->getOperand(0));
539       }
540 
541       if (auto *GEP = dyn_cast<GEPOperator>(V)) {
542         // We're limiting the GEP to having one index. This will preserve
543         // the original pointer type. We could handle more cases in the
544         // future.
545         if (GEP->getNumIndices() != 1 || !GEP->isInBounds() ||
546             GEP->getType() != Start->getType())
547           return false;
548 
549         if (Explored.count(GEP->getOperand(0)) == 0)
550           WorkList.push_back(GEP->getOperand(0));
551       }
552 
553       if (WorkList.back() == V) {
554         WorkList.pop_back();
555         // We've finished visiting this node, mark it as such.
556         Explored.insert(V);
557       }
558 
559       if (auto *PN = dyn_cast<PHINode>(V)) {
560         // We cannot transform PHIs on unsplittable basic blocks.
561         if (isa<CatchSwitchInst>(PN->getParent()->getTerminator()))
562           return false;
563         Explored.insert(PN);
564         PHIs.insert(PN);
565       }
566     }
567 
568     // Explore the PHI nodes further.
569     for (auto *PN : PHIs)
570       for (Value *Op : PN->incoming_values())
571         if (Explored.count(Op) == 0)
572           WorkList.push_back(Op);
573   }
574 
575   // Make sure that we can do this. Since we can't insert GEPs in a basic
576   // block before a PHI node, we can't easily do this transformation if
577   // we have PHI node users of transformed instructions.
578   for (Value *Val : Explored) {
579     for (Value *Use : Val->uses()) {
580 
581       auto *PHI = dyn_cast<PHINode>(Use);
582       auto *Inst = dyn_cast<Instruction>(Val);
583 
584       if (Inst == Base || Inst == PHI || !Inst || !PHI ||
585           Explored.count(PHI) == 0)
586         continue;
587 
588       if (PHI->getParent() == Inst->getParent())
589         return false;
590     }
591   }
592   return true;
593 }
594 
595 // Sets the appropriate insert point on Builder where we can add
596 // a replacement Instruction for V (if that is possible).
597 static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
598                               bool Before = true) {
599   if (auto *PHI = dyn_cast<PHINode>(V)) {
600     Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt());
601     return;
602   }
603   if (auto *I = dyn_cast<Instruction>(V)) {
604     if (!Before)
605       I = &*std::next(I->getIterator());
606     Builder.SetInsertPoint(I);
607     return;
608   }
609   if (auto *A = dyn_cast<Argument>(V)) {
610     // Set the insertion point in the entry block.
611     BasicBlock &Entry = A->getParent()->getEntryBlock();
612     Builder.SetInsertPoint(&*Entry.getFirstInsertionPt());
613     return;
614   }
615   // Otherwise, this is a constant and we don't need to set a new
616   // insertion point.
617   assert(isa<Constant>(V) && "Setting insertion point for unknown value!");
618 }
619 
620 /// Returns a re-written value of Start as an indexed GEP using Base as a
621 /// pointer.
622 static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
623                                  const DataLayout &DL,
624                                  SetVector<Value *> &Explored) {
625   // Perform all the substitutions. This is a bit tricky because we can
626   // have cycles in our use-def chains.
627   // 1. Create the PHI nodes without any incoming values.
628   // 2. Create all the other values.
629   // 3. Add the edges for the PHI nodes.
630   // 4. Emit GEPs to get the original pointers.
631   // 5. Remove the original instructions.
632   Type *IndexType = IntegerType::get(
633       Base->getContext(), DL.getIndexTypeSizeInBits(Start->getType()));
634 
635   DenseMap<Value *, Value *> NewInsts;
636   NewInsts[Base] = ConstantInt::getNullValue(IndexType);
637 
638   // Create the new PHI nodes, without adding any incoming values.
639   for (Value *Val : Explored) {
640     if (Val == Base)
641       continue;
642     // Create empty phi nodes. This avoids cyclic dependencies when creating
643     // the remaining instructions.
644     if (auto *PHI = dyn_cast<PHINode>(Val))
645       NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(),
646                                       PHI->getName() + ".idx", PHI);
647   }
648   IRBuilder<> Builder(Base->getContext());
649 
650   // Create all the other instructions.
651   for (Value *Val : Explored) {
652 
653     if (NewInsts.find(Val) != NewInsts.end())
654       continue;
655 
656     if (auto *CI = dyn_cast<CastInst>(Val)) {
657       // Don't get rid of the intermediate variable here; the store can grow
658       // the map which will invalidate the reference to the input value.
659       Value *V = NewInsts[CI->getOperand(0)];
660       NewInsts[CI] = V;
661       continue;
662     }
663     if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
664       Value *Index = NewInsts[GEP->getOperand(1)] ? NewInsts[GEP->getOperand(1)]
665                                                   : GEP->getOperand(1);
666       setInsertionPoint(Builder, GEP);
667       // Indices might need to be sign extended. GEPs will magically do
668       // this, but we need to do it ourselves here.
669       if (Index->getType()->getScalarSizeInBits() !=
670           NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) {
671         Index = Builder.CreateSExtOrTrunc(
672             Index, NewInsts[GEP->getOperand(0)]->getType(),
673             GEP->getOperand(0)->getName() + ".sext");
674       }
675 
676       auto *Op = NewInsts[GEP->getOperand(0)];
677       if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
678         NewInsts[GEP] = Index;
679       else
680         NewInsts[GEP] = Builder.CreateNSWAdd(
681             Op, Index, GEP->getOperand(0)->getName() + ".add");
682       continue;
683     }
684     if (isa<PHINode>(Val))
685       continue;
686 
687     llvm_unreachable("Unexpected instruction type");
688   }
689 
690   // Add the incoming values to the PHI nodes.
691   for (Value *Val : Explored) {
692     if (Val == Base)
693       continue;
694     // All the instructions have been created, we can now add edges to the
695     // phi nodes.
696     if (auto *PHI = dyn_cast<PHINode>(Val)) {
697       PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]);
698       for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
699         Value *NewIncoming = PHI->getIncomingValue(I);
700 
701         if (NewInsts.find(NewIncoming) != NewInsts.end())
702           NewIncoming = NewInsts[NewIncoming];
703 
704         NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I));
705       }
706     }
707   }
708 
709   for (Value *Val : Explored) {
710     if (Val == Base)
711       continue;
712 
713     // Depending on the type, for external users we have to emit
714     // a GEP or a GEP + ptrtoint.
715     setInsertionPoint(Builder, Val, false);
716 
717     // If required, create an inttoptr instruction for Base.
718     Value *NewBase = Base;
719     if (!Base->getType()->isPointerTy())
720       NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(),
721                                                Start->getName() + "to.ptr");
722 
723     Value *GEP = Builder.CreateInBoundsGEP(
724         Start->getType()->getPointerElementType(), NewBase,
725         makeArrayRef(NewInsts[Val]), Val->getName() + ".ptr");
726 
727     if (!Val->getType()->isPointerTy()) {
728       Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(),
729                                               Val->getName() + ".conv");
730       GEP = Cast;
731     }
732     Val->replaceAllUsesWith(GEP);
733   }
734 
735   return NewInsts[Start];
736 }
737 
738 /// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express
739 /// the input Value as a constant indexed GEP. Returns a pair containing
740 /// the GEPs Pointer and Index.
741 static std::pair<Value *, Value *>
742 getAsConstantIndexedAddress(Value *V, const DataLayout &DL) {
743   Type *IndexType = IntegerType::get(V->getContext(),
744                                      DL.getIndexTypeSizeInBits(V->getType()));
745 
746   Constant *Index = ConstantInt::getNullValue(IndexType);
747   while (true) {
748     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
749       // We accept only inbouds GEPs here to exclude the possibility of
750       // overflow.
751       if (!GEP->isInBounds())
752         break;
753       if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1 &&
754           GEP->getType() == V->getType()) {
755         V = GEP->getOperand(0);
756         Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));
757         Index = ConstantExpr::getAdd(
758             Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType));
759         continue;
760       }
761       break;
762     }
763     if (auto *CI = dyn_cast<IntToPtrInst>(V)) {
764       if (!CI->isNoopCast(DL))
765         break;
766       V = CI->getOperand(0);
767       continue;
768     }
769     if (auto *CI = dyn_cast<PtrToIntInst>(V)) {
770       if (!CI->isNoopCast(DL))
771         break;
772       V = CI->getOperand(0);
773       continue;
774     }
775     break;
776   }
777   return {V, Index};
778 }
779 
780 /// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
781 /// We can look through PHIs, GEPs and casts in order to determine a common base
782 /// between GEPLHS and RHS.
783 static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
784                                               ICmpInst::Predicate Cond,
785                                               const DataLayout &DL) {
786   // FIXME: Support vector of pointers.
787   if (GEPLHS->getType()->isVectorTy())
788     return nullptr;
789 
790   if (!GEPLHS->hasAllConstantIndices())
791     return nullptr;
792 
793   // Make sure the pointers have the same type.
794   if (GEPLHS->getType() != RHS->getType())
795     return nullptr;
796 
797   Value *PtrBase, *Index;
798   std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL);
799 
800   // The set of nodes that will take part in this transformation.
801   SetVector<Value *> Nodes;
802 
803   if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))
804     return nullptr;
805 
806   // We know we can re-write this as
807   //  ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)
808   // Since we've only looked through inbouds GEPs we know that we
809   // can't have overflow on either side. We can therefore re-write
810   // this as:
811   //   OFFSET1 cmp OFFSET2
812   Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes);
813 
814   // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
815   // GEP having PtrBase as the pointer base, and has returned in NewRHS the
816   // offset. Since Index is the offset of LHS to the base pointer, we will now
817   // compare the offsets instead of comparing the pointers.
818   return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS);
819 }
820 
821 /// Fold comparisons between a GEP instruction and something else. At this point
822 /// we know that the GEP is on the LHS of the comparison.
823 Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
824                                            ICmpInst::Predicate Cond,
825                                            Instruction &I) {
826   // Don't transform signed compares of GEPs into index compares. Even if the
827   // GEP is inbounds, the final add of the base pointer can have signed overflow
828   // and would change the result of the icmp.
829   // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
830   // the maximum signed value for the pointer type.
831   if (ICmpInst::isSigned(Cond))
832     return nullptr;
833 
834   // Look through bitcasts and addrspacecasts. We do not however want to remove
835   // 0 GEPs.
836   if (!isa<GetElementPtrInst>(RHS))
837     RHS = RHS->stripPointerCasts();
838 
839   Value *PtrBase = GEPLHS->getOperand(0);
840   // FIXME: Support vector pointer GEPs.
841   if (PtrBase == RHS && GEPLHS->isInBounds() &&
842       !GEPLHS->getType()->isVectorTy()) {
843     // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
844     // This transformation (ignoring the base and scales) is valid because we
845     // know pointers can't overflow since the gep is inbounds.  See if we can
846     // output an optimized form.
847     Value *Offset = evaluateGEPOffsetExpression(GEPLHS, *this, DL);
848 
849     // If not, synthesize the offset the hard way.
850     if (!Offset)
851       Offset = EmitGEPOffset(GEPLHS);
852     return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
853                         Constant::getNullValue(Offset->getType()));
854   }
855 
856   if (GEPLHS->isInBounds() && ICmpInst::isEquality(Cond) &&
857       isa<Constant>(RHS) && cast<Constant>(RHS)->isNullValue() &&
858       !NullPointerIsDefined(I.getFunction(),
859                             RHS->getType()->getPointerAddressSpace())) {
860     // For most address spaces, an allocation can't be placed at null, but null
861     // itself is treated as a 0 size allocation in the in bounds rules.  Thus,
862     // the only valid inbounds address derived from null, is null itself.
863     // Thus, we have four cases to consider:
864     // 1) Base == nullptr, Offset == 0 -> inbounds, null
865     // 2) Base == nullptr, Offset != 0 -> poison as the result is out of bounds
866     // 3) Base != nullptr, Offset == (-base) -> poison (crossing allocations)
867     // 4) Base != nullptr, Offset != (-base) -> nonnull (and possibly poison)
868     //
869     // (Note if we're indexing a type of size 0, that simply collapses into one
870     //  of the buckets above.)
871     //
872     // In general, we're allowed to make values less poison (i.e. remove
873     //   sources of full UB), so in this case, we just select between the two
874     //   non-poison cases (1 and 4 above).
875     //
876     // For vectors, we apply the same reasoning on a per-lane basis.
877     auto *Base = GEPLHS->getPointerOperand();
878     if (GEPLHS->getType()->isVectorTy() && Base->getType()->isPointerTy()) {
879       auto EC = cast<VectorType>(GEPLHS->getType())->getElementCount();
880       Base = Builder.CreateVectorSplat(EC, Base);
881     }
882     return new ICmpInst(Cond, Base,
883                         ConstantExpr::getPointerBitCastOrAddrSpaceCast(
884                             cast<Constant>(RHS), Base->getType()));
885   } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
886     // If the base pointers are different, but the indices are the same, just
887     // compare the base pointer.
888     if (PtrBase != GEPRHS->getOperand(0)) {
889       bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
890       IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
891                         GEPRHS->getOperand(0)->getType();
892       if (IndicesTheSame)
893         for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
894           if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
895             IndicesTheSame = false;
896             break;
897           }
898 
899       // If all indices are the same, just compare the base pointers.
900       Type *BaseType = GEPLHS->getOperand(0)->getType();
901       if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType())
902         return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
903 
904       // If we're comparing GEPs with two base pointers that only differ in type
905       // and both GEPs have only constant indices or just one use, then fold
906       // the compare with the adjusted indices.
907       // FIXME: Support vector of pointers.
908       if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
909           (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
910           (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
911           PtrBase->stripPointerCasts() ==
912               GEPRHS->getOperand(0)->stripPointerCasts() &&
913           !GEPLHS->getType()->isVectorTy()) {
914         Value *LOffset = EmitGEPOffset(GEPLHS);
915         Value *ROffset = EmitGEPOffset(GEPRHS);
916 
917         // If we looked through an addrspacecast between different sized address
918         // spaces, the LHS and RHS pointers are different sized
919         // integers. Truncate to the smaller one.
920         Type *LHSIndexTy = LOffset->getType();
921         Type *RHSIndexTy = ROffset->getType();
922         if (LHSIndexTy != RHSIndexTy) {
923           if (LHSIndexTy->getPrimitiveSizeInBits().getFixedSize() <
924               RHSIndexTy->getPrimitiveSizeInBits().getFixedSize()) {
925             ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy);
926           } else
927             LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy);
928         }
929 
930         Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond),
931                                         LOffset, ROffset);
932         return replaceInstUsesWith(I, Cmp);
933       }
934 
935       // Otherwise, the base pointers are different and the indices are
936       // different. Try convert this to an indexed compare by looking through
937       // PHIs/casts.
938       return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
939     }
940 
941     // If one of the GEPs has all zero indices, recurse.
942     // FIXME: Handle vector of pointers.
943     if (!GEPLHS->getType()->isVectorTy() && GEPLHS->hasAllZeroIndices())
944       return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
945                          ICmpInst::getSwappedPredicate(Cond), I);
946 
947     // If the other GEP has all zero indices, recurse.
948     // FIXME: Handle vector of pointers.
949     if (!GEPRHS->getType()->isVectorTy() && GEPRHS->hasAllZeroIndices())
950       return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
951 
952     bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
953     if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
954       // If the GEPs only differ by one index, compare it.
955       unsigned NumDifferences = 0;  // Keep track of # differences.
956       unsigned DiffOperand = 0;     // The operand that differs.
957       for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
958         if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
959           Type *LHSType = GEPLHS->getOperand(i)->getType();
960           Type *RHSType = GEPRHS->getOperand(i)->getType();
961           // FIXME: Better support for vector of pointers.
962           if (LHSType->getPrimitiveSizeInBits() !=
963                    RHSType->getPrimitiveSizeInBits() ||
964               (GEPLHS->getType()->isVectorTy() &&
965                (!LHSType->isVectorTy() || !RHSType->isVectorTy()))) {
966             // Irreconcilable differences.
967             NumDifferences = 2;
968             break;
969           }
970 
971           if (NumDifferences++) break;
972           DiffOperand = i;
973         }
974 
975       if (NumDifferences == 0)   // SAME GEP?
976         return replaceInstUsesWith(I, // No comparison is needed here.
977           ConstantInt::get(I.getType(), ICmpInst::isTrueWhenEqual(Cond)));
978 
979       else if (NumDifferences == 1 && GEPsInBounds) {
980         Value *LHSV = GEPLHS->getOperand(DiffOperand);
981         Value *RHSV = GEPRHS->getOperand(DiffOperand);
982         // Make sure we do a signed comparison here.
983         return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
984       }
985     }
986 
987     // Only lower this if the icmp is the only user of the GEP or if we expect
988     // the result to fold to a constant!
989     if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
990         (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
991       // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
992       Value *L = EmitGEPOffset(GEPLHS);
993       Value *R = EmitGEPOffset(GEPRHS);
994       return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
995     }
996   }
997 
998   // Try convert this to an indexed compare by looking through PHIs/casts as a
999   // last resort.
1000   return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
1001 }
1002 
1003 Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI,
1004                                              const AllocaInst *Alloca,
1005                                              const Value *Other) {
1006   assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
1007 
1008   // It would be tempting to fold away comparisons between allocas and any
1009   // pointer not based on that alloca (e.g. an argument). However, even
1010   // though such pointers cannot alias, they can still compare equal.
1011   //
1012   // But LLVM doesn't specify where allocas get their memory, so if the alloca
1013   // doesn't escape we can argue that it's impossible to guess its value, and we
1014   // can therefore act as if any such guesses are wrong.
1015   //
1016   // The code below checks that the alloca doesn't escape, and that it's only
1017   // used in a comparison once (the current instruction). The
1018   // single-comparison-use condition ensures that we're trivially folding all
1019   // comparisons against the alloca consistently, and avoids the risk of
1020   // erroneously folding a comparison of the pointer with itself.
1021 
1022   unsigned MaxIter = 32; // Break cycles and bound to constant-time.
1023 
1024   SmallVector<const Use *, 32> Worklist;
1025   for (const Use &U : Alloca->uses()) {
1026     if (Worklist.size() >= MaxIter)
1027       return nullptr;
1028     Worklist.push_back(&U);
1029   }
1030 
1031   unsigned NumCmps = 0;
1032   while (!Worklist.empty()) {
1033     assert(Worklist.size() <= MaxIter);
1034     const Use *U = Worklist.pop_back_val();
1035     const Value *V = U->getUser();
1036     --MaxIter;
1037 
1038     if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
1039         isa<SelectInst>(V)) {
1040       // Track the uses.
1041     } else if (isa<LoadInst>(V)) {
1042       // Loading from the pointer doesn't escape it.
1043       continue;
1044     } else if (const auto *SI = dyn_cast<StoreInst>(V)) {
1045       // Storing *to* the pointer is fine, but storing the pointer escapes it.
1046       if (SI->getValueOperand() == U->get())
1047         return nullptr;
1048       continue;
1049     } else if (isa<ICmpInst>(V)) {
1050       if (NumCmps++)
1051         return nullptr; // Found more than one cmp.
1052       continue;
1053     } else if (const auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
1054       switch (Intrin->getIntrinsicID()) {
1055         // These intrinsics don't escape or compare the pointer. Memset is safe
1056         // because we don't allow ptrtoint. Memcpy and memmove are safe because
1057         // we don't allow stores, so src cannot point to V.
1058         case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
1059         case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset:
1060           continue;
1061         default:
1062           return nullptr;
1063       }
1064     } else {
1065       return nullptr;
1066     }
1067     for (const Use &U : V->uses()) {
1068       if (Worklist.size() >= MaxIter)
1069         return nullptr;
1070       Worklist.push_back(&U);
1071     }
1072   }
1073 
1074   Type *CmpTy = CmpInst::makeCmpResultType(Other->getType());
1075   return replaceInstUsesWith(
1076       ICI,
1077       ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
1078 }
1079 
1080 /// Fold "icmp pred (X+C), X".
1081 Instruction *InstCombinerImpl::foldICmpAddOpConst(Value *X, const APInt &C,
1082                                                   ICmpInst::Predicate Pred) {
1083   // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
1084   // so the values can never be equal.  Similarly for all other "or equals"
1085   // operators.
1086   assert(!!C && "C should not be zero!");
1087 
1088   // (X+1) <u X        --> X >u (MAXUINT-1)        --> X == 255
1089   // (X+2) <u X        --> X >u (MAXUINT-2)        --> X > 253
1090   // (X+MAXUINT) <u X  --> X >u (MAXUINT-MAXUINT)  --> X != 0
1091   if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
1092     Constant *R = ConstantInt::get(X->getType(),
1093                                    APInt::getMaxValue(C.getBitWidth()) - C);
1094     return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
1095   }
1096 
1097   // (X+1) >u X        --> X <u (0-1)        --> X != 255
1098   // (X+2) >u X        --> X <u (0-2)        --> X <u 254
1099   // (X+MAXUINT) >u X  --> X <u (0-MAXUINT)  --> X <u 1  --> X == 0
1100   if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
1101     return new ICmpInst(ICmpInst::ICMP_ULT, X,
1102                         ConstantInt::get(X->getType(), -C));
1103 
1104   APInt SMax = APInt::getSignedMaxValue(C.getBitWidth());
1105 
1106   // (X+ 1) <s X       --> X >s (MAXSINT-1)          --> X == 127
1107   // (X+ 2) <s X       --> X >s (MAXSINT-2)          --> X >s 125
1108   // (X+MAXSINT) <s X  --> X >s (MAXSINT-MAXSINT)    --> X >s 0
1109   // (X+MINSINT) <s X  --> X >s (MAXSINT-MINSINT)    --> X >s -1
1110   // (X+ -2) <s X      --> X >s (MAXSINT- -2)        --> X >s 126
1111   // (X+ -1) <s X      --> X >s (MAXSINT- -1)        --> X != 127
1112   if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
1113     return new ICmpInst(ICmpInst::ICMP_SGT, X,
1114                         ConstantInt::get(X->getType(), SMax - C));
1115 
1116   // (X+ 1) >s X       --> X <s (MAXSINT-(1-1))       --> X != 127
1117   // (X+ 2) >s X       --> X <s (MAXSINT-(2-1))       --> X <s 126
1118   // (X+MAXSINT) >s X  --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
1119   // (X+MINSINT) >s X  --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
1120   // (X+ -2) >s X      --> X <s (MAXSINT-(-2-1))      --> X <s -126
1121   // (X+ -1) >s X      --> X <s (MAXSINT-(-1-1))      --> X == -128
1122 
1123   assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
1124   return new ICmpInst(ICmpInst::ICMP_SLT, X,
1125                       ConstantInt::get(X->getType(), SMax - (C - 1)));
1126 }
1127 
1128 /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
1129 /// (icmp eq/ne A, Log2(AP2/AP1)) ->
1130 /// (icmp eq/ne A, Log2(AP2) - Log2(AP1)).
1131 Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A,
1132                                                      const APInt &AP1,
1133                                                      const APInt &AP2) {
1134   assert(I.isEquality() && "Cannot fold icmp gt/lt");
1135 
1136   auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
1137     if (I.getPredicate() == I.ICMP_NE)
1138       Pred = CmpInst::getInversePredicate(Pred);
1139     return new ICmpInst(Pred, LHS, RHS);
1140   };
1141 
1142   // Don't bother doing any work for cases which InstSimplify handles.
1143   if (AP2.isNullValue())
1144     return nullptr;
1145 
1146   bool IsAShr = isa<AShrOperator>(I.getOperand(0));
1147   if (IsAShr) {
1148     if (AP2.isAllOnesValue())
1149       return nullptr;
1150     if (AP2.isNegative() != AP1.isNegative())
1151       return nullptr;
1152     if (AP2.sgt(AP1))
1153       return nullptr;
1154   }
1155 
1156   if (!AP1)
1157     // 'A' must be large enough to shift out the highest set bit.
1158     return getICmp(I.ICMP_UGT, A,
1159                    ConstantInt::get(A->getType(), AP2.logBase2()));
1160 
1161   if (AP1 == AP2)
1162     return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
1163 
1164   int Shift;
1165   if (IsAShr && AP1.isNegative())
1166     Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
1167   else
1168     Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
1169 
1170   if (Shift > 0) {
1171     if (IsAShr && AP1 == AP2.ashr(Shift)) {
1172       // There are multiple solutions if we are comparing against -1 and the LHS
1173       // of the ashr is not a power of two.
1174       if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
1175         return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
1176       return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1177     } else if (AP1 == AP2.lshr(Shift)) {
1178       return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1179     }
1180   }
1181 
1182   // Shifting const2 will never be equal to const1.
1183   // FIXME: This should always be handled by InstSimplify?
1184   auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
1185   return replaceInstUsesWith(I, TorF);
1186 }
1187 
1188 /// Handle "(icmp eq/ne (shl AP2, A), AP1)" ->
1189 /// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)).
1190 Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A,
1191                                                      const APInt &AP1,
1192                                                      const APInt &AP2) {
1193   assert(I.isEquality() && "Cannot fold icmp gt/lt");
1194 
1195   auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
1196     if (I.getPredicate() == I.ICMP_NE)
1197       Pred = CmpInst::getInversePredicate(Pred);
1198     return new ICmpInst(Pred, LHS, RHS);
1199   };
1200 
1201   // Don't bother doing any work for cases which InstSimplify handles.
1202   if (AP2.isNullValue())
1203     return nullptr;
1204 
1205   unsigned AP2TrailingZeros = AP2.countTrailingZeros();
1206 
1207   if (!AP1 && AP2TrailingZeros != 0)
1208     return getICmp(
1209         I.ICMP_UGE, A,
1210         ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
1211 
1212   if (AP1 == AP2)
1213     return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
1214 
1215   // Get the distance between the lowest bits that are set.
1216   int Shift = AP1.countTrailingZeros() - AP2TrailingZeros;
1217 
1218   if (Shift > 0 && AP2.shl(Shift) == AP1)
1219     return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1220 
1221   // Shifting const2 will never be equal to const1.
1222   // FIXME: This should always be handled by InstSimplify?
1223   auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
1224   return replaceInstUsesWith(I, TorF);
1225 }
1226 
1227 /// The caller has matched a pattern of the form:
1228 ///   I = icmp ugt (add (add A, B), CI2), CI1
1229 /// If this is of the form:
1230 ///   sum = a + b
1231 ///   if (sum+128 >u 255)
1232 /// Then replace it with llvm.sadd.with.overflow.i8.
1233 ///
1234 static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
1235                                           ConstantInt *CI2, ConstantInt *CI1,
1236                                           InstCombinerImpl &IC) {
1237   // The transformation we're trying to do here is to transform this into an
1238   // llvm.sadd.with.overflow.  To do this, we have to replace the original add
1239   // with a narrower add, and discard the add-with-constant that is part of the
1240   // range check (if we can't eliminate it, this isn't profitable).
1241 
1242   // In order to eliminate the add-with-constant, the compare can be its only
1243   // use.
1244   Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
1245   if (!AddWithCst->hasOneUse())
1246     return nullptr;
1247 
1248   // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
1249   if (!CI2->getValue().isPowerOf2())
1250     return nullptr;
1251   unsigned NewWidth = CI2->getValue().countTrailingZeros();
1252   if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31)
1253     return nullptr;
1254 
1255   // The width of the new add formed is 1 more than the bias.
1256   ++NewWidth;
1257 
1258   // Check to see that CI1 is an all-ones value with NewWidth bits.
1259   if (CI1->getBitWidth() == NewWidth ||
1260       CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
1261     return nullptr;
1262 
1263   // This is only really a signed overflow check if the inputs have been
1264   // sign-extended; check for that condition. For example, if CI2 is 2^31 and
1265   // the operands of the add are 64 bits wide, we need at least 33 sign bits.
1266   unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
1267   if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits ||
1268       IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits)
1269     return nullptr;
1270 
1271   // In order to replace the original add with a narrower
1272   // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
1273   // and truncates that discard the high bits of the add.  Verify that this is
1274   // the case.
1275   Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
1276   for (User *U : OrigAdd->users()) {
1277     if (U == AddWithCst)
1278       continue;
1279 
1280     // Only accept truncates for now.  We would really like a nice recursive
1281     // predicate like SimplifyDemandedBits, but which goes downwards the use-def
1282     // chain to see which bits of a value are actually demanded.  If the
1283     // original add had another add which was then immediately truncated, we
1284     // could still do the transformation.
1285     TruncInst *TI = dyn_cast<TruncInst>(U);
1286     if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
1287       return nullptr;
1288   }
1289 
1290   // If the pattern matches, truncate the inputs to the narrower type and
1291   // use the sadd_with_overflow intrinsic to efficiently compute both the
1292   // result and the overflow bit.
1293   Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
1294   Function *F = Intrinsic::getDeclaration(
1295       I.getModule(), Intrinsic::sadd_with_overflow, NewType);
1296 
1297   InstCombiner::BuilderTy &Builder = IC.Builder;
1298 
1299   // Put the new code above the original add, in case there are any uses of the
1300   // add between the add and the compare.
1301   Builder.SetInsertPoint(OrigAdd);
1302 
1303   Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc");
1304   Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc");
1305   CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd");
1306   Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result");
1307   Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType());
1308 
1309   // The inner add was the result of the narrow add, zero extended to the
1310   // wider type.  Replace it with the result computed by the intrinsic.
1311   IC.replaceInstUsesWith(*OrigAdd, ZExt);
1312   IC.eraseInstFromFunction(*OrigAdd);
1313 
1314   // The original icmp gets replaced with the overflow value.
1315   return ExtractValueInst::Create(Call, 1, "sadd.overflow");
1316 }
1317 
1318 /// If we have:
1319 ///   icmp eq/ne (urem/srem %x, %y), 0
1320 /// iff %y is a power-of-two, we can replace this with a bit test:
1321 ///   icmp eq/ne (and %x, (add %y, -1)), 0
1322 Instruction *InstCombinerImpl::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) {
1323   // This fold is only valid for equality predicates.
1324   if (!I.isEquality())
1325     return nullptr;
1326   ICmpInst::Predicate Pred;
1327   Value *X, *Y, *Zero;
1328   if (!match(&I, m_ICmp(Pred, m_OneUse(m_IRem(m_Value(X), m_Value(Y))),
1329                         m_CombineAnd(m_Zero(), m_Value(Zero)))))
1330     return nullptr;
1331   if (!isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, 0, &I))
1332     return nullptr;
1333   // This may increase instruction count, we don't enforce that Y is a constant.
1334   Value *Mask = Builder.CreateAdd(Y, Constant::getAllOnesValue(Y->getType()));
1335   Value *Masked = Builder.CreateAnd(X, Mask);
1336   return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero);
1337 }
1338 
1339 /// Fold equality-comparison between zero and any (maybe truncated) right-shift
1340 /// by one-less-than-bitwidth into a sign test on the original value.
1341 Instruction *InstCombinerImpl::foldSignBitTest(ICmpInst &I) {
1342   Instruction *Val;
1343   ICmpInst::Predicate Pred;
1344   if (!I.isEquality() || !match(&I, m_ICmp(Pred, m_Instruction(Val), m_Zero())))
1345     return nullptr;
1346 
1347   Value *X;
1348   Type *XTy;
1349 
1350   Constant *C;
1351   if (match(Val, m_TruncOrSelf(m_Shr(m_Value(X), m_Constant(C))))) {
1352     XTy = X->getType();
1353     unsigned XBitWidth = XTy->getScalarSizeInBits();
1354     if (!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ,
1355                                      APInt(XBitWidth, XBitWidth - 1))))
1356       return nullptr;
1357   } else if (isa<BinaryOperator>(Val) &&
1358              (X = reassociateShiftAmtsOfTwoSameDirectionShifts(
1359                   cast<BinaryOperator>(Val), SQ.getWithInstruction(Val),
1360                   /*AnalyzeForSignBitExtraction=*/true))) {
1361     XTy = X->getType();
1362   } else
1363     return nullptr;
1364 
1365   return ICmpInst::Create(Instruction::ICmp,
1366                           Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_SGE
1367                                                     : ICmpInst::ICMP_SLT,
1368                           X, ConstantInt::getNullValue(XTy));
1369 }
1370 
1371 // Handle  icmp pred X, 0
1372 Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
1373   CmpInst::Predicate Pred = Cmp.getPredicate();
1374   if (!match(Cmp.getOperand(1), m_Zero()))
1375     return nullptr;
1376 
1377   // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
1378   if (Pred == ICmpInst::ICMP_SGT) {
1379     Value *A, *B;
1380     SelectPatternResult SPR = matchSelectPattern(Cmp.getOperand(0), A, B);
1381     if (SPR.Flavor == SPF_SMIN) {
1382       if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
1383         return new ICmpInst(Pred, B, Cmp.getOperand(1));
1384       if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT))
1385         return new ICmpInst(Pred, A, Cmp.getOperand(1));
1386     }
1387   }
1388 
1389   if (Instruction *New = foldIRemByPowerOfTwoToBitTest(Cmp))
1390     return New;
1391 
1392   // Given:
1393   //   icmp eq/ne (urem %x, %y), 0
1394   // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
1395   //   icmp eq/ne %x, 0
1396   Value *X, *Y;
1397   if (match(Cmp.getOperand(0), m_URem(m_Value(X), m_Value(Y))) &&
1398       ICmpInst::isEquality(Pred)) {
1399     KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
1400     KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
1401     if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
1402       return new ICmpInst(Pred, X, Cmp.getOperand(1));
1403   }
1404 
1405   return nullptr;
1406 }
1407 
1408 /// Fold icmp Pred X, C.
1409 /// TODO: This code structure does not make sense. The saturating add fold
1410 /// should be moved to some other helper and extended as noted below (it is also
1411 /// possible that code has been made unnecessary - do we canonicalize IR to
1412 /// overflow/saturating intrinsics or not?).
1413 Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) {
1414   // Match the following pattern, which is a common idiom when writing
1415   // overflow-safe integer arithmetic functions. The source performs an addition
1416   // in wider type and explicitly checks for overflow using comparisons against
1417   // INT_MIN and INT_MAX. Simplify by using the sadd_with_overflow intrinsic.
1418   //
1419   // TODO: This could probably be generalized to handle other overflow-safe
1420   // operations if we worked out the formulas to compute the appropriate magic
1421   // constants.
1422   //
1423   // sum = a + b
1424   // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
1425   CmpInst::Predicate Pred = Cmp.getPredicate();
1426   Value *Op0 = Cmp.getOperand(0), *Op1 = Cmp.getOperand(1);
1427   Value *A, *B;
1428   ConstantInt *CI, *CI2; // I = icmp ugt (add (add A, B), CI2), CI
1429   if (Pred == ICmpInst::ICMP_UGT && match(Op1, m_ConstantInt(CI)) &&
1430       match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
1431     if (Instruction *Res = processUGT_ADDCST_ADD(Cmp, A, B, CI2, CI, *this))
1432       return Res;
1433 
1434   // icmp(phi(C1, C2, ...), C) -> phi(icmp(C1, C), icmp(C2, C), ...).
1435   Constant *C = dyn_cast<Constant>(Op1);
1436   if (!C)
1437     return nullptr;
1438 
1439   if (auto *Phi = dyn_cast<PHINode>(Op0))
1440     if (all_of(Phi->operands(), [](Value *V) { return isa<Constant>(V); })) {
1441       Type *Ty = Cmp.getType();
1442       Builder.SetInsertPoint(Phi);
1443       PHINode *NewPhi =
1444           Builder.CreatePHI(Ty, Phi->getNumOperands());
1445       for (BasicBlock *Predecessor : predecessors(Phi->getParent())) {
1446         auto *Input =
1447             cast<Constant>(Phi->getIncomingValueForBlock(Predecessor));
1448         auto *BoolInput = ConstantExpr::getCompare(Pred, Input, C);
1449         NewPhi->addIncoming(BoolInput, Predecessor);
1450       }
1451       NewPhi->takeName(&Cmp);
1452       return replaceInstUsesWith(Cmp, NewPhi);
1453     }
1454 
1455   return nullptr;
1456 }
1457 
1458 /// Canonicalize icmp instructions based on dominating conditions.
1459 Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
1460   // This is a cheap/incomplete check for dominance - just match a single
1461   // predecessor with a conditional branch.
1462   BasicBlock *CmpBB = Cmp.getParent();
1463   BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1464   if (!DomBB)
1465     return nullptr;
1466 
1467   Value *DomCond;
1468   BasicBlock *TrueBB, *FalseBB;
1469   if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1470     return nullptr;
1471 
1472   assert((TrueBB == CmpBB || FalseBB == CmpBB) &&
1473          "Predecessor block does not point to successor?");
1474 
1475   // The branch should get simplified. Don't bother simplifying this condition.
1476   if (TrueBB == FalseBB)
1477     return nullptr;
1478 
1479   // Try to simplify this compare to T/F based on the dominating condition.
1480   Optional<bool> Imp = isImpliedCondition(DomCond, &Cmp, DL, TrueBB == CmpBB);
1481   if (Imp)
1482     return replaceInstUsesWith(Cmp, ConstantInt::get(Cmp.getType(), *Imp));
1483 
1484   CmpInst::Predicate Pred = Cmp.getPredicate();
1485   Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1);
1486   ICmpInst::Predicate DomPred;
1487   const APInt *C, *DomC;
1488   if (match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC))) &&
1489       match(Y, m_APInt(C))) {
1490     // We have 2 compares of a variable with constants. Calculate the constant
1491     // ranges of those compares to see if we can transform the 2nd compare:
1492     // DomBB:
1493     //   DomCond = icmp DomPred X, DomC
1494     //   br DomCond, CmpBB, FalseBB
1495     // CmpBB:
1496     //   Cmp = icmp Pred X, C
1497     ConstantRange CR = ConstantRange::makeAllowedICmpRegion(Pred, *C);
1498     ConstantRange DominatingCR =
1499         (CmpBB == TrueBB) ? ConstantRange::makeExactICmpRegion(DomPred, *DomC)
1500                           : ConstantRange::makeExactICmpRegion(
1501                                 CmpInst::getInversePredicate(DomPred), *DomC);
1502     ConstantRange Intersection = DominatingCR.intersectWith(CR);
1503     ConstantRange Difference = DominatingCR.difference(CR);
1504     if (Intersection.isEmptySet())
1505       return replaceInstUsesWith(Cmp, Builder.getFalse());
1506     if (Difference.isEmptySet())
1507       return replaceInstUsesWith(Cmp, Builder.getTrue());
1508 
1509     // Canonicalizing a sign bit comparison that gets used in a branch,
1510     // pessimizes codegen by generating branch on zero instruction instead
1511     // of a test and branch. So we avoid canonicalizing in such situations
1512     // because test and branch instruction has better branch displacement
1513     // than compare and branch instruction.
1514     bool UnusedBit;
1515     bool IsSignBit = isSignBitCheck(Pred, *C, UnusedBit);
1516     if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
1517       return nullptr;
1518 
1519     // Avoid an infinite loop with min/max canonicalization.
1520     // TODO: This will be unnecessary if we canonicalize to min/max intrinsics.
1521     if (Cmp.hasOneUse() &&
1522         match(Cmp.user_back(), m_MaxOrMin(m_Value(), m_Value())))
1523       return nullptr;
1524 
1525     if (const APInt *EqC = Intersection.getSingleElement())
1526       return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC));
1527     if (const APInt *NeC = Difference.getSingleElement())
1528       return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC));
1529   }
1530 
1531   return nullptr;
1532 }
1533 
1534 /// Fold icmp (trunc X, Y), C.
1535 Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
1536                                                      TruncInst *Trunc,
1537                                                      const APInt &C) {
1538   ICmpInst::Predicate Pred = Cmp.getPredicate();
1539   Value *X = Trunc->getOperand(0);
1540   if (C.isOneValue() && C.getBitWidth() > 1) {
1541     // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
1542     Value *V = nullptr;
1543     if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V))))
1544       return new ICmpInst(ICmpInst::ICMP_SLT, V,
1545                           ConstantInt::get(V->getType(), 1));
1546   }
1547 
1548   unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
1549            SrcBits = X->getType()->getScalarSizeInBits();
1550   if (Cmp.isEquality() && Trunc->hasOneUse()) {
1551     // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
1552     // of the high bits truncated out of x are known.
1553     KnownBits Known = computeKnownBits(X, 0, &Cmp);
1554 
1555     // If all the high bits are known, we can do this xform.
1556     if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) {
1557       // Pull in the high bits from known-ones set.
1558       APInt NewRHS = C.zext(SrcBits);
1559       NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
1560       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS));
1561     }
1562   }
1563 
1564   // Look through truncated right-shift of the sign-bit for a sign-bit check:
1565   // trunc iN (ShOp >> ShAmtC) to i[N - ShAmtC] < 0  --> ShOp <  0
1566   // trunc iN (ShOp >> ShAmtC) to i[N - ShAmtC] > -1 --> ShOp > -1
1567   Value *ShOp;
1568   const APInt *ShAmtC;
1569   bool TrueIfSigned;
1570   if (isSignBitCheck(Pred, C, TrueIfSigned) &&
1571       match(X, m_Shr(m_Value(ShOp), m_APInt(ShAmtC))) &&
1572       DstBits == SrcBits - ShAmtC->getZExtValue()) {
1573     return TrueIfSigned
1574                ? new ICmpInst(ICmpInst::ICMP_SLT, ShOp,
1575                               ConstantInt::getNullValue(X->getType()))
1576                : new ICmpInst(ICmpInst::ICMP_SGT, ShOp,
1577                               ConstantInt::getAllOnesValue(X->getType()));
1578   }
1579 
1580   return nullptr;
1581 }
1582 
1583 /// Fold icmp (xor X, Y), C.
1584 Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp,
1585                                                    BinaryOperator *Xor,
1586                                                    const APInt &C) {
1587   Value *X = Xor->getOperand(0);
1588   Value *Y = Xor->getOperand(1);
1589   const APInt *XorC;
1590   if (!match(Y, m_APInt(XorC)))
1591     return nullptr;
1592 
1593   // If this is a comparison that tests the signbit (X < 0) or (x > -1),
1594   // fold the xor.
1595   ICmpInst::Predicate Pred = Cmp.getPredicate();
1596   bool TrueIfSigned = false;
1597   if (isSignBitCheck(Cmp.getPredicate(), C, TrueIfSigned)) {
1598 
1599     // If the sign bit of the XorCst is not set, there is no change to
1600     // the operation, just stop using the Xor.
1601     if (!XorC->isNegative())
1602       return replaceOperand(Cmp, 0, X);
1603 
1604     // Emit the opposite comparison.
1605     if (TrueIfSigned)
1606       return new ICmpInst(ICmpInst::ICMP_SGT, X,
1607                           ConstantInt::getAllOnesValue(X->getType()));
1608     else
1609       return new ICmpInst(ICmpInst::ICMP_SLT, X,
1610                           ConstantInt::getNullValue(X->getType()));
1611   }
1612 
1613   if (Xor->hasOneUse()) {
1614     // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask))
1615     if (!Cmp.isEquality() && XorC->isSignMask()) {
1616       Pred = Cmp.getFlippedSignednessPredicate();
1617       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
1618     }
1619 
1620     // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask))
1621     if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
1622       Pred = Cmp.getFlippedSignednessPredicate();
1623       Pred = Cmp.getSwappedPredicate(Pred);
1624       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
1625     }
1626   }
1627 
1628   // Mask constant magic can eliminate an 'xor' with unsigned compares.
1629   if (Pred == ICmpInst::ICMP_UGT) {
1630     // (xor X, ~C) >u C --> X <u ~C (when C+1 is a power of 2)
1631     if (*XorC == ~C && (C + 1).isPowerOf2())
1632       return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
1633     // (xor X, C) >u C --> X >u C (when C+1 is a power of 2)
1634     if (*XorC == C && (C + 1).isPowerOf2())
1635       return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
1636   }
1637   if (Pred == ICmpInst::ICMP_ULT) {
1638     // (xor X, -C) <u C --> X >u ~C (when C is a power of 2)
1639     if (*XorC == -C && C.isPowerOf2())
1640       return new ICmpInst(ICmpInst::ICMP_UGT, X,
1641                           ConstantInt::get(X->getType(), ~C));
1642     // (xor X, C) <u C --> X >u ~C (when -C is a power of 2)
1643     if (*XorC == C && (-C).isPowerOf2())
1644       return new ICmpInst(ICmpInst::ICMP_UGT, X,
1645                           ConstantInt::get(X->getType(), ~C));
1646   }
1647   return nullptr;
1648 }
1649 
1650 /// Fold icmp (and (sh X, Y), C2), C1.
1651 Instruction *InstCombinerImpl::foldICmpAndShift(ICmpInst &Cmp,
1652                                                 BinaryOperator *And,
1653                                                 const APInt &C1,
1654                                                 const APInt &C2) {
1655   BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0));
1656   if (!Shift || !Shift->isShift())
1657     return nullptr;
1658 
1659   // If this is: (X >> C3) & C2 != C1 (where any shift and any compare could
1660   // exist), turn it into (X & (C2 << C3)) != (C1 << C3). This happens a LOT in
1661   // code produced by the clang front-end, for bitfield access.
1662   // This seemingly simple opportunity to fold away a shift turns out to be
1663   // rather complicated. See PR17827 for details.
1664   unsigned ShiftOpcode = Shift->getOpcode();
1665   bool IsShl = ShiftOpcode == Instruction::Shl;
1666   const APInt *C3;
1667   if (match(Shift->getOperand(1), m_APInt(C3))) {
1668     APInt NewAndCst, NewCmpCst;
1669     bool AnyCmpCstBitsShiftedOut;
1670     if (ShiftOpcode == Instruction::Shl) {
1671       // For a left shift, we can fold if the comparison is not signed. We can
1672       // also fold a signed comparison if the mask value and comparison value
1673       // are not negative. These constraints may not be obvious, but we can
1674       // prove that they are correct using an SMT solver.
1675       if (Cmp.isSigned() && (C2.isNegative() || C1.isNegative()))
1676         return nullptr;
1677 
1678       NewCmpCst = C1.lshr(*C3);
1679       NewAndCst = C2.lshr(*C3);
1680       AnyCmpCstBitsShiftedOut = NewCmpCst.shl(*C3) != C1;
1681     } else if (ShiftOpcode == Instruction::LShr) {
1682       // For a logical right shift, we can fold if the comparison is not signed.
1683       // We can also fold a signed comparison if the shifted mask value and the
1684       // shifted comparison value are not negative. These constraints may not be
1685       // obvious, but we can prove that they are correct using an SMT solver.
1686       NewCmpCst = C1.shl(*C3);
1687       NewAndCst = C2.shl(*C3);
1688       AnyCmpCstBitsShiftedOut = NewCmpCst.lshr(*C3) != C1;
1689       if (Cmp.isSigned() && (NewAndCst.isNegative() || NewCmpCst.isNegative()))
1690         return nullptr;
1691     } else {
1692       // For an arithmetic shift, check that both constants don't use (in a
1693       // signed sense) the top bits being shifted out.
1694       assert(ShiftOpcode == Instruction::AShr && "Unknown shift opcode");
1695       NewCmpCst = C1.shl(*C3);
1696       NewAndCst = C2.shl(*C3);
1697       AnyCmpCstBitsShiftedOut = NewCmpCst.ashr(*C3) != C1;
1698       if (NewAndCst.ashr(*C3) != C2)
1699         return nullptr;
1700     }
1701 
1702     if (AnyCmpCstBitsShiftedOut) {
1703       // If we shifted bits out, the fold is not going to work out. As a
1704       // special case, check to see if this means that the result is always
1705       // true or false now.
1706       if (Cmp.getPredicate() == ICmpInst::ICMP_EQ)
1707         return replaceInstUsesWith(Cmp, ConstantInt::getFalse(Cmp.getType()));
1708       if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
1709         return replaceInstUsesWith(Cmp, ConstantInt::getTrue(Cmp.getType()));
1710     } else {
1711       Value *NewAnd = Builder.CreateAnd(
1712           Shift->getOperand(0), ConstantInt::get(And->getType(), NewAndCst));
1713       return new ICmpInst(Cmp.getPredicate(),
1714           NewAnd, ConstantInt::get(And->getType(), NewCmpCst));
1715     }
1716   }
1717 
1718   // Turn ((X >> Y) & C2) == 0  into  (X & (C2 << Y)) == 0.  The latter is
1719   // preferable because it allows the C2 << Y expression to be hoisted out of a
1720   // loop if Y is invariant and X is not.
1721   if (Shift->hasOneUse() && C1.isNullValue() && Cmp.isEquality() &&
1722       !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
1723     // Compute C2 << Y.
1724     Value *NewShift =
1725         IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1))
1726               : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1));
1727 
1728     // Compute X & (C2 << Y).
1729     Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift);
1730     return replaceOperand(Cmp, 0, NewAnd);
1731   }
1732 
1733   return nullptr;
1734 }
1735 
1736 /// Fold icmp (and X, C2), C1.
1737 Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
1738                                                      BinaryOperator *And,
1739                                                      const APInt &C1) {
1740   bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE;
1741 
1742   // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
1743   // TODO: We canonicalize to the longer form for scalars because we have
1744   // better analysis/folds for icmp, and codegen may be better with icmp.
1745   if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isNullValue() &&
1746       match(And->getOperand(1), m_One()))
1747     return new TruncInst(And->getOperand(0), Cmp.getType());
1748 
1749   const APInt *C2;
1750   Value *X;
1751   if (!match(And, m_And(m_Value(X), m_APInt(C2))))
1752     return nullptr;
1753 
1754   // Don't perform the following transforms if the AND has multiple uses
1755   if (!And->hasOneUse())
1756     return nullptr;
1757 
1758   if (Cmp.isEquality() && C1.isNullValue()) {
1759     // Restrict this fold to single-use 'and' (PR10267).
1760     // Replace (and X, (1 << size(X)-1) != 0) with X s< 0
1761     if (C2->isSignMask()) {
1762       Constant *Zero = Constant::getNullValue(X->getType());
1763       auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
1764       return new ICmpInst(NewPred, X, Zero);
1765     }
1766 
1767     // Restrict this fold only for single-use 'and' (PR10267).
1768     // ((%x & C) == 0) --> %x u< (-C)  iff (-C) is power of two.
1769     if ((~(*C2) + 1).isPowerOf2()) {
1770       Constant *NegBOC =
1771           ConstantExpr::getNeg(cast<Constant>(And->getOperand(1)));
1772       auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
1773       return new ICmpInst(NewPred, X, NegBOC);
1774     }
1775   }
1776 
1777   // If the LHS is an 'and' of a truncate and we can widen the and/compare to
1778   // the input width without changing the value produced, eliminate the cast:
1779   //
1780   // icmp (and (trunc W), C2), C1 -> icmp (and W, C2'), C1'
1781   //
1782   // We can do this transformation if the constants do not have their sign bits
1783   // set or if it is an equality comparison. Extending a relational comparison
1784   // when we're checking the sign bit would not work.
1785   Value *W;
1786   if (match(And->getOperand(0), m_OneUse(m_Trunc(m_Value(W)))) &&
1787       (Cmp.isEquality() || (!C1.isNegative() && !C2->isNegative()))) {
1788     // TODO: Is this a good transform for vectors? Wider types may reduce
1789     // throughput. Should this transform be limited (even for scalars) by using
1790     // shouldChangeType()?
1791     if (!Cmp.getType()->isVectorTy()) {
1792       Type *WideType = W->getType();
1793       unsigned WideScalarBits = WideType->getScalarSizeInBits();
1794       Constant *ZextC1 = ConstantInt::get(WideType, C1.zext(WideScalarBits));
1795       Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits));
1796       Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName());
1797       return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1);
1798     }
1799   }
1800 
1801   if (Instruction *I = foldICmpAndShift(Cmp, And, C1, *C2))
1802     return I;
1803 
1804   // (icmp pred (and (or (lshr A, B), A), 1), 0) -->
1805   // (icmp pred (and A, (or (shl 1, B), 1), 0))
1806   //
1807   // iff pred isn't signed
1808   if (!Cmp.isSigned() && C1.isNullValue() && And->getOperand(0)->hasOneUse() &&
1809       match(And->getOperand(1), m_One())) {
1810     Constant *One = cast<Constant>(And->getOperand(1));
1811     Value *Or = And->getOperand(0);
1812     Value *A, *B, *LShr;
1813     if (match(Or, m_Or(m_Value(LShr), m_Value(A))) &&
1814         match(LShr, m_LShr(m_Specific(A), m_Value(B)))) {
1815       unsigned UsesRemoved = 0;
1816       if (And->hasOneUse())
1817         ++UsesRemoved;
1818       if (Or->hasOneUse())
1819         ++UsesRemoved;
1820       if (LShr->hasOneUse())
1821         ++UsesRemoved;
1822 
1823       // Compute A & ((1 << B) | 1)
1824       Value *NewOr = nullptr;
1825       if (auto *C = dyn_cast<Constant>(B)) {
1826         if (UsesRemoved >= 1)
1827           NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
1828       } else {
1829         if (UsesRemoved >= 3)
1830           NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(),
1831                                                      /*HasNUW=*/true),
1832                                    One, Or->getName());
1833       }
1834       if (NewOr) {
1835         Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName());
1836         return replaceOperand(Cmp, 0, NewAnd);
1837       }
1838     }
1839   }
1840 
1841   return nullptr;
1842 }
1843 
1844 /// Fold icmp (and X, Y), C.
1845 Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
1846                                                    BinaryOperator *And,
1847                                                    const APInt &C) {
1848   if (Instruction *I = foldICmpAndConstConst(Cmp, And, C))
1849     return I;
1850 
1851   // TODO: These all require that Y is constant too, so refactor with the above.
1852 
1853   // Try to optimize things like "A[i] & 42 == 0" to index computations.
1854   Value *X = And->getOperand(0);
1855   Value *Y = And->getOperand(1);
1856   if (auto *LI = dyn_cast<LoadInst>(X))
1857     if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
1858       if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
1859         if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
1860             !LI->isVolatile() && isa<ConstantInt>(Y)) {
1861           ConstantInt *C2 = cast<ConstantInt>(Y);
1862           if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C2))
1863             return Res;
1864         }
1865 
1866   if (!Cmp.isEquality())
1867     return nullptr;
1868 
1869   // X & -C == -C -> X >  u ~C
1870   // X & -C != -C -> X <= u ~C
1871   //   iff C is a power of 2
1872   if (Cmp.getOperand(1) == Y && (-C).isPowerOf2()) {
1873     auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT
1874                                                           : CmpInst::ICMP_ULE;
1875     return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
1876   }
1877 
1878   // (X & C2) == 0 -> (trunc X) >= 0
1879   // (X & C2) != 0 -> (trunc X) <  0
1880   //   iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
1881   const APInt *C2;
1882   if (And->hasOneUse() && C.isNullValue() && match(Y, m_APInt(C2))) {
1883     int32_t ExactLogBase2 = C2->exactLogBase2();
1884     if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
1885       Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
1886       if (auto *AndVTy = dyn_cast<VectorType>(And->getType()))
1887         NTy = VectorType::get(NTy, AndVTy->getElementCount());
1888       Value *Trunc = Builder.CreateTrunc(X, NTy);
1889       auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE
1890                                                             : CmpInst::ICMP_SLT;
1891       return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy));
1892     }
1893   }
1894 
1895   return nullptr;
1896 }
1897 
1898 /// Fold icmp (or X, Y), C.
1899 Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
1900                                                   BinaryOperator *Or,
1901                                                   const APInt &C) {
1902   ICmpInst::Predicate Pred = Cmp.getPredicate();
1903   if (C.isOneValue()) {
1904     // icmp slt signum(V) 1 --> icmp slt V, 1
1905     Value *V = nullptr;
1906     if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V))))
1907       return new ICmpInst(ICmpInst::ICMP_SLT, V,
1908                           ConstantInt::get(V->getType(), 1));
1909   }
1910 
1911   Value *OrOp0 = Or->getOperand(0), *OrOp1 = Or->getOperand(1);
1912   const APInt *MaskC;
1913   if (match(OrOp1, m_APInt(MaskC)) && Cmp.isEquality()) {
1914     if (*MaskC == C && (C + 1).isPowerOf2()) {
1915       // X | C == C --> X <=u C
1916       // X | C != C --> X  >u C
1917       //   iff C+1 is a power of 2 (C is a bitmask of the low bits)
1918       Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
1919       return new ICmpInst(Pred, OrOp0, OrOp1);
1920     }
1921 
1922     // More general: canonicalize 'equality with set bits mask' to
1923     // 'equality with clear bits mask'.
1924     // (X | MaskC) == C --> (X & ~MaskC) == C ^ MaskC
1925     // (X | MaskC) != C --> (X & ~MaskC) != C ^ MaskC
1926     if (Or->hasOneUse()) {
1927       Value *And = Builder.CreateAnd(OrOp0, ~(*MaskC));
1928       Constant *NewC = ConstantInt::get(Or->getType(), C ^ (*MaskC));
1929       return new ICmpInst(Pred, And, NewC);
1930     }
1931   }
1932 
1933   if (!Cmp.isEquality() || !C.isNullValue() || !Or->hasOneUse())
1934     return nullptr;
1935 
1936   Value *P, *Q;
1937   if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
1938     // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
1939     // -> and (icmp eq P, null), (icmp eq Q, null).
1940     Value *CmpP =
1941         Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
1942     Value *CmpQ =
1943         Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
1944     auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
1945     return BinaryOperator::Create(BOpc, CmpP, CmpQ);
1946   }
1947 
1948   // Are we using xors to bitwise check for a pair of (in)equalities? Convert to
1949   // a shorter form that has more potential to be folded even further.
1950   Value *X1, *X2, *X3, *X4;
1951   if (match(OrOp0, m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
1952       match(OrOp1, m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
1953     // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
1954     // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
1955     Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2);
1956     Value *Cmp34 = Builder.CreateICmp(Pred, X3, X4);
1957     auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
1958     return BinaryOperator::Create(BOpc, Cmp12, Cmp34);
1959   }
1960 
1961   return nullptr;
1962 }
1963 
1964 /// Fold icmp (mul X, Y), C.
1965 Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
1966                                                    BinaryOperator *Mul,
1967                                                    const APInt &C) {
1968   const APInt *MulC;
1969   if (!match(Mul->getOperand(1), m_APInt(MulC)))
1970     return nullptr;
1971 
1972   // If this is a test of the sign bit and the multiply is sign-preserving with
1973   // a constant operand, use the multiply LHS operand instead.
1974   ICmpInst::Predicate Pred = Cmp.getPredicate();
1975   if (isSignTest(Pred, C) && Mul->hasNoSignedWrap()) {
1976     if (MulC->isNegative())
1977       Pred = ICmpInst::getSwappedPredicate(Pred);
1978     return new ICmpInst(Pred, Mul->getOperand(0),
1979                         Constant::getNullValue(Mul->getType()));
1980   }
1981 
1982   // If the multiply does not wrap, try to divide the compare constant by the
1983   // multiplication factor.
1984   if (Cmp.isEquality() && !MulC->isNullValue()) {
1985     // (mul nsw X, MulC) == C --> X == C /s MulC
1986     if (Mul->hasNoSignedWrap() && C.srem(*MulC).isNullValue()) {
1987       Constant *NewC = ConstantInt::get(Mul->getType(), C.sdiv(*MulC));
1988       return new ICmpInst(Pred, Mul->getOperand(0), NewC);
1989     }
1990     // (mul nuw X, MulC) == C --> X == C /u MulC
1991     if (Mul->hasNoUnsignedWrap() && C.urem(*MulC).isNullValue()) {
1992       Constant *NewC = ConstantInt::get(Mul->getType(), C.udiv(*MulC));
1993       return new ICmpInst(Pred, Mul->getOperand(0), NewC);
1994     }
1995   }
1996 
1997   return nullptr;
1998 }
1999 
2000 /// Fold icmp (shl 1, Y), C.
2001 static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
2002                                    const APInt &C) {
2003   Value *Y;
2004   if (!match(Shl, m_Shl(m_One(), m_Value(Y))))
2005     return nullptr;
2006 
2007   Type *ShiftType = Shl->getType();
2008   unsigned TypeBits = C.getBitWidth();
2009   bool CIsPowerOf2 = C.isPowerOf2();
2010   ICmpInst::Predicate Pred = Cmp.getPredicate();
2011   if (Cmp.isUnsigned()) {
2012     // (1 << Y) pred C -> Y pred Log2(C)
2013     if (!CIsPowerOf2) {
2014       // (1 << Y) <  30 -> Y <= 4
2015       // (1 << Y) <= 30 -> Y <= 4
2016       // (1 << Y) >= 30 -> Y >  4
2017       // (1 << Y) >  30 -> Y >  4
2018       if (Pred == ICmpInst::ICMP_ULT)
2019         Pred = ICmpInst::ICMP_ULE;
2020       else if (Pred == ICmpInst::ICMP_UGE)
2021         Pred = ICmpInst::ICMP_UGT;
2022     }
2023 
2024     // (1 << Y) >= 2147483648 -> Y >= 31 -> Y == 31
2025     // (1 << Y) <  2147483648 -> Y <  31 -> Y != 31
2026     unsigned CLog2 = C.logBase2();
2027     if (CLog2 == TypeBits - 1) {
2028       if (Pred == ICmpInst::ICMP_UGE)
2029         Pred = ICmpInst::ICMP_EQ;
2030       else if (Pred == ICmpInst::ICMP_ULT)
2031         Pred = ICmpInst::ICMP_NE;
2032     }
2033     return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
2034   } else if (Cmp.isSigned()) {
2035     Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
2036     if (C.isAllOnesValue()) {
2037       // (1 << Y) <= -1 -> Y == 31
2038       if (Pred == ICmpInst::ICMP_SLE)
2039         return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
2040 
2041       // (1 << Y) >  -1 -> Y != 31
2042       if (Pred == ICmpInst::ICMP_SGT)
2043         return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
2044     } else if (!C) {
2045       // (1 << Y) <  0 -> Y == 31
2046       // (1 << Y) <= 0 -> Y == 31
2047       if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
2048         return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
2049 
2050       // (1 << Y) >= 0 -> Y != 31
2051       // (1 << Y) >  0 -> Y != 31
2052       if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
2053         return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
2054     }
2055   } else if (Cmp.isEquality() && CIsPowerOf2) {
2056     return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, C.logBase2()));
2057   }
2058 
2059   return nullptr;
2060 }
2061 
2062 /// Fold icmp (shl X, Y), C.
2063 Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
2064                                                    BinaryOperator *Shl,
2065                                                    const APInt &C) {
2066   const APInt *ShiftVal;
2067   if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal)))
2068     return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal);
2069 
2070   const APInt *ShiftAmt;
2071   if (!match(Shl->getOperand(1), m_APInt(ShiftAmt)))
2072     return foldICmpShlOne(Cmp, Shl, C);
2073 
2074   // Check that the shift amount is in range. If not, don't perform undefined
2075   // shifts. When the shift is visited, it will be simplified.
2076   unsigned TypeBits = C.getBitWidth();
2077   if (ShiftAmt->uge(TypeBits))
2078     return nullptr;
2079 
2080   ICmpInst::Predicate Pred = Cmp.getPredicate();
2081   Value *X = Shl->getOperand(0);
2082   Type *ShType = Shl->getType();
2083 
2084   // NSW guarantees that we are only shifting out sign bits from the high bits,
2085   // so we can ASHR the compare constant without needing a mask and eliminate
2086   // the shift.
2087   if (Shl->hasNoSignedWrap()) {
2088     if (Pred == ICmpInst::ICMP_SGT) {
2089       // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt)
2090       APInt ShiftedC = C.ashr(*ShiftAmt);
2091       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2092     }
2093     if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
2094         C.ashr(*ShiftAmt).shl(*ShiftAmt) == C) {
2095       APInt ShiftedC = C.ashr(*ShiftAmt);
2096       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2097     }
2098     if (Pred == ICmpInst::ICMP_SLT) {
2099       // SLE is the same as above, but SLE is canonicalized to SLT, so convert:
2100       // (X << S) <=s C is equiv to X <=s (C >> S) for all C
2101       // (X << S) <s (C + 1) is equiv to X <s (C >> S) + 1 if C <s SMAX
2102       // (X << S) <s C is equiv to X <s ((C - 1) >> S) + 1 if C >s SMIN
2103       assert(!C.isMinSignedValue() && "Unexpected icmp slt");
2104       APInt ShiftedC = (C - 1).ashr(*ShiftAmt) + 1;
2105       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2106     }
2107     // If this is a signed comparison to 0 and the shift is sign preserving,
2108     // use the shift LHS operand instead; isSignTest may change 'Pred', so only
2109     // do that if we're sure to not continue on in this function.
2110     if (isSignTest(Pred, C))
2111       return new ICmpInst(Pred, X, Constant::getNullValue(ShType));
2112   }
2113 
2114   // NUW guarantees that we are only shifting out zero bits from the high bits,
2115   // so we can LSHR the compare constant without needing a mask and eliminate
2116   // the shift.
2117   if (Shl->hasNoUnsignedWrap()) {
2118     if (Pred == ICmpInst::ICMP_UGT) {
2119       // icmp Pred (shl nuw X, ShiftAmt), C --> icmp Pred X, (C >>u ShiftAmt)
2120       APInt ShiftedC = C.lshr(*ShiftAmt);
2121       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2122     }
2123     if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
2124         C.lshr(*ShiftAmt).shl(*ShiftAmt) == C) {
2125       APInt ShiftedC = C.lshr(*ShiftAmt);
2126       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2127     }
2128     if (Pred == ICmpInst::ICMP_ULT) {
2129       // ULE is the same as above, but ULE is canonicalized to ULT, so convert:
2130       // (X << S) <=u C is equiv to X <=u (C >> S) for all C
2131       // (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u
2132       // (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0
2133       assert(C.ugt(0) && "ult 0 should have been eliminated");
2134       APInt ShiftedC = (C - 1).lshr(*ShiftAmt) + 1;
2135       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
2136     }
2137   }
2138 
2139   if (Cmp.isEquality() && Shl->hasOneUse()) {
2140     // Strength-reduce the shift into an 'and'.
2141     Constant *Mask = ConstantInt::get(
2142         ShType,
2143         APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
2144     Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
2145     Constant *LShrC = ConstantInt::get(ShType, C.lshr(*ShiftAmt));
2146     return new ICmpInst(Pred, And, LShrC);
2147   }
2148 
2149   // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
2150   bool TrueIfSigned = false;
2151   if (Shl->hasOneUse() && isSignBitCheck(Pred, C, TrueIfSigned)) {
2152     // (X << 31) <s 0  --> (X & 1) != 0
2153     Constant *Mask = ConstantInt::get(
2154         ShType,
2155         APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1));
2156     Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
2157     return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
2158                         And, Constant::getNullValue(ShType));
2159   }
2160 
2161   // Simplify 'shl' inequality test into 'and' equality test.
2162   if (Cmp.isUnsigned() && Shl->hasOneUse()) {
2163     // (X l<< C2) u<=/u> C1 iff C1+1 is power of two -> X & (~C1 l>> C2) ==/!= 0
2164     if ((C + 1).isPowerOf2() &&
2165         (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT)) {
2166       Value *And = Builder.CreateAnd(X, (~C).lshr(ShiftAmt->getZExtValue()));
2167       return new ICmpInst(Pred == ICmpInst::ICMP_ULE ? ICmpInst::ICMP_EQ
2168                                                      : ICmpInst::ICMP_NE,
2169                           And, Constant::getNullValue(ShType));
2170     }
2171     // (X l<< C2) u</u>= C1 iff C1 is power of two -> X & (-C1 l>> C2) ==/!= 0
2172     if (C.isPowerOf2() &&
2173         (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) {
2174       Value *And =
2175           Builder.CreateAnd(X, (~(C - 1)).lshr(ShiftAmt->getZExtValue()));
2176       return new ICmpInst(Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_EQ
2177                                                      : ICmpInst::ICMP_NE,
2178                           And, Constant::getNullValue(ShType));
2179     }
2180   }
2181 
2182   // Transform (icmp pred iM (shl iM %v, N), C)
2183   // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
2184   // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
2185   // This enables us to get rid of the shift in favor of a trunc that may be
2186   // free on the target. It has the additional benefit of comparing to a
2187   // smaller constant that may be more target-friendly.
2188   unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
2189   if (Shl->hasOneUse() && Amt != 0 && C.countTrailingZeros() >= Amt &&
2190       DL.isLegalInteger(TypeBits - Amt)) {
2191     Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt);
2192     if (auto *ShVTy = dyn_cast<VectorType>(ShType))
2193       TruncTy = VectorType::get(TruncTy, ShVTy->getElementCount());
2194     Constant *NewC =
2195         ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt));
2196     return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC);
2197   }
2198 
2199   return nullptr;
2200 }
2201 
2202 /// Fold icmp ({al}shr X, Y), C.
2203 Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
2204                                                    BinaryOperator *Shr,
2205                                                    const APInt &C) {
2206   // An exact shr only shifts out zero bits, so:
2207   // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
2208   Value *X = Shr->getOperand(0);
2209   CmpInst::Predicate Pred = Cmp.getPredicate();
2210   if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() &&
2211       C.isNullValue())
2212     return new ICmpInst(Pred, X, Cmp.getOperand(1));
2213 
2214   const APInt *ShiftVal;
2215   if (Cmp.isEquality() && match(Shr->getOperand(0), m_APInt(ShiftVal)))
2216     return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftVal);
2217 
2218   const APInt *ShiftAmt;
2219   if (!match(Shr->getOperand(1), m_APInt(ShiftAmt)))
2220     return nullptr;
2221 
2222   // Check that the shift amount is in range. If not, don't perform undefined
2223   // shifts. When the shift is visited it will be simplified.
2224   unsigned TypeBits = C.getBitWidth();
2225   unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits);
2226   if (ShAmtVal >= TypeBits || ShAmtVal == 0)
2227     return nullptr;
2228 
2229   bool IsAShr = Shr->getOpcode() == Instruction::AShr;
2230   bool IsExact = Shr->isExact();
2231   Type *ShrTy = Shr->getType();
2232   // TODO: If we could guarantee that InstSimplify would handle all of the
2233   // constant-value-based preconditions in the folds below, then we could assert
2234   // those conditions rather than checking them. This is difficult because of
2235   // undef/poison (PR34838).
2236   if (IsAShr) {
2237     if (Pred == CmpInst::ICMP_SLT || (Pred == CmpInst::ICMP_SGT && IsExact)) {
2238       // icmp slt (ashr X, ShAmtC), C --> icmp slt X, (C << ShAmtC)
2239       // icmp sgt (ashr exact X, ShAmtC), C --> icmp sgt X, (C << ShAmtC)
2240       APInt ShiftedC = C.shl(ShAmtVal);
2241       if (ShiftedC.ashr(ShAmtVal) == C)
2242         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2243     }
2244     if (Pred == CmpInst::ICMP_SGT) {
2245       // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1
2246       APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2247       if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() &&
2248           (ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
2249         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2250     }
2251 
2252     // If the compare constant has significant bits above the lowest sign-bit,
2253     // then convert an unsigned cmp to a test of the sign-bit:
2254     // (ashr X, ShiftC) u> C --> X s< 0
2255     // (ashr X, ShiftC) u< C --> X s> -1
2256     if (C.getBitWidth() > 2 && C.getNumSignBits() <= ShAmtVal) {
2257       if (Pred == CmpInst::ICMP_UGT) {
2258         return new ICmpInst(CmpInst::ICMP_SLT, X,
2259                             ConstantInt::getNullValue(ShrTy));
2260       }
2261       if (Pred == CmpInst::ICMP_ULT) {
2262         return new ICmpInst(CmpInst::ICMP_SGT, X,
2263                             ConstantInt::getAllOnesValue(ShrTy));
2264       }
2265     }
2266   } else {
2267     if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
2268       // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
2269       // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)
2270       APInt ShiftedC = C.shl(ShAmtVal);
2271       if (ShiftedC.lshr(ShAmtVal) == C)
2272         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2273     }
2274     if (Pred == CmpInst::ICMP_UGT) {
2275       // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
2276       APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2277       if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1))
2278         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2279     }
2280   }
2281 
2282   if (!Cmp.isEquality())
2283     return nullptr;
2284 
2285   // Handle equality comparisons of shift-by-constant.
2286 
2287   // If the comparison constant changes with the shift, the comparison cannot
2288   // succeed (bits of the comparison constant cannot match the shifted value).
2289   // This should be known by InstSimplify and already be folded to true/false.
2290   assert(((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) ||
2291           (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) &&
2292          "Expected icmp+shr simplify did not occur.");
2293 
2294   // If the bits shifted out are known zero, compare the unshifted value:
2295   //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
2296   if (Shr->isExact())
2297     return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
2298 
2299   if (C.isNullValue()) {
2300     // == 0 is u< 1.
2301     if (Pred == CmpInst::ICMP_EQ)
2302       return new ICmpInst(CmpInst::ICMP_ULT, X,
2303                           ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal)));
2304     else
2305       return new ICmpInst(CmpInst::ICMP_UGT, X,
2306                           ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal) - 1));
2307   }
2308 
2309   if (Shr->hasOneUse()) {
2310     // Canonicalize the shift into an 'and':
2311     // icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt)
2312     APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
2313     Constant *Mask = ConstantInt::get(ShrTy, Val);
2314     Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
2315     return new ICmpInst(Pred, And, ConstantInt::get(ShrTy, C << ShAmtVal));
2316   }
2317 
2318   return nullptr;
2319 }
2320 
2321 Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
2322                                                     BinaryOperator *SRem,
2323                                                     const APInt &C) {
2324   // Match an 'is positive' or 'is negative' comparison of remainder by a
2325   // constant power-of-2 value:
2326   // (X % pow2C) sgt/slt 0
2327   const ICmpInst::Predicate Pred = Cmp.getPredicate();
2328   if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT)
2329     return nullptr;
2330 
2331   // TODO: The one-use check is standard because we do not typically want to
2332   //       create longer instruction sequences, but this might be a special-case
2333   //       because srem is not good for analysis or codegen.
2334   if (!SRem->hasOneUse())
2335     return nullptr;
2336 
2337   const APInt *DivisorC;
2338   if (!C.isNullValue() || !match(SRem->getOperand(1), m_Power2(DivisorC)))
2339     return nullptr;
2340 
2341   // Mask off the sign bit and the modulo bits (low-bits).
2342   Type *Ty = SRem->getType();
2343   APInt SignMask = APInt::getSignMask(Ty->getScalarSizeInBits());
2344   Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1));
2345   Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC);
2346 
2347   // For 'is positive?' check that the sign-bit is clear and at least 1 masked
2348   // bit is set. Example:
2349   // (i8 X % 32) s> 0 --> (X & 159) s> 0
2350   if (Pred == ICmpInst::ICMP_SGT)
2351     return new ICmpInst(ICmpInst::ICMP_SGT, And, ConstantInt::getNullValue(Ty));
2352 
2353   // For 'is negative?' check that the sign-bit is set and at least 1 masked
2354   // bit is set. Example:
2355   // (i16 X % 4) s< 0 --> (X & 32771) u> 32768
2356   return new ICmpInst(ICmpInst::ICMP_UGT, And, ConstantInt::get(Ty, SignMask));
2357 }
2358 
2359 /// Fold icmp (udiv X, Y), C.
2360 Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp,
2361                                                     BinaryOperator *UDiv,
2362                                                     const APInt &C) {
2363   const APInt *C2;
2364   if (!match(UDiv->getOperand(0), m_APInt(C2)))
2365     return nullptr;
2366 
2367   assert(*C2 != 0 && "udiv 0, X should have been simplified already.");
2368 
2369   // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1))
2370   Value *Y = UDiv->getOperand(1);
2371   if (Cmp.getPredicate() == ICmpInst::ICMP_UGT) {
2372     assert(!C.isMaxValue() &&
2373            "icmp ugt X, UINT_MAX should have been simplified already.");
2374     return new ICmpInst(ICmpInst::ICMP_ULE, Y,
2375                         ConstantInt::get(Y->getType(), C2->udiv(C + 1)));
2376   }
2377 
2378   // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C)
2379   if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) {
2380     assert(C != 0 && "icmp ult X, 0 should have been simplified already.");
2381     return new ICmpInst(ICmpInst::ICMP_UGT, Y,
2382                         ConstantInt::get(Y->getType(), C2->udiv(C)));
2383   }
2384 
2385   return nullptr;
2386 }
2387 
2388 /// Fold icmp ({su}div X, Y), C.
2389 Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
2390                                                    BinaryOperator *Div,
2391                                                    const APInt &C) {
2392   // Fold: icmp pred ([us]div X, C2), C -> range test
2393   // Fold this div into the comparison, producing a range check.
2394   // Determine, based on the divide type, what the range is being
2395   // checked.  If there is an overflow on the low or high side, remember
2396   // it, otherwise compute the range [low, hi) bounding the new value.
2397   // See: InsertRangeTest above for the kinds of replacements possible.
2398   const APInt *C2;
2399   if (!match(Div->getOperand(1), m_APInt(C2)))
2400     return nullptr;
2401 
2402   // FIXME: If the operand types don't match the type of the divide
2403   // then don't attempt this transform. The code below doesn't have the
2404   // logic to deal with a signed divide and an unsigned compare (and
2405   // vice versa). This is because (x /s C2) <s C  produces different
2406   // results than (x /s C2) <u C or (x /u C2) <s C or even
2407   // (x /u C2) <u C.  Simply casting the operands and result won't
2408   // work. :(  The if statement below tests that condition and bails
2409   // if it finds it.
2410   bool DivIsSigned = Div->getOpcode() == Instruction::SDiv;
2411   if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned())
2412     return nullptr;
2413 
2414   // The ProdOV computation fails on divide by 0 and divide by -1. Cases with
2415   // INT_MIN will also fail if the divisor is 1. Although folds of all these
2416   // division-by-constant cases should be present, we can not assert that they
2417   // have happened before we reach this icmp instruction.
2418   if (C2->isNullValue() || C2->isOneValue() ||
2419       (DivIsSigned && C2->isAllOnesValue()))
2420     return nullptr;
2421 
2422   // Compute Prod = C * C2. We are essentially solving an equation of
2423   // form X / C2 = C. We solve for X by multiplying C2 and C.
2424   // By solving for X, we can turn this into a range check instead of computing
2425   // a divide.
2426   APInt Prod = C * *C2;
2427 
2428   // Determine if the product overflows by seeing if the product is not equal to
2429   // the divide. Make sure we do the same kind of divide as in the LHS
2430   // instruction that we're folding.
2431   bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C;
2432 
2433   ICmpInst::Predicate Pred = Cmp.getPredicate();
2434 
2435   // If the division is known to be exact, then there is no remainder from the
2436   // divide, so the covered range size is unit, otherwise it is the divisor.
2437   APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2;
2438 
2439   // Figure out the interval that is being checked.  For example, a comparison
2440   // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
2441   // Compute this interval based on the constants involved and the signedness of
2442   // the compare/divide.  This computes a half-open interval, keeping track of
2443   // whether either value in the interval overflows.  After analysis each
2444   // overflow variable is set to 0 if it's corresponding bound variable is valid
2445   // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
2446   int LoOverflow = 0, HiOverflow = 0;
2447   APInt LoBound, HiBound;
2448 
2449   if (!DivIsSigned) {  // udiv
2450     // e.g. X/5 op 3  --> [15, 20)
2451     LoBound = Prod;
2452     HiOverflow = LoOverflow = ProdOV;
2453     if (!HiOverflow) {
2454       // If this is not an exact divide, then many values in the range collapse
2455       // to the same result value.
2456       HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false);
2457     }
2458   } else if (C2->isStrictlyPositive()) { // Divisor is > 0.
2459     if (C.isNullValue()) {       // (X / pos) op 0
2460       // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
2461       LoBound = -(RangeSize - 1);
2462       HiBound = RangeSize;
2463     } else if (C.isStrictlyPositive()) {   // (X / pos) op pos
2464       LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
2465       HiOverflow = LoOverflow = ProdOV;
2466       if (!HiOverflow)
2467         HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true);
2468     } else {                       // (X / pos) op neg
2469       // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
2470       HiBound = Prod + 1;
2471       LoOverflow = HiOverflow = ProdOV ? -1 : 0;
2472       if (!LoOverflow) {
2473         APInt DivNeg = -RangeSize;
2474         LoOverflow = addWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
2475       }
2476     }
2477   } else if (C2->isNegative()) { // Divisor is < 0.
2478     if (Div->isExact())
2479       RangeSize.negate();
2480     if (C.isNullValue()) { // (X / neg) op 0
2481       // e.g. X/-5 op 0  --> [-4, 5)
2482       LoBound = RangeSize + 1;
2483       HiBound = -RangeSize;
2484       if (HiBound == *C2) {        // -INTMIN = INTMIN
2485         HiOverflow = 1;            // [INTMIN+1, overflow)
2486         HiBound = APInt();         // e.g. X/INTMIN = 0 --> X > INTMIN
2487       }
2488     } else if (C.isStrictlyPositive()) {   // (X / neg) op pos
2489       // e.g. X/-5 op 3  --> [-19, -14)
2490       HiBound = Prod + 1;
2491       HiOverflow = LoOverflow = ProdOV ? -1 : 0;
2492       if (!LoOverflow)
2493         LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
2494     } else {                       // (X / neg) op neg
2495       LoBound = Prod;       // e.g. X/-5 op -3  --> [15, 20)
2496       LoOverflow = HiOverflow = ProdOV;
2497       if (!HiOverflow)
2498         HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true);
2499     }
2500 
2501     // Dividing by a negative swaps the condition.  LT <-> GT
2502     Pred = ICmpInst::getSwappedPredicate(Pred);
2503   }
2504 
2505   Value *X = Div->getOperand(0);
2506   switch (Pred) {
2507     default: llvm_unreachable("Unhandled icmp opcode!");
2508     case ICmpInst::ICMP_EQ:
2509       if (LoOverflow && HiOverflow)
2510         return replaceInstUsesWith(Cmp, Builder.getFalse());
2511       if (HiOverflow)
2512         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
2513                             ICmpInst::ICMP_UGE, X,
2514                             ConstantInt::get(Div->getType(), LoBound));
2515       if (LoOverflow)
2516         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
2517                             ICmpInst::ICMP_ULT, X,
2518                             ConstantInt::get(Div->getType(), HiBound));
2519       return replaceInstUsesWith(
2520           Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true));
2521     case ICmpInst::ICMP_NE:
2522       if (LoOverflow && HiOverflow)
2523         return replaceInstUsesWith(Cmp, Builder.getTrue());
2524       if (HiOverflow)
2525         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
2526                             ICmpInst::ICMP_ULT, X,
2527                             ConstantInt::get(Div->getType(), LoBound));
2528       if (LoOverflow)
2529         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
2530                             ICmpInst::ICMP_UGE, X,
2531                             ConstantInt::get(Div->getType(), HiBound));
2532       return replaceInstUsesWith(Cmp,
2533                                  insertRangeTest(X, LoBound, HiBound,
2534                                                  DivIsSigned, false));
2535     case ICmpInst::ICMP_ULT:
2536     case ICmpInst::ICMP_SLT:
2537       if (LoOverflow == +1)   // Low bound is greater than input range.
2538         return replaceInstUsesWith(Cmp, Builder.getTrue());
2539       if (LoOverflow == -1)   // Low bound is less than input range.
2540         return replaceInstUsesWith(Cmp, Builder.getFalse());
2541       return new ICmpInst(Pred, X, ConstantInt::get(Div->getType(), LoBound));
2542     case ICmpInst::ICMP_UGT:
2543     case ICmpInst::ICMP_SGT:
2544       if (HiOverflow == +1)       // High bound greater than input range.
2545         return replaceInstUsesWith(Cmp, Builder.getFalse());
2546       if (HiOverflow == -1)       // High bound less than input range.
2547         return replaceInstUsesWith(Cmp, Builder.getTrue());
2548       if (Pred == ICmpInst::ICMP_UGT)
2549         return new ICmpInst(ICmpInst::ICMP_UGE, X,
2550                             ConstantInt::get(Div->getType(), HiBound));
2551       return new ICmpInst(ICmpInst::ICMP_SGE, X,
2552                           ConstantInt::get(Div->getType(), HiBound));
2553   }
2554 
2555   return nullptr;
2556 }
2557 
2558 /// Fold icmp (sub X, Y), C.
2559 Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp,
2560                                                    BinaryOperator *Sub,
2561                                                    const APInt &C) {
2562   Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
2563   ICmpInst::Predicate Pred = Cmp.getPredicate();
2564   const APInt *C2;
2565   APInt SubResult;
2566 
2567   // icmp eq/ne (sub C, Y), C -> icmp eq/ne Y, 0
2568   if (match(X, m_APInt(C2)) && *C2 == C && Cmp.isEquality())
2569     return new ICmpInst(Cmp.getPredicate(), Y,
2570                         ConstantInt::get(Y->getType(), 0));
2571 
2572   // (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C)
2573   if (match(X, m_APInt(C2)) &&
2574       ((Cmp.isUnsigned() && Sub->hasNoUnsignedWrap()) ||
2575        (Cmp.isSigned() && Sub->hasNoSignedWrap())) &&
2576       !subWithOverflow(SubResult, *C2, C, Cmp.isSigned()))
2577     return new ICmpInst(Cmp.getSwappedPredicate(), Y,
2578                         ConstantInt::get(Y->getType(), SubResult));
2579 
2580   // The following transforms are only worth it if the only user of the subtract
2581   // is the icmp.
2582   if (!Sub->hasOneUse())
2583     return nullptr;
2584 
2585   if (Sub->hasNoSignedWrap()) {
2586     // (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y)
2587     if (Pred == ICmpInst::ICMP_SGT && C.isAllOnesValue())
2588       return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
2589 
2590     // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y)
2591     if (Pred == ICmpInst::ICMP_SGT && C.isNullValue())
2592       return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
2593 
2594     // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y)
2595     if (Pred == ICmpInst::ICMP_SLT && C.isNullValue())
2596       return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
2597 
2598     // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y)
2599     if (Pred == ICmpInst::ICMP_SLT && C.isOneValue())
2600       return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
2601   }
2602 
2603   if (!match(X, m_APInt(C2)))
2604     return nullptr;
2605 
2606   // C2 - Y <u C -> (Y | (C - 1)) == C2
2607   //   iff (C2 & (C - 1)) == C - 1 and C is a power of 2
2608   if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() &&
2609       (*C2 & (C - 1)) == (C - 1))
2610     return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, C - 1), X);
2611 
2612   // C2 - Y >u C -> (Y | C) != C2
2613   //   iff C2 & C == C and C + 1 is a power of 2
2614   if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == C)
2615     return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, C), X);
2616 
2617   return nullptr;
2618 }
2619 
2620 /// Fold icmp (add X, Y), C.
2621 Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
2622                                                    BinaryOperator *Add,
2623                                                    const APInt &C) {
2624   Value *Y = Add->getOperand(1);
2625   const APInt *C2;
2626   if (Cmp.isEquality() || !match(Y, m_APInt(C2)))
2627     return nullptr;
2628 
2629   // Fold icmp pred (add X, C2), C.
2630   Value *X = Add->getOperand(0);
2631   Type *Ty = Add->getType();
2632   CmpInst::Predicate Pred = Cmp.getPredicate();
2633 
2634   // If the add does not wrap, we can always adjust the compare by subtracting
2635   // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
2636   // are canonicalized to SGT/SLT/UGT/ULT.
2637   if ((Add->hasNoSignedWrap() &&
2638        (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) ||
2639       (Add->hasNoUnsignedWrap() &&
2640        (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT))) {
2641     bool Overflow;
2642     APInt NewC =
2643         Cmp.isSigned() ? C.ssub_ov(*C2, Overflow) : C.usub_ov(*C2, Overflow);
2644     // If there is overflow, the result must be true or false.
2645     // TODO: Can we assert there is no overflow because InstSimplify always
2646     // handles those cases?
2647     if (!Overflow)
2648       // icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2)
2649       return new ICmpInst(Pred, X, ConstantInt::get(Ty, NewC));
2650   }
2651 
2652   auto CR = ConstantRange::makeExactICmpRegion(Pred, C).subtract(*C2);
2653   const APInt &Upper = CR.getUpper();
2654   const APInt &Lower = CR.getLower();
2655   if (Cmp.isSigned()) {
2656     if (Lower.isSignMask())
2657       return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper));
2658     if (Upper.isSignMask())
2659       return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower));
2660   } else {
2661     if (Lower.isMinValue())
2662       return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, Upper));
2663     if (Upper.isMinValue())
2664       return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
2665   }
2666 
2667   if (!Add->hasOneUse())
2668     return nullptr;
2669 
2670   // X+C <u C2 -> (X & -C2) == C
2671   //   iff C & (C2-1) == 0
2672   //       C2 is a power of 2
2673   if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && (*C2 & (C - 1)) == 0)
2674     return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -C),
2675                         ConstantExpr::getNeg(cast<Constant>(Y)));
2676 
2677   // X+C >u C2 -> (X & ~C2) != C
2678   //   iff C & C2 == 0
2679   //       C2+1 is a power of 2
2680   if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == 0)
2681     return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C),
2682                         ConstantExpr::getNeg(cast<Constant>(Y)));
2683 
2684   return nullptr;
2685 }
2686 
2687 bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
2688                                                Value *&RHS, ConstantInt *&Less,
2689                                                ConstantInt *&Equal,
2690                                                ConstantInt *&Greater) {
2691   // TODO: Generalize this to work with other comparison idioms or ensure
2692   // they get canonicalized into this form.
2693 
2694   // select i1 (a == b),
2695   //        i32 Equal,
2696   //        i32 (select i1 (a < b), i32 Less, i32 Greater)
2697   // where Equal, Less and Greater are placeholders for any three constants.
2698   ICmpInst::Predicate PredA;
2699   if (!match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) ||
2700       !ICmpInst::isEquality(PredA))
2701     return false;
2702   Value *EqualVal = SI->getTrueValue();
2703   Value *UnequalVal = SI->getFalseValue();
2704   // We still can get non-canonical predicate here, so canonicalize.
2705   if (PredA == ICmpInst::ICMP_NE)
2706     std::swap(EqualVal, UnequalVal);
2707   if (!match(EqualVal, m_ConstantInt(Equal)))
2708     return false;
2709   ICmpInst::Predicate PredB;
2710   Value *LHS2, *RHS2;
2711   if (!match(UnequalVal, m_Select(m_ICmp(PredB, m_Value(LHS2), m_Value(RHS2)),
2712                                   m_ConstantInt(Less), m_ConstantInt(Greater))))
2713     return false;
2714   // We can get predicate mismatch here, so canonicalize if possible:
2715   // First, ensure that 'LHS' match.
2716   if (LHS2 != LHS) {
2717     // x sgt y <--> y slt x
2718     std::swap(LHS2, RHS2);
2719     PredB = ICmpInst::getSwappedPredicate(PredB);
2720   }
2721   if (LHS2 != LHS)
2722     return false;
2723   // We also need to canonicalize 'RHS'.
2724   if (PredB == ICmpInst::ICMP_SGT && isa<Constant>(RHS2)) {
2725     // x sgt C-1  <-->  x sge C  <-->  not(x slt C)
2726     auto FlippedStrictness =
2727         InstCombiner::getFlippedStrictnessPredicateAndConstant(
2728             PredB, cast<Constant>(RHS2));
2729     if (!FlippedStrictness)
2730       return false;
2731     assert(FlippedStrictness->first == ICmpInst::ICMP_SGE && "Sanity check");
2732     RHS2 = FlippedStrictness->second;
2733     // And kind-of perform the result swap.
2734     std::swap(Less, Greater);
2735     PredB = ICmpInst::ICMP_SLT;
2736   }
2737   return PredB == ICmpInst::ICMP_SLT && RHS == RHS2;
2738 }
2739 
2740 Instruction *InstCombinerImpl::foldICmpSelectConstant(ICmpInst &Cmp,
2741                                                       SelectInst *Select,
2742                                                       ConstantInt *C) {
2743 
2744   assert(C && "Cmp RHS should be a constant int!");
2745   // If we're testing a constant value against the result of a three way
2746   // comparison, the result can be expressed directly in terms of the
2747   // original values being compared.  Note: We could possibly be more
2748   // aggressive here and remove the hasOneUse test. The original select is
2749   // really likely to simplify or sink when we remove a test of the result.
2750   Value *OrigLHS, *OrigRHS;
2751   ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan;
2752   if (Cmp.hasOneUse() &&
2753       matchThreeWayIntCompare(Select, OrigLHS, OrigRHS, C1LessThan, C2Equal,
2754                               C3GreaterThan)) {
2755     assert(C1LessThan && C2Equal && C3GreaterThan);
2756 
2757     bool TrueWhenLessThan =
2758         ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C)
2759             ->isAllOnesValue();
2760     bool TrueWhenEqual =
2761         ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C)
2762             ->isAllOnesValue();
2763     bool TrueWhenGreaterThan =
2764         ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C)
2765             ->isAllOnesValue();
2766 
2767     // This generates the new instruction that will replace the original Cmp
2768     // Instruction. Instead of enumerating the various combinations when
2769     // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus
2770     // false, we rely on chaining of ORs and future passes of InstCombine to
2771     // simplify the OR further (i.e. a s< b || a == b becomes a s<= b).
2772 
2773     // When none of the three constants satisfy the predicate for the RHS (C),
2774     // the entire original Cmp can be simplified to a false.
2775     Value *Cond = Builder.getFalse();
2776     if (TrueWhenLessThan)
2777       Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT,
2778                                                        OrigLHS, OrigRHS));
2779     if (TrueWhenEqual)
2780       Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ,
2781                                                        OrigLHS, OrigRHS));
2782     if (TrueWhenGreaterThan)
2783       Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT,
2784                                                        OrigLHS, OrigRHS));
2785 
2786     return replaceInstUsesWith(Cmp, Cond);
2787   }
2788   return nullptr;
2789 }
2790 
2791 static Instruction *foldICmpBitCast(ICmpInst &Cmp,
2792                                     InstCombiner::BuilderTy &Builder) {
2793   auto *Bitcast = dyn_cast<BitCastInst>(Cmp.getOperand(0));
2794   if (!Bitcast)
2795     return nullptr;
2796 
2797   ICmpInst::Predicate Pred = Cmp.getPredicate();
2798   Value *Op1 = Cmp.getOperand(1);
2799   Value *BCSrcOp = Bitcast->getOperand(0);
2800 
2801   // Make sure the bitcast doesn't change the number of vector elements.
2802   if (Bitcast->getSrcTy()->getScalarSizeInBits() ==
2803           Bitcast->getDestTy()->getScalarSizeInBits()) {
2804     // Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
2805     Value *X;
2806     if (match(BCSrcOp, m_SIToFP(m_Value(X)))) {
2807       // icmp  eq (bitcast (sitofp X)), 0 --> icmp  eq X, 0
2808       // icmp  ne (bitcast (sitofp X)), 0 --> icmp  ne X, 0
2809       // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0
2810       // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0
2811       if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT ||
2812            Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) &&
2813           match(Op1, m_Zero()))
2814         return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
2815 
2816       // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1
2817       if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One()))
2818         return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1));
2819 
2820       // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1
2821       if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))
2822         return new ICmpInst(Pred, X,
2823                             ConstantInt::getAllOnesValue(X->getType()));
2824     }
2825 
2826     // Zero-equality checks are preserved through unsigned floating-point casts:
2827     // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0
2828     // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0
2829     if (match(BCSrcOp, m_UIToFP(m_Value(X))))
2830       if (Cmp.isEquality() && match(Op1, m_Zero()))
2831         return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
2832 
2833     // If this is a sign-bit test of a bitcast of a casted FP value, eliminate
2834     // the FP extend/truncate because that cast does not change the sign-bit.
2835     // This is true for all standard IEEE-754 types and the X86 80-bit type.
2836     // The sign-bit is always the most significant bit in those types.
2837     const APInt *C;
2838     bool TrueIfSigned;
2839     if (match(Op1, m_APInt(C)) && Bitcast->hasOneUse() &&
2840         InstCombiner::isSignBitCheck(Pred, *C, TrueIfSigned)) {
2841       if (match(BCSrcOp, m_FPExt(m_Value(X))) ||
2842           match(BCSrcOp, m_FPTrunc(m_Value(X)))) {
2843         // (bitcast (fpext/fptrunc X)) to iX) < 0 --> (bitcast X to iY) < 0
2844         // (bitcast (fpext/fptrunc X)) to iX) > -1 --> (bitcast X to iY) > -1
2845         Type *XType = X->getType();
2846 
2847         // We can't currently handle Power style floating point operations here.
2848         if (!(XType->isPPC_FP128Ty() || BCSrcOp->getType()->isPPC_FP128Ty())) {
2849 
2850           Type *NewType = Builder.getIntNTy(XType->getScalarSizeInBits());
2851           if (auto *XVTy = dyn_cast<VectorType>(XType))
2852             NewType = VectorType::get(NewType, XVTy->getElementCount());
2853           Value *NewBitcast = Builder.CreateBitCast(X, NewType);
2854           if (TrueIfSigned)
2855             return new ICmpInst(ICmpInst::ICMP_SLT, NewBitcast,
2856                                 ConstantInt::getNullValue(NewType));
2857           else
2858             return new ICmpInst(ICmpInst::ICMP_SGT, NewBitcast,
2859                                 ConstantInt::getAllOnesValue(NewType));
2860         }
2861       }
2862     }
2863   }
2864 
2865   // Test to see if the operands of the icmp are casted versions of other
2866   // values. If the ptr->ptr cast can be stripped off both arguments, do so.
2867   if (Bitcast->getType()->isPointerTy() &&
2868       (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
2869     // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
2870     // so eliminate it as well.
2871     if (auto *BC2 = dyn_cast<BitCastInst>(Op1))
2872       Op1 = BC2->getOperand(0);
2873 
2874     Op1 = Builder.CreateBitCast(Op1, BCSrcOp->getType());
2875     return new ICmpInst(Pred, BCSrcOp, Op1);
2876   }
2877 
2878   // Folding: icmp <pred> iN X, C
2879   //  where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
2880   //    and C is a splat of a K-bit pattern
2881   //    and SC is a constant vector = <C', C', C', ..., C'>
2882   // Into:
2883   //   %E = extractelement <M x iK> %vec, i32 C'
2884   //   icmp <pred> iK %E, trunc(C)
2885   const APInt *C;
2886   if (!match(Cmp.getOperand(1), m_APInt(C)) ||
2887       !Bitcast->getType()->isIntegerTy() ||
2888       !Bitcast->getSrcTy()->isIntOrIntVectorTy())
2889     return nullptr;
2890 
2891   Value *Vec;
2892   ArrayRef<int> Mask;
2893   if (match(BCSrcOp, m_Shuffle(m_Value(Vec), m_Undef(), m_Mask(Mask)))) {
2894     // Check whether every element of Mask is the same constant
2895     if (is_splat(Mask)) {
2896       auto *VecTy = cast<VectorType>(BCSrcOp->getType());
2897       auto *EltTy = cast<IntegerType>(VecTy->getElementType());
2898       if (C->isSplat(EltTy->getBitWidth())) {
2899         // Fold the icmp based on the value of C
2900         // If C is M copies of an iK sized bit pattern,
2901         // then:
2902         //   =>  %E = extractelement <N x iK> %vec, i32 Elem
2903         //       icmp <pred> iK %SplatVal, <pattern>
2904         Value *Elem = Builder.getInt32(Mask[0]);
2905         Value *Extract = Builder.CreateExtractElement(Vec, Elem);
2906         Value *NewC = ConstantInt::get(EltTy, C->trunc(EltTy->getBitWidth()));
2907         return new ICmpInst(Pred, Extract, NewC);
2908       }
2909     }
2910   }
2911   return nullptr;
2912 }
2913 
2914 /// Try to fold integer comparisons with a constant operand: icmp Pred X, C
2915 /// where X is some kind of instruction.
2916 Instruction *InstCombinerImpl::foldICmpInstWithConstant(ICmpInst &Cmp) {
2917   const APInt *C;
2918   if (!match(Cmp.getOperand(1), m_APInt(C)))
2919     return nullptr;
2920 
2921   if (auto *BO = dyn_cast<BinaryOperator>(Cmp.getOperand(0))) {
2922     switch (BO->getOpcode()) {
2923     case Instruction::Xor:
2924       if (Instruction *I = foldICmpXorConstant(Cmp, BO, *C))
2925         return I;
2926       break;
2927     case Instruction::And:
2928       if (Instruction *I = foldICmpAndConstant(Cmp, BO, *C))
2929         return I;
2930       break;
2931     case Instruction::Or:
2932       if (Instruction *I = foldICmpOrConstant(Cmp, BO, *C))
2933         return I;
2934       break;
2935     case Instruction::Mul:
2936       if (Instruction *I = foldICmpMulConstant(Cmp, BO, *C))
2937         return I;
2938       break;
2939     case Instruction::Shl:
2940       if (Instruction *I = foldICmpShlConstant(Cmp, BO, *C))
2941         return I;
2942       break;
2943     case Instruction::LShr:
2944     case Instruction::AShr:
2945       if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C))
2946         return I;
2947       break;
2948     case Instruction::SRem:
2949       if (Instruction *I = foldICmpSRemConstant(Cmp, BO, *C))
2950         return I;
2951       break;
2952     case Instruction::UDiv:
2953       if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C))
2954         return I;
2955       LLVM_FALLTHROUGH;
2956     case Instruction::SDiv:
2957       if (Instruction *I = foldICmpDivConstant(Cmp, BO, *C))
2958         return I;
2959       break;
2960     case Instruction::Sub:
2961       if (Instruction *I = foldICmpSubConstant(Cmp, BO, *C))
2962         return I;
2963       break;
2964     case Instruction::Add:
2965       if (Instruction *I = foldICmpAddConstant(Cmp, BO, *C))
2966         return I;
2967       break;
2968     default:
2969       break;
2970     }
2971     // TODO: These folds could be refactored to be part of the above calls.
2972     if (Instruction *I = foldICmpBinOpEqualityWithConstant(Cmp, BO, *C))
2973       return I;
2974   }
2975 
2976   // Match against CmpInst LHS being instructions other than binary operators.
2977 
2978   if (auto *SI = dyn_cast<SelectInst>(Cmp.getOperand(0))) {
2979     // For now, we only support constant integers while folding the
2980     // ICMP(SELECT)) pattern. We can extend this to support vector of integers
2981     // similar to the cases handled by binary ops above.
2982     if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1)))
2983       if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS))
2984         return I;
2985   }
2986 
2987   if (auto *TI = dyn_cast<TruncInst>(Cmp.getOperand(0))) {
2988     if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C))
2989       return I;
2990   }
2991 
2992   if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0)))
2993     if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C))
2994       return I;
2995 
2996   return nullptr;
2997 }
2998 
2999 /// Fold an icmp equality instruction with binary operator LHS and constant RHS:
3000 /// icmp eq/ne BO, C.
3001 Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
3002     ICmpInst &Cmp, BinaryOperator *BO, const APInt &C) {
3003   // TODO: Some of these folds could work with arbitrary constants, but this
3004   // function is limited to scalar and vector splat constants.
3005   if (!Cmp.isEquality())
3006     return nullptr;
3007 
3008   ICmpInst::Predicate Pred = Cmp.getPredicate();
3009   bool isICMP_NE = Pred == ICmpInst::ICMP_NE;
3010   Constant *RHS = cast<Constant>(Cmp.getOperand(1));
3011   Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
3012 
3013   switch (BO->getOpcode()) {
3014   case Instruction::SRem:
3015     // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
3016     if (C.isNullValue() && BO->hasOneUse()) {
3017       const APInt *BOC;
3018       if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
3019         Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName());
3020         return new ICmpInst(Pred, NewRem,
3021                             Constant::getNullValue(BO->getType()));
3022       }
3023     }
3024     break;
3025   case Instruction::Add: {
3026     // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
3027     if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
3028       if (BO->hasOneUse())
3029         return new ICmpInst(Pred, BOp0, ConstantExpr::getSub(RHS, BOC));
3030     } else if (C.isNullValue()) {
3031       // Replace ((add A, B) != 0) with (A != -B) if A or B is
3032       // efficiently invertible, or if the add has just this one use.
3033       if (Value *NegVal = dyn_castNegVal(BOp1))
3034         return new ICmpInst(Pred, BOp0, NegVal);
3035       if (Value *NegVal = dyn_castNegVal(BOp0))
3036         return new ICmpInst(Pred, NegVal, BOp1);
3037       if (BO->hasOneUse()) {
3038         Value *Neg = Builder.CreateNeg(BOp1);
3039         Neg->takeName(BO);
3040         return new ICmpInst(Pred, BOp0, Neg);
3041       }
3042     }
3043     break;
3044   }
3045   case Instruction::Xor:
3046     if (BO->hasOneUse()) {
3047       if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
3048         // For the xor case, we can xor two constants together, eliminating
3049         // the explicit xor.
3050         return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
3051       } else if (C.isNullValue()) {
3052         // Replace ((xor A, B) != 0) with (A != B)
3053         return new ICmpInst(Pred, BOp0, BOp1);
3054       }
3055     }
3056     break;
3057   case Instruction::Sub:
3058     if (BO->hasOneUse()) {
3059       // Only check for constant LHS here, as constant RHS will be canonicalized
3060       // to add and use the fold above.
3061       if (Constant *BOC = dyn_cast<Constant>(BOp0)) {
3062         // Replace ((sub BOC, B) != C) with (B != BOC-C).
3063         return new ICmpInst(Pred, BOp1, ConstantExpr::getSub(BOC, RHS));
3064       } else if (C.isNullValue()) {
3065         // Replace ((sub A, B) != 0) with (A != B).
3066         return new ICmpInst(Pred, BOp0, BOp1);
3067       }
3068     }
3069     break;
3070   case Instruction::Or: {
3071     const APInt *BOC;
3072     if (match(BOp1, m_APInt(BOC)) && BO->hasOneUse() && RHS->isAllOnesValue()) {
3073       // Comparing if all bits outside of a constant mask are set?
3074       // Replace (X | C) == -1 with (X & ~C) == ~C.
3075       // This removes the -1 constant.
3076       Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1));
3077       Value *And = Builder.CreateAnd(BOp0, NotBOC);
3078       return new ICmpInst(Pred, And, NotBOC);
3079     }
3080     break;
3081   }
3082   case Instruction::And: {
3083     const APInt *BOC;
3084     if (match(BOp1, m_APInt(BOC))) {
3085       // If we have ((X & C) == C), turn it into ((X & C) != 0).
3086       if (C == *BOC && C.isPowerOf2())
3087         return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
3088                             BO, Constant::getNullValue(RHS->getType()));
3089     }
3090     break;
3091   }
3092   case Instruction::UDiv:
3093     if (C.isNullValue()) {
3094       // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
3095       auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
3096       return new ICmpInst(NewPred, BOp1, BOp0);
3097     }
3098     break;
3099   default:
3100     break;
3101   }
3102   return nullptr;
3103 }
3104 
3105 /// Fold an equality icmp with LLVM intrinsic and constant operand.
3106 Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
3107     ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) {
3108   Type *Ty = II->getType();
3109   unsigned BitWidth = C.getBitWidth();
3110   switch (II->getIntrinsicID()) {
3111   case Intrinsic::abs:
3112     // abs(A) == 0  ->  A == 0
3113     // abs(A) == INT_MIN  ->  A == INT_MIN
3114     if (C.isNullValue() || C.isMinSignedValue())
3115       return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
3116                           ConstantInt::get(Ty, C));
3117     break;
3118 
3119   case Intrinsic::bswap:
3120     // bswap(A) == C  ->  A == bswap(C)
3121     return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
3122                         ConstantInt::get(Ty, C.byteSwap()));
3123 
3124   case Intrinsic::ctlz:
3125   case Intrinsic::cttz: {
3126     // ctz(A) == bitwidth(A)  ->  A == 0 and likewise for !=
3127     if (C == BitWidth)
3128       return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
3129                           ConstantInt::getNullValue(Ty));
3130 
3131     // ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set
3132     // and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits.
3133     // Limit to one use to ensure we don't increase instruction count.
3134     unsigned Num = C.getLimitedValue(BitWidth);
3135     if (Num != BitWidth && II->hasOneUse()) {
3136       bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz;
3137       APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1)
3138                                : APInt::getHighBitsSet(BitWidth, Num + 1);
3139       APInt Mask2 = IsTrailing
3140         ? APInt::getOneBitSet(BitWidth, Num)
3141         : APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
3142       return new ICmpInst(Cmp.getPredicate(),
3143           Builder.CreateAnd(II->getArgOperand(0), Mask1),
3144           ConstantInt::get(Ty, Mask2));
3145     }
3146     break;
3147   }
3148 
3149   case Intrinsic::ctpop: {
3150     // popcount(A) == 0  ->  A == 0 and likewise for !=
3151     // popcount(A) == bitwidth(A)  ->  A == -1 and likewise for !=
3152     bool IsZero = C.isNullValue();
3153     if (IsZero || C == BitWidth)
3154       return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
3155           IsZero ? Constant::getNullValue(Ty) : Constant::getAllOnesValue(Ty));
3156 
3157     break;
3158   }
3159 
3160   case Intrinsic::uadd_sat: {
3161     // uadd.sat(a, b) == 0  ->  (a | b) == 0
3162     if (C.isNullValue()) {
3163       Value *Or = Builder.CreateOr(II->getArgOperand(0), II->getArgOperand(1));
3164       return new ICmpInst(Cmp.getPredicate(), Or, Constant::getNullValue(Ty));
3165     }
3166     break;
3167   }
3168 
3169   case Intrinsic::usub_sat: {
3170     // usub.sat(a, b) == 0  ->  a <= b
3171     if (C.isNullValue()) {
3172       ICmpInst::Predicate NewPred = Cmp.getPredicate() == ICmpInst::ICMP_EQ
3173           ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
3174       return new ICmpInst(NewPred, II->getArgOperand(0), II->getArgOperand(1));
3175     }
3176     break;
3177   }
3178   default:
3179     break;
3180   }
3181 
3182   return nullptr;
3183 }
3184 
3185 /// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
3186 Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
3187                                                              IntrinsicInst *II,
3188                                                              const APInt &C) {
3189   if (Cmp.isEquality())
3190     return foldICmpEqIntrinsicWithConstant(Cmp, II, C);
3191 
3192   Type *Ty = II->getType();
3193   unsigned BitWidth = C.getBitWidth();
3194   ICmpInst::Predicate Pred = Cmp.getPredicate();
3195   switch (II->getIntrinsicID()) {
3196   case Intrinsic::ctpop: {
3197     // (ctpop X > BitWidth - 1) --> X == -1
3198     Value *X = II->getArgOperand(0);
3199     if (C == BitWidth - 1 && Pred == ICmpInst::ICMP_UGT)
3200       return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, X,
3201                              ConstantInt::getAllOnesValue(Ty));
3202     // (ctpop X < BitWidth) --> X != -1
3203     if (C == BitWidth && Pred == ICmpInst::ICMP_ULT)
3204       return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE, X,
3205                              ConstantInt::getAllOnesValue(Ty));
3206     break;
3207   }
3208   case Intrinsic::ctlz: {
3209     // ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000
3210     if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
3211       unsigned Num = C.getLimitedValue();
3212       APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
3213       return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT,
3214                              II->getArgOperand(0), ConstantInt::get(Ty, Limit));
3215     }
3216 
3217     // ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111
3218     if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) {
3219       unsigned Num = C.getLimitedValue();
3220       APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num);
3221       return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT,
3222                              II->getArgOperand(0), ConstantInt::get(Ty, Limit));
3223     }
3224     break;
3225   }
3226   case Intrinsic::cttz: {
3227     // Limit to one use to ensure we don't increase instruction count.
3228     if (!II->hasOneUse())
3229       return nullptr;
3230 
3231     // cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0
3232     if (Pred == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
3233       APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1);
3234       return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ,
3235                              Builder.CreateAnd(II->getArgOperand(0), Mask),
3236                              ConstantInt::getNullValue(Ty));
3237     }
3238 
3239     // cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0
3240     if (Pred == ICmpInst::ICMP_ULT && C.uge(1) && C.ule(BitWidth)) {
3241       APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue());
3242       return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE,
3243                              Builder.CreateAnd(II->getArgOperand(0), Mask),
3244                              ConstantInt::getNullValue(Ty));
3245     }
3246     break;
3247   }
3248   default:
3249     break;
3250   }
3251 
3252   return nullptr;
3253 }
3254 
3255 /// Handle icmp with constant (but not simple integer constant) RHS.
3256 Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) {
3257   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3258   Constant *RHSC = dyn_cast<Constant>(Op1);
3259   Instruction *LHSI = dyn_cast<Instruction>(Op0);
3260   if (!RHSC || !LHSI)
3261     return nullptr;
3262 
3263   switch (LHSI->getOpcode()) {
3264   case Instruction::GetElementPtr:
3265     // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
3266     if (RHSC->isNullValue() &&
3267         cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
3268       return new ICmpInst(
3269           I.getPredicate(), LHSI->getOperand(0),
3270           Constant::getNullValue(LHSI->getOperand(0)->getType()));
3271     break;
3272   case Instruction::PHI:
3273     // Only fold icmp into the PHI if the phi and icmp are in the same
3274     // block.  If in the same block, we're encouraging jump threading.  If
3275     // not, we are just pessimizing the code by making an i1 phi.
3276     if (LHSI->getParent() == I.getParent())
3277       if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
3278         return NV;
3279     break;
3280   case Instruction::Select: {
3281     // If either operand of the select is a constant, we can fold the
3282     // comparison into the select arms, which will cause one to be
3283     // constant folded and the select turned into a bitwise or.
3284     Value *Op1 = nullptr, *Op2 = nullptr;
3285     ConstantInt *CI = nullptr;
3286     if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
3287       Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
3288       CI = dyn_cast<ConstantInt>(Op1);
3289     }
3290     if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
3291       Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
3292       CI = dyn_cast<ConstantInt>(Op2);
3293     }
3294 
3295     // We only want to perform this transformation if it will not lead to
3296     // additional code. This is true if either both sides of the select
3297     // fold to a constant (in which case the icmp is replaced with a select
3298     // which will usually simplify) or this is the only user of the
3299     // select (in which case we are trading a select+icmp for a simpler
3300     // select+icmp) or all uses of the select can be replaced based on
3301     // dominance information ("Global cases").
3302     bool Transform = false;
3303     if (Op1 && Op2)
3304       Transform = true;
3305     else if (Op1 || Op2) {
3306       // Local case
3307       if (LHSI->hasOneUse())
3308         Transform = true;
3309       // Global cases
3310       else if (CI && !CI->isZero())
3311         // When Op1 is constant try replacing select with second operand.
3312         // Otherwise Op2 is constant and try replacing select with first
3313         // operand.
3314         Transform =
3315             replacedSelectWithOperand(cast<SelectInst>(LHSI), &I, Op1 ? 2 : 1);
3316     }
3317     if (Transform) {
3318       if (!Op1)
3319         Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC,
3320                                  I.getName());
3321       if (!Op2)
3322         Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC,
3323                                  I.getName());
3324       return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
3325     }
3326     break;
3327   }
3328   case Instruction::IntToPtr:
3329     // icmp pred inttoptr(X), null -> icmp pred X, 0
3330     if (RHSC->isNullValue() &&
3331         DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
3332       return new ICmpInst(
3333           I.getPredicate(), LHSI->getOperand(0),
3334           Constant::getNullValue(LHSI->getOperand(0)->getType()));
3335     break;
3336 
3337   case Instruction::Load:
3338     // Try to optimize things like "A[i] > 4" to index computations.
3339     if (GetElementPtrInst *GEP =
3340             dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
3341       if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
3342         if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
3343             !cast<LoadInst>(LHSI)->isVolatile())
3344           if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
3345             return Res;
3346     }
3347     break;
3348   }
3349 
3350   return nullptr;
3351 }
3352 
3353 /// Some comparisons can be simplified.
3354 /// In this case, we are looking for comparisons that look like
3355 /// a check for a lossy truncation.
3356 /// Folds:
3357 ///   icmp SrcPred (x & Mask), x    to    icmp DstPred x, Mask
3358 /// Where Mask is some pattern that produces all-ones in low bits:
3359 ///    (-1 >> y)
3360 ///    ((-1 << y) >> y)     <- non-canonical, has extra uses
3361 ///   ~(-1 << y)
3362 ///    ((1 << y) + (-1))    <- non-canonical, has extra uses
3363 /// The Mask can be a constant, too.
3364 /// For some predicates, the operands are commutative.
3365 /// For others, x can only be on a specific side.
3366 static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
3367                                           InstCombiner::BuilderTy &Builder) {
3368   ICmpInst::Predicate SrcPred;
3369   Value *X, *M, *Y;
3370   auto m_VariableMask = m_CombineOr(
3371       m_CombineOr(m_Not(m_Shl(m_AllOnes(), m_Value())),
3372                   m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())),
3373       m_CombineOr(m_LShr(m_AllOnes(), m_Value()),
3374                   m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y))));
3375   auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask());
3376   if (!match(&I, m_c_ICmp(SrcPred,
3377                           m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)),
3378                           m_Deferred(X))))
3379     return nullptr;
3380 
3381   ICmpInst::Predicate DstPred;
3382   switch (SrcPred) {
3383   case ICmpInst::Predicate::ICMP_EQ:
3384     //  x & (-1 >> y) == x    ->    x u<= (-1 >> y)
3385     DstPred = ICmpInst::Predicate::ICMP_ULE;
3386     break;
3387   case ICmpInst::Predicate::ICMP_NE:
3388     //  x & (-1 >> y) != x    ->    x u> (-1 >> y)
3389     DstPred = ICmpInst::Predicate::ICMP_UGT;
3390     break;
3391   case ICmpInst::Predicate::ICMP_ULT:
3392     //  x & (-1 >> y) u< x    ->    x u> (-1 >> y)
3393     //  x u> x & (-1 >> y)    ->    x u> (-1 >> y)
3394     DstPred = ICmpInst::Predicate::ICMP_UGT;
3395     break;
3396   case ICmpInst::Predicate::ICMP_UGE:
3397     //  x & (-1 >> y) u>= x    ->    x u<= (-1 >> y)
3398     //  x u<= x & (-1 >> y)    ->    x u<= (-1 >> y)
3399     DstPred = ICmpInst::Predicate::ICMP_ULE;
3400     break;
3401   case ICmpInst::Predicate::ICMP_SLT:
3402     //  x & (-1 >> y) s< x    ->    x s> (-1 >> y)
3403     //  x s> x & (-1 >> y)    ->    x s> (-1 >> y)
3404     if (!match(M, m_Constant())) // Can not do this fold with non-constant.
3405       return nullptr;
3406     if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
3407       return nullptr;
3408     DstPred = ICmpInst::Predicate::ICMP_SGT;
3409     break;
3410   case ICmpInst::Predicate::ICMP_SGE:
3411     //  x & (-1 >> y) s>= x    ->    x s<= (-1 >> y)
3412     //  x s<= x & (-1 >> y)    ->    x s<= (-1 >> y)
3413     if (!match(M, m_Constant())) // Can not do this fold with non-constant.
3414       return nullptr;
3415     if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
3416       return nullptr;
3417     DstPred = ICmpInst::Predicate::ICMP_SLE;
3418     break;
3419   case ICmpInst::Predicate::ICMP_SGT:
3420   case ICmpInst::Predicate::ICMP_SLE:
3421     return nullptr;
3422   case ICmpInst::Predicate::ICMP_UGT:
3423   case ICmpInst::Predicate::ICMP_ULE:
3424     llvm_unreachable("Instsimplify took care of commut. variant");
3425     break;
3426   default:
3427     llvm_unreachable("All possible folds are handled.");
3428   }
3429 
3430   // The mask value may be a vector constant that has undefined elements. But it
3431   // may not be safe to propagate those undefs into the new compare, so replace
3432   // those elements by copying an existing, defined, and safe scalar constant.
3433   Type *OpTy = M->getType();
3434   auto *VecC = dyn_cast<Constant>(M);
3435   auto *OpVTy = dyn_cast<FixedVectorType>(OpTy);
3436   if (OpVTy && VecC && VecC->containsUndefOrPoisonElement()) {
3437     Constant *SafeReplacementConstant = nullptr;
3438     for (unsigned i = 0, e = OpVTy->getNumElements(); i != e; ++i) {
3439       if (!isa<UndefValue>(VecC->getAggregateElement(i))) {
3440         SafeReplacementConstant = VecC->getAggregateElement(i);
3441         break;
3442       }
3443     }
3444     assert(SafeReplacementConstant && "Failed to find undef replacement");
3445     M = Constant::replaceUndefsWith(VecC, SafeReplacementConstant);
3446   }
3447 
3448   return Builder.CreateICmp(DstPred, X, M);
3449 }
3450 
3451 /// Some comparisons can be simplified.
3452 /// In this case, we are looking for comparisons that look like
3453 /// a check for a lossy signed truncation.
3454 /// Folds:   (MaskedBits is a constant.)
3455 ///   ((%x << MaskedBits) a>> MaskedBits) SrcPred %x
3456 /// Into:
3457 ///   (add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits)
3458 /// Where  KeptBits = bitwidth(%x) - MaskedBits
3459 static Value *
3460 foldICmpWithTruncSignExtendedVal(ICmpInst &I,
3461                                  InstCombiner::BuilderTy &Builder) {
3462   ICmpInst::Predicate SrcPred;
3463   Value *X;
3464   const APInt *C0, *C1; // FIXME: non-splats, potentially with undef.
3465   // We are ok with 'shl' having multiple uses, but 'ashr' must be one-use.
3466   if (!match(&I, m_c_ICmp(SrcPred,
3467                           m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C0)),
3468                                           m_APInt(C1))),
3469                           m_Deferred(X))))
3470     return nullptr;
3471 
3472   // Potential handling of non-splats: for each element:
3473   //  * if both are undef, replace with constant 0.
3474   //    Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0.
3475   //  * if both are not undef, and are different, bailout.
3476   //  * else, only one is undef, then pick the non-undef one.
3477 
3478   // The shift amount must be equal.
3479   if (*C0 != *C1)
3480     return nullptr;
3481   const APInt &MaskedBits = *C0;
3482   assert(MaskedBits != 0 && "shift by zero should be folded away already.");
3483 
3484   ICmpInst::Predicate DstPred;
3485   switch (SrcPred) {
3486   case ICmpInst::Predicate::ICMP_EQ:
3487     // ((%x << MaskedBits) a>> MaskedBits) == %x
3488     //   =>
3489     // (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits)
3490     DstPred = ICmpInst::Predicate::ICMP_ULT;
3491     break;
3492   case ICmpInst::Predicate::ICMP_NE:
3493     // ((%x << MaskedBits) a>> MaskedBits) != %x
3494     //   =>
3495     // (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits)
3496     DstPred = ICmpInst::Predicate::ICMP_UGE;
3497     break;
3498   // FIXME: are more folds possible?
3499   default:
3500     return nullptr;
3501   }
3502 
3503   auto *XType = X->getType();
3504   const unsigned XBitWidth = XType->getScalarSizeInBits();
3505   const APInt BitWidth = APInt(XBitWidth, XBitWidth);
3506   assert(BitWidth.ugt(MaskedBits) && "shifts should leave some bits untouched");
3507 
3508   // KeptBits = bitwidth(%x) - MaskedBits
3509   const APInt KeptBits = BitWidth - MaskedBits;
3510   assert(KeptBits.ugt(0) && KeptBits.ult(BitWidth) && "unreachable");
3511   // ICmpCst = (1 << KeptBits)
3512   const APInt ICmpCst = APInt(XBitWidth, 1).shl(KeptBits);
3513   assert(ICmpCst.isPowerOf2());
3514   // AddCst = (1 << (KeptBits-1))
3515   const APInt AddCst = ICmpCst.lshr(1);
3516   assert(AddCst.ult(ICmpCst) && AddCst.isPowerOf2());
3517 
3518   // T0 = add %x, AddCst
3519   Value *T0 = Builder.CreateAdd(X, ConstantInt::get(XType, AddCst));
3520   // T1 = T0 DstPred ICmpCst
3521   Value *T1 = Builder.CreateICmp(DstPred, T0, ConstantInt::get(XType, ICmpCst));
3522 
3523   return T1;
3524 }
3525 
3526 // Given pattern:
3527 //   icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
3528 // we should move shifts to the same hand of 'and', i.e. rewrite as
3529 //   icmp eq/ne (and (x shift (Q+K)), y), 0  iff (Q+K) u< bitwidth(x)
3530 // We are only interested in opposite logical shifts here.
3531 // One of the shifts can be truncated.
3532 // If we can, we want to end up creating 'lshr' shift.
3533 static Value *
3534 foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
3535                                            InstCombiner::BuilderTy &Builder) {
3536   if (!I.isEquality() || !match(I.getOperand(1), m_Zero()) ||
3537       !I.getOperand(0)->hasOneUse())
3538     return nullptr;
3539 
3540   auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value());
3541 
3542   // Look for an 'and' of two logical shifts, one of which may be truncated.
3543   // We use m_TruncOrSelf() on the RHS to correctly handle commutative case.
3544   Instruction *XShift, *MaybeTruncation, *YShift;
3545   if (!match(
3546           I.getOperand(0),
3547           m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)),
3548                   m_CombineAnd(m_TruncOrSelf(m_CombineAnd(
3549                                    m_AnyLogicalShift, m_Instruction(YShift))),
3550                                m_Instruction(MaybeTruncation)))))
3551     return nullptr;
3552 
3553   // We potentially looked past 'trunc', but only when matching YShift,
3554   // therefore YShift must have the widest type.
3555   Instruction *WidestShift = YShift;
3556   // Therefore XShift must have the shallowest type.
3557   // Or they both have identical types if there was no truncation.
3558   Instruction *NarrowestShift = XShift;
3559 
3560   Type *WidestTy = WidestShift->getType();
3561   Type *NarrowestTy = NarrowestShift->getType();
3562   assert(NarrowestTy == I.getOperand(0)->getType() &&
3563          "We did not look past any shifts while matching XShift though.");
3564   bool HadTrunc = WidestTy != I.getOperand(0)->getType();
3565 
3566   // If YShift is a 'lshr', swap the shifts around.
3567   if (match(YShift, m_LShr(m_Value(), m_Value())))
3568     std::swap(XShift, YShift);
3569 
3570   // The shifts must be in opposite directions.
3571   auto XShiftOpcode = XShift->getOpcode();
3572   if (XShiftOpcode == YShift->getOpcode())
3573     return nullptr; // Do not care about same-direction shifts here.
3574 
3575   Value *X, *XShAmt, *Y, *YShAmt;
3576   match(XShift, m_BinOp(m_Value(X), m_ZExtOrSelf(m_Value(XShAmt))));
3577   match(YShift, m_BinOp(m_Value(Y), m_ZExtOrSelf(m_Value(YShAmt))));
3578 
3579   // If one of the values being shifted is a constant, then we will end with
3580   // and+icmp, and [zext+]shift instrs will be constant-folded. If they are not,
3581   // however, we will need to ensure that we won't increase instruction count.
3582   if (!isa<Constant>(X) && !isa<Constant>(Y)) {
3583     // At least one of the hands of the 'and' should be one-use shift.
3584     if (!match(I.getOperand(0),
3585                m_c_And(m_OneUse(m_AnyLogicalShift), m_Value())))
3586       return nullptr;
3587     if (HadTrunc) {
3588       // Due to the 'trunc', we will need to widen X. For that either the old
3589       // 'trunc' or the shift amt in the non-truncated shift should be one-use.
3590       if (!MaybeTruncation->hasOneUse() &&
3591           !NarrowestShift->getOperand(1)->hasOneUse())
3592         return nullptr;
3593     }
3594   }
3595 
3596   // We have two shift amounts from two different shifts. The types of those
3597   // shift amounts may not match. If that's the case let's bailout now.
3598   if (XShAmt->getType() != YShAmt->getType())
3599     return nullptr;
3600 
3601   // As input, we have the following pattern:
3602   //   icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
3603   // We want to rewrite that as:
3604   //   icmp eq/ne (and (x shift (Q+K)), y), 0  iff (Q+K) u< bitwidth(x)
3605   // While we know that originally (Q+K) would not overflow
3606   // (because  2 * (N-1) u<= iN -1), we have looked past extensions of
3607   // shift amounts. so it may now overflow in smaller bitwidth.
3608   // To ensure that does not happen, we need to ensure that the total maximal
3609   // shift amount is still representable in that smaller bit width.
3610   unsigned MaximalPossibleTotalShiftAmount =
3611       (WidestTy->getScalarSizeInBits() - 1) +
3612       (NarrowestTy->getScalarSizeInBits() - 1);
3613   APInt MaximalRepresentableShiftAmount =
3614       APInt::getAllOnesValue(XShAmt->getType()->getScalarSizeInBits());
3615   if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount))
3616     return nullptr;
3617 
3618   // Can we fold (XShAmt+YShAmt) ?
3619   auto *NewShAmt = dyn_cast_or_null<Constant>(
3620       SimplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false,
3621                       /*isNUW=*/false, SQ.getWithInstruction(&I)));
3622   if (!NewShAmt)
3623     return nullptr;
3624   NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy);
3625   unsigned WidestBitWidth = WidestTy->getScalarSizeInBits();
3626 
3627   // Is the new shift amount smaller than the bit width?
3628   // FIXME: could also rely on ConstantRange.
3629   if (!match(NewShAmt,
3630              m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
3631                                 APInt(WidestBitWidth, WidestBitWidth))))
3632     return nullptr;
3633 
3634   // An extra legality check is needed if we had trunc-of-lshr.
3635   if (HadTrunc && match(WidestShift, m_LShr(m_Value(), m_Value()))) {
3636     auto CanFold = [NewShAmt, WidestBitWidth, NarrowestShift, SQ,
3637                     WidestShift]() {
3638       // It isn't obvious whether it's worth it to analyze non-constants here.
3639       // Also, let's basically give up on non-splat cases, pessimizing vectors.
3640       // If *any* of these preconditions matches we can perform the fold.
3641       Constant *NewShAmtSplat = NewShAmt->getType()->isVectorTy()
3642                                     ? NewShAmt->getSplatValue()
3643                                     : NewShAmt;
3644       // If it's edge-case shift (by 0 or by WidestBitWidth-1) we can fold.
3645       if (NewShAmtSplat &&
3646           (NewShAmtSplat->isNullValue() ||
3647            NewShAmtSplat->getUniqueInteger() == WidestBitWidth - 1))
3648         return true;
3649       // We consider *min* leading zeros so a single outlier
3650       // blocks the transform as opposed to allowing it.
3651       if (auto *C = dyn_cast<Constant>(NarrowestShift->getOperand(0))) {
3652         KnownBits Known = computeKnownBits(C, SQ.DL);
3653         unsigned MinLeadZero = Known.countMinLeadingZeros();
3654         // If the value being shifted has at most lowest bit set we can fold.
3655         unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero;
3656         if (MaxActiveBits <= 1)
3657           return true;
3658         // Precondition:  NewShAmt u<= countLeadingZeros(C)
3659         if (NewShAmtSplat && NewShAmtSplat->getUniqueInteger().ule(MinLeadZero))
3660           return true;
3661       }
3662       if (auto *C = dyn_cast<Constant>(WidestShift->getOperand(0))) {
3663         KnownBits Known = computeKnownBits(C, SQ.DL);
3664         unsigned MinLeadZero = Known.countMinLeadingZeros();
3665         // If the value being shifted has at most lowest bit set we can fold.
3666         unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero;
3667         if (MaxActiveBits <= 1)
3668           return true;
3669         // Precondition:  ((WidestBitWidth-1)-NewShAmt) u<= countLeadingZeros(C)
3670         if (NewShAmtSplat) {
3671           APInt AdjNewShAmt =
3672               (WidestBitWidth - 1) - NewShAmtSplat->getUniqueInteger();
3673           if (AdjNewShAmt.ule(MinLeadZero))
3674             return true;
3675         }
3676       }
3677       return false; // Can't tell if it's ok.
3678     };
3679     if (!CanFold())
3680       return nullptr;
3681   }
3682 
3683   // All good, we can do this fold.
3684   X = Builder.CreateZExt(X, WidestTy);
3685   Y = Builder.CreateZExt(Y, WidestTy);
3686   // The shift is the same that was for X.
3687   Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
3688                   ? Builder.CreateLShr(X, NewShAmt)
3689                   : Builder.CreateShl(X, NewShAmt);
3690   Value *T1 = Builder.CreateAnd(T0, Y);
3691   return Builder.CreateICmp(I.getPredicate(), T1,
3692                             Constant::getNullValue(WidestTy));
3693 }
3694 
3695 /// Fold
3696 ///   (-1 u/ x) u< y
3697 ///   ((x * y) u/ x) != y
3698 /// to
3699 ///   @llvm.umul.with.overflow(x, y) plus extraction of overflow bit
3700 /// Note that the comparison is commutative, while inverted (u>=, ==) predicate
3701 /// will mean that we are looking for the opposite answer.
3702 Value *InstCombinerImpl::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) {
3703   ICmpInst::Predicate Pred;
3704   Value *X, *Y;
3705   Instruction *Mul;
3706   bool NeedNegation;
3707   // Look for: (-1 u/ x) u</u>= y
3708   if (!I.isEquality() &&
3709       match(&I, m_c_ICmp(Pred, m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))),
3710                          m_Value(Y)))) {
3711     Mul = nullptr;
3712 
3713     // Are we checking that overflow does not happen, or does happen?
3714     switch (Pred) {
3715     case ICmpInst::Predicate::ICMP_ULT:
3716       NeedNegation = false;
3717       break; // OK
3718     case ICmpInst::Predicate::ICMP_UGE:
3719       NeedNegation = true;
3720       break; // OK
3721     default:
3722       return nullptr; // Wrong predicate.
3723     }
3724   } else // Look for: ((x * y) u/ x) !=/== y
3725       if (I.isEquality() &&
3726           match(&I, m_c_ICmp(Pred, m_Value(Y),
3727                              m_OneUse(m_UDiv(m_CombineAnd(m_c_Mul(m_Deferred(Y),
3728                                                                   m_Value(X)),
3729                                                           m_Instruction(Mul)),
3730                                              m_Deferred(X)))))) {
3731     NeedNegation = Pred == ICmpInst::Predicate::ICMP_EQ;
3732   } else
3733     return nullptr;
3734 
3735   BuilderTy::InsertPointGuard Guard(Builder);
3736   // If the pattern included (x * y), we'll want to insert new instructions
3737   // right before that original multiplication so that we can replace it.
3738   bool MulHadOtherUses = Mul && !Mul->hasOneUse();
3739   if (MulHadOtherUses)
3740     Builder.SetInsertPoint(Mul);
3741 
3742   Function *F = Intrinsic::getDeclaration(
3743       I.getModule(), Intrinsic::umul_with_overflow, X->getType());
3744   CallInst *Call = Builder.CreateCall(F, {X, Y}, "umul");
3745 
3746   // If the multiplication was used elsewhere, to ensure that we don't leave
3747   // "duplicate" instructions, replace uses of that original multiplication
3748   // with the multiplication result from the with.overflow intrinsic.
3749   if (MulHadOtherUses)
3750     replaceInstUsesWith(*Mul, Builder.CreateExtractValue(Call, 0, "umul.val"));
3751 
3752   Value *Res = Builder.CreateExtractValue(Call, 1, "umul.ov");
3753   if (NeedNegation) // This technically increases instruction count.
3754     Res = Builder.CreateNot(Res, "umul.not.ov");
3755 
3756   // If we replaced the mul, erase it. Do this after all uses of Builder,
3757   // as the mul is used as insertion point.
3758   if (MulHadOtherUses)
3759     eraseInstFromFunction(*Mul);
3760 
3761   return Res;
3762 }
3763 
3764 static Instruction *foldICmpXNegX(ICmpInst &I) {
3765   CmpInst::Predicate Pred;
3766   Value *X;
3767   if (!match(&I, m_c_ICmp(Pred, m_NSWNeg(m_Value(X)), m_Deferred(X))))
3768     return nullptr;
3769 
3770   if (ICmpInst::isSigned(Pred))
3771     Pred = ICmpInst::getSwappedPredicate(Pred);
3772   else if (ICmpInst::isUnsigned(Pred))
3773     Pred = ICmpInst::getSignedPredicate(Pred);
3774   // else for equality-comparisons just keep the predicate.
3775 
3776   return ICmpInst::Create(Instruction::ICmp, Pred, X,
3777                           Constant::getNullValue(X->getType()), I.getName());
3778 }
3779 
3780 /// Try to fold icmp (binop), X or icmp X, (binop).
3781 /// TODO: A large part of this logic is duplicated in InstSimplify's
3782 /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
3783 /// duplication.
3784 Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
3785                                              const SimplifyQuery &SQ) {
3786   const SimplifyQuery Q = SQ.getWithInstruction(&I);
3787   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3788 
3789   // Special logic for binary operators.
3790   BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
3791   BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
3792   if (!BO0 && !BO1)
3793     return nullptr;
3794 
3795   if (Instruction *NewICmp = foldICmpXNegX(I))
3796     return NewICmp;
3797 
3798   const CmpInst::Predicate Pred = I.getPredicate();
3799   Value *X;
3800 
3801   // Convert add-with-unsigned-overflow comparisons into a 'not' with compare.
3802   // (Op1 + X) u</u>= Op1 --> ~Op1 u</u>= X
3803   if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) &&
3804       (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE))
3805     return new ICmpInst(Pred, Builder.CreateNot(Op1), X);
3806   // Op0 u>/u<= (Op0 + X) --> X u>/u<= ~Op0
3807   if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) &&
3808       (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE))
3809     return new ICmpInst(Pred, X, Builder.CreateNot(Op0));
3810 
3811   bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
3812   if (BO0 && isa<OverflowingBinaryOperator>(BO0))
3813     NoOp0WrapProblem =
3814         ICmpInst::isEquality(Pred) ||
3815         (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
3816         (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
3817   if (BO1 && isa<OverflowingBinaryOperator>(BO1))
3818     NoOp1WrapProblem =
3819         ICmpInst::isEquality(Pred) ||
3820         (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
3821         (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
3822 
3823   // Analyze the case when either Op0 or Op1 is an add instruction.
3824   // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
3825   Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
3826   if (BO0 && BO0->getOpcode() == Instruction::Add) {
3827     A = BO0->getOperand(0);
3828     B = BO0->getOperand(1);
3829   }
3830   if (BO1 && BO1->getOpcode() == Instruction::Add) {
3831     C = BO1->getOperand(0);
3832     D = BO1->getOperand(1);
3833   }
3834 
3835   // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow.
3836   // icmp (A+B), B -> icmp A, 0 for equalities or if there is no overflow.
3837   if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
3838     return new ICmpInst(Pred, A == Op1 ? B : A,
3839                         Constant::getNullValue(Op1->getType()));
3840 
3841   // icmp C, (C+D) -> icmp 0, D for equalities or if there is no overflow.
3842   // icmp D, (C+D) -> icmp 0, C for equalities or if there is no overflow.
3843   if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
3844     return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
3845                         C == Op0 ? D : C);
3846 
3847   // icmp (A+B), (A+D) -> icmp B, D for equalities or if there is no overflow.
3848   if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem &&
3849       NoOp1WrapProblem) {
3850     // Determine Y and Z in the form icmp (X+Y), (X+Z).
3851     Value *Y, *Z;
3852     if (A == C) {
3853       // C + B == C + D  ->  B == D
3854       Y = B;
3855       Z = D;
3856     } else if (A == D) {
3857       // D + B == C + D  ->  B == C
3858       Y = B;
3859       Z = C;
3860     } else if (B == C) {
3861       // A + C == C + D  ->  A == D
3862       Y = A;
3863       Z = D;
3864     } else {
3865       assert(B == D);
3866       // A + D == C + D  ->  A == C
3867       Y = A;
3868       Z = C;
3869     }
3870     return new ICmpInst(Pred, Y, Z);
3871   }
3872 
3873   // icmp slt (A + -1), Op1 -> icmp sle A, Op1
3874   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
3875       match(B, m_AllOnes()))
3876     return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
3877 
3878   // icmp sge (A + -1), Op1 -> icmp sgt A, Op1
3879   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
3880       match(B, m_AllOnes()))
3881     return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
3882 
3883   // icmp sle (A + 1), Op1 -> icmp slt A, Op1
3884   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One()))
3885     return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
3886 
3887   // icmp sgt (A + 1), Op1 -> icmp sge A, Op1
3888   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One()))
3889     return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
3890 
3891   // icmp sgt Op0, (C + -1) -> icmp sge Op0, C
3892   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
3893       match(D, m_AllOnes()))
3894     return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
3895 
3896   // icmp sle Op0, (C + -1) -> icmp slt Op0, C
3897   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
3898       match(D, m_AllOnes()))
3899     return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
3900 
3901   // icmp sge Op0, (C + 1) -> icmp sgt Op0, C
3902   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One()))
3903     return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
3904 
3905   // icmp slt Op0, (C + 1) -> icmp sle Op0, C
3906   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
3907     return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
3908 
3909   // TODO: The subtraction-related identities shown below also hold, but
3910   // canonicalization from (X -nuw 1) to (X + -1) means that the combinations
3911   // wouldn't happen even if they were implemented.
3912   //
3913   // icmp ult (A - 1), Op1 -> icmp ule A, Op1
3914   // icmp uge (A - 1), Op1 -> icmp ugt A, Op1
3915   // icmp ugt Op0, (C - 1) -> icmp uge Op0, C
3916   // icmp ule Op0, (C - 1) -> icmp ult Op0, C
3917 
3918   // icmp ule (A + 1), Op0 -> icmp ult A, Op1
3919   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One()))
3920     return new ICmpInst(CmpInst::ICMP_ULT, A, Op1);
3921 
3922   // icmp ugt (A + 1), Op0 -> icmp uge A, Op1
3923   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One()))
3924     return new ICmpInst(CmpInst::ICMP_UGE, A, Op1);
3925 
3926   // icmp uge Op0, (C + 1) -> icmp ugt Op0, C
3927   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One()))
3928     return new ICmpInst(CmpInst::ICMP_UGT, Op0, C);
3929 
3930   // icmp ult Op0, (C + 1) -> icmp ule Op0, C
3931   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One()))
3932     return new ICmpInst(CmpInst::ICMP_ULE, Op0, C);
3933 
3934   // if C1 has greater magnitude than C2:
3935   //  icmp (A + C1), (C + C2) -> icmp (A + C3), C
3936   //  s.t. C3 = C1 - C2
3937   //
3938   // if C2 has greater magnitude than C1:
3939   //  icmp (A + C1), (C + C2) -> icmp A, (C + C3)
3940   //  s.t. C3 = C2 - C1
3941   if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
3942       (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
3943     if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
3944       if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
3945         const APInt &AP1 = C1->getValue();
3946         const APInt &AP2 = C2->getValue();
3947         if (AP1.isNegative() == AP2.isNegative()) {
3948           APInt AP1Abs = C1->getValue().abs();
3949           APInt AP2Abs = C2->getValue().abs();
3950           if (AP1Abs.uge(AP2Abs)) {
3951             ConstantInt *C3 = Builder.getInt(AP1 - AP2);
3952             bool HasNUW = BO0->hasNoUnsignedWrap() && C3->getValue().ule(AP1);
3953             bool HasNSW = BO0->hasNoSignedWrap();
3954             Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW);
3955             return new ICmpInst(Pred, NewAdd, C);
3956           } else {
3957             ConstantInt *C3 = Builder.getInt(AP2 - AP1);
3958             bool HasNUW = BO1->hasNoUnsignedWrap() && C3->getValue().ule(AP2);
3959             bool HasNSW = BO1->hasNoSignedWrap();
3960             Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW);
3961             return new ICmpInst(Pred, A, NewAdd);
3962           }
3963         }
3964       }
3965 
3966   // Analyze the case when either Op0 or Op1 is a sub instruction.
3967   // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
3968   A = nullptr;
3969   B = nullptr;
3970   C = nullptr;
3971   D = nullptr;
3972   if (BO0 && BO0->getOpcode() == Instruction::Sub) {
3973     A = BO0->getOperand(0);
3974     B = BO0->getOperand(1);
3975   }
3976   if (BO1 && BO1->getOpcode() == Instruction::Sub) {
3977     C = BO1->getOperand(0);
3978     D = BO1->getOperand(1);
3979   }
3980 
3981   // icmp (A-B), A -> icmp 0, B for equalities or if there is no overflow.
3982   if (A == Op1 && NoOp0WrapProblem)
3983     return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
3984   // icmp C, (C-D) -> icmp D, 0 for equalities or if there is no overflow.
3985   if (C == Op0 && NoOp1WrapProblem)
3986     return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
3987 
3988   // Convert sub-with-unsigned-overflow comparisons into a comparison of args.
3989   // (A - B) u>/u<= A --> B u>/u<= A
3990   if (A == Op1 && (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE))
3991     return new ICmpInst(Pred, B, A);
3992   // C u</u>= (C - D) --> C u</u>= D
3993   if (C == Op0 && (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE))
3994     return new ICmpInst(Pred, C, D);
3995   // (A - B) u>=/u< A --> B u>/u<= A  iff B != 0
3996   if (A == Op1 && (Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) &&
3997       isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
3998     return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), B, A);
3999   // C u<=/u> (C - D) --> C u</u>= D  iff B != 0
4000   if (C == Op0 && (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) &&
4001       isKnownNonZero(D, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
4002     return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), C, D);
4003 
4004   // icmp (A-B), (C-B) -> icmp A, C for equalities or if there is no overflow.
4005   if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem)
4006     return new ICmpInst(Pred, A, C);
4007 
4008   // icmp (A-B), (A-D) -> icmp D, B for equalities or if there is no overflow.
4009   if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem)
4010     return new ICmpInst(Pred, D, B);
4011 
4012   // icmp (0-X) < cst --> x > -cst
4013   if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
4014     Value *X;
4015     if (match(BO0, m_Neg(m_Value(X))))
4016       if (Constant *RHSC = dyn_cast<Constant>(Op1))
4017         if (RHSC->isNotMinSignedValue())
4018           return new ICmpInst(I.getSwappedPredicate(), X,
4019                               ConstantExpr::getNeg(RHSC));
4020   }
4021 
4022   {
4023     // Try to remove shared constant multiplier from equality comparison:
4024     // X * C == Y * C (with no overflowing/aliasing) --> X == Y
4025     Value *X, *Y;
4026     const APInt *C;
4027     if (match(Op0, m_Mul(m_Value(X), m_APInt(C))) && *C != 0 &&
4028         match(Op1, m_Mul(m_Value(Y), m_SpecificInt(*C))) && I.isEquality())
4029       if (!C->countTrailingZeros() ||
4030           (BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) ||
4031           (BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap()))
4032       return new ICmpInst(Pred, X, Y);
4033   }
4034 
4035   BinaryOperator *SRem = nullptr;
4036   // icmp (srem X, Y), Y
4037   if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1))
4038     SRem = BO0;
4039   // icmp Y, (srem X, Y)
4040   else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
4041            Op0 == BO1->getOperand(1))
4042     SRem = BO1;
4043   if (SRem) {
4044     // We don't check hasOneUse to avoid increasing register pressure because
4045     // the value we use is the same value this instruction was already using.
4046     switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
4047     default:
4048       break;
4049     case ICmpInst::ICMP_EQ:
4050       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4051     case ICmpInst::ICMP_NE:
4052       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4053     case ICmpInst::ICMP_SGT:
4054     case ICmpInst::ICMP_SGE:
4055       return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
4056                           Constant::getAllOnesValue(SRem->getType()));
4057     case ICmpInst::ICMP_SLT:
4058     case ICmpInst::ICMP_SLE:
4059       return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
4060                           Constant::getNullValue(SRem->getType()));
4061     }
4062   }
4063 
4064   if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && BO0->hasOneUse() &&
4065       BO1->hasOneUse() && BO0->getOperand(1) == BO1->getOperand(1)) {
4066     switch (BO0->getOpcode()) {
4067     default:
4068       break;
4069     case Instruction::Add:
4070     case Instruction::Sub:
4071     case Instruction::Xor: {
4072       if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
4073         return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4074 
4075       const APInt *C;
4076       if (match(BO0->getOperand(1), m_APInt(C))) {
4077         // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
4078         if (C->isSignMask()) {
4079           ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate();
4080           return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
4081         }
4082 
4083         // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b
4084         if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) {
4085           ICmpInst::Predicate NewPred = I.getFlippedSignednessPredicate();
4086           NewPred = I.getSwappedPredicate(NewPred);
4087           return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
4088         }
4089       }
4090       break;
4091     }
4092     case Instruction::Mul: {
4093       if (!I.isEquality())
4094         break;
4095 
4096       const APInt *C;
4097       if (match(BO0->getOperand(1), m_APInt(C)) && !C->isNullValue() &&
4098           !C->isOneValue()) {
4099         // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
4100         // Mask = -1 >> count-trailing-zeros(C).
4101         if (unsigned TZs = C->countTrailingZeros()) {
4102           Constant *Mask = ConstantInt::get(
4103               BO0->getType(),
4104               APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
4105           Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask);
4106           Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask);
4107           return new ICmpInst(Pred, And1, And2);
4108         }
4109       }
4110       break;
4111     }
4112     case Instruction::UDiv:
4113     case Instruction::LShr:
4114       if (I.isSigned() || !BO0->isExact() || !BO1->isExact())
4115         break;
4116       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4117 
4118     case Instruction::SDiv:
4119       if (!I.isEquality() || !BO0->isExact() || !BO1->isExact())
4120         break;
4121       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4122 
4123     case Instruction::AShr:
4124       if (!BO0->isExact() || !BO1->isExact())
4125         break;
4126       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4127 
4128     case Instruction::Shl: {
4129       bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
4130       bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
4131       if (!NUW && !NSW)
4132         break;
4133       if (!NSW && I.isSigned())
4134         break;
4135       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
4136     }
4137     }
4138   }
4139 
4140   if (BO0) {
4141     // Transform  A & (L - 1) `ult` L --> L != 0
4142     auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
4143     auto BitwiseAnd = m_c_And(m_Value(), LSubOne);
4144 
4145     if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) {
4146       auto *Zero = Constant::getNullValue(BO0->getType());
4147       return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
4148     }
4149   }
4150 
4151   if (Value *V = foldUnsignedMultiplicationOverflowCheck(I))
4152     return replaceInstUsesWith(I, V);
4153 
4154   if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
4155     return replaceInstUsesWith(I, V);
4156 
4157   if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
4158     return replaceInstUsesWith(I, V);
4159 
4160   if (Value *V = foldShiftIntoShiftInAnotherHandOfAndInICmp(I, SQ, Builder))
4161     return replaceInstUsesWith(I, V);
4162 
4163   return nullptr;
4164 }
4165 
4166 /// Fold icmp Pred min|max(X, Y), X.
4167 static Instruction *foldICmpWithMinMax(ICmpInst &Cmp) {
4168   ICmpInst::Predicate Pred = Cmp.getPredicate();
4169   Value *Op0 = Cmp.getOperand(0);
4170   Value *X = Cmp.getOperand(1);
4171 
4172   // Canonicalize minimum or maximum operand to LHS of the icmp.
4173   if (match(X, m_c_SMin(m_Specific(Op0), m_Value())) ||
4174       match(X, m_c_SMax(m_Specific(Op0), m_Value())) ||
4175       match(X, m_c_UMin(m_Specific(Op0), m_Value())) ||
4176       match(X, m_c_UMax(m_Specific(Op0), m_Value()))) {
4177     std::swap(Op0, X);
4178     Pred = Cmp.getSwappedPredicate();
4179   }
4180 
4181   Value *Y;
4182   if (match(Op0, m_c_SMin(m_Specific(X), m_Value(Y)))) {
4183     // smin(X, Y)  == X --> X s<= Y
4184     // smin(X, Y) s>= X --> X s<= Y
4185     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SGE)
4186       return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
4187 
4188     // smin(X, Y) != X --> X s> Y
4189     // smin(X, Y) s< X --> X s> Y
4190     if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SLT)
4191       return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
4192 
4193     // These cases should be handled in InstSimplify:
4194     // smin(X, Y) s<= X --> true
4195     // smin(X, Y) s> X --> false
4196     return nullptr;
4197   }
4198 
4199   if (match(Op0, m_c_SMax(m_Specific(X), m_Value(Y)))) {
4200     // smax(X, Y)  == X --> X s>= Y
4201     // smax(X, Y) s<= X --> X s>= Y
4202     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLE)
4203       return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
4204 
4205     // smax(X, Y) != X --> X s< Y
4206     // smax(X, Y) s> X --> X s< Y
4207     if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SGT)
4208       return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
4209 
4210     // These cases should be handled in InstSimplify:
4211     // smax(X, Y) s>= X --> true
4212     // smax(X, Y) s< X --> false
4213     return nullptr;
4214   }
4215 
4216   if (match(Op0, m_c_UMin(m_Specific(X), m_Value(Y)))) {
4217     // umin(X, Y)  == X --> X u<= Y
4218     // umin(X, Y) u>= X --> X u<= Y
4219     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_UGE)
4220       return new ICmpInst(ICmpInst::ICMP_ULE, X, Y);
4221 
4222     // umin(X, Y) != X --> X u> Y
4223     // umin(X, Y) u< X --> X u> Y
4224     if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT)
4225       return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
4226 
4227     // These cases should be handled in InstSimplify:
4228     // umin(X, Y) u<= X --> true
4229     // umin(X, Y) u> X --> false
4230     return nullptr;
4231   }
4232 
4233   if (match(Op0, m_c_UMax(m_Specific(X), m_Value(Y)))) {
4234     // umax(X, Y)  == X --> X u>= Y
4235     // umax(X, Y) u<= X --> X u>= Y
4236     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_ULE)
4237       return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
4238 
4239     // umax(X, Y) != X --> X u< Y
4240     // umax(X, Y) u> X --> X u< Y
4241     if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_UGT)
4242       return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
4243 
4244     // These cases should be handled in InstSimplify:
4245     // umax(X, Y) u>= X --> true
4246     // umax(X, Y) u< X --> false
4247     return nullptr;
4248   }
4249 
4250   return nullptr;
4251 }
4252 
4253 Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
4254   if (!I.isEquality())
4255     return nullptr;
4256 
4257   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
4258   const CmpInst::Predicate Pred = I.getPredicate();
4259   Value *A, *B, *C, *D;
4260   if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
4261     if (A == Op1 || B == Op1) { // (A^B) == A  ->  B == 0
4262       Value *OtherVal = A == Op1 ? B : A;
4263       return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
4264     }
4265 
4266     if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
4267       // A^c1 == C^c2 --> A == C^(c1^c2)
4268       ConstantInt *C1, *C2;
4269       if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) &&
4270           Op1->hasOneUse()) {
4271         Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue());
4272         Value *Xor = Builder.CreateXor(C, NC);
4273         return new ICmpInst(Pred, A, Xor);
4274       }
4275 
4276       // A^B == A^D -> B == D
4277       if (A == C)
4278         return new ICmpInst(Pred, B, D);
4279       if (A == D)
4280         return new ICmpInst(Pred, B, C);
4281       if (B == C)
4282         return new ICmpInst(Pred, A, D);
4283       if (B == D)
4284         return new ICmpInst(Pred, A, C);
4285     }
4286   }
4287 
4288   if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
4289     // A == (A^B)  ->  B == 0
4290     Value *OtherVal = A == Op0 ? B : A;
4291     return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
4292   }
4293 
4294   // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
4295   if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
4296       match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
4297     Value *X = nullptr, *Y = nullptr, *Z = nullptr;
4298 
4299     if (A == C) {
4300       X = B;
4301       Y = D;
4302       Z = A;
4303     } else if (A == D) {
4304       X = B;
4305       Y = C;
4306       Z = A;
4307     } else if (B == C) {
4308       X = A;
4309       Y = D;
4310       Z = B;
4311     } else if (B == D) {
4312       X = A;
4313       Y = C;
4314       Z = B;
4315     }
4316 
4317     if (X) { // Build (X^Y) & Z
4318       Op1 = Builder.CreateXor(X, Y);
4319       Op1 = Builder.CreateAnd(Op1, Z);
4320       return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType()));
4321     }
4322   }
4323 
4324   // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
4325   // and       (B & (1<<X)-1) == (zext A) --> A == (trunc B)
4326   ConstantInt *Cst1;
4327   if ((Op0->hasOneUse() && match(Op0, m_ZExt(m_Value(A))) &&
4328        match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) ||
4329       (Op1->hasOneUse() && match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) &&
4330        match(Op1, m_ZExt(m_Value(A))))) {
4331     APInt Pow2 = Cst1->getValue() + 1;
4332     if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
4333         Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
4334       return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));
4335   }
4336 
4337   // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
4338   // For lshr and ashr pairs.
4339   if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
4340        match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
4341       (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
4342        match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
4343     unsigned TypeBits = Cst1->getBitWidth();
4344     unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
4345     if (ShAmt < TypeBits && ShAmt != 0) {
4346       ICmpInst::Predicate NewPred =
4347           Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
4348       Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
4349       APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
4350       return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal));
4351     }
4352   }
4353 
4354   // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
4355   if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
4356       match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
4357     unsigned TypeBits = Cst1->getBitWidth();
4358     unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
4359     if (ShAmt < TypeBits && ShAmt != 0) {
4360       Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
4361       APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
4362       Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal),
4363                                       I.getName() + ".mask");
4364       return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType()));
4365     }
4366   }
4367 
4368   // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
4369   // "icmp (and X, mask), cst"
4370   uint64_t ShAmt = 0;
4371   if (Op0->hasOneUse() &&
4372       match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), m_ConstantInt(ShAmt))))) &&
4373       match(Op1, m_ConstantInt(Cst1)) &&
4374       // Only do this when A has multiple uses.  This is most important to do
4375       // when it exposes other optimizations.
4376       !A->hasOneUse()) {
4377     unsigned ASize = cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
4378 
4379     if (ShAmt < ASize) {
4380       APInt MaskV =
4381           APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
4382       MaskV <<= ShAmt;
4383 
4384       APInt CmpV = Cst1->getValue().zext(ASize);
4385       CmpV <<= ShAmt;
4386 
4387       Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV));
4388       return new ICmpInst(Pred, Mask, Builder.getInt(CmpV));
4389     }
4390   }
4391 
4392   // If both operands are byte-swapped or bit-reversed, just compare the
4393   // original values.
4394   // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant()
4395   // and handle more intrinsics.
4396   if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) ||
4397       (match(Op0, m_BitReverse(m_Value(A))) &&
4398        match(Op1, m_BitReverse(m_Value(B)))))
4399     return new ICmpInst(Pred, A, B);
4400 
4401   // Canonicalize checking for a power-of-2-or-zero value:
4402   // (A & (A-1)) == 0 --> ctpop(A) < 2 (two commuted variants)
4403   // ((A-1) & A) != 0 --> ctpop(A) > 1 (two commuted variants)
4404   if (!match(Op0, m_OneUse(m_c_And(m_Add(m_Value(A), m_AllOnes()),
4405                                    m_Deferred(A)))) ||
4406       !match(Op1, m_ZeroInt()))
4407     A = nullptr;
4408 
4409   // (A & -A) == A --> ctpop(A) < 2 (four commuted variants)
4410   // (-A & A) != A --> ctpop(A) > 1 (four commuted variants)
4411   if (match(Op0, m_OneUse(m_c_And(m_Neg(m_Specific(Op1)), m_Specific(Op1)))))
4412     A = Op1;
4413   else if (match(Op1,
4414                  m_OneUse(m_c_And(m_Neg(m_Specific(Op0)), m_Specific(Op0)))))
4415     A = Op0;
4416 
4417   if (A) {
4418     Type *Ty = A->getType();
4419     CallInst *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A);
4420     return Pred == ICmpInst::ICMP_EQ
4421         ? new ICmpInst(ICmpInst::ICMP_ULT, CtPop, ConstantInt::get(Ty, 2))
4422         : new ICmpInst(ICmpInst::ICMP_UGT, CtPop, ConstantInt::get(Ty, 1));
4423   }
4424 
4425   return nullptr;
4426 }
4427 
4428 static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp,
4429                                            InstCombiner::BuilderTy &Builder) {
4430   assert(isa<CastInst>(ICmp.getOperand(0)) && "Expected cast for operand 0");
4431   auto *CastOp0 = cast<CastInst>(ICmp.getOperand(0));
4432   Value *X;
4433   if (!match(CastOp0, m_ZExtOrSExt(m_Value(X))))
4434     return nullptr;
4435 
4436   bool IsSignedExt = CastOp0->getOpcode() == Instruction::SExt;
4437   bool IsSignedCmp = ICmp.isSigned();
4438   if (auto *CastOp1 = dyn_cast<CastInst>(ICmp.getOperand(1))) {
4439     // If the signedness of the two casts doesn't agree (i.e. one is a sext
4440     // and the other is a zext), then we can't handle this.
4441     // TODO: This is too strict. We can handle some predicates (equality?).
4442     if (CastOp0->getOpcode() != CastOp1->getOpcode())
4443       return nullptr;
4444 
4445     // Not an extension from the same type?
4446     Value *Y = CastOp1->getOperand(0);
4447     Type *XTy = X->getType(), *YTy = Y->getType();
4448     if (XTy != YTy) {
4449       // One of the casts must have one use because we are creating a new cast.
4450       if (!CastOp0->hasOneUse() && !CastOp1->hasOneUse())
4451         return nullptr;
4452       // Extend the narrower operand to the type of the wider operand.
4453       if (XTy->getScalarSizeInBits() < YTy->getScalarSizeInBits())
4454         X = Builder.CreateCast(CastOp0->getOpcode(), X, YTy);
4455       else if (YTy->getScalarSizeInBits() < XTy->getScalarSizeInBits())
4456         Y = Builder.CreateCast(CastOp0->getOpcode(), Y, XTy);
4457       else
4458         return nullptr;
4459     }
4460 
4461     // (zext X) == (zext Y) --> X == Y
4462     // (sext X) == (sext Y) --> X == Y
4463     if (ICmp.isEquality())
4464       return new ICmpInst(ICmp.getPredicate(), X, Y);
4465 
4466     // A signed comparison of sign extended values simplifies into a
4467     // signed comparison.
4468     if (IsSignedCmp && IsSignedExt)
4469       return new ICmpInst(ICmp.getPredicate(), X, Y);
4470 
4471     // The other three cases all fold into an unsigned comparison.
4472     return new ICmpInst(ICmp.getUnsignedPredicate(), X, Y);
4473   }
4474 
4475   // Below here, we are only folding a compare with constant.
4476   auto *C = dyn_cast<Constant>(ICmp.getOperand(1));
4477   if (!C)
4478     return nullptr;
4479 
4480   // Compute the constant that would happen if we truncated to SrcTy then
4481   // re-extended to DestTy.
4482   Type *SrcTy = CastOp0->getSrcTy();
4483   Type *DestTy = CastOp0->getDestTy();
4484   Constant *Res1 = ConstantExpr::getTrunc(C, SrcTy);
4485   Constant *Res2 = ConstantExpr::getCast(CastOp0->getOpcode(), Res1, DestTy);
4486 
4487   // If the re-extended constant didn't change...
4488   if (Res2 == C) {
4489     if (ICmp.isEquality())
4490       return new ICmpInst(ICmp.getPredicate(), X, Res1);
4491 
4492     // A signed comparison of sign extended values simplifies into a
4493     // signed comparison.
4494     if (IsSignedExt && IsSignedCmp)
4495       return new ICmpInst(ICmp.getPredicate(), X, Res1);
4496 
4497     // The other three cases all fold into an unsigned comparison.
4498     return new ICmpInst(ICmp.getUnsignedPredicate(), X, Res1);
4499   }
4500 
4501   // The re-extended constant changed, partly changed (in the case of a vector),
4502   // or could not be determined to be equal (in the case of a constant
4503   // expression), so the constant cannot be represented in the shorter type.
4504   // All the cases that fold to true or false will have already been handled
4505   // by SimplifyICmpInst, so only deal with the tricky case.
4506   if (IsSignedCmp || !IsSignedExt || !isa<ConstantInt>(C))
4507     return nullptr;
4508 
4509   // Is source op positive?
4510   // icmp ult (sext X), C --> icmp sgt X, -1
4511   if (ICmp.getPredicate() == ICmpInst::ICMP_ULT)
4512     return new ICmpInst(CmpInst::ICMP_SGT, X, Constant::getAllOnesValue(SrcTy));
4513 
4514   // Is source op negative?
4515   // icmp ugt (sext X), C --> icmp slt X, 0
4516   assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
4517   return new ICmpInst(CmpInst::ICMP_SLT, X, Constant::getNullValue(SrcTy));
4518 }
4519 
4520 /// Handle icmp (cast x), (cast or constant).
4521 Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) {
4522   auto *CastOp0 = dyn_cast<CastInst>(ICmp.getOperand(0));
4523   if (!CastOp0)
4524     return nullptr;
4525   if (!isa<Constant>(ICmp.getOperand(1)) && !isa<CastInst>(ICmp.getOperand(1)))
4526     return nullptr;
4527 
4528   Value *Op0Src = CastOp0->getOperand(0);
4529   Type *SrcTy = CastOp0->getSrcTy();
4530   Type *DestTy = CastOp0->getDestTy();
4531 
4532   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
4533   // integer type is the same size as the pointer type.
4534   auto CompatibleSizes = [&](Type *SrcTy, Type *DestTy) {
4535     if (isa<VectorType>(SrcTy)) {
4536       SrcTy = cast<VectorType>(SrcTy)->getElementType();
4537       DestTy = cast<VectorType>(DestTy)->getElementType();
4538     }
4539     return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth();
4540   };
4541   if (CastOp0->getOpcode() == Instruction::PtrToInt &&
4542       CompatibleSizes(SrcTy, DestTy)) {
4543     Value *NewOp1 = nullptr;
4544     if (auto *PtrToIntOp1 = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) {
4545       Value *PtrSrc = PtrToIntOp1->getOperand(0);
4546       if (PtrSrc->getType()->getPointerAddressSpace() ==
4547           Op0Src->getType()->getPointerAddressSpace()) {
4548         NewOp1 = PtrToIntOp1->getOperand(0);
4549         // If the pointer types don't match, insert a bitcast.
4550         if (Op0Src->getType() != NewOp1->getType())
4551           NewOp1 = Builder.CreateBitCast(NewOp1, Op0Src->getType());
4552       }
4553     } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
4554       NewOp1 = ConstantExpr::getIntToPtr(RHSC, SrcTy);
4555     }
4556 
4557     if (NewOp1)
4558       return new ICmpInst(ICmp.getPredicate(), Op0Src, NewOp1);
4559   }
4560 
4561   return foldICmpWithZextOrSext(ICmp, Builder);
4562 }
4563 
4564 static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) {
4565   switch (BinaryOp) {
4566     default:
4567       llvm_unreachable("Unsupported binary op");
4568     case Instruction::Add:
4569     case Instruction::Sub:
4570       return match(RHS, m_Zero());
4571     case Instruction::Mul:
4572       return match(RHS, m_One());
4573   }
4574 }
4575 
4576 OverflowResult
4577 InstCombinerImpl::computeOverflow(Instruction::BinaryOps BinaryOp,
4578                                   bool IsSigned, Value *LHS, Value *RHS,
4579                                   Instruction *CxtI) const {
4580   switch (BinaryOp) {
4581     default:
4582       llvm_unreachable("Unsupported binary op");
4583     case Instruction::Add:
4584       if (IsSigned)
4585         return computeOverflowForSignedAdd(LHS, RHS, CxtI);
4586       else
4587         return computeOverflowForUnsignedAdd(LHS, RHS, CxtI);
4588     case Instruction::Sub:
4589       if (IsSigned)
4590         return computeOverflowForSignedSub(LHS, RHS, CxtI);
4591       else
4592         return computeOverflowForUnsignedSub(LHS, RHS, CxtI);
4593     case Instruction::Mul:
4594       if (IsSigned)
4595         return computeOverflowForSignedMul(LHS, RHS, CxtI);
4596       else
4597         return computeOverflowForUnsignedMul(LHS, RHS, CxtI);
4598   }
4599 }
4600 
4601 bool InstCombinerImpl::OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp,
4602                                              bool IsSigned, Value *LHS,
4603                                              Value *RHS, Instruction &OrigI,
4604                                              Value *&Result,
4605                                              Constant *&Overflow) {
4606   if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
4607     std::swap(LHS, RHS);
4608 
4609   // If the overflow check was an add followed by a compare, the insertion point
4610   // may be pointing to the compare.  We want to insert the new instructions
4611   // before the add in case there are uses of the add between the add and the
4612   // compare.
4613   Builder.SetInsertPoint(&OrigI);
4614 
4615   Type *OverflowTy = Type::getInt1Ty(LHS->getContext());
4616   if (auto *LHSTy = dyn_cast<VectorType>(LHS->getType()))
4617     OverflowTy = VectorType::get(OverflowTy, LHSTy->getElementCount());
4618 
4619   if (isNeutralValue(BinaryOp, RHS)) {
4620     Result = LHS;
4621     Overflow = ConstantInt::getFalse(OverflowTy);
4622     return true;
4623   }
4624 
4625   switch (computeOverflow(BinaryOp, IsSigned, LHS, RHS, &OrigI)) {
4626     case OverflowResult::MayOverflow:
4627       return false;
4628     case OverflowResult::AlwaysOverflowsLow:
4629     case OverflowResult::AlwaysOverflowsHigh:
4630       Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
4631       Result->takeName(&OrigI);
4632       Overflow = ConstantInt::getTrue(OverflowTy);
4633       return true;
4634     case OverflowResult::NeverOverflows:
4635       Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
4636       Result->takeName(&OrigI);
4637       Overflow = ConstantInt::getFalse(OverflowTy);
4638       if (auto *Inst = dyn_cast<Instruction>(Result)) {
4639         if (IsSigned)
4640           Inst->setHasNoSignedWrap();
4641         else
4642           Inst->setHasNoUnsignedWrap();
4643       }
4644       return true;
4645   }
4646 
4647   llvm_unreachable("Unexpected overflow result");
4648 }
4649 
4650 /// Recognize and process idiom involving test for multiplication
4651 /// overflow.
4652 ///
4653 /// The caller has matched a pattern of the form:
4654 ///   I = cmp u (mul(zext A, zext B), V
4655 /// The function checks if this is a test for overflow and if so replaces
4656 /// multiplication with call to 'mul.with.overflow' intrinsic.
4657 ///
4658 /// \param I Compare instruction.
4659 /// \param MulVal Result of 'mult' instruction.  It is one of the arguments of
4660 ///               the compare instruction.  Must be of integer type.
4661 /// \param OtherVal The other argument of compare instruction.
4662 /// \returns Instruction which must replace the compare instruction, NULL if no
4663 ///          replacement required.
4664 static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
4665                                          Value *OtherVal,
4666                                          InstCombinerImpl &IC) {
4667   // Don't bother doing this transformation for pointers, don't do it for
4668   // vectors.
4669   if (!isa<IntegerType>(MulVal->getType()))
4670     return nullptr;
4671 
4672   assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
4673   assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
4674   auto *MulInstr = dyn_cast<Instruction>(MulVal);
4675   if (!MulInstr)
4676     return nullptr;
4677   assert(MulInstr->getOpcode() == Instruction::Mul);
4678 
4679   auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),
4680        *RHS = cast<ZExtOperator>(MulInstr->getOperand(1));
4681   assert(LHS->getOpcode() == Instruction::ZExt);
4682   assert(RHS->getOpcode() == Instruction::ZExt);
4683   Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
4684 
4685   // Calculate type and width of the result produced by mul.with.overflow.
4686   Type *TyA = A->getType(), *TyB = B->getType();
4687   unsigned WidthA = TyA->getPrimitiveSizeInBits(),
4688            WidthB = TyB->getPrimitiveSizeInBits();
4689   unsigned MulWidth;
4690   Type *MulType;
4691   if (WidthB > WidthA) {
4692     MulWidth = WidthB;
4693     MulType = TyB;
4694   } else {
4695     MulWidth = WidthA;
4696     MulType = TyA;
4697   }
4698 
4699   // In order to replace the original mul with a narrower mul.with.overflow,
4700   // all uses must ignore upper bits of the product.  The number of used low
4701   // bits must be not greater than the width of mul.with.overflow.
4702   if (MulVal->hasNUsesOrMore(2))
4703     for (User *U : MulVal->users()) {
4704       if (U == &I)
4705         continue;
4706       if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
4707         // Check if truncation ignores bits above MulWidth.
4708         unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
4709         if (TruncWidth > MulWidth)
4710           return nullptr;
4711       } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
4712         // Check if AND ignores bits above MulWidth.
4713         if (BO->getOpcode() != Instruction::And)
4714           return nullptr;
4715         if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
4716           const APInt &CVal = CI->getValue();
4717           if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth)
4718             return nullptr;
4719         } else {
4720           // In this case we could have the operand of the binary operation
4721           // being defined in another block, and performing the replacement
4722           // could break the dominance relation.
4723           return nullptr;
4724         }
4725       } else {
4726         // Other uses prohibit this transformation.
4727         return nullptr;
4728       }
4729     }
4730 
4731   // Recognize patterns
4732   switch (I.getPredicate()) {
4733   case ICmpInst::ICMP_EQ:
4734   case ICmpInst::ICMP_NE:
4735     // Recognize pattern:
4736     //   mulval = mul(zext A, zext B)
4737     //   cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits.
4738     ConstantInt *CI;
4739     Value *ValToMask;
4740     if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) {
4741       if (ValToMask != MulVal)
4742         return nullptr;
4743       const APInt &CVal = CI->getValue() + 1;
4744       if (CVal.isPowerOf2()) {
4745         unsigned MaskWidth = CVal.logBase2();
4746         if (MaskWidth == MulWidth)
4747           break; // Recognized
4748       }
4749     }
4750     return nullptr;
4751 
4752   case ICmpInst::ICMP_UGT:
4753     // Recognize pattern:
4754     //   mulval = mul(zext A, zext B)
4755     //   cmp ugt mulval, max
4756     if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
4757       APInt MaxVal = APInt::getMaxValue(MulWidth);
4758       MaxVal = MaxVal.zext(CI->getBitWidth());
4759       if (MaxVal.eq(CI->getValue()))
4760         break; // Recognized
4761     }
4762     return nullptr;
4763 
4764   case ICmpInst::ICMP_UGE:
4765     // Recognize pattern:
4766     //   mulval = mul(zext A, zext B)
4767     //   cmp uge mulval, max+1
4768     if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
4769       APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
4770       if (MaxVal.eq(CI->getValue()))
4771         break; // Recognized
4772     }
4773     return nullptr;
4774 
4775   case ICmpInst::ICMP_ULE:
4776     // Recognize pattern:
4777     //   mulval = mul(zext A, zext B)
4778     //   cmp ule mulval, max
4779     if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
4780       APInt MaxVal = APInt::getMaxValue(MulWidth);
4781       MaxVal = MaxVal.zext(CI->getBitWidth());
4782       if (MaxVal.eq(CI->getValue()))
4783         break; // Recognized
4784     }
4785     return nullptr;
4786 
4787   case ICmpInst::ICMP_ULT:
4788     // Recognize pattern:
4789     //   mulval = mul(zext A, zext B)
4790     //   cmp ule mulval, max + 1
4791     if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
4792       APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
4793       if (MaxVal.eq(CI->getValue()))
4794         break; // Recognized
4795     }
4796     return nullptr;
4797 
4798   default:
4799     return nullptr;
4800   }
4801 
4802   InstCombiner::BuilderTy &Builder = IC.Builder;
4803   Builder.SetInsertPoint(MulInstr);
4804 
4805   // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
4806   Value *MulA = A, *MulB = B;
4807   if (WidthA < MulWidth)
4808     MulA = Builder.CreateZExt(A, MulType);
4809   if (WidthB < MulWidth)
4810     MulB = Builder.CreateZExt(B, MulType);
4811   Function *F = Intrinsic::getDeclaration(
4812       I.getModule(), Intrinsic::umul_with_overflow, MulType);
4813   CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
4814   IC.addToWorklist(MulInstr);
4815 
4816   // If there are uses of mul result other than the comparison, we know that
4817   // they are truncation or binary AND. Change them to use result of
4818   // mul.with.overflow and adjust properly mask/size.
4819   if (MulVal->hasNUsesOrMore(2)) {
4820     Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");
4821     for (User *U : make_early_inc_range(MulVal->users())) {
4822       if (U == &I || U == OtherVal)
4823         continue;
4824       if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
4825         if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
4826           IC.replaceInstUsesWith(*TI, Mul);
4827         else
4828           TI->setOperand(0, Mul);
4829       } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
4830         assert(BO->getOpcode() == Instruction::And);
4831         // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
4832         ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
4833         APInt ShortMask = CI->getValue().trunc(MulWidth);
4834         Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
4835         Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType());
4836         IC.replaceInstUsesWith(*BO, Zext);
4837       } else {
4838         llvm_unreachable("Unexpected Binary operation");
4839       }
4840       IC.addToWorklist(cast<Instruction>(U));
4841     }
4842   }
4843   if (isa<Instruction>(OtherVal))
4844     IC.addToWorklist(cast<Instruction>(OtherVal));
4845 
4846   // The original icmp gets replaced with the overflow value, maybe inverted
4847   // depending on predicate.
4848   bool Inverse = false;
4849   switch (I.getPredicate()) {
4850   case ICmpInst::ICMP_NE:
4851     break;
4852   case ICmpInst::ICMP_EQ:
4853     Inverse = true;
4854     break;
4855   case ICmpInst::ICMP_UGT:
4856   case ICmpInst::ICMP_UGE:
4857     if (I.getOperand(0) == MulVal)
4858       break;
4859     Inverse = true;
4860     break;
4861   case ICmpInst::ICMP_ULT:
4862   case ICmpInst::ICMP_ULE:
4863     if (I.getOperand(1) == MulVal)
4864       break;
4865     Inverse = true;
4866     break;
4867   default:
4868     llvm_unreachable("Unexpected predicate");
4869   }
4870   if (Inverse) {
4871     Value *Res = Builder.CreateExtractValue(Call, 1);
4872     return BinaryOperator::CreateNot(Res);
4873   }
4874 
4875   return ExtractValueInst::Create(Call, 1);
4876 }
4877 
4878 /// When performing a comparison against a constant, it is possible that not all
4879 /// the bits in the LHS are demanded. This helper method computes the mask that
4880 /// IS demanded.
4881 static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
4882   const APInt *RHS;
4883   if (!match(I.getOperand(1), m_APInt(RHS)))
4884     return APInt::getAllOnesValue(BitWidth);
4885 
4886   // If this is a normal comparison, it demands all bits. If it is a sign bit
4887   // comparison, it only demands the sign bit.
4888   bool UnusedBit;
4889   if (InstCombiner::isSignBitCheck(I.getPredicate(), *RHS, UnusedBit))
4890     return APInt::getSignMask(BitWidth);
4891 
4892   switch (I.getPredicate()) {
4893   // For a UGT comparison, we don't care about any bits that
4894   // correspond to the trailing ones of the comparand.  The value of these
4895   // bits doesn't impact the outcome of the comparison, because any value
4896   // greater than the RHS must differ in a bit higher than these due to carry.
4897   case ICmpInst::ICMP_UGT:
4898     return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingOnes());
4899 
4900   // Similarly, for a ULT comparison, we don't care about the trailing zeros.
4901   // Any value less than the RHS must differ in a higher bit because of carries.
4902   case ICmpInst::ICMP_ULT:
4903     return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingZeros());
4904 
4905   default:
4906     return APInt::getAllOnesValue(BitWidth);
4907   }
4908 }
4909 
4910 /// Check if the order of \p Op0 and \p Op1 as operands in an ICmpInst
4911 /// should be swapped.
4912 /// The decision is based on how many times these two operands are reused
4913 /// as subtract operands and their positions in those instructions.
4914 /// The rationale is that several architectures use the same instruction for
4915 /// both subtract and cmp. Thus, it is better if the order of those operands
4916 /// match.
4917 /// \return true if Op0 and Op1 should be swapped.
4918 static bool swapMayExposeCSEOpportunities(const Value *Op0, const Value *Op1) {
4919   // Filter out pointer values as those cannot appear directly in subtract.
4920   // FIXME: we may want to go through inttoptrs or bitcasts.
4921   if (Op0->getType()->isPointerTy())
4922     return false;
4923   // If a subtract already has the same operands as a compare, swapping would be
4924   // bad. If a subtract has the same operands as a compare but in reverse order,
4925   // then swapping is good.
4926   int GoodToSwap = 0;
4927   for (const User *U : Op0->users()) {
4928     if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
4929       GoodToSwap++;
4930     else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
4931       GoodToSwap--;
4932   }
4933   return GoodToSwap > 0;
4934 }
4935 
4936 /// Check that one use is in the same block as the definition and all
4937 /// other uses are in blocks dominated by a given block.
4938 ///
4939 /// \param DI Definition
4940 /// \param UI Use
4941 /// \param DB Block that must dominate all uses of \p DI outside
4942 ///           the parent block
4943 /// \return true when \p UI is the only use of \p DI in the parent block
4944 /// and all other uses of \p DI are in blocks dominated by \p DB.
4945 ///
4946 bool InstCombinerImpl::dominatesAllUses(const Instruction *DI,
4947                                         const Instruction *UI,
4948                                         const BasicBlock *DB) const {
4949   assert(DI && UI && "Instruction not defined\n");
4950   // Ignore incomplete definitions.
4951   if (!DI->getParent())
4952     return false;
4953   // DI and UI must be in the same block.
4954   if (DI->getParent() != UI->getParent())
4955     return false;
4956   // Protect from self-referencing blocks.
4957   if (DI->getParent() == DB)
4958     return false;
4959   for (const User *U : DI->users()) {
4960     auto *Usr = cast<Instruction>(U);
4961     if (Usr != UI && !DT.dominates(DB, Usr->getParent()))
4962       return false;
4963   }
4964   return true;
4965 }
4966 
4967 /// Return true when the instruction sequence within a block is select-cmp-br.
4968 static bool isChainSelectCmpBranch(const SelectInst *SI) {
4969   const BasicBlock *BB = SI->getParent();
4970   if (!BB)
4971     return false;
4972   auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator());
4973   if (!BI || BI->getNumSuccessors() != 2)
4974     return false;
4975   auto *IC = dyn_cast<ICmpInst>(BI->getCondition());
4976   if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI))
4977     return false;
4978   return true;
4979 }
4980 
4981 /// True when a select result is replaced by one of its operands
4982 /// in select-icmp sequence. This will eventually result in the elimination
4983 /// of the select.
4984 ///
4985 /// \param SI    Select instruction
4986 /// \param Icmp  Compare instruction
4987 /// \param SIOpd Operand that replaces the select
4988 ///
4989 /// Notes:
4990 /// - The replacement is global and requires dominator information
4991 /// - The caller is responsible for the actual replacement
4992 ///
4993 /// Example:
4994 ///
4995 /// entry:
4996 ///  %4 = select i1 %3, %C* %0, %C* null
4997 ///  %5 = icmp eq %C* %4, null
4998 ///  br i1 %5, label %9, label %7
4999 ///  ...
5000 ///  ; <label>:7                                       ; preds = %entry
5001 ///  %8 = getelementptr inbounds %C* %4, i64 0, i32 0
5002 ///  ...
5003 ///
5004 /// can be transformed to
5005 ///
5006 ///  %5 = icmp eq %C* %0, null
5007 ///  %6 = select i1 %3, i1 %5, i1 true
5008 ///  br i1 %6, label %9, label %7
5009 ///  ...
5010 ///  ; <label>:7                                       ; preds = %entry
5011 ///  %8 = getelementptr inbounds %C* %0, i64 0, i32 0  // replace by %0!
5012 ///
5013 /// Similar when the first operand of the select is a constant or/and
5014 /// the compare is for not equal rather than equal.
5015 ///
5016 /// NOTE: The function is only called when the select and compare constants
5017 /// are equal, the optimization can work only for EQ predicates. This is not a
5018 /// major restriction since a NE compare should be 'normalized' to an equal
5019 /// compare, which usually happens in the combiner and test case
5020 /// select-cmp-br.ll checks for it.
5021 bool InstCombinerImpl::replacedSelectWithOperand(SelectInst *SI,
5022                                                  const ICmpInst *Icmp,
5023                                                  const unsigned SIOpd) {
5024   assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!");
5025   if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) {
5026     BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1);
5027     // The check for the single predecessor is not the best that can be
5028     // done. But it protects efficiently against cases like when SI's
5029     // home block has two successors, Succ and Succ1, and Succ1 predecessor
5030     // of Succ. Then SI can't be replaced by SIOpd because the use that gets
5031     // replaced can be reached on either path. So the uniqueness check
5032     // guarantees that the path all uses of SI (outside SI's parent) are on
5033     // is disjoint from all other paths out of SI. But that information
5034     // is more expensive to compute, and the trade-off here is in favor
5035     // of compile-time. It should also be noticed that we check for a single
5036     // predecessor and not only uniqueness. This to handle the situation when
5037     // Succ and Succ1 points to the same basic block.
5038     if (Succ->getSinglePredecessor() && dominatesAllUses(SI, Icmp, Succ)) {
5039       NumSel++;
5040       SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent());
5041       return true;
5042     }
5043   }
5044   return false;
5045 }
5046 
5047 /// Try to fold the comparison based on range information we can get by checking
5048 /// whether bits are known to be zero or one in the inputs.
5049 Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
5050   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
5051   Type *Ty = Op0->getType();
5052   ICmpInst::Predicate Pred = I.getPredicate();
5053 
5054   // Get scalar or pointer size.
5055   unsigned BitWidth = Ty->isIntOrIntVectorTy()
5056                           ? Ty->getScalarSizeInBits()
5057                           : DL.getPointerTypeSizeInBits(Ty->getScalarType());
5058 
5059   if (!BitWidth)
5060     return nullptr;
5061 
5062   KnownBits Op0Known(BitWidth);
5063   KnownBits Op1Known(BitWidth);
5064 
5065   if (SimplifyDemandedBits(&I, 0,
5066                            getDemandedBitsLHSMask(I, BitWidth),
5067                            Op0Known, 0))
5068     return &I;
5069 
5070   if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth),
5071                            Op1Known, 0))
5072     return &I;
5073 
5074   // Given the known and unknown bits, compute a range that the LHS could be
5075   // in.  Compute the Min, Max and RHS values based on the known bits. For the
5076   // EQ and NE we use unsigned values.
5077   APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
5078   APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
5079   if (I.isSigned()) {
5080     Op0Min = Op0Known.getSignedMinValue();
5081     Op0Max = Op0Known.getSignedMaxValue();
5082     Op1Min = Op1Known.getSignedMinValue();
5083     Op1Max = Op1Known.getSignedMaxValue();
5084   } else {
5085     Op0Min = Op0Known.getMinValue();
5086     Op0Max = Op0Known.getMaxValue();
5087     Op1Min = Op1Known.getMinValue();
5088     Op1Max = Op1Known.getMaxValue();
5089   }
5090 
5091   // If Min and Max are known to be the same, then SimplifyDemandedBits figured
5092   // out that the LHS or RHS is a constant. Constant fold this now, so that
5093   // code below can assume that Min != Max.
5094   if (!isa<Constant>(Op0) && Op0Min == Op0Max)
5095     return new ICmpInst(Pred, ConstantExpr::getIntegerValue(Ty, Op0Min), Op1);
5096   if (!isa<Constant>(Op1) && Op1Min == Op1Max)
5097     return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min));
5098 
5099   // Based on the range information we know about the LHS, see if we can
5100   // simplify this comparison.  For example, (x&4) < 8 is always true.
5101   switch (Pred) {
5102   default:
5103     llvm_unreachable("Unknown icmp opcode!");
5104   case ICmpInst::ICMP_EQ:
5105   case ICmpInst::ICMP_NE: {
5106     if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
5107       return replaceInstUsesWith(
5108           I, ConstantInt::getBool(I.getType(), Pred == CmpInst::ICMP_NE));
5109 
5110     // If all bits are known zero except for one, then we know at most one bit
5111     // is set. If the comparison is against zero, then this is a check to see if
5112     // *that* bit is set.
5113     APInt Op0KnownZeroInverted = ~Op0Known.Zero;
5114     if (Op1Known.isZero()) {
5115       // If the LHS is an AND with the same constant, look through it.
5116       Value *LHS = nullptr;
5117       const APInt *LHSC;
5118       if (!match(Op0, m_And(m_Value(LHS), m_APInt(LHSC))) ||
5119           *LHSC != Op0KnownZeroInverted)
5120         LHS = Op0;
5121 
5122       Value *X;
5123       if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
5124         APInt ValToCheck = Op0KnownZeroInverted;
5125         Type *XTy = X->getType();
5126         if (ValToCheck.isPowerOf2()) {
5127           // ((1 << X) & 8) == 0 -> X != 3
5128           // ((1 << X) & 8) != 0 -> X == 3
5129           auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
5130           auto NewPred = ICmpInst::getInversePredicate(Pred);
5131           return new ICmpInst(NewPred, X, CmpC);
5132         } else if ((++ValToCheck).isPowerOf2()) {
5133           // ((1 << X) & 7) == 0 -> X >= 3
5134           // ((1 << X) & 7) != 0 -> X  < 3
5135           auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
5136           auto NewPred =
5137               Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT;
5138           return new ICmpInst(NewPred, X, CmpC);
5139         }
5140       }
5141 
5142       // Check if the LHS is 8 >>u x and the result is a power of 2 like 1.
5143       const APInt *CI;
5144       if (Op0KnownZeroInverted.isOneValue() &&
5145           match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) {
5146         // ((8 >>u X) & 1) == 0 -> X != 3
5147         // ((8 >>u X) & 1) != 0 -> X == 3
5148         unsigned CmpVal = CI->countTrailingZeros();
5149         auto NewPred = ICmpInst::getInversePredicate(Pred);
5150         return new ICmpInst(NewPred, X, ConstantInt::get(X->getType(), CmpVal));
5151       }
5152     }
5153     break;
5154   }
5155   case ICmpInst::ICMP_ULT: {
5156     if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
5157       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
5158     if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
5159       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
5160     if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
5161       return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
5162 
5163     const APInt *CmpC;
5164     if (match(Op1, m_APInt(CmpC))) {
5165       // A <u C -> A == C-1 if min(A)+1 == C
5166       if (*CmpC == Op0Min + 1)
5167         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
5168                             ConstantInt::get(Op1->getType(), *CmpC - 1));
5169       // X <u C --> X == 0, if the number of zero bits in the bottom of X
5170       // exceeds the log2 of C.
5171       if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
5172         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
5173                             Constant::getNullValue(Op1->getType()));
5174     }
5175     break;
5176   }
5177   case ICmpInst::ICMP_UGT: {
5178     if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
5179       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
5180     if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
5181       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
5182     if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
5183       return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
5184 
5185     const APInt *CmpC;
5186     if (match(Op1, m_APInt(CmpC))) {
5187       // A >u C -> A == C+1 if max(a)-1 == C
5188       if (*CmpC == Op0Max - 1)
5189         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
5190                             ConstantInt::get(Op1->getType(), *CmpC + 1));
5191       // X >u C --> X != 0, if the number of zero bits in the bottom of X
5192       // exceeds the log2 of C.
5193       if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
5194         return new ICmpInst(ICmpInst::ICMP_NE, Op0,
5195                             Constant::getNullValue(Op1->getType()));
5196     }
5197     break;
5198   }
5199   case ICmpInst::ICMP_SLT: {
5200     if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
5201       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
5202     if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
5203       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
5204     if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
5205       return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
5206     const APInt *CmpC;
5207     if (match(Op1, m_APInt(CmpC))) {
5208       if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
5209         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
5210                             ConstantInt::get(Op1->getType(), *CmpC - 1));
5211     }
5212     break;
5213   }
5214   case ICmpInst::ICMP_SGT: {
5215     if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
5216       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
5217     if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
5218       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
5219     if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
5220       return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
5221     const APInt *CmpC;
5222     if (match(Op1, m_APInt(CmpC))) {
5223       if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
5224         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
5225                             ConstantInt::get(Op1->getType(), *CmpC + 1));
5226     }
5227     break;
5228   }
5229   case ICmpInst::ICMP_SGE:
5230     assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
5231     if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
5232       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
5233     if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
5234       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
5235     if (Op1Min == Op0Max) // A >=s B -> A == B if max(A) == min(B)
5236       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
5237     break;
5238   case ICmpInst::ICMP_SLE:
5239     assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
5240     if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
5241       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
5242     if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
5243       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
5244     if (Op1Max == Op0Min) // A <=s B -> A == B if min(A) == max(B)
5245       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
5246     break;
5247   case ICmpInst::ICMP_UGE:
5248     assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
5249     if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
5250       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
5251     if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
5252       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
5253     if (Op1Min == Op0Max) // A >=u B -> A == B if max(A) == min(B)
5254       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
5255     break;
5256   case ICmpInst::ICMP_ULE:
5257     assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
5258     if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
5259       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
5260     if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
5261       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
5262     if (Op1Max == Op0Min) // A <=u B -> A == B if min(A) == max(B)
5263       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
5264     break;
5265   }
5266 
5267   // Turn a signed comparison into an unsigned one if both operands are known to
5268   // have the same sign.
5269   if (I.isSigned() &&
5270       ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) ||
5271        (Op0Known.One.isNegative() && Op1Known.One.isNegative())))
5272     return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
5273 
5274   return nullptr;
5275 }
5276 
5277 llvm::Optional<std::pair<CmpInst::Predicate, Constant *>>
5278 InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
5279                                                        Constant *C) {
5280   assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) &&
5281          "Only for relational integer predicates.");
5282 
5283   Type *Type = C->getType();
5284   bool IsSigned = ICmpInst::isSigned(Pred);
5285 
5286   CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred);
5287   bool WillIncrement =
5288       UnsignedPred == ICmpInst::ICMP_ULE || UnsignedPred == ICmpInst::ICMP_UGT;
5289 
5290   // Check if the constant operand can be safely incremented/decremented
5291   // without overflowing/underflowing.
5292   auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) {
5293     return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned);
5294   };
5295 
5296   Constant *SafeReplacementConstant = nullptr;
5297   if (auto *CI = dyn_cast<ConstantInt>(C)) {
5298     // Bail out if the constant can't be safely incremented/decremented.
5299     if (!ConstantIsOk(CI))
5300       return llvm::None;
5301   } else if (auto *FVTy = dyn_cast<FixedVectorType>(Type)) {
5302     unsigned NumElts = FVTy->getNumElements();
5303     for (unsigned i = 0; i != NumElts; ++i) {
5304       Constant *Elt = C->getAggregateElement(i);
5305       if (!Elt)
5306         return llvm::None;
5307 
5308       if (isa<UndefValue>(Elt))
5309         continue;
5310 
5311       // Bail out if we can't determine if this constant is min/max or if we
5312       // know that this constant is min/max.
5313       auto *CI = dyn_cast<ConstantInt>(Elt);
5314       if (!CI || !ConstantIsOk(CI))
5315         return llvm::None;
5316 
5317       if (!SafeReplacementConstant)
5318         SafeReplacementConstant = CI;
5319     }
5320   } else {
5321     // ConstantExpr?
5322     return llvm::None;
5323   }
5324 
5325   // It may not be safe to change a compare predicate in the presence of
5326   // undefined elements, so replace those elements with the first safe constant
5327   // that we found.
5328   // TODO: in case of poison, it is safe; let's replace undefs only.
5329   if (C->containsUndefOrPoisonElement()) {
5330     assert(SafeReplacementConstant && "Replacement constant not set");
5331     C = Constant::replaceUndefsWith(C, SafeReplacementConstant);
5332   }
5333 
5334   CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred);
5335 
5336   // Increment or decrement the constant.
5337   Constant *OneOrNegOne = ConstantInt::get(Type, WillIncrement ? 1 : -1, true);
5338   Constant *NewC = ConstantExpr::getAdd(C, OneOrNegOne);
5339 
5340   return std::make_pair(NewPred, NewC);
5341 }
5342 
5343 /// If we have an icmp le or icmp ge instruction with a constant operand, turn
5344 /// it into the appropriate icmp lt or icmp gt instruction. This transform
5345 /// allows them to be folded in visitICmpInst.
5346 static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
5347   ICmpInst::Predicate Pred = I.getPredicate();
5348   if (ICmpInst::isEquality(Pred) || !ICmpInst::isIntPredicate(Pred) ||
5349       InstCombiner::isCanonicalPredicate(Pred))
5350     return nullptr;
5351 
5352   Value *Op0 = I.getOperand(0);
5353   Value *Op1 = I.getOperand(1);
5354   auto *Op1C = dyn_cast<Constant>(Op1);
5355   if (!Op1C)
5356     return nullptr;
5357 
5358   auto FlippedStrictness =
5359       InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, Op1C);
5360   if (!FlippedStrictness)
5361     return nullptr;
5362 
5363   return new ICmpInst(FlippedStrictness->first, Op0, FlippedStrictness->second);
5364 }
5365 
5366 /// If we have a comparison with a non-canonical predicate, if we can update
5367 /// all the users, invert the predicate and adjust all the users.
5368 CmpInst *InstCombinerImpl::canonicalizeICmpPredicate(CmpInst &I) {
5369   // Is the predicate already canonical?
5370   CmpInst::Predicate Pred = I.getPredicate();
5371   if (InstCombiner::isCanonicalPredicate(Pred))
5372     return nullptr;
5373 
5374   // Can all users be adjusted to predicate inversion?
5375   if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr))
5376     return nullptr;
5377 
5378   // Ok, we can canonicalize comparison!
5379   // Let's first invert the comparison's predicate.
5380   I.setPredicate(CmpInst::getInversePredicate(Pred));
5381   I.setName(I.getName() + ".not");
5382 
5383   // And, adapt users.
5384   freelyInvertAllUsersOf(&I);
5385 
5386   return &I;
5387 }
5388 
5389 /// Integer compare with boolean values can always be turned into bitwise ops.
5390 static Instruction *canonicalizeICmpBool(ICmpInst &I,
5391                                          InstCombiner::BuilderTy &Builder) {
5392   Value *A = I.getOperand(0), *B = I.getOperand(1);
5393   assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only");
5394 
5395   // A boolean compared to true/false can be simplified to Op0/true/false in
5396   // 14 out of the 20 (10 predicates * 2 constants) possible combinations.
5397   // Cases not handled by InstSimplify are always 'not' of Op0.
5398   if (match(B, m_Zero())) {
5399     switch (I.getPredicate()) {
5400       case CmpInst::ICMP_EQ:  // A ==   0 -> !A
5401       case CmpInst::ICMP_ULE: // A <=u  0 -> !A
5402       case CmpInst::ICMP_SGE: // A >=s  0 -> !A
5403         return BinaryOperator::CreateNot(A);
5404       default:
5405         llvm_unreachable("ICmp i1 X, C not simplified as expected.");
5406     }
5407   } else if (match(B, m_One())) {
5408     switch (I.getPredicate()) {
5409       case CmpInst::ICMP_NE:  // A !=  1 -> !A
5410       case CmpInst::ICMP_ULT: // A <u  1 -> !A
5411       case CmpInst::ICMP_SGT: // A >s -1 -> !A
5412         return BinaryOperator::CreateNot(A);
5413       default:
5414         llvm_unreachable("ICmp i1 X, C not simplified as expected.");
5415     }
5416   }
5417 
5418   switch (I.getPredicate()) {
5419   default:
5420     llvm_unreachable("Invalid icmp instruction!");
5421   case ICmpInst::ICMP_EQ:
5422     // icmp eq i1 A, B -> ~(A ^ B)
5423     return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
5424 
5425   case ICmpInst::ICMP_NE:
5426     // icmp ne i1 A, B -> A ^ B
5427     return BinaryOperator::CreateXor(A, B);
5428 
5429   case ICmpInst::ICMP_UGT:
5430     // icmp ugt -> icmp ult
5431     std::swap(A, B);
5432     LLVM_FALLTHROUGH;
5433   case ICmpInst::ICMP_ULT:
5434     // icmp ult i1 A, B -> ~A & B
5435     return BinaryOperator::CreateAnd(Builder.CreateNot(A), B);
5436 
5437   case ICmpInst::ICMP_SGT:
5438     // icmp sgt -> icmp slt
5439     std::swap(A, B);
5440     LLVM_FALLTHROUGH;
5441   case ICmpInst::ICMP_SLT:
5442     // icmp slt i1 A, B -> A & ~B
5443     return BinaryOperator::CreateAnd(Builder.CreateNot(B), A);
5444 
5445   case ICmpInst::ICMP_UGE:
5446     // icmp uge -> icmp ule
5447     std::swap(A, B);
5448     LLVM_FALLTHROUGH;
5449   case ICmpInst::ICMP_ULE:
5450     // icmp ule i1 A, B -> ~A | B
5451     return BinaryOperator::CreateOr(Builder.CreateNot(A), B);
5452 
5453   case ICmpInst::ICMP_SGE:
5454     // icmp sge -> icmp sle
5455     std::swap(A, B);
5456     LLVM_FALLTHROUGH;
5457   case ICmpInst::ICMP_SLE:
5458     // icmp sle i1 A, B -> A | ~B
5459     return BinaryOperator::CreateOr(Builder.CreateNot(B), A);
5460   }
5461 }
5462 
5463 // Transform pattern like:
5464 //   (1 << Y) u<= X  or  ~(-1 << Y) u<  X  or  ((1 << Y)+(-1)) u<  X
5465 //   (1 << Y) u>  X  or  ~(-1 << Y) u>= X  or  ((1 << Y)+(-1)) u>= X
5466 // Into:
5467 //   (X l>> Y) != 0
5468 //   (X l>> Y) == 0
5469 static Instruction *foldICmpWithHighBitMask(ICmpInst &Cmp,
5470                                             InstCombiner::BuilderTy &Builder) {
5471   ICmpInst::Predicate Pred, NewPred;
5472   Value *X, *Y;
5473   if (match(&Cmp,
5474             m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) {
5475     switch (Pred) {
5476     case ICmpInst::ICMP_ULE:
5477       NewPred = ICmpInst::ICMP_NE;
5478       break;
5479     case ICmpInst::ICMP_UGT:
5480       NewPred = ICmpInst::ICMP_EQ;
5481       break;
5482     default:
5483       return nullptr;
5484     }
5485   } else if (match(&Cmp, m_c_ICmp(Pred,
5486                                   m_OneUse(m_CombineOr(
5487                                       m_Not(m_Shl(m_AllOnes(), m_Value(Y))),
5488                                       m_Add(m_Shl(m_One(), m_Value(Y)),
5489                                             m_AllOnes()))),
5490                                   m_Value(X)))) {
5491     // The variant with 'add' is not canonical, (the variant with 'not' is)
5492     // we only get it because it has extra uses, and can't be canonicalized,
5493 
5494     switch (Pred) {
5495     case ICmpInst::ICMP_ULT:
5496       NewPred = ICmpInst::ICMP_NE;
5497       break;
5498     case ICmpInst::ICMP_UGE:
5499       NewPred = ICmpInst::ICMP_EQ;
5500       break;
5501     default:
5502       return nullptr;
5503     }
5504   } else
5505     return nullptr;
5506 
5507   Value *NewX = Builder.CreateLShr(X, Y, X->getName() + ".highbits");
5508   Constant *Zero = Constant::getNullValue(NewX->getType());
5509   return CmpInst::Create(Instruction::ICmp, NewPred, NewX, Zero);
5510 }
5511 
5512 static Instruction *foldVectorCmp(CmpInst &Cmp,
5513                                   InstCombiner::BuilderTy &Builder) {
5514   const CmpInst::Predicate Pred = Cmp.getPredicate();
5515   Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1);
5516   Value *V1, *V2;
5517   ArrayRef<int> M;
5518   if (!match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(M))))
5519     return nullptr;
5520 
5521   // If both arguments of the cmp are shuffles that use the same mask and
5522   // shuffle within a single vector, move the shuffle after the cmp:
5523   // cmp (shuffle V1, M), (shuffle V2, M) --> shuffle (cmp V1, V2), M
5524   Type *V1Ty = V1->getType();
5525   if (match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(M))) &&
5526       V1Ty == V2->getType() && (LHS->hasOneUse() || RHS->hasOneUse())) {
5527     Value *NewCmp = Builder.CreateCmp(Pred, V1, V2);
5528     return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()), M);
5529   }
5530 
5531   // Try to canonicalize compare with splatted operand and splat constant.
5532   // TODO: We could generalize this for more than splats. See/use the code in
5533   //       InstCombiner::foldVectorBinop().
5534   Constant *C;
5535   if (!LHS->hasOneUse() || !match(RHS, m_Constant(C)))
5536     return nullptr;
5537 
5538   // Length-changing splats are ok, so adjust the constants as needed:
5539   // cmp (shuffle V1, M), C --> shuffle (cmp V1, C'), M
5540   Constant *ScalarC = C->getSplatValue(/* AllowUndefs */ true);
5541   int MaskSplatIndex;
5542   if (ScalarC && match(M, m_SplatOrUndefMask(MaskSplatIndex))) {
5543     // We allow undefs in matching, but this transform removes those for safety.
5544     // Demanded elements analysis should be able to recover some/all of that.
5545     C = ConstantVector::getSplat(cast<VectorType>(V1Ty)->getElementCount(),
5546                                  ScalarC);
5547     SmallVector<int, 8> NewM(M.size(), MaskSplatIndex);
5548     Value *NewCmp = Builder.CreateCmp(Pred, V1, C);
5549     return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()),
5550                                  NewM);
5551   }
5552 
5553   return nullptr;
5554 }
5555 
5556 // extract(uadd.with.overflow(A, B), 0) ult A
5557 //  -> extract(uadd.with.overflow(A, B), 1)
5558 static Instruction *foldICmpOfUAddOv(ICmpInst &I) {
5559   CmpInst::Predicate Pred = I.getPredicate();
5560   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
5561 
5562   Value *UAddOv;
5563   Value *A, *B;
5564   auto UAddOvResultPat = m_ExtractValue<0>(
5565       m_Intrinsic<Intrinsic::uadd_with_overflow>(m_Value(A), m_Value(B)));
5566   if (match(Op0, UAddOvResultPat) &&
5567       ((Pred == ICmpInst::ICMP_ULT && (Op1 == A || Op1 == B)) ||
5568        (Pred == ICmpInst::ICMP_EQ && match(Op1, m_ZeroInt()) &&
5569         (match(A, m_One()) || match(B, m_One()))) ||
5570        (Pred == ICmpInst::ICMP_NE && match(Op1, m_AllOnes()) &&
5571         (match(A, m_AllOnes()) || match(B, m_AllOnes())))))
5572     // extract(uadd.with.overflow(A, B), 0) < A
5573     // extract(uadd.with.overflow(A, 1), 0) == 0
5574     // extract(uadd.with.overflow(A, -1), 0) != -1
5575     UAddOv = cast<ExtractValueInst>(Op0)->getAggregateOperand();
5576   else if (match(Op1, UAddOvResultPat) &&
5577            Pred == ICmpInst::ICMP_UGT && (Op0 == A || Op0 == B))
5578     // A > extract(uadd.with.overflow(A, B), 0)
5579     UAddOv = cast<ExtractValueInst>(Op1)->getAggregateOperand();
5580   else
5581     return nullptr;
5582 
5583   return ExtractValueInst::Create(UAddOv, 1);
5584 }
5585 
5586 Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
5587   bool Changed = false;
5588   const SimplifyQuery Q = SQ.getWithInstruction(&I);
5589   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
5590   unsigned Op0Cplxity = getComplexity(Op0);
5591   unsigned Op1Cplxity = getComplexity(Op1);
5592 
5593   /// Orders the operands of the compare so that they are listed from most
5594   /// complex to least complex.  This puts constants before unary operators,
5595   /// before binary operators.
5596   if (Op0Cplxity < Op1Cplxity ||
5597       (Op0Cplxity == Op1Cplxity && swapMayExposeCSEOpportunities(Op0, Op1))) {
5598     I.swapOperands();
5599     std::swap(Op0, Op1);
5600     Changed = true;
5601   }
5602 
5603   if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, Q))
5604     return replaceInstUsesWith(I, V);
5605 
5606   // Comparing -val or val with non-zero is the same as just comparing val
5607   // ie, abs(val) != 0 -> val != 0
5608   if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) {
5609     Value *Cond, *SelectTrue, *SelectFalse;
5610     if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
5611                             m_Value(SelectFalse)))) {
5612       if (Value *V = dyn_castNegVal(SelectTrue)) {
5613         if (V == SelectFalse)
5614           return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
5615       }
5616       else if (Value *V = dyn_castNegVal(SelectFalse)) {
5617         if (V == SelectTrue)
5618           return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
5619       }
5620     }
5621   }
5622 
5623   if (Op0->getType()->isIntOrIntVectorTy(1))
5624     if (Instruction *Res = canonicalizeICmpBool(I, Builder))
5625       return Res;
5626 
5627   if (Instruction *Res = canonicalizeCmpWithConstant(I))
5628     return Res;
5629 
5630   if (Instruction *Res = canonicalizeICmpPredicate(I))
5631     return Res;
5632 
5633   if (Instruction *Res = foldICmpWithConstant(I))
5634     return Res;
5635 
5636   if (Instruction *Res = foldICmpWithDominatingICmp(I))
5637     return Res;
5638 
5639   if (Instruction *Res = foldICmpBinOp(I, Q))
5640     return Res;
5641 
5642   if (Instruction *Res = foldICmpUsingKnownBits(I))
5643     return Res;
5644 
5645   // Test if the ICmpInst instruction is used exclusively by a select as
5646   // part of a minimum or maximum operation. If so, refrain from doing
5647   // any other folding. This helps out other analyses which understand
5648   // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
5649   // and CodeGen. And in this case, at least one of the comparison
5650   // operands has at least one user besides the compare (the select),
5651   // which would often largely negate the benefit of folding anyway.
5652   //
5653   // Do the same for the other patterns recognized by matchSelectPattern.
5654   if (I.hasOneUse())
5655     if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) {
5656       Value *A, *B;
5657       SelectPatternResult SPR = matchSelectPattern(SI, A, B);
5658       if (SPR.Flavor != SPF_UNKNOWN)
5659         return nullptr;
5660     }
5661 
5662   // Do this after checking for min/max to prevent infinite looping.
5663   if (Instruction *Res = foldICmpWithZero(I))
5664     return Res;
5665 
5666   // FIXME: We only do this after checking for min/max to prevent infinite
5667   // looping caused by a reverse canonicalization of these patterns for min/max.
5668   // FIXME: The organization of folds is a mess. These would naturally go into
5669   // canonicalizeCmpWithConstant(), but we can't move all of the above folds
5670   // down here after the min/max restriction.
5671   ICmpInst::Predicate Pred = I.getPredicate();
5672   const APInt *C;
5673   if (match(Op1, m_APInt(C))) {
5674     // For i32: x >u 2147483647 -> x <s 0  -> true if sign bit set
5675     if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) {
5676       Constant *Zero = Constant::getNullValue(Op0->getType());
5677       return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero);
5678     }
5679 
5680     // For i32: x <u 2147483648 -> x >s -1  -> true if sign bit clear
5681     if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) {
5682       Constant *AllOnes = Constant::getAllOnesValue(Op0->getType());
5683       return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes);
5684     }
5685   }
5686 
5687   if (Instruction *Res = foldICmpInstWithConstant(I))
5688     return Res;
5689 
5690   // Try to match comparison as a sign bit test. Intentionally do this after
5691   // foldICmpInstWithConstant() to potentially let other folds to happen first.
5692   if (Instruction *New = foldSignBitTest(I))
5693     return New;
5694 
5695   if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
5696     return Res;
5697 
5698   // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
5699   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
5700     if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I))
5701       return NI;
5702   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
5703     if (Instruction *NI = foldGEPICmp(GEP, Op0,
5704                            ICmpInst::getSwappedPredicate(I.getPredicate()), I))
5705       return NI;
5706 
5707   // Try to optimize equality comparisons against alloca-based pointers.
5708   if (Op0->getType()->isPointerTy() && I.isEquality()) {
5709     assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
5710     if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op0)))
5711       if (Instruction *New = foldAllocaCmp(I, Alloca, Op1))
5712         return New;
5713     if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op1)))
5714       if (Instruction *New = foldAllocaCmp(I, Alloca, Op0))
5715         return New;
5716   }
5717 
5718   if (Instruction *Res = foldICmpBitCast(I, Builder))
5719     return Res;
5720 
5721   // TODO: Hoist this above the min/max bailout.
5722   if (Instruction *R = foldICmpWithCastOp(I))
5723     return R;
5724 
5725   if (Instruction *Res = foldICmpWithMinMax(I))
5726     return Res;
5727 
5728   {
5729     Value *A, *B;
5730     // Transform (A & ~B) == 0 --> (A & B) != 0
5731     // and       (A & ~B) != 0 --> (A & B) == 0
5732     // if A is a power of 2.
5733     if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
5734         match(Op1, m_Zero()) &&
5735         isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality())
5736       return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B),
5737                           Op1);
5738 
5739     // ~X < ~Y --> Y < X
5740     // ~X < C -->  X > ~C
5741     if (match(Op0, m_Not(m_Value(A)))) {
5742       if (match(Op1, m_Not(m_Value(B))))
5743         return new ICmpInst(I.getPredicate(), B, A);
5744 
5745       const APInt *C;
5746       if (match(Op1, m_APInt(C)))
5747         return new ICmpInst(I.getSwappedPredicate(), A,
5748                             ConstantInt::get(Op1->getType(), ~(*C)));
5749     }
5750 
5751     Instruction *AddI = nullptr;
5752     if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
5753                                      m_Instruction(AddI))) &&
5754         isa<IntegerType>(A->getType())) {
5755       Value *Result;
5756       Constant *Overflow;
5757       // m_UAddWithOverflow can match patterns that do not include  an explicit
5758       // "add" instruction, so check the opcode of the matched op.
5759       if (AddI->getOpcode() == Instruction::Add &&
5760           OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, A, B, *AddI,
5761                                 Result, Overflow)) {
5762         replaceInstUsesWith(*AddI, Result);
5763         eraseInstFromFunction(*AddI);
5764         return replaceInstUsesWith(I, Overflow);
5765       }
5766     }
5767 
5768     // (zext a) * (zext b)  --> llvm.umul.with.overflow.
5769     if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
5770       if (Instruction *R = processUMulZExtIdiom(I, Op0, Op1, *this))
5771         return R;
5772     }
5773     if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
5774       if (Instruction *R = processUMulZExtIdiom(I, Op1, Op0, *this))
5775         return R;
5776     }
5777   }
5778 
5779   if (Instruction *Res = foldICmpEquality(I))
5780     return Res;
5781 
5782   if (Instruction *Res = foldICmpOfUAddOv(I))
5783     return Res;
5784 
5785   // The 'cmpxchg' instruction returns an aggregate containing the old value and
5786   // an i1 which indicates whether or not we successfully did the swap.
5787   //
5788   // Replace comparisons between the old value and the expected value with the
5789   // indicator that 'cmpxchg' returns.
5790   //
5791   // N.B.  This transform is only valid when the 'cmpxchg' is not permitted to
5792   // spuriously fail.  In those cases, the old value may equal the expected
5793   // value but it is possible for the swap to not occur.
5794   if (I.getPredicate() == ICmpInst::ICMP_EQ)
5795     if (auto *EVI = dyn_cast<ExtractValueInst>(Op0))
5796       if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand()))
5797         if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 &&
5798             !ACXI->isWeak())
5799           return ExtractValueInst::Create(ACXI, 1);
5800 
5801   {
5802     Value *X;
5803     const APInt *C;
5804     // icmp X+Cst, X
5805     if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X)
5806       return foldICmpAddOpConst(X, *C, I.getPredicate());
5807 
5808     // icmp X, X+Cst
5809     if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X)
5810       return foldICmpAddOpConst(X, *C, I.getSwappedPredicate());
5811   }
5812 
5813   if (Instruction *Res = foldICmpWithHighBitMask(I, Builder))
5814     return Res;
5815 
5816   if (I.getType()->isVectorTy())
5817     if (Instruction *Res = foldVectorCmp(I, Builder))
5818       return Res;
5819 
5820   return Changed ? &I : nullptr;
5821 }
5822 
5823 /// Fold fcmp ([us]itofp x, cst) if possible.
5824 Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I,
5825                                                     Instruction *LHSI,
5826                                                     Constant *RHSC) {
5827   if (!isa<ConstantFP>(RHSC)) return nullptr;
5828   const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
5829 
5830   // Get the width of the mantissa.  We don't want to hack on conversions that
5831   // might lose information from the integer, e.g. "i64 -> float"
5832   int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
5833   if (MantissaWidth == -1) return nullptr;  // Unknown.
5834 
5835   IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
5836 
5837   bool LHSUnsigned = isa<UIToFPInst>(LHSI);
5838 
5839   if (I.isEquality()) {
5840     FCmpInst::Predicate P = I.getPredicate();
5841     bool IsExact = false;
5842     APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned);
5843     RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact);
5844 
5845     // If the floating point constant isn't an integer value, we know if we will
5846     // ever compare equal / not equal to it.
5847     if (!IsExact) {
5848       // TODO: Can never be -0.0 and other non-representable values
5849       APFloat RHSRoundInt(RHS);
5850       RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven);
5851       if (RHS != RHSRoundInt) {
5852         if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ)
5853           return replaceInstUsesWith(I, Builder.getFalse());
5854 
5855         assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE);
5856         return replaceInstUsesWith(I, Builder.getTrue());
5857       }
5858     }
5859 
5860     // TODO: If the constant is exactly representable, is it always OK to do
5861     // equality compares as integer?
5862   }
5863 
5864   // Check to see that the input is converted from an integer type that is small
5865   // enough that preserves all bits.  TODO: check here for "known" sign bits.
5866   // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
5867   unsigned InputSize = IntTy->getScalarSizeInBits();
5868 
5869   // Following test does NOT adjust InputSize downwards for signed inputs,
5870   // because the most negative value still requires all the mantissa bits
5871   // to distinguish it from one less than that value.
5872   if ((int)InputSize > MantissaWidth) {
5873     // Conversion would lose accuracy. Check if loss can impact comparison.
5874     int Exp = ilogb(RHS);
5875     if (Exp == APFloat::IEK_Inf) {
5876       int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics()));
5877       if (MaxExponent < (int)InputSize - !LHSUnsigned)
5878         // Conversion could create infinity.
5879         return nullptr;
5880     } else {
5881       // Note that if RHS is zero or NaN, then Exp is negative
5882       // and first condition is trivially false.
5883       if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned)
5884         // Conversion could affect comparison.
5885         return nullptr;
5886     }
5887   }
5888 
5889   // Otherwise, we can potentially simplify the comparison.  We know that it
5890   // will always come through as an integer value and we know the constant is
5891   // not a NAN (it would have been previously simplified).
5892   assert(!RHS.isNaN() && "NaN comparison not already folded!");
5893 
5894   ICmpInst::Predicate Pred;
5895   switch (I.getPredicate()) {
5896   default: llvm_unreachable("Unexpected predicate!");
5897   case FCmpInst::FCMP_UEQ:
5898   case FCmpInst::FCMP_OEQ:
5899     Pred = ICmpInst::ICMP_EQ;
5900     break;
5901   case FCmpInst::FCMP_UGT:
5902   case FCmpInst::FCMP_OGT:
5903     Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
5904     break;
5905   case FCmpInst::FCMP_UGE:
5906   case FCmpInst::FCMP_OGE:
5907     Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
5908     break;
5909   case FCmpInst::FCMP_ULT:
5910   case FCmpInst::FCMP_OLT:
5911     Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
5912     break;
5913   case FCmpInst::FCMP_ULE:
5914   case FCmpInst::FCMP_OLE:
5915     Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
5916     break;
5917   case FCmpInst::FCMP_UNE:
5918   case FCmpInst::FCMP_ONE:
5919     Pred = ICmpInst::ICMP_NE;
5920     break;
5921   case FCmpInst::FCMP_ORD:
5922     return replaceInstUsesWith(I, Builder.getTrue());
5923   case FCmpInst::FCMP_UNO:
5924     return replaceInstUsesWith(I, Builder.getFalse());
5925   }
5926 
5927   // Now we know that the APFloat is a normal number, zero or inf.
5928 
5929   // See if the FP constant is too large for the integer.  For example,
5930   // comparing an i8 to 300.0.
5931   unsigned IntWidth = IntTy->getScalarSizeInBits();
5932 
5933   if (!LHSUnsigned) {
5934     // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
5935     // and large values.
5936     APFloat SMax(RHS.getSemantics());
5937     SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
5938                           APFloat::rmNearestTiesToEven);
5939     if (SMax < RHS) { // smax < 13123.0
5940       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
5941           Pred == ICmpInst::ICMP_SLE)
5942         return replaceInstUsesWith(I, Builder.getTrue());
5943       return replaceInstUsesWith(I, Builder.getFalse());
5944     }
5945   } else {
5946     // If the RHS value is > UnsignedMax, fold the comparison. This handles
5947     // +INF and large values.
5948     APFloat UMax(RHS.getSemantics());
5949     UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
5950                           APFloat::rmNearestTiesToEven);
5951     if (UMax < RHS) { // umax < 13123.0
5952       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
5953           Pred == ICmpInst::ICMP_ULE)
5954         return replaceInstUsesWith(I, Builder.getTrue());
5955       return replaceInstUsesWith(I, Builder.getFalse());
5956     }
5957   }
5958 
5959   if (!LHSUnsigned) {
5960     // See if the RHS value is < SignedMin.
5961     APFloat SMin(RHS.getSemantics());
5962     SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
5963                           APFloat::rmNearestTiesToEven);
5964     if (SMin > RHS) { // smin > 12312.0
5965       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
5966           Pred == ICmpInst::ICMP_SGE)
5967         return replaceInstUsesWith(I, Builder.getTrue());
5968       return replaceInstUsesWith(I, Builder.getFalse());
5969     }
5970   } else {
5971     // See if the RHS value is < UnsignedMin.
5972     APFloat UMin(RHS.getSemantics());
5973     UMin.convertFromAPInt(APInt::getMinValue(IntWidth), false,
5974                           APFloat::rmNearestTiesToEven);
5975     if (UMin > RHS) { // umin > 12312.0
5976       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
5977           Pred == ICmpInst::ICMP_UGE)
5978         return replaceInstUsesWith(I, Builder.getTrue());
5979       return replaceInstUsesWith(I, Builder.getFalse());
5980     }
5981   }
5982 
5983   // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
5984   // [0, UMAX], but it may still be fractional.  See if it is fractional by
5985   // casting the FP value to the integer value and back, checking for equality.
5986   // Don't do this for zero, because -0.0 is not fractional.
5987   Constant *RHSInt = LHSUnsigned
5988     ? ConstantExpr::getFPToUI(RHSC, IntTy)
5989     : ConstantExpr::getFPToSI(RHSC, IntTy);
5990   if (!RHS.isZero()) {
5991     bool Equal = LHSUnsigned
5992       ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
5993       : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
5994     if (!Equal) {
5995       // If we had a comparison against a fractional value, we have to adjust
5996       // the compare predicate and sometimes the value.  RHSC is rounded towards
5997       // zero at this point.
5998       switch (Pred) {
5999       default: llvm_unreachable("Unexpected integer comparison!");
6000       case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
6001         return replaceInstUsesWith(I, Builder.getTrue());
6002       case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
6003         return replaceInstUsesWith(I, Builder.getFalse());
6004       case ICmpInst::ICMP_ULE:
6005         // (float)int <= 4.4   --> int <= 4
6006         // (float)int <= -4.4  --> false
6007         if (RHS.isNegative())
6008           return replaceInstUsesWith(I, Builder.getFalse());
6009         break;
6010       case ICmpInst::ICMP_SLE:
6011         // (float)int <= 4.4   --> int <= 4
6012         // (float)int <= -4.4  --> int < -4
6013         if (RHS.isNegative())
6014           Pred = ICmpInst::ICMP_SLT;
6015         break;
6016       case ICmpInst::ICMP_ULT:
6017         // (float)int < -4.4   --> false
6018         // (float)int < 4.4    --> int <= 4
6019         if (RHS.isNegative())
6020           return replaceInstUsesWith(I, Builder.getFalse());
6021         Pred = ICmpInst::ICMP_ULE;
6022         break;
6023       case ICmpInst::ICMP_SLT:
6024         // (float)int < -4.4   --> int < -4
6025         // (float)int < 4.4    --> int <= 4
6026         if (!RHS.isNegative())
6027           Pred = ICmpInst::ICMP_SLE;
6028         break;
6029       case ICmpInst::ICMP_UGT:
6030         // (float)int > 4.4    --> int > 4
6031         // (float)int > -4.4   --> true
6032         if (RHS.isNegative())
6033           return replaceInstUsesWith(I, Builder.getTrue());
6034         break;
6035       case ICmpInst::ICMP_SGT:
6036         // (float)int > 4.4    --> int > 4
6037         // (float)int > -4.4   --> int >= -4
6038         if (RHS.isNegative())
6039           Pred = ICmpInst::ICMP_SGE;
6040         break;
6041       case ICmpInst::ICMP_UGE:
6042         // (float)int >= -4.4   --> true
6043         // (float)int >= 4.4    --> int > 4
6044         if (RHS.isNegative())
6045           return replaceInstUsesWith(I, Builder.getTrue());
6046         Pred = ICmpInst::ICMP_UGT;
6047         break;
6048       case ICmpInst::ICMP_SGE:
6049         // (float)int >= -4.4   --> int >= -4
6050         // (float)int >= 4.4    --> int > 4
6051         if (!RHS.isNegative())
6052           Pred = ICmpInst::ICMP_SGT;
6053         break;
6054       }
6055     }
6056   }
6057 
6058   // Lower this FP comparison into an appropriate integer version of the
6059   // comparison.
6060   return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
6061 }
6062 
6063 /// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary.
6064 static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
6065                                               Constant *RHSC) {
6066   // When C is not 0.0 and infinities are not allowed:
6067   // (C / X) < 0.0 is a sign-bit test of X
6068   // (C / X) < 0.0 --> X < 0.0 (if C is positive)
6069   // (C / X) < 0.0 --> X > 0.0 (if C is negative, swap the predicate)
6070   //
6071   // Proof:
6072   // Multiply (C / X) < 0.0 by X * X / C.
6073   // - X is non zero, if it is the flag 'ninf' is violated.
6074   // - C defines the sign of X * X * C. Thus it also defines whether to swap
6075   //   the predicate. C is also non zero by definition.
6076   //
6077   // Thus X * X / C is non zero and the transformation is valid. [qed]
6078 
6079   FCmpInst::Predicate Pred = I.getPredicate();
6080 
6081   // Check that predicates are valid.
6082   if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) &&
6083       (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
6084     return nullptr;
6085 
6086   // Check that RHS operand is zero.
6087   if (!match(RHSC, m_AnyZeroFP()))
6088     return nullptr;
6089 
6090   // Check fastmath flags ('ninf').
6091   if (!LHSI->hasNoInfs() || !I.hasNoInfs())
6092     return nullptr;
6093 
6094   // Check the properties of the dividend. It must not be zero to avoid a
6095   // division by zero (see Proof).
6096   const APFloat *C;
6097   if (!match(LHSI->getOperand(0), m_APFloat(C)))
6098     return nullptr;
6099 
6100   if (C->isZero())
6101     return nullptr;
6102 
6103   // Get swapped predicate if necessary.
6104   if (C->isNegative())
6105     Pred = I.getSwappedPredicate();
6106 
6107   return new FCmpInst(Pred, LHSI->getOperand(1), RHSC, "", &I);
6108 }
6109 
6110 /// Optimize fabs(X) compared with zero.
6111 static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) {
6112   Value *X;
6113   if (!match(I.getOperand(0), m_FAbs(m_Value(X))) ||
6114       !match(I.getOperand(1), m_PosZeroFP()))
6115     return nullptr;
6116 
6117   auto replacePredAndOp0 = [&IC](FCmpInst *I, FCmpInst::Predicate P, Value *X) {
6118     I->setPredicate(P);
6119     return IC.replaceOperand(*I, 0, X);
6120   };
6121 
6122   switch (I.getPredicate()) {
6123   case FCmpInst::FCMP_UGE:
6124   case FCmpInst::FCMP_OLT:
6125     // fabs(X) >= 0.0 --> true
6126     // fabs(X) <  0.0 --> false
6127     llvm_unreachable("fcmp should have simplified");
6128 
6129   case FCmpInst::FCMP_OGT:
6130     // fabs(X) > 0.0 --> X != 0.0
6131     return replacePredAndOp0(&I, FCmpInst::FCMP_ONE, X);
6132 
6133   case FCmpInst::FCMP_UGT:
6134     // fabs(X) u> 0.0 --> X u!= 0.0
6135     return replacePredAndOp0(&I, FCmpInst::FCMP_UNE, X);
6136 
6137   case FCmpInst::FCMP_OLE:
6138     // fabs(X) <= 0.0 --> X == 0.0
6139     return replacePredAndOp0(&I, FCmpInst::FCMP_OEQ, X);
6140 
6141   case FCmpInst::FCMP_ULE:
6142     // fabs(X) u<= 0.0 --> X u== 0.0
6143     return replacePredAndOp0(&I, FCmpInst::FCMP_UEQ, X);
6144 
6145   case FCmpInst::FCMP_OGE:
6146     // fabs(X) >= 0.0 --> !isnan(X)
6147     assert(!I.hasNoNaNs() && "fcmp should have simplified");
6148     return replacePredAndOp0(&I, FCmpInst::FCMP_ORD, X);
6149 
6150   case FCmpInst::FCMP_ULT:
6151     // fabs(X) u< 0.0 --> isnan(X)
6152     assert(!I.hasNoNaNs() && "fcmp should have simplified");
6153     return replacePredAndOp0(&I, FCmpInst::FCMP_UNO, X);
6154 
6155   case FCmpInst::FCMP_OEQ:
6156   case FCmpInst::FCMP_UEQ:
6157   case FCmpInst::FCMP_ONE:
6158   case FCmpInst::FCMP_UNE:
6159   case FCmpInst::FCMP_ORD:
6160   case FCmpInst::FCMP_UNO:
6161     // Look through the fabs() because it doesn't change anything but the sign.
6162     // fabs(X) == 0.0 --> X == 0.0,
6163     // fabs(X) != 0.0 --> X != 0.0
6164     // isnan(fabs(X)) --> isnan(X)
6165     // !isnan(fabs(X) --> !isnan(X)
6166     return replacePredAndOp0(&I, I.getPredicate(), X);
6167 
6168   default:
6169     return nullptr;
6170   }
6171 }
6172 
6173 Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
6174   bool Changed = false;
6175 
6176   /// Orders the operands of the compare so that they are listed from most
6177   /// complex to least complex.  This puts constants before unary operators,
6178   /// before binary operators.
6179   if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
6180     I.swapOperands();
6181     Changed = true;
6182   }
6183 
6184   const CmpInst::Predicate Pred = I.getPredicate();
6185   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
6186   if (Value *V = SimplifyFCmpInst(Pred, Op0, Op1, I.getFastMathFlags(),
6187                                   SQ.getWithInstruction(&I)))
6188     return replaceInstUsesWith(I, V);
6189 
6190   // Simplify 'fcmp pred X, X'
6191   Type *OpType = Op0->getType();
6192   assert(OpType == Op1->getType() && "fcmp with different-typed operands?");
6193   if (Op0 == Op1) {
6194     switch (Pred) {
6195       default: break;
6196     case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
6197     case FCmpInst::FCMP_ULT:    // True if unordered or less than
6198     case FCmpInst::FCMP_UGT:    // True if unordered or greater than
6199     case FCmpInst::FCMP_UNE:    // True if unordered or not equal
6200       // Canonicalize these to be 'fcmp uno %X, 0.0'.
6201       I.setPredicate(FCmpInst::FCMP_UNO);
6202       I.setOperand(1, Constant::getNullValue(OpType));
6203       return &I;
6204 
6205     case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
6206     case FCmpInst::FCMP_OEQ:    // True if ordered and equal
6207     case FCmpInst::FCMP_OGE:    // True if ordered and greater than or equal
6208     case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
6209       // Canonicalize these to be 'fcmp ord %X, 0.0'.
6210       I.setPredicate(FCmpInst::FCMP_ORD);
6211       I.setOperand(1, Constant::getNullValue(OpType));
6212       return &I;
6213     }
6214   }
6215 
6216   // If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand,
6217   // then canonicalize the operand to 0.0.
6218   if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
6219     if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, &TLI))
6220       return replaceOperand(I, 0, ConstantFP::getNullValue(OpType));
6221 
6222     if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1, &TLI))
6223       return replaceOperand(I, 1, ConstantFP::getNullValue(OpType));
6224   }
6225 
6226   // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
6227   Value *X, *Y;
6228   if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
6229     return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
6230 
6231   // Test if the FCmpInst instruction is used exclusively by a select as
6232   // part of a minimum or maximum operation. If so, refrain from doing
6233   // any other folding. This helps out other analyses which understand
6234   // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
6235   // and CodeGen. And in this case, at least one of the comparison
6236   // operands has at least one user besides the compare (the select),
6237   // which would often largely negate the benefit of folding anyway.
6238   if (I.hasOneUse())
6239     if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) {
6240       Value *A, *B;
6241       SelectPatternResult SPR = matchSelectPattern(SI, A, B);
6242       if (SPR.Flavor != SPF_UNKNOWN)
6243         return nullptr;
6244     }
6245 
6246   // The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0:
6247   // fcmp Pred X, -0.0 --> fcmp Pred X, 0.0
6248   if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP()))
6249     return replaceOperand(I, 1, ConstantFP::getNullValue(OpType));
6250 
6251   // Handle fcmp with instruction LHS and constant RHS.
6252   Instruction *LHSI;
6253   Constant *RHSC;
6254   if (match(Op0, m_Instruction(LHSI)) && match(Op1, m_Constant(RHSC))) {
6255     switch (LHSI->getOpcode()) {
6256     case Instruction::PHI:
6257       // Only fold fcmp into the PHI if the phi and fcmp are in the same
6258       // block.  If in the same block, we're encouraging jump threading.  If
6259       // not, we are just pessimizing the code by making an i1 phi.
6260       if (LHSI->getParent() == I.getParent())
6261         if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
6262           return NV;
6263       break;
6264     case Instruction::SIToFP:
6265     case Instruction::UIToFP:
6266       if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC))
6267         return NV;
6268       break;
6269     case Instruction::FDiv:
6270       if (Instruction *NV = foldFCmpReciprocalAndZero(I, LHSI, RHSC))
6271         return NV;
6272       break;
6273     case Instruction::Load:
6274       if (auto *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0)))
6275         if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
6276           if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
6277               !cast<LoadInst>(LHSI)->isVolatile())
6278             if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
6279               return Res;
6280       break;
6281   }
6282   }
6283 
6284   if (Instruction *R = foldFabsWithFcmpZero(I, *this))
6285     return R;
6286 
6287   if (match(Op0, m_FNeg(m_Value(X)))) {
6288     // fcmp pred (fneg X), C --> fcmp swap(pred) X, -C
6289     Constant *C;
6290     if (match(Op1, m_Constant(C))) {
6291       Constant *NegC = ConstantExpr::getFNeg(C);
6292       return new FCmpInst(I.getSwappedPredicate(), X, NegC, "", &I);
6293     }
6294   }
6295 
6296   if (match(Op0, m_FPExt(m_Value(X)))) {
6297     // fcmp (fpext X), (fpext Y) -> fcmp X, Y
6298     if (match(Op1, m_FPExt(m_Value(Y))) && X->getType() == Y->getType())
6299       return new FCmpInst(Pred, X, Y, "", &I);
6300 
6301     // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless
6302     const APFloat *C;
6303     if (match(Op1, m_APFloat(C))) {
6304       const fltSemantics &FPSem =
6305           X->getType()->getScalarType()->getFltSemantics();
6306       bool Lossy;
6307       APFloat TruncC = *C;
6308       TruncC.convert(FPSem, APFloat::rmNearestTiesToEven, &Lossy);
6309 
6310       // Avoid lossy conversions and denormals.
6311       // Zero is a special case that's OK to convert.
6312       APFloat Fabs = TruncC;
6313       Fabs.clearSign();
6314       if (!Lossy &&
6315           (!(Fabs < APFloat::getSmallestNormalized(FPSem)) || Fabs.isZero())) {
6316         Constant *NewC = ConstantFP::get(X->getType(), TruncC);
6317         return new FCmpInst(Pred, X, NewC, "", &I);
6318       }
6319     }
6320   }
6321 
6322   // Convert a sign-bit test of an FP value into a cast and integer compare.
6323   // TODO: Simplify if the copysign constant is 0.0 or NaN.
6324   // TODO: Handle non-zero compare constants.
6325   // TODO: Handle other predicates.
6326   const APFloat *C;
6327   if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::copysign>(m_APFloat(C),
6328                                                            m_Value(X)))) &&
6329       match(Op1, m_AnyZeroFP()) && !C->isZero() && !C->isNaN()) {
6330     Type *IntType = Builder.getIntNTy(X->getType()->getScalarSizeInBits());
6331     if (auto *VecTy = dyn_cast<VectorType>(OpType))
6332       IntType = VectorType::get(IntType, VecTy->getElementCount());
6333 
6334     // copysign(non-zero constant, X) < 0.0 --> (bitcast X) < 0
6335     if (Pred == FCmpInst::FCMP_OLT) {
6336       Value *IntX = Builder.CreateBitCast(X, IntType);
6337       return new ICmpInst(ICmpInst::ICMP_SLT, IntX,
6338                           ConstantInt::getNullValue(IntType));
6339     }
6340   }
6341 
6342   if (I.getType()->isVectorTy())
6343     if (Instruction *Res = foldVectorCmp(I, Builder))
6344       return Res;
6345 
6346   return Changed ? &I : nullptr;
6347 }
6348