1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/CodeGen/CallingConvLower.h"
16 #include "llvm/CodeGen/CodeGenCommonISel.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineJumpTableInfo.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/SelectionDAG.h"
22 #include "llvm/CodeGen/TargetRegisterInfo.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
29 #include "llvm/Support/DivisionByConstantInfo.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/KnownBits.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include <cctype>
35 using namespace llvm;
36 
37 /// NOTE: The TargetMachine owns TLOF.
38 TargetLowering::TargetLowering(const TargetMachine &tm)
39     : TargetLoweringBase(tm) {}
40 
41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
42   return nullptr;
43 }
44 
45 bool TargetLowering::isPositionIndependent() const {
46   return getTargetMachine().isPositionIndependent();
47 }
48 
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
51 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
52                                           SDValue &Chain) const {
53   const Function &F = DAG.getMachineFunction().getFunction();
54 
55   // First, check if tail calls have been disabled in this function.
56   if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
57     return false;
58 
59   // Conservatively require the attributes of the call to match those of
60   // the return. Ignore following attributes because they don't affect the
61   // call sequence.
62   AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
63   for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
64                            Attribute::DereferenceableOrNull, Attribute::NoAlias,
65                            Attribute::NonNull, Attribute::NoUndef})
66     CallerAttrs.removeAttribute(Attr);
67 
68   if (CallerAttrs.hasAttributes())
69     return false;
70 
71   // It's not safe to eliminate the sign / zero extension of the return value.
72   if (CallerAttrs.contains(Attribute::ZExt) ||
73       CallerAttrs.contains(Attribute::SExt))
74     return false;
75 
76   // Check if the only use is a function return node.
77   return isUsedByReturnOnly(Node, Chain);
78 }
79 
80 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
81     const uint32_t *CallerPreservedMask,
82     const SmallVectorImpl<CCValAssign> &ArgLocs,
83     const SmallVectorImpl<SDValue> &OutVals) const {
84   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
85     const CCValAssign &ArgLoc = ArgLocs[I];
86     if (!ArgLoc.isRegLoc())
87       continue;
88     MCRegister Reg = ArgLoc.getLocReg();
89     // Only look at callee saved registers.
90     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
91       continue;
92     // Check that we pass the value used for the caller.
93     // (We look for a CopyFromReg reading a virtual register that is used
94     //  for the function live-in value of register Reg)
95     SDValue Value = OutVals[I];
96     if (Value->getOpcode() == ISD::AssertZext)
97       Value = Value.getOperand(0);
98     if (Value->getOpcode() != ISD::CopyFromReg)
99       return false;
100     Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
101     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
102       return false;
103   }
104   return true;
105 }
106 
107 /// Set CallLoweringInfo attribute flags based on a call instruction
108 /// and called function attributes.
109 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
110                                                      unsigned ArgIdx) {
111   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
112   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
113   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
114   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
115   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
116   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
117   IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
118   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
119   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
120   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
121   IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
122   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
123   Alignment = Call->getParamStackAlign(ArgIdx);
124   IndirectType = nullptr;
125   assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
126          "multiple ABI attributes?");
127   if (IsByVal) {
128     IndirectType = Call->getParamByValType(ArgIdx);
129     if (!Alignment)
130       Alignment = Call->getParamAlign(ArgIdx);
131   }
132   if (IsPreallocated)
133     IndirectType = Call->getParamPreallocatedType(ArgIdx);
134   if (IsInAlloca)
135     IndirectType = Call->getParamInAllocaType(ArgIdx);
136   if (IsSRet)
137     IndirectType = Call->getParamStructRetType(ArgIdx);
138 }
139 
140 /// Generate a libcall taking the given operands as arguments and returning a
141 /// result of type RetVT.
142 std::pair<SDValue, SDValue>
143 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
144                             ArrayRef<SDValue> Ops,
145                             MakeLibCallOptions CallOptions,
146                             const SDLoc &dl,
147                             SDValue InChain) const {
148   if (!InChain)
149     InChain = DAG.getEntryNode();
150 
151   TargetLowering::ArgListTy Args;
152   Args.reserve(Ops.size());
153 
154   TargetLowering::ArgListEntry Entry;
155   for (unsigned i = 0; i < Ops.size(); ++i) {
156     SDValue NewOp = Ops[i];
157     Entry.Node = NewOp;
158     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
159     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
160                                                  CallOptions.IsSExt);
161     Entry.IsZExt = !Entry.IsSExt;
162 
163     if (CallOptions.IsSoften &&
164         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
165       Entry.IsSExt = Entry.IsZExt = false;
166     }
167     Args.push_back(Entry);
168   }
169 
170   if (LC == RTLIB::UNKNOWN_LIBCALL)
171     report_fatal_error("Unsupported library call operation!");
172   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
173                                          getPointerTy(DAG.getDataLayout()));
174 
175   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
176   TargetLowering::CallLoweringInfo CLI(DAG);
177   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
178   bool zeroExtend = !signExtend;
179 
180   if (CallOptions.IsSoften &&
181       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
182     signExtend = zeroExtend = false;
183   }
184 
185   CLI.setDebugLoc(dl)
186       .setChain(InChain)
187       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
188       .setNoReturn(CallOptions.DoesNotReturn)
189       .setDiscardResult(!CallOptions.IsReturnValueUsed)
190       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
191       .setSExtResult(signExtend)
192       .setZExtResult(zeroExtend);
193   return LowerCallTo(CLI);
194 }
195 
196 bool TargetLowering::findOptimalMemOpLowering(
197     std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
198     unsigned SrcAS, const AttributeList &FuncAttributes) const {
199   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
200     return false;
201 
202   EVT VT = getOptimalMemOpType(Op, FuncAttributes);
203 
204   if (VT == MVT::Other) {
205     // Use the largest integer type whose alignment constraints are satisfied.
206     // We only need to check DstAlign here as SrcAlign is always greater or
207     // equal to DstAlign (or zero).
208     VT = MVT::i64;
209     if (Op.isFixedDstAlign())
210       while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
211              !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
212         VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
213     assert(VT.isInteger());
214 
215     // Find the largest legal integer type.
216     MVT LVT = MVT::i64;
217     while (!isTypeLegal(LVT))
218       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
219     assert(LVT.isInteger());
220 
221     // If the type we've chosen is larger than the largest legal integer type
222     // then use that instead.
223     if (VT.bitsGT(LVT))
224       VT = LVT;
225   }
226 
227   unsigned NumMemOps = 0;
228   uint64_t Size = Op.size();
229   while (Size) {
230     unsigned VTSize = VT.getSizeInBits() / 8;
231     while (VTSize > Size) {
232       // For now, only use non-vector load / store's for the left-over pieces.
233       EVT NewVT = VT;
234       unsigned NewVTSize;
235 
236       bool Found = false;
237       if (VT.isVector() || VT.isFloatingPoint()) {
238         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
239         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
240             isSafeMemOpType(NewVT.getSimpleVT()))
241           Found = true;
242         else if (NewVT == MVT::i64 &&
243                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
244                  isSafeMemOpType(MVT::f64)) {
245           // i64 is usually not legal on 32-bit targets, but f64 may be.
246           NewVT = MVT::f64;
247           Found = true;
248         }
249       }
250 
251       if (!Found) {
252         do {
253           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
254           if (NewVT == MVT::i8)
255             break;
256         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
257       }
258       NewVTSize = NewVT.getSizeInBits() / 8;
259 
260       // If the new VT cannot cover all of the remaining bits, then consider
261       // issuing a (or a pair of) unaligned and overlapping load / store.
262       bool Fast;
263       if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
264           allowsMisalignedMemoryAccesses(
265               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
266               MachineMemOperand::MONone, &Fast) &&
267           Fast)
268         VTSize = Size;
269       else {
270         VT = NewVT;
271         VTSize = NewVTSize;
272       }
273     }
274 
275     if (++NumMemOps > Limit)
276       return false;
277 
278     MemOps.push_back(VT);
279     Size -= VTSize;
280   }
281 
282   return true;
283 }
284 
285 /// Soften the operands of a comparison. This code is shared among BR_CC,
286 /// SELECT_CC, and SETCC handlers.
287 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
288                                          SDValue &NewLHS, SDValue &NewRHS,
289                                          ISD::CondCode &CCCode,
290                                          const SDLoc &dl, const SDValue OldLHS,
291                                          const SDValue OldRHS) const {
292   SDValue Chain;
293   return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
294                              OldRHS, Chain);
295 }
296 
297 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
298                                          SDValue &NewLHS, SDValue &NewRHS,
299                                          ISD::CondCode &CCCode,
300                                          const SDLoc &dl, const SDValue OldLHS,
301                                          const SDValue OldRHS,
302                                          SDValue &Chain,
303                                          bool IsSignaling) const {
304   // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
305   // not supporting it. We can update this code when libgcc provides such
306   // functions.
307 
308   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
309          && "Unsupported setcc type!");
310 
311   // Expand into one or more soft-fp libcall(s).
312   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
313   bool ShouldInvertCC = false;
314   switch (CCCode) {
315   case ISD::SETEQ:
316   case ISD::SETOEQ:
317     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
318           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
319           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
320     break;
321   case ISD::SETNE:
322   case ISD::SETUNE:
323     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
324           (VT == MVT::f64) ? RTLIB::UNE_F64 :
325           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
326     break;
327   case ISD::SETGE:
328   case ISD::SETOGE:
329     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
330           (VT == MVT::f64) ? RTLIB::OGE_F64 :
331           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
332     break;
333   case ISD::SETLT:
334   case ISD::SETOLT:
335     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
336           (VT == MVT::f64) ? RTLIB::OLT_F64 :
337           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
338     break;
339   case ISD::SETLE:
340   case ISD::SETOLE:
341     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
342           (VT == MVT::f64) ? RTLIB::OLE_F64 :
343           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
344     break;
345   case ISD::SETGT:
346   case ISD::SETOGT:
347     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
348           (VT == MVT::f64) ? RTLIB::OGT_F64 :
349           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
350     break;
351   case ISD::SETO:
352     ShouldInvertCC = true;
353     LLVM_FALLTHROUGH;
354   case ISD::SETUO:
355     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
356           (VT == MVT::f64) ? RTLIB::UO_F64 :
357           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
358     break;
359   case ISD::SETONE:
360     // SETONE = O && UNE
361     ShouldInvertCC = true;
362     LLVM_FALLTHROUGH;
363   case ISD::SETUEQ:
364     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
365           (VT == MVT::f64) ? RTLIB::UO_F64 :
366           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
367     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
368           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
369           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
370     break;
371   default:
372     // Invert CC for unordered comparisons
373     ShouldInvertCC = true;
374     switch (CCCode) {
375     case ISD::SETULT:
376       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
377             (VT == MVT::f64) ? RTLIB::OGE_F64 :
378             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
379       break;
380     case ISD::SETULE:
381       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
382             (VT == MVT::f64) ? RTLIB::OGT_F64 :
383             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
384       break;
385     case ISD::SETUGT:
386       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
387             (VT == MVT::f64) ? RTLIB::OLE_F64 :
388             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
389       break;
390     case ISD::SETUGE:
391       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
392             (VT == MVT::f64) ? RTLIB::OLT_F64 :
393             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
394       break;
395     default: llvm_unreachable("Do not know how to soften this setcc!");
396     }
397   }
398 
399   // Use the target specific return value for comparions lib calls.
400   EVT RetVT = getCmpLibcallReturnType();
401   SDValue Ops[2] = {NewLHS, NewRHS};
402   TargetLowering::MakeLibCallOptions CallOptions;
403   EVT OpsVT[2] = { OldLHS.getValueType(),
404                    OldRHS.getValueType() };
405   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
406   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
407   NewLHS = Call.first;
408   NewRHS = DAG.getConstant(0, dl, RetVT);
409 
410   CCCode = getCmpLibcallCC(LC1);
411   if (ShouldInvertCC) {
412     assert(RetVT.isInteger());
413     CCCode = getSetCCInverse(CCCode, RetVT);
414   }
415 
416   if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
417     // Update Chain.
418     Chain = Call.second;
419   } else {
420     EVT SetCCVT =
421         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
422     SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
423     auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
424     CCCode = getCmpLibcallCC(LC2);
425     if (ShouldInvertCC)
426       CCCode = getSetCCInverse(CCCode, RetVT);
427     NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
428     if (Chain)
429       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
430                           Call2.second);
431     NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
432                          Tmp.getValueType(), Tmp, NewLHS);
433     NewRHS = SDValue();
434   }
435 }
436 
437 /// Return the entry encoding for a jump table in the current function. The
438 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
439 unsigned TargetLowering::getJumpTableEncoding() const {
440   // In non-pic modes, just use the address of a block.
441   if (!isPositionIndependent())
442     return MachineJumpTableInfo::EK_BlockAddress;
443 
444   // In PIC mode, if the target supports a GPRel32 directive, use it.
445   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
446     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
447 
448   // Otherwise, use a label difference.
449   return MachineJumpTableInfo::EK_LabelDifference32;
450 }
451 
452 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
453                                                  SelectionDAG &DAG) const {
454   // If our PIC model is GP relative, use the global offset table as the base.
455   unsigned JTEncoding = getJumpTableEncoding();
456 
457   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
458       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
459     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
460 
461   return Table;
462 }
463 
464 /// This returns the relocation base for the given PIC jumptable, the same as
465 /// getPICJumpTableRelocBase, but as an MCExpr.
466 const MCExpr *
467 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
468                                              unsigned JTI,MCContext &Ctx) const{
469   // The normal PIC reloc base is the label at the start of the jump table.
470   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
471 }
472 
473 bool
474 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
475   const TargetMachine &TM = getTargetMachine();
476   const GlobalValue *GV = GA->getGlobal();
477 
478   // If the address is not even local to this DSO we will have to load it from
479   // a got and then add the offset.
480   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
481     return false;
482 
483   // If the code is position independent we will have to add a base register.
484   if (isPositionIndependent())
485     return false;
486 
487   // Otherwise we can do it.
488   return true;
489 }
490 
491 //===----------------------------------------------------------------------===//
492 //  Optimization Methods
493 //===----------------------------------------------------------------------===//
494 
495 /// If the specified instruction has a constant integer operand and there are
496 /// bits set in that constant that are not demanded, then clear those bits and
497 /// return true.
498 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
499                                             const APInt &DemandedBits,
500                                             const APInt &DemandedElts,
501                                             TargetLoweringOpt &TLO) const {
502   SDLoc DL(Op);
503   unsigned Opcode = Op.getOpcode();
504 
505   // Do target-specific constant optimization.
506   if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
507     return TLO.New.getNode();
508 
509   // FIXME: ISD::SELECT, ISD::SELECT_CC
510   switch (Opcode) {
511   default:
512     break;
513   case ISD::XOR:
514   case ISD::AND:
515   case ISD::OR: {
516     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
517     if (!Op1C || Op1C->isOpaque())
518       return false;
519 
520     // If this is a 'not' op, don't touch it because that's a canonical form.
521     const APInt &C = Op1C->getAPIntValue();
522     if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
523       return false;
524 
525     if (!C.isSubsetOf(DemandedBits)) {
526       EVT VT = Op.getValueType();
527       SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
528       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
529       return TLO.CombineTo(Op, NewOp);
530     }
531 
532     break;
533   }
534   }
535 
536   return false;
537 }
538 
539 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
540                                             const APInt &DemandedBits,
541                                             TargetLoweringOpt &TLO) const {
542   EVT VT = Op.getValueType();
543   APInt DemandedElts = VT.isVector()
544                            ? APInt::getAllOnes(VT.getVectorNumElements())
545                            : APInt(1, 1);
546   return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
547 }
548 
549 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
550 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
551 /// generalized for targets with other types of implicit widening casts.
552 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
553                                       const APInt &Demanded,
554                                       TargetLoweringOpt &TLO) const {
555   assert(Op.getNumOperands() == 2 &&
556          "ShrinkDemandedOp only supports binary operators!");
557   assert(Op.getNode()->getNumValues() == 1 &&
558          "ShrinkDemandedOp only supports nodes with one result!");
559 
560   SelectionDAG &DAG = TLO.DAG;
561   SDLoc dl(Op);
562 
563   // Early return, as this function cannot handle vector types.
564   if (Op.getValueType().isVector())
565     return false;
566 
567   // Don't do this if the node has another user, which may require the
568   // full value.
569   if (!Op.getNode()->hasOneUse())
570     return false;
571 
572   // Search for the smallest integer type with free casts to and from
573   // Op's type. For expedience, just check power-of-2 integer types.
574   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
575   unsigned DemandedSize = Demanded.getActiveBits();
576   unsigned SmallVTBits = DemandedSize;
577   if (!isPowerOf2_32(SmallVTBits))
578     SmallVTBits = NextPowerOf2(SmallVTBits);
579   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
580     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
581     if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
582         TLI.isZExtFree(SmallVT, Op.getValueType())) {
583       // We found a type with free casts.
584       SDValue X = DAG.getNode(
585           Op.getOpcode(), dl, SmallVT,
586           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
587           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
588       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
589       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
590       return TLO.CombineTo(Op, Z);
591     }
592   }
593   return false;
594 }
595 
596 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
597                                           DAGCombinerInfo &DCI) const {
598   SelectionDAG &DAG = DCI.DAG;
599   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
600                         !DCI.isBeforeLegalizeOps());
601   KnownBits Known;
602 
603   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
604   if (Simplified) {
605     DCI.AddToWorklist(Op.getNode());
606     DCI.CommitTargetLoweringOpt(TLO);
607   }
608   return Simplified;
609 }
610 
611 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
612                                           const APInt &DemandedElts,
613                                           DAGCombinerInfo &DCI) const {
614   SelectionDAG &DAG = DCI.DAG;
615   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
616                         !DCI.isBeforeLegalizeOps());
617   KnownBits Known;
618 
619   bool Simplified =
620       SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
621   if (Simplified) {
622     DCI.AddToWorklist(Op.getNode());
623     DCI.CommitTargetLoweringOpt(TLO);
624   }
625   return Simplified;
626 }
627 
628 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
629                                           KnownBits &Known,
630                                           TargetLoweringOpt &TLO,
631                                           unsigned Depth,
632                                           bool AssumeSingleUse) const {
633   EVT VT = Op.getValueType();
634 
635   // TODO: We can probably do more work on calculating the known bits and
636   // simplifying the operations for scalable vectors, but for now we just
637   // bail out.
638   if (VT.isScalableVector()) {
639     // Pretend we don't know anything for now.
640     Known = KnownBits(DemandedBits.getBitWidth());
641     return false;
642   }
643 
644   APInt DemandedElts = VT.isVector()
645                            ? APInt::getAllOnes(VT.getVectorNumElements())
646                            : APInt(1, 1);
647   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
648                               AssumeSingleUse);
649 }
650 
651 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
652 // TODO: Under what circumstances can we create nodes? Constant folding?
653 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
654     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
655     SelectionDAG &DAG, unsigned Depth) const {
656   // Limit search depth.
657   if (Depth >= SelectionDAG::MaxRecursionDepth)
658     return SDValue();
659 
660   // Ignore UNDEFs.
661   if (Op.isUndef())
662     return SDValue();
663 
664   // Not demanding any bits/elts from Op.
665   if (DemandedBits == 0 || DemandedElts == 0)
666     return DAG.getUNDEF(Op.getValueType());
667 
668   bool IsLE = DAG.getDataLayout().isLittleEndian();
669   unsigned NumElts = DemandedElts.getBitWidth();
670   unsigned BitWidth = DemandedBits.getBitWidth();
671   KnownBits LHSKnown, RHSKnown;
672   switch (Op.getOpcode()) {
673   case ISD::BITCAST: {
674     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
675     EVT SrcVT = Src.getValueType();
676     EVT DstVT = Op.getValueType();
677     if (SrcVT == DstVT)
678       return Src;
679 
680     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
681     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
682     if (NumSrcEltBits == NumDstEltBits)
683       if (SDValue V = SimplifyMultipleUseDemandedBits(
684               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
685         return DAG.getBitcast(DstVT, V);
686 
687     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
688       unsigned Scale = NumDstEltBits / NumSrcEltBits;
689       unsigned NumSrcElts = SrcVT.getVectorNumElements();
690       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
691       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
692       for (unsigned i = 0; i != Scale; ++i) {
693         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
694         unsigned BitOffset = EltOffset * NumSrcEltBits;
695         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
696         if (!Sub.isZero()) {
697           DemandedSrcBits |= Sub;
698           for (unsigned j = 0; j != NumElts; ++j)
699             if (DemandedElts[j])
700               DemandedSrcElts.setBit((j * Scale) + i);
701         }
702       }
703 
704       if (SDValue V = SimplifyMultipleUseDemandedBits(
705               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
706         return DAG.getBitcast(DstVT, V);
707     }
708 
709     // TODO - bigendian once we have test coverage.
710     if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
711       unsigned Scale = NumSrcEltBits / NumDstEltBits;
712       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
713       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
714       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
715       for (unsigned i = 0; i != NumElts; ++i)
716         if (DemandedElts[i]) {
717           unsigned Offset = (i % Scale) * NumDstEltBits;
718           DemandedSrcBits.insertBits(DemandedBits, Offset);
719           DemandedSrcElts.setBit(i / Scale);
720         }
721 
722       if (SDValue V = SimplifyMultipleUseDemandedBits(
723               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
724         return DAG.getBitcast(DstVT, V);
725     }
726 
727     break;
728   }
729   case ISD::AND: {
730     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
731     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
732 
733     // If all of the demanded bits are known 1 on one side, return the other.
734     // These bits cannot contribute to the result of the 'and' in this
735     // context.
736     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
737       return Op.getOperand(0);
738     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
739       return Op.getOperand(1);
740     break;
741   }
742   case ISD::OR: {
743     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
744     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
745 
746     // If all of the demanded bits are known zero on one side, return the
747     // other.  These bits cannot contribute to the result of the 'or' in this
748     // context.
749     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
750       return Op.getOperand(0);
751     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
752       return Op.getOperand(1);
753     break;
754   }
755   case ISD::XOR: {
756     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
757     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
758 
759     // If all of the demanded bits are known zero on one side, return the
760     // other.
761     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
762       return Op.getOperand(0);
763     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
764       return Op.getOperand(1);
765     break;
766   }
767   case ISD::SHL: {
768     // If we are only demanding sign bits then we can use the shift source
769     // directly.
770     if (const APInt *MaxSA =
771             DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
772       SDValue Op0 = Op.getOperand(0);
773       unsigned ShAmt = MaxSA->getZExtValue();
774       unsigned NumSignBits =
775           DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
776       unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
777       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
778         return Op0;
779     }
780     break;
781   }
782   case ISD::SETCC: {
783     SDValue Op0 = Op.getOperand(0);
784     SDValue Op1 = Op.getOperand(1);
785     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
786     // If (1) we only need the sign-bit, (2) the setcc operands are the same
787     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
788     // -1, we may be able to bypass the setcc.
789     if (DemandedBits.isSignMask() &&
790         Op0.getScalarValueSizeInBits() == BitWidth &&
791         getBooleanContents(Op0.getValueType()) ==
792             BooleanContent::ZeroOrNegativeOneBooleanContent) {
793       // If we're testing X < 0, then this compare isn't needed - just use X!
794       // FIXME: We're limiting to integer types here, but this should also work
795       // if we don't care about FP signed-zero. The use of SETLT with FP means
796       // that we don't care about NaNs.
797       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
798           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
799         return Op0;
800     }
801     break;
802   }
803   case ISD::SIGN_EXTEND_INREG: {
804     // If none of the extended bits are demanded, eliminate the sextinreg.
805     SDValue Op0 = Op.getOperand(0);
806     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
807     unsigned ExBits = ExVT.getScalarSizeInBits();
808     if (DemandedBits.getActiveBits() <= ExBits)
809       return Op0;
810     // If the input is already sign extended, just drop the extension.
811     unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
812     if (NumSignBits >= (BitWidth - ExBits + 1))
813       return Op0;
814     break;
815   }
816   case ISD::ANY_EXTEND_VECTOR_INREG:
817   case ISD::SIGN_EXTEND_VECTOR_INREG:
818   case ISD::ZERO_EXTEND_VECTOR_INREG: {
819     // If we only want the lowest element and none of extended bits, then we can
820     // return the bitcasted source vector.
821     SDValue Src = Op.getOperand(0);
822     EVT SrcVT = Src.getValueType();
823     EVT DstVT = Op.getValueType();
824     if (IsLE && DemandedElts == 1 &&
825         DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
826         DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
827       return DAG.getBitcast(DstVT, Src);
828     }
829     break;
830   }
831   case ISD::INSERT_VECTOR_ELT: {
832     // If we don't demand the inserted element, return the base vector.
833     SDValue Vec = Op.getOperand(0);
834     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
835     EVT VecVT = Vec.getValueType();
836     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
837         !DemandedElts[CIdx->getZExtValue()])
838       return Vec;
839     break;
840   }
841   case ISD::INSERT_SUBVECTOR: {
842     SDValue Vec = Op.getOperand(0);
843     SDValue Sub = Op.getOperand(1);
844     uint64_t Idx = Op.getConstantOperandVal(2);
845     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
846     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
847     // If we don't demand the inserted subvector, return the base vector.
848     if (DemandedSubElts == 0)
849       return Vec;
850     // If this simply widens the lowest subvector, see if we can do it earlier.
851     if (Idx == 0 && Vec.isUndef()) {
852       if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
853               Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
854         return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
855                            Op.getOperand(0), NewSub, Op.getOperand(2));
856     }
857     break;
858   }
859   case ISD::VECTOR_SHUFFLE: {
860     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
861 
862     // If all the demanded elts are from one operand and are inline,
863     // then we can use the operand directly.
864     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
865     for (unsigned i = 0; i != NumElts; ++i) {
866       int M = ShuffleMask[i];
867       if (M < 0 || !DemandedElts[i])
868         continue;
869       AllUndef = false;
870       IdentityLHS &= (M == (int)i);
871       IdentityRHS &= ((M - NumElts) == i);
872     }
873 
874     if (AllUndef)
875       return DAG.getUNDEF(Op.getValueType());
876     if (IdentityLHS)
877       return Op.getOperand(0);
878     if (IdentityRHS)
879       return Op.getOperand(1);
880     break;
881   }
882   default:
883     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
884       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
885               Op, DemandedBits, DemandedElts, DAG, Depth))
886         return V;
887     break;
888   }
889   return SDValue();
890 }
891 
892 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
893     SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
894     unsigned Depth) const {
895   EVT VT = Op.getValueType();
896   APInt DemandedElts = VT.isVector()
897                            ? APInt::getAllOnes(VT.getVectorNumElements())
898                            : APInt(1, 1);
899   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
900                                          Depth);
901 }
902 
903 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
904     SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
905     unsigned Depth) const {
906   APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
907   return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
908                                          Depth);
909 }
910 
911 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
912 //      or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
913 static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
914                                  const TargetLowering &TLI,
915                                  const APInt &DemandedBits,
916                                  const APInt &DemandedElts,
917                                  unsigned Depth) {
918   assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
919          "SRL or SRA node is required here!");
920   // Is the right shift using an immediate value of 1?
921   ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
922   if (!N1C || !N1C->isOne())
923     return SDValue();
924 
925   // We are looking for an avgfloor
926   // add(ext, ext)
927   // or one of these as a avgceil
928   // add(add(ext, ext), 1)
929   // add(add(ext, 1), ext)
930   // add(ext, add(ext, 1))
931   SDValue Add = Op.getOperand(0);
932   if (Add.getOpcode() != ISD::ADD)
933     return SDValue();
934 
935   SDValue ExtOpA = Add.getOperand(0);
936   SDValue ExtOpB = Add.getOperand(1);
937   auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) {
938     ConstantSDNode *ConstOp;
939     if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) &&
940         ConstOp->isOne()) {
941       ExtOpA = Op2;
942       ExtOpB = Op3;
943       return true;
944     }
945     if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
946         ConstOp->isOne()) {
947       ExtOpA = Op1;
948       ExtOpB = Op3;
949       return true;
950     }
951     if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
952         ConstOp->isOne()) {
953       ExtOpA = Op1;
954       ExtOpB = Op2;
955       return true;
956     }
957     return false;
958   };
959   bool IsCeil =
960       (ExtOpA.getOpcode() == ISD::ADD &&
961        MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) ||
962       (ExtOpB.getOpcode() == ISD::ADD &&
963        MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA));
964 
965   // If the shift is signed (sra):
966   //  - Needs >= 2 sign bit for both operands.
967   //  - Needs >= 2 zero bits.
968   // If the shift is unsigned (srl):
969   //  - Needs >= 1 zero bit for both operands.
970   //  - Needs 1 demanded bit zero and >= 2 sign bits.
971   unsigned ShiftOpc = Op.getOpcode();
972   bool IsSigned = false;
973   unsigned KnownBits;
974   unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
975   unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
976   unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
977   unsigned NumZeroA =
978       DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
979   unsigned NumZeroB =
980       DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
981   unsigned NumZero = std::min(NumZeroA, NumZeroB);
982 
983   switch (ShiftOpc) {
984   default:
985     llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
986   case ISD::SRA: {
987     if (NumZero >= 2 && NumSigned < NumZero) {
988       IsSigned = false;
989       KnownBits = NumZero;
990       break;
991     }
992     if (NumSigned >= 1) {
993       IsSigned = true;
994       KnownBits = NumSigned;
995       break;
996     }
997     return SDValue();
998   }
999   case ISD::SRL: {
1000     if (NumZero >= 1 && NumSigned < NumZero) {
1001       IsSigned = false;
1002       KnownBits = NumZero;
1003       break;
1004     }
1005     if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1006       IsSigned = true;
1007       KnownBits = NumSigned;
1008       break;
1009     }
1010     return SDValue();
1011   }
1012   }
1013 
1014   unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1015                            : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1016 
1017   // Find the smallest power-2 type that is legal for this vector size and
1018   // operation, given the original type size and the number of known sign/zero
1019   // bits.
1020   EVT VT = Op.getValueType();
1021   unsigned MinWidth =
1022       std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1023   EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth));
1024   if (VT.isVector())
1025     NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1026   if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT))
1027     return SDValue();
1028 
1029   SDLoc DL(Op);
1030   SDValue ResultAVG =
1031       DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
1032                   DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
1033   return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
1034                      ResultAVG);
1035 }
1036 
1037 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1038 /// result of Op are ever used downstream. If we can use this information to
1039 /// simplify Op, create a new simplified DAG node and return true, returning the
1040 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1041 /// return a mask of Known bits for the expression (used to simplify the
1042 /// caller).  The Known bits may only be accurate for those bits in the
1043 /// OriginalDemandedBits and OriginalDemandedElts.
1044 bool TargetLowering::SimplifyDemandedBits(
1045     SDValue Op, const APInt &OriginalDemandedBits,
1046     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1047     unsigned Depth, bool AssumeSingleUse) const {
1048   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1049   assert(Op.getScalarValueSizeInBits() == BitWidth &&
1050          "Mask size mismatches value type size!");
1051 
1052   // Don't know anything.
1053   Known = KnownBits(BitWidth);
1054 
1055   // TODO: We can probably do more work on calculating the known bits and
1056   // simplifying the operations for scalable vectors, but for now we just
1057   // bail out.
1058   if (Op.getValueType().isScalableVector())
1059     return false;
1060 
1061   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1062   unsigned NumElts = OriginalDemandedElts.getBitWidth();
1063   assert((!Op.getValueType().isVector() ||
1064           NumElts == Op.getValueType().getVectorNumElements()) &&
1065          "Unexpected vector size");
1066 
1067   APInt DemandedBits = OriginalDemandedBits;
1068   APInt DemandedElts = OriginalDemandedElts;
1069   SDLoc dl(Op);
1070   auto &DL = TLO.DAG.getDataLayout();
1071 
1072   // Undef operand.
1073   if (Op.isUndef())
1074     return false;
1075 
1076   if (Op.getOpcode() == ISD::Constant) {
1077     // We know all of the bits for a constant!
1078     Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
1079     return false;
1080   }
1081 
1082   if (Op.getOpcode() == ISD::ConstantFP) {
1083     // We know all of the bits for a floating point constant!
1084     Known = KnownBits::makeConstant(
1085         cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1086     return false;
1087   }
1088 
1089   // Other users may use these bits.
1090   EVT VT = Op.getValueType();
1091   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
1092     if (Depth != 0) {
1093       // If not at the root, Just compute the Known bits to
1094       // simplify things downstream.
1095       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1096       return false;
1097     }
1098     // If this is the root being simplified, allow it to have multiple uses,
1099     // just set the DemandedBits/Elts to all bits.
1100     DemandedBits = APInt::getAllOnes(BitWidth);
1101     DemandedElts = APInt::getAllOnes(NumElts);
1102   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1103     // Not demanding any bits/elts from Op.
1104     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1105   } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1106     // Limit search depth.
1107     return false;
1108   }
1109 
1110   KnownBits Known2;
1111   switch (Op.getOpcode()) {
1112   case ISD::TargetConstant:
1113     llvm_unreachable("Can't simplify this node");
1114   case ISD::SCALAR_TO_VECTOR: {
1115     if (!DemandedElts[0])
1116       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1117 
1118     KnownBits SrcKnown;
1119     SDValue Src = Op.getOperand(0);
1120     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1121     APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
1122     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1123       return true;
1124 
1125     // Upper elements are undef, so only get the knownbits if we just demand
1126     // the bottom element.
1127     if (DemandedElts == 1)
1128       Known = SrcKnown.anyextOrTrunc(BitWidth);
1129     break;
1130   }
1131   case ISD::BUILD_VECTOR:
1132     // Collect the known bits that are shared by every demanded element.
1133     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1134     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1135     return false; // Don't fall through, will infinitely loop.
1136   case ISD::LOAD: {
1137     auto *LD = cast<LoadSDNode>(Op);
1138     if (getTargetConstantFromLoad(LD)) {
1139       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1140       return false; // Don't fall through, will infinitely loop.
1141     }
1142     if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1143       // If this is a ZEXTLoad and we are looking at the loaded value.
1144       EVT MemVT = LD->getMemoryVT();
1145       unsigned MemBits = MemVT.getScalarSizeInBits();
1146       Known.Zero.setBitsFrom(MemBits);
1147       return false; // Don't fall through, will infinitely loop.
1148     }
1149     break;
1150   }
1151   case ISD::INSERT_VECTOR_ELT: {
1152     SDValue Vec = Op.getOperand(0);
1153     SDValue Scl = Op.getOperand(1);
1154     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1155     EVT VecVT = Vec.getValueType();
1156 
1157     // If index isn't constant, assume we need all vector elements AND the
1158     // inserted element.
1159     APInt DemandedVecElts(DemandedElts);
1160     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1161       unsigned Idx = CIdx->getZExtValue();
1162       DemandedVecElts.clearBit(Idx);
1163 
1164       // Inserted element is not required.
1165       if (!DemandedElts[Idx])
1166         return TLO.CombineTo(Op, Vec);
1167     }
1168 
1169     KnownBits KnownScl;
1170     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1171     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1172     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1173       return true;
1174 
1175     Known = KnownScl.anyextOrTrunc(BitWidth);
1176 
1177     KnownBits KnownVec;
1178     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1179                              Depth + 1))
1180       return true;
1181 
1182     if (!!DemandedVecElts)
1183       Known = KnownBits::commonBits(Known, KnownVec);
1184 
1185     return false;
1186   }
1187   case ISD::INSERT_SUBVECTOR: {
1188     // Demand any elements from the subvector and the remainder from the src its
1189     // inserted into.
1190     SDValue Src = Op.getOperand(0);
1191     SDValue Sub = Op.getOperand(1);
1192     uint64_t Idx = Op.getConstantOperandVal(2);
1193     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1194     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1195     APInt DemandedSrcElts = DemandedElts;
1196     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1197 
1198     KnownBits KnownSub, KnownSrc;
1199     if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1200                              Depth + 1))
1201       return true;
1202     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1203                              Depth + 1))
1204       return true;
1205 
1206     Known.Zero.setAllBits();
1207     Known.One.setAllBits();
1208     if (!!DemandedSubElts)
1209       Known = KnownBits::commonBits(Known, KnownSub);
1210     if (!!DemandedSrcElts)
1211       Known = KnownBits::commonBits(Known, KnownSrc);
1212 
1213     // Attempt to avoid multi-use src if we don't need anything from it.
1214     if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1215         !DemandedSrcElts.isAllOnes()) {
1216       SDValue NewSub = SimplifyMultipleUseDemandedBits(
1217           Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1218       SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1219           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1220       if (NewSub || NewSrc) {
1221         NewSub = NewSub ? NewSub : Sub;
1222         NewSrc = NewSrc ? NewSrc : Src;
1223         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1224                                         Op.getOperand(2));
1225         return TLO.CombineTo(Op, NewOp);
1226       }
1227     }
1228     break;
1229   }
1230   case ISD::EXTRACT_SUBVECTOR: {
1231     // Offset the demanded elts by the subvector index.
1232     SDValue Src = Op.getOperand(0);
1233     if (Src.getValueType().isScalableVector())
1234       break;
1235     uint64_t Idx = Op.getConstantOperandVal(1);
1236     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1237     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
1238 
1239     if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1240                              Depth + 1))
1241       return true;
1242 
1243     // Attempt to avoid multi-use src if we don't need anything from it.
1244     if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1245       SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1246           Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1247       if (DemandedSrc) {
1248         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1249                                         Op.getOperand(1));
1250         return TLO.CombineTo(Op, NewOp);
1251       }
1252     }
1253     break;
1254   }
1255   case ISD::CONCAT_VECTORS: {
1256     Known.Zero.setAllBits();
1257     Known.One.setAllBits();
1258     EVT SubVT = Op.getOperand(0).getValueType();
1259     unsigned NumSubVecs = Op.getNumOperands();
1260     unsigned NumSubElts = SubVT.getVectorNumElements();
1261     for (unsigned i = 0; i != NumSubVecs; ++i) {
1262       APInt DemandedSubElts =
1263           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1264       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1265                                Known2, TLO, Depth + 1))
1266         return true;
1267       // Known bits are shared by every demanded subvector element.
1268       if (!!DemandedSubElts)
1269         Known = KnownBits::commonBits(Known, Known2);
1270     }
1271     break;
1272   }
1273   case ISD::VECTOR_SHUFFLE: {
1274     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1275 
1276     // Collect demanded elements from shuffle operands..
1277     APInt DemandedLHS(NumElts, 0);
1278     APInt DemandedRHS(NumElts, 0);
1279     for (unsigned i = 0; i != NumElts; ++i) {
1280       if (!DemandedElts[i])
1281         continue;
1282       int M = ShuffleMask[i];
1283       if (M < 0) {
1284         // For UNDEF elements, we don't know anything about the common state of
1285         // the shuffle result.
1286         DemandedLHS.clearAllBits();
1287         DemandedRHS.clearAllBits();
1288         break;
1289       }
1290       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1291       if (M < (int)NumElts)
1292         DemandedLHS.setBit(M);
1293       else
1294         DemandedRHS.setBit(M - NumElts);
1295     }
1296 
1297     if (!!DemandedLHS || !!DemandedRHS) {
1298       SDValue Op0 = Op.getOperand(0);
1299       SDValue Op1 = Op.getOperand(1);
1300 
1301       Known.Zero.setAllBits();
1302       Known.One.setAllBits();
1303       if (!!DemandedLHS) {
1304         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1305                                  Depth + 1))
1306           return true;
1307         Known = KnownBits::commonBits(Known, Known2);
1308       }
1309       if (!!DemandedRHS) {
1310         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1311                                  Depth + 1))
1312           return true;
1313         Known = KnownBits::commonBits(Known, Known2);
1314       }
1315 
1316       // Attempt to avoid multi-use ops if we don't need anything from them.
1317       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1318           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1319       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1320           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1321       if (DemandedOp0 || DemandedOp1) {
1322         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1323         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1324         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1325         return TLO.CombineTo(Op, NewOp);
1326       }
1327     }
1328     break;
1329   }
1330   case ISD::AND: {
1331     SDValue Op0 = Op.getOperand(0);
1332     SDValue Op1 = Op.getOperand(1);
1333 
1334     // If the RHS is a constant, check to see if the LHS would be zero without
1335     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1336     // simplify the LHS, here we're using information from the LHS to simplify
1337     // the RHS.
1338     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1339       // Do not increment Depth here; that can cause an infinite loop.
1340       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1341       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1342       if ((LHSKnown.Zero & DemandedBits) ==
1343           (~RHSC->getAPIntValue() & DemandedBits))
1344         return TLO.CombineTo(Op, Op0);
1345 
1346       // If any of the set bits in the RHS are known zero on the LHS, shrink
1347       // the constant.
1348       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1349                                  DemandedElts, TLO))
1350         return true;
1351 
1352       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1353       // constant, but if this 'and' is only clearing bits that were just set by
1354       // the xor, then this 'and' can be eliminated by shrinking the mask of
1355       // the xor. For example, for a 32-bit X:
1356       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1357       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1358           LHSKnown.One == ~RHSC->getAPIntValue()) {
1359         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1360         return TLO.CombineTo(Op, Xor);
1361       }
1362     }
1363 
1364     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1365                              Depth + 1))
1366       return true;
1367     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1368     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1369                              Known2, TLO, Depth + 1))
1370       return true;
1371     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1372 
1373     // Attempt to avoid multi-use ops if we don't need anything from them.
1374     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1375       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1376           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1377       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1378           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1379       if (DemandedOp0 || DemandedOp1) {
1380         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1381         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1382         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1383         return TLO.CombineTo(Op, NewOp);
1384       }
1385     }
1386 
1387     // If all of the demanded bits are known one on one side, return the other.
1388     // These bits cannot contribute to the result of the 'and'.
1389     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1390       return TLO.CombineTo(Op, Op0);
1391     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1392       return TLO.CombineTo(Op, Op1);
1393     // If all of the demanded bits in the inputs are known zeros, return zero.
1394     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1395       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1396     // If the RHS is a constant, see if we can simplify it.
1397     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1398                                TLO))
1399       return true;
1400     // If the operation can be done in a smaller type, do so.
1401     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1402       return true;
1403 
1404     Known &= Known2;
1405     break;
1406   }
1407   case ISD::OR: {
1408     SDValue Op0 = Op.getOperand(0);
1409     SDValue Op1 = Op.getOperand(1);
1410 
1411     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1412                              Depth + 1))
1413       return true;
1414     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1415     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1416                              Known2, TLO, Depth + 1))
1417       return true;
1418     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1419 
1420     // Attempt to avoid multi-use ops if we don't need anything from them.
1421     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1422       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1423           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1424       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1425           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1426       if (DemandedOp0 || DemandedOp1) {
1427         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1428         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1429         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1430         return TLO.CombineTo(Op, NewOp);
1431       }
1432     }
1433 
1434     // If all of the demanded bits are known zero on one side, return the other.
1435     // These bits cannot contribute to the result of the 'or'.
1436     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1437       return TLO.CombineTo(Op, Op0);
1438     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1439       return TLO.CombineTo(Op, Op1);
1440     // If the RHS is a constant, see if we can simplify it.
1441     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1442       return true;
1443     // If the operation can be done in a smaller type, do so.
1444     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1445       return true;
1446 
1447     Known |= Known2;
1448     break;
1449   }
1450   case ISD::XOR: {
1451     SDValue Op0 = Op.getOperand(0);
1452     SDValue Op1 = Op.getOperand(1);
1453 
1454     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1455                              Depth + 1))
1456       return true;
1457     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1458     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1459                              Depth + 1))
1460       return true;
1461     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1462 
1463     // Attempt to avoid multi-use ops if we don't need anything from them.
1464     if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1465       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1466           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1467       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1468           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1469       if (DemandedOp0 || DemandedOp1) {
1470         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1471         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1472         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1473         return TLO.CombineTo(Op, NewOp);
1474       }
1475     }
1476 
1477     // If all of the demanded bits are known zero on one side, return the other.
1478     // These bits cannot contribute to the result of the 'xor'.
1479     if (DemandedBits.isSubsetOf(Known.Zero))
1480       return TLO.CombineTo(Op, Op0);
1481     if (DemandedBits.isSubsetOf(Known2.Zero))
1482       return TLO.CombineTo(Op, Op1);
1483     // If the operation can be done in a smaller type, do so.
1484     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1485       return true;
1486 
1487     // If all of the unknown bits are known to be zero on one side or the other
1488     // turn this into an *inclusive* or.
1489     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1490     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1491       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1492 
1493     ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
1494     if (C) {
1495       // If one side is a constant, and all of the set bits in the constant are
1496       // also known set on the other side, turn this into an AND, as we know
1497       // the bits will be cleared.
1498       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1499       // NB: it is okay if more bits are known than are requested
1500       if (C->getAPIntValue() == Known2.One) {
1501         SDValue ANDC =
1502             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1503         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1504       }
1505 
1506       // If the RHS is a constant, see if we can change it. Don't alter a -1
1507       // constant because that's a 'not' op, and that is better for combining
1508       // and codegen.
1509       if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1510         // We're flipping all demanded bits. Flip the undemanded bits too.
1511         SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1512         return TLO.CombineTo(Op, New);
1513       }
1514     }
1515 
1516     // If we can't turn this into a 'not', try to shrink the constant.
1517     if (!C || !C->isAllOnes())
1518       if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1519         return true;
1520 
1521     Known ^= Known2;
1522     break;
1523   }
1524   case ISD::SELECT:
1525     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1526                              Depth + 1))
1527       return true;
1528     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1529                              Depth + 1))
1530       return true;
1531     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1532     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1533 
1534     // If the operands are constants, see if we can simplify them.
1535     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1536       return true;
1537 
1538     // Only known if known in both the LHS and RHS.
1539     Known = KnownBits::commonBits(Known, Known2);
1540     break;
1541   case ISD::SELECT_CC:
1542     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1543                              Depth + 1))
1544       return true;
1545     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1546                              Depth + 1))
1547       return true;
1548     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1549     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1550 
1551     // If the operands are constants, see if we can simplify them.
1552     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1553       return true;
1554 
1555     // Only known if known in both the LHS and RHS.
1556     Known = KnownBits::commonBits(Known, Known2);
1557     break;
1558   case ISD::SETCC: {
1559     SDValue Op0 = Op.getOperand(0);
1560     SDValue Op1 = Op.getOperand(1);
1561     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1562     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1563     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1564     // -1, we may be able to bypass the setcc.
1565     if (DemandedBits.isSignMask() &&
1566         Op0.getScalarValueSizeInBits() == BitWidth &&
1567         getBooleanContents(Op0.getValueType()) ==
1568             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1569       // If we're testing X < 0, then this compare isn't needed - just use X!
1570       // FIXME: We're limiting to integer types here, but this should also work
1571       // if we don't care about FP signed-zero. The use of SETLT with FP means
1572       // that we don't care about NaNs.
1573       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1574           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1575         return TLO.CombineTo(Op, Op0);
1576 
1577       // TODO: Should we check for other forms of sign-bit comparisons?
1578       // Examples: X <= -1, X >= 0
1579     }
1580     if (getBooleanContents(Op0.getValueType()) ==
1581             TargetLowering::ZeroOrOneBooleanContent &&
1582         BitWidth > 1)
1583       Known.Zero.setBitsFrom(1);
1584     break;
1585   }
1586   case ISD::SHL: {
1587     SDValue Op0 = Op.getOperand(0);
1588     SDValue Op1 = Op.getOperand(1);
1589     EVT ShiftVT = Op1.getValueType();
1590 
1591     if (const APInt *SA =
1592             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1593       unsigned ShAmt = SA->getZExtValue();
1594       if (ShAmt == 0)
1595         return TLO.CombineTo(Op, Op0);
1596 
1597       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1598       // single shift.  We can do this if the bottom bits (which are shifted
1599       // out) are never demanded.
1600       // TODO - support non-uniform vector amounts.
1601       if (Op0.getOpcode() == ISD::SRL) {
1602         if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1603           if (const APInt *SA2 =
1604                   TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1605             unsigned C1 = SA2->getZExtValue();
1606             unsigned Opc = ISD::SHL;
1607             int Diff = ShAmt - C1;
1608             if (Diff < 0) {
1609               Diff = -Diff;
1610               Opc = ISD::SRL;
1611             }
1612             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1613             return TLO.CombineTo(
1614                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1615           }
1616         }
1617       }
1618 
1619       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1620       // are not demanded. This will likely allow the anyext to be folded away.
1621       // TODO - support non-uniform vector amounts.
1622       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1623         SDValue InnerOp = Op0.getOperand(0);
1624         EVT InnerVT = InnerOp.getValueType();
1625         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1626         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1627             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1628           EVT ShTy = getShiftAmountTy(InnerVT, DL);
1629           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1630             ShTy = InnerVT;
1631           SDValue NarrowShl =
1632               TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1633                               TLO.DAG.getConstant(ShAmt, dl, ShTy));
1634           return TLO.CombineTo(
1635               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1636         }
1637 
1638         // Repeat the SHL optimization above in cases where an extension
1639         // intervenes: (shl (anyext (shr x, c1)), c2) to
1640         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1641         // aren't demanded (as above) and that the shifted upper c1 bits of
1642         // x aren't demanded.
1643         // TODO - support non-uniform vector amounts.
1644         if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1645             InnerOp.hasOneUse()) {
1646           if (const APInt *SA2 =
1647                   TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1648             unsigned InnerShAmt = SA2->getZExtValue();
1649             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1650                 DemandedBits.getActiveBits() <=
1651                     (InnerBits - InnerShAmt + ShAmt) &&
1652                 DemandedBits.countTrailingZeros() >= ShAmt) {
1653               SDValue NewSA =
1654                   TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1655               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1656                                                InnerOp.getOperand(0));
1657               return TLO.CombineTo(
1658                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1659             }
1660           }
1661         }
1662       }
1663 
1664       APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1665       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1666                                Depth + 1))
1667         return true;
1668       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1669       Known.Zero <<= ShAmt;
1670       Known.One <<= ShAmt;
1671       // low bits known zero.
1672       Known.Zero.setLowBits(ShAmt);
1673 
1674       // Try shrinking the operation as long as the shift amount will still be
1675       // in range.
1676       if ((ShAmt < DemandedBits.getActiveBits()) &&
1677           ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1678         return true;
1679     }
1680 
1681     // If we are only demanding sign bits then we can use the shift source
1682     // directly.
1683     if (const APInt *MaxSA =
1684             TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1685       unsigned ShAmt = MaxSA->getZExtValue();
1686       unsigned NumSignBits =
1687           TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1688       unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1689       if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1690         return TLO.CombineTo(Op, Op0);
1691     }
1692     break;
1693   }
1694   case ISD::SRL: {
1695     SDValue Op0 = Op.getOperand(0);
1696     SDValue Op1 = Op.getOperand(1);
1697     EVT ShiftVT = Op1.getValueType();
1698 
1699     // Try to match AVG patterns.
1700     if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1701                                         DemandedElts, Depth + 1))
1702       return TLO.CombineTo(Op, AVG);
1703 
1704     if (const APInt *SA =
1705             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1706       unsigned ShAmt = SA->getZExtValue();
1707       if (ShAmt == 0)
1708         return TLO.CombineTo(Op, Op0);
1709 
1710       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1711       // single shift.  We can do this if the top bits (which are shifted out)
1712       // are never demanded.
1713       // TODO - support non-uniform vector amounts.
1714       if (Op0.getOpcode() == ISD::SHL) {
1715         if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1716           if (const APInt *SA2 =
1717                   TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1718             unsigned C1 = SA2->getZExtValue();
1719             unsigned Opc = ISD::SRL;
1720             int Diff = ShAmt - C1;
1721             if (Diff < 0) {
1722               Diff = -Diff;
1723               Opc = ISD::SHL;
1724             }
1725             SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1726             return TLO.CombineTo(
1727                 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1728           }
1729         }
1730       }
1731 
1732       APInt InDemandedMask = (DemandedBits << ShAmt);
1733 
1734       // If the shift is exact, then it does demand the low bits (and knows that
1735       // they are zero).
1736       if (Op->getFlags().hasExact())
1737         InDemandedMask.setLowBits(ShAmt);
1738 
1739       // Compute the new bits that are at the top now.
1740       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1741                                Depth + 1))
1742         return true;
1743       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1744       Known.Zero.lshrInPlace(ShAmt);
1745       Known.One.lshrInPlace(ShAmt);
1746       // High bits known zero.
1747       Known.Zero.setHighBits(ShAmt);
1748     }
1749     break;
1750   }
1751   case ISD::SRA: {
1752     SDValue Op0 = Op.getOperand(0);
1753     SDValue Op1 = Op.getOperand(1);
1754     EVT ShiftVT = Op1.getValueType();
1755 
1756     // If we only want bits that already match the signbit then we don't need
1757     // to shift.
1758     unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1759     if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1760         NumHiDemandedBits)
1761       return TLO.CombineTo(Op, Op0);
1762 
1763     // If this is an arithmetic shift right and only the low-bit is set, we can
1764     // always convert this into a logical shr, even if the shift amount is
1765     // variable.  The low bit of the shift cannot be an input sign bit unless
1766     // the shift amount is >= the size of the datatype, which is undefined.
1767     if (DemandedBits.isOne())
1768       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1769 
1770     // Try to match AVG patterns.
1771     if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1772                                         DemandedElts, Depth + 1))
1773       return TLO.CombineTo(Op, AVG);
1774 
1775     if (const APInt *SA =
1776             TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1777       unsigned ShAmt = SA->getZExtValue();
1778       if (ShAmt == 0)
1779         return TLO.CombineTo(Op, Op0);
1780 
1781       APInt InDemandedMask = (DemandedBits << ShAmt);
1782 
1783       // If the shift is exact, then it does demand the low bits (and knows that
1784       // they are zero).
1785       if (Op->getFlags().hasExact())
1786         InDemandedMask.setLowBits(ShAmt);
1787 
1788       // If any of the demanded bits are produced by the sign extension, we also
1789       // demand the input sign bit.
1790       if (DemandedBits.countLeadingZeros() < ShAmt)
1791         InDemandedMask.setSignBit();
1792 
1793       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1794                                Depth + 1))
1795         return true;
1796       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1797       Known.Zero.lshrInPlace(ShAmt);
1798       Known.One.lshrInPlace(ShAmt);
1799 
1800       // If the input sign bit is known to be zero, or if none of the top bits
1801       // are demanded, turn this into an unsigned shift right.
1802       if (Known.Zero[BitWidth - ShAmt - 1] ||
1803           DemandedBits.countLeadingZeros() >= ShAmt) {
1804         SDNodeFlags Flags;
1805         Flags.setExact(Op->getFlags().hasExact());
1806         return TLO.CombineTo(
1807             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1808       }
1809 
1810       int Log2 = DemandedBits.exactLogBase2();
1811       if (Log2 >= 0) {
1812         // The bit must come from the sign.
1813         SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1814         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1815       }
1816 
1817       if (Known.One[BitWidth - ShAmt - 1])
1818         // New bits are known one.
1819         Known.One.setHighBits(ShAmt);
1820 
1821       // Attempt to avoid multi-use ops if we don't need anything from them.
1822       if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1823         SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1824             Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1825         if (DemandedOp0) {
1826           SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1827           return TLO.CombineTo(Op, NewOp);
1828         }
1829       }
1830     }
1831     break;
1832   }
1833   case ISD::FSHL:
1834   case ISD::FSHR: {
1835     SDValue Op0 = Op.getOperand(0);
1836     SDValue Op1 = Op.getOperand(1);
1837     SDValue Op2 = Op.getOperand(2);
1838     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1839 
1840     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1841       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1842 
1843       // For fshl, 0-shift returns the 1st arg.
1844       // For fshr, 0-shift returns the 2nd arg.
1845       if (Amt == 0) {
1846         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1847                                  Known, TLO, Depth + 1))
1848           return true;
1849         break;
1850       }
1851 
1852       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1853       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1854       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1855       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1856       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1857                                Depth + 1))
1858         return true;
1859       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1860                                Depth + 1))
1861         return true;
1862 
1863       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1864       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1865       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1866       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1867       Known.One |= Known2.One;
1868       Known.Zero |= Known2.Zero;
1869     }
1870 
1871     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1872     if (isPowerOf2_32(BitWidth)) {
1873       APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
1874       if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
1875                                Known2, TLO, Depth + 1))
1876         return true;
1877     }
1878     break;
1879   }
1880   case ISD::ROTL:
1881   case ISD::ROTR: {
1882     SDValue Op0 = Op.getOperand(0);
1883     SDValue Op1 = Op.getOperand(1);
1884     bool IsROTL = (Op.getOpcode() == ISD::ROTL);
1885 
1886     // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
1887     if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
1888       return TLO.CombineTo(Op, Op0);
1889 
1890     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1891       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1892       unsigned RevAmt = BitWidth - Amt;
1893 
1894       // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
1895       // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
1896       APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
1897       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1898                                Depth + 1))
1899         return true;
1900 
1901       // rot*(x, 0) --> x
1902       if (Amt == 0)
1903         return TLO.CombineTo(Op, Op0);
1904 
1905       // See if we don't demand either half of the rotated bits.
1906       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
1907           DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
1908         Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
1909         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
1910       }
1911       if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
1912           DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
1913         Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
1914         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1915       }
1916     }
1917 
1918     // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1919     if (isPowerOf2_32(BitWidth)) {
1920       APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
1921       if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
1922                                Depth + 1))
1923         return true;
1924     }
1925     break;
1926   }
1927   case ISD::UMIN: {
1928     // Check if one arg is always less than (or equal) to the other arg.
1929     SDValue Op0 = Op.getOperand(0);
1930     SDValue Op1 = Op.getOperand(1);
1931     KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
1932     KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1933     Known = KnownBits::umin(Known0, Known1);
1934     if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
1935       return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
1936     if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
1937       return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
1938     break;
1939   }
1940   case ISD::UMAX: {
1941     // Check if one arg is always greater than (or equal) to the other arg.
1942     SDValue Op0 = Op.getOperand(0);
1943     SDValue Op1 = Op.getOperand(1);
1944     KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
1945     KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1946     Known = KnownBits::umax(Known0, Known1);
1947     if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
1948       return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
1949     if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
1950       return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
1951     break;
1952   }
1953   case ISD::BITREVERSE: {
1954     SDValue Src = Op.getOperand(0);
1955     APInt DemandedSrcBits = DemandedBits.reverseBits();
1956     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1957                              Depth + 1))
1958       return true;
1959     Known.One = Known2.One.reverseBits();
1960     Known.Zero = Known2.Zero.reverseBits();
1961     break;
1962   }
1963   case ISD::BSWAP: {
1964     SDValue Src = Op.getOperand(0);
1965 
1966     // If the only bits demanded come from one byte of the bswap result,
1967     // just shift the input byte into position to eliminate the bswap.
1968     unsigned NLZ = DemandedBits.countLeadingZeros();
1969     unsigned NTZ = DemandedBits.countTrailingZeros();
1970 
1971     // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
1972     // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
1973     // have 14 leading zeros, round to 8.
1974     NLZ = alignDown(NLZ, 8);
1975     NTZ = alignDown(NTZ, 8);
1976     // If we need exactly one byte, we can do this transformation.
1977     if (BitWidth - NLZ - NTZ == 8) {
1978       // Replace this with either a left or right shift to get the byte into
1979       // the right place.
1980       unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
1981       if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
1982         EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
1983         unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
1984         SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
1985         SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
1986         return TLO.CombineTo(Op, NewOp);
1987       }
1988     }
1989 
1990     APInt DemandedSrcBits = DemandedBits.byteSwap();
1991     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1992                              Depth + 1))
1993       return true;
1994     Known.One = Known2.One.byteSwap();
1995     Known.Zero = Known2.Zero.byteSwap();
1996     break;
1997   }
1998   case ISD::CTPOP: {
1999     // If only 1 bit is demanded, replace with PARITY as long as we're before
2000     // op legalization.
2001     // FIXME: Limit to scalars for now.
2002     if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2003       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2004                                                Op.getOperand(0)));
2005 
2006     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2007     break;
2008   }
2009   case ISD::SIGN_EXTEND_INREG: {
2010     SDValue Op0 = Op.getOperand(0);
2011     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2012     unsigned ExVTBits = ExVT.getScalarSizeInBits();
2013 
2014     // If we only care about the highest bit, don't bother shifting right.
2015     if (DemandedBits.isSignMask()) {
2016       unsigned MinSignedBits =
2017           TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2018       bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2019       // However if the input is already sign extended we expect the sign
2020       // extension to be dropped altogether later and do not simplify.
2021       if (!AlreadySignExtended) {
2022         // Compute the correct shift amount type, which must be getShiftAmountTy
2023         // for scalar types after legalization.
2024         SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
2025                                                getShiftAmountTy(VT, DL));
2026         return TLO.CombineTo(Op,
2027                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2028       }
2029     }
2030 
2031     // If none of the extended bits are demanded, eliminate the sextinreg.
2032     if (DemandedBits.getActiveBits() <= ExVTBits)
2033       return TLO.CombineTo(Op, Op0);
2034 
2035     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2036 
2037     // Since the sign extended bits are demanded, we know that the sign
2038     // bit is demanded.
2039     InputDemandedBits.setBit(ExVTBits - 1);
2040 
2041     if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
2042       return true;
2043     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2044 
2045     // If the sign bit of the input is known set or clear, then we know the
2046     // top bits of the result.
2047 
2048     // If the input sign bit is known zero, convert this into a zero extension.
2049     if (Known.Zero[ExVTBits - 1])
2050       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2051 
2052     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2053     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2054       Known.One.setBitsFrom(ExVTBits);
2055       Known.Zero &= Mask;
2056     } else { // Input sign bit unknown
2057       Known.Zero &= Mask;
2058       Known.One &= Mask;
2059     }
2060     break;
2061   }
2062   case ISD::BUILD_PAIR: {
2063     EVT HalfVT = Op.getOperand(0).getValueType();
2064     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2065 
2066     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2067     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2068 
2069     KnownBits KnownLo, KnownHi;
2070 
2071     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2072       return true;
2073 
2074     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2075       return true;
2076 
2077     Known.Zero = KnownLo.Zero.zext(BitWidth) |
2078                  KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
2079 
2080     Known.One = KnownLo.One.zext(BitWidth) |
2081                 KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
2082     break;
2083   }
2084   case ISD::ZERO_EXTEND:
2085   case ISD::ZERO_EXTEND_VECTOR_INREG: {
2086     SDValue Src = Op.getOperand(0);
2087     EVT SrcVT = Src.getValueType();
2088     unsigned InBits = SrcVT.getScalarSizeInBits();
2089     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2090     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2091 
2092     // If none of the top bits are demanded, convert this into an any_extend.
2093     if (DemandedBits.getActiveBits() <= InBits) {
2094       // If we only need the non-extended bits of the bottom element
2095       // then we can just bitcast to the result.
2096       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2097           VT.getSizeInBits() == SrcVT.getSizeInBits())
2098         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2099 
2100       unsigned Opc =
2101           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2102       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2103         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2104     }
2105 
2106     APInt InDemandedBits = DemandedBits.trunc(InBits);
2107     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
2108     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2109                              Depth + 1))
2110       return true;
2111     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2112     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2113     Known = Known.zext(BitWidth);
2114 
2115     // Attempt to avoid multi-use ops if we don't need anything from them.
2116     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2117             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2118       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2119     break;
2120   }
2121   case ISD::SIGN_EXTEND:
2122   case ISD::SIGN_EXTEND_VECTOR_INREG: {
2123     SDValue Src = Op.getOperand(0);
2124     EVT SrcVT = Src.getValueType();
2125     unsigned InBits = SrcVT.getScalarSizeInBits();
2126     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2127     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2128 
2129     // If none of the top bits are demanded, convert this into an any_extend.
2130     if (DemandedBits.getActiveBits() <= InBits) {
2131       // If we only need the non-extended bits of the bottom element
2132       // then we can just bitcast to the result.
2133       if (IsLE && IsVecInReg && DemandedElts == 1 &&
2134           VT.getSizeInBits() == SrcVT.getSizeInBits())
2135         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2136 
2137       unsigned Opc =
2138           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2139       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2140         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2141     }
2142 
2143     APInt InDemandedBits = DemandedBits.trunc(InBits);
2144     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
2145 
2146     // Since some of the sign extended bits are demanded, we know that the sign
2147     // bit is demanded.
2148     InDemandedBits.setBit(InBits - 1);
2149 
2150     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2151                              Depth + 1))
2152       return true;
2153     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2154     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2155 
2156     // If the sign bit is known one, the top bits match.
2157     Known = Known.sext(BitWidth);
2158 
2159     // If the sign bit is known zero, convert this to a zero extend.
2160     if (Known.isNonNegative()) {
2161       unsigned Opc =
2162           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2163       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2164         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2165     }
2166 
2167     // Attempt to avoid multi-use ops if we don't need anything from them.
2168     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2169             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2170       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2171     break;
2172   }
2173   case ISD::ANY_EXTEND:
2174   case ISD::ANY_EXTEND_VECTOR_INREG: {
2175     SDValue Src = Op.getOperand(0);
2176     EVT SrcVT = Src.getValueType();
2177     unsigned InBits = SrcVT.getScalarSizeInBits();
2178     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2179     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2180 
2181     // If we only need the bottom element then we can just bitcast.
2182     // TODO: Handle ANY_EXTEND?
2183     if (IsLE && IsVecInReg && DemandedElts == 1 &&
2184         VT.getSizeInBits() == SrcVT.getSizeInBits())
2185       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2186 
2187     APInt InDemandedBits = DemandedBits.trunc(InBits);
2188     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
2189     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2190                              Depth + 1))
2191       return true;
2192     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2193     assert(Known.getBitWidth() == InBits && "Src width has changed?");
2194     Known = Known.anyext(BitWidth);
2195 
2196     // Attempt to avoid multi-use ops if we don't need anything from them.
2197     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2198             Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2199       return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2200     break;
2201   }
2202   case ISD::TRUNCATE: {
2203     SDValue Src = Op.getOperand(0);
2204 
2205     // Simplify the input, using demanded bit information, and compute the known
2206     // zero/one bits live out.
2207     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2208     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2209     if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2210                              Depth + 1))
2211       return true;
2212     Known = Known.trunc(BitWidth);
2213 
2214     // Attempt to avoid multi-use ops if we don't need anything from them.
2215     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2216             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2217       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2218 
2219     // If the input is only used by this truncate, see if we can shrink it based
2220     // on the known demanded bits.
2221     if (Src.getNode()->hasOneUse()) {
2222       switch (Src.getOpcode()) {
2223       default:
2224         break;
2225       case ISD::SRL:
2226         // Shrink SRL by a constant if none of the high bits shifted in are
2227         // demanded.
2228         if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2229           // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2230           // undesirable.
2231           break;
2232 
2233         const APInt *ShAmtC =
2234             TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2235         if (!ShAmtC || ShAmtC->uge(BitWidth))
2236           break;
2237         uint64_t ShVal = ShAmtC->getZExtValue();
2238 
2239         APInt HighBits =
2240             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2241         HighBits.lshrInPlace(ShVal);
2242         HighBits = HighBits.trunc(BitWidth);
2243 
2244         if (!(HighBits & DemandedBits)) {
2245           // None of the shifted in bits are needed.  Add a truncate of the
2246           // shift input, then shift it.
2247           SDValue NewShAmt = TLO.DAG.getConstant(
2248               ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2249           SDValue NewTrunc =
2250               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2251           return TLO.CombineTo(
2252               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2253         }
2254         break;
2255       }
2256     }
2257 
2258     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2259     break;
2260   }
2261   case ISD::AssertZext: {
2262     // AssertZext demands all of the high bits, plus any of the low bits
2263     // demanded by its users.
2264     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2265     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2266     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2267                              TLO, Depth + 1))
2268       return true;
2269     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2270 
2271     Known.Zero |= ~InMask;
2272     break;
2273   }
2274   case ISD::EXTRACT_VECTOR_ELT: {
2275     SDValue Src = Op.getOperand(0);
2276     SDValue Idx = Op.getOperand(1);
2277     ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2278     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2279 
2280     if (SrcEltCnt.isScalable())
2281       return false;
2282 
2283     // Demand the bits from every vector element without a constant index.
2284     unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2285     APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2286     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2287       if (CIdx->getAPIntValue().ult(NumSrcElts))
2288         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2289 
2290     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2291     // anything about the extended bits.
2292     APInt DemandedSrcBits = DemandedBits;
2293     if (BitWidth > EltBitWidth)
2294       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2295 
2296     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2297                              Depth + 1))
2298       return true;
2299 
2300     // Attempt to avoid multi-use ops if we don't need anything from them.
2301     if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2302       if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2303               Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2304         SDValue NewOp =
2305             TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2306         return TLO.CombineTo(Op, NewOp);
2307       }
2308     }
2309 
2310     Known = Known2;
2311     if (BitWidth > EltBitWidth)
2312       Known = Known.anyext(BitWidth);
2313     break;
2314   }
2315   case ISD::BITCAST: {
2316     SDValue Src = Op.getOperand(0);
2317     EVT SrcVT = Src.getValueType();
2318     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2319 
2320     // If this is an FP->Int bitcast and if the sign bit is the only
2321     // thing demanded, turn this into a FGETSIGN.
2322     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2323         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2324         SrcVT.isFloatingPoint()) {
2325       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2326       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2327       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2328           SrcVT != MVT::f128) {
2329         // Cannot eliminate/lower SHL for f128 yet.
2330         EVT Ty = OpVTLegal ? VT : MVT::i32;
2331         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2332         // place.  We expect the SHL to be eliminated by other optimizations.
2333         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2334         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2335         if (!OpVTLegal && OpVTSizeInBits > 32)
2336           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2337         unsigned ShVal = Op.getValueSizeInBits() - 1;
2338         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2339         return TLO.CombineTo(Op,
2340                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2341       }
2342     }
2343 
2344     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2345     // Demand the elt/bit if any of the original elts/bits are demanded.
2346     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2347       unsigned Scale = BitWidth / NumSrcEltBits;
2348       unsigned NumSrcElts = SrcVT.getVectorNumElements();
2349       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2350       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2351       for (unsigned i = 0; i != Scale; ++i) {
2352         unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2353         unsigned BitOffset = EltOffset * NumSrcEltBits;
2354         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2355         if (!Sub.isZero()) {
2356           DemandedSrcBits |= Sub;
2357           for (unsigned j = 0; j != NumElts; ++j)
2358             if (DemandedElts[j])
2359               DemandedSrcElts.setBit((j * Scale) + i);
2360         }
2361       }
2362 
2363       APInt KnownSrcUndef, KnownSrcZero;
2364       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2365                                      KnownSrcZero, TLO, Depth + 1))
2366         return true;
2367 
2368       KnownBits KnownSrcBits;
2369       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2370                                KnownSrcBits, TLO, Depth + 1))
2371         return true;
2372     } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2373       // TODO - bigendian once we have test coverage.
2374       unsigned Scale = NumSrcEltBits / BitWidth;
2375       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2376       APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2377       APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2378       for (unsigned i = 0; i != NumElts; ++i)
2379         if (DemandedElts[i]) {
2380           unsigned Offset = (i % Scale) * BitWidth;
2381           DemandedSrcBits.insertBits(DemandedBits, Offset);
2382           DemandedSrcElts.setBit(i / Scale);
2383         }
2384 
2385       if (SrcVT.isVector()) {
2386         APInt KnownSrcUndef, KnownSrcZero;
2387         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2388                                        KnownSrcZero, TLO, Depth + 1))
2389           return true;
2390       }
2391 
2392       KnownBits KnownSrcBits;
2393       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2394                                KnownSrcBits, TLO, Depth + 1))
2395         return true;
2396     }
2397 
2398     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2399     // recursive call where Known may be useful to the caller.
2400     if (Depth > 0) {
2401       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2402       return false;
2403     }
2404     break;
2405   }
2406   case ISD::MUL:
2407     if (DemandedBits.isPowerOf2()) {
2408       // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2409       // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2410       // odd (has LSB set), then the left-shifted low bit of X is the answer.
2411       unsigned CTZ = DemandedBits.countTrailingZeros();
2412       ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2413       if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
2414         EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2415         SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
2416         SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2417         return TLO.CombineTo(Op, Shl);
2418       }
2419     }
2420     // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2421     // X * X is odd iff X is odd.
2422     // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2423     if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2424       SDValue One = TLO.DAG.getConstant(1, dl, VT);
2425       SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2426       return TLO.CombineTo(Op, And1);
2427     }
2428     LLVM_FALLTHROUGH;
2429   case ISD::ADD:
2430   case ISD::SUB: {
2431     // Add, Sub, and Mul don't demand any bits in positions beyond that
2432     // of the highest bit demanded of them.
2433     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2434     SDNodeFlags Flags = Op.getNode()->getFlags();
2435     unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
2436     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2437     if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2438                              Depth + 1) ||
2439         SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2440                              Depth + 1) ||
2441         // See if the operation should be performed at a smaller bit width.
2442         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2443       if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2444         // Disable the nsw and nuw flags. We can no longer guarantee that we
2445         // won't wrap after simplification.
2446         Flags.setNoSignedWrap(false);
2447         Flags.setNoUnsignedWrap(false);
2448         SDValue NewOp =
2449             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2450         return TLO.CombineTo(Op, NewOp);
2451       }
2452       return true;
2453     }
2454 
2455     // Attempt to avoid multi-use ops if we don't need anything from them.
2456     if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2457       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2458           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2459       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2460           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2461       if (DemandedOp0 || DemandedOp1) {
2462         Flags.setNoSignedWrap(false);
2463         Flags.setNoUnsignedWrap(false);
2464         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2465         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2466         SDValue NewOp =
2467             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2468         return TLO.CombineTo(Op, NewOp);
2469       }
2470     }
2471 
2472     // If we have a constant operand, we may be able to turn it into -1 if we
2473     // do not demand the high bits. This can make the constant smaller to
2474     // encode, allow more general folding, or match specialized instruction
2475     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2476     // is probably not useful (and could be detrimental).
2477     ConstantSDNode *C = isConstOrConstSplat(Op1);
2478     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2479     if (C && !C->isAllOnes() && !C->isOne() &&
2480         (C->getAPIntValue() | HighMask).isAllOnes()) {
2481       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2482       // Disable the nsw and nuw flags. We can no longer guarantee that we
2483       // won't wrap after simplification.
2484       Flags.setNoSignedWrap(false);
2485       Flags.setNoUnsignedWrap(false);
2486       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2487       return TLO.CombineTo(Op, NewOp);
2488     }
2489 
2490     // Match a multiply with a disguised negated-power-of-2 and convert to a
2491     // an equivalent shift-left amount.
2492     // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2493     auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2494       if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2495         return 0;
2496 
2497       // Don't touch opaque constants. Also, ignore zero and power-of-2
2498       // multiplies. Those will get folded later.
2499       ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2500       if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2501           !MulC->getAPIntValue().isPowerOf2()) {
2502         APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2503         if (UnmaskedC.isNegatedPowerOf2())
2504           return (-UnmaskedC).logBase2();
2505       }
2506       return 0;
2507     };
2508 
2509     auto foldMul = [&](SDValue X, SDValue Y, unsigned ShlAmt) {
2510       EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2511       SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
2512       SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2513       SDValue Sub = TLO.DAG.getNode(ISD::SUB, dl, VT, Y, Shl);
2514       return TLO.CombineTo(Op, Sub);
2515     };
2516 
2517     if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2518       if (Op.getOpcode() == ISD::ADD) {
2519         // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2520         if (unsigned ShAmt = getShiftLeftAmt(Op0))
2521           return foldMul(Op0.getOperand(0), Op1, ShAmt);
2522         // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2523         if (unsigned ShAmt = getShiftLeftAmt(Op1))
2524           return foldMul(Op1.getOperand(0), Op0, ShAmt);
2525         // TODO:
2526         // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2527       }
2528     }
2529 
2530     LLVM_FALLTHROUGH;
2531   }
2532   default:
2533     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2534       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2535                                             Known, TLO, Depth))
2536         return true;
2537       break;
2538     }
2539 
2540     // Just use computeKnownBits to compute output bits.
2541     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2542     break;
2543   }
2544 
2545   // If we know the value of all of the demanded bits, return this as a
2546   // constant.
2547   if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2548     // Avoid folding to a constant if any OpaqueConstant is involved.
2549     const SDNode *N = Op.getNode();
2550     for (SDNode *Op :
2551          llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
2552       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2553         if (C->isOpaque())
2554           return false;
2555     }
2556     if (VT.isInteger())
2557       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2558     if (VT.isFloatingPoint())
2559       return TLO.CombineTo(
2560           Op,
2561           TLO.DAG.getConstantFP(
2562               APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2563   }
2564 
2565   return false;
2566 }
2567 
2568 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2569                                                 const APInt &DemandedElts,
2570                                                 DAGCombinerInfo &DCI) const {
2571   SelectionDAG &DAG = DCI.DAG;
2572   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2573                         !DCI.isBeforeLegalizeOps());
2574 
2575   APInt KnownUndef, KnownZero;
2576   bool Simplified =
2577       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2578   if (Simplified) {
2579     DCI.AddToWorklist(Op.getNode());
2580     DCI.CommitTargetLoweringOpt(TLO);
2581   }
2582 
2583   return Simplified;
2584 }
2585 
2586 /// Given a vector binary operation and known undefined elements for each input
2587 /// operand, compute whether each element of the output is undefined.
2588 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2589                                          const APInt &UndefOp0,
2590                                          const APInt &UndefOp1) {
2591   EVT VT = BO.getValueType();
2592   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2593          "Vector binop only");
2594 
2595   EVT EltVT = VT.getVectorElementType();
2596   unsigned NumElts = VT.getVectorNumElements();
2597   assert(UndefOp0.getBitWidth() == NumElts &&
2598          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2599 
2600   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2601                                    const APInt &UndefVals) {
2602     if (UndefVals[Index])
2603       return DAG.getUNDEF(EltVT);
2604 
2605     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2606       // Try hard to make sure that the getNode() call is not creating temporary
2607       // nodes. Ignore opaque integers because they do not constant fold.
2608       SDValue Elt = BV->getOperand(Index);
2609       auto *C = dyn_cast<ConstantSDNode>(Elt);
2610       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2611         return Elt;
2612     }
2613 
2614     return SDValue();
2615   };
2616 
2617   APInt KnownUndef = APInt::getZero(NumElts);
2618   for (unsigned i = 0; i != NumElts; ++i) {
2619     // If both inputs for this element are either constant or undef and match
2620     // the element type, compute the constant/undef result for this element of
2621     // the vector.
2622     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2623     // not handle FP constants. The code within getNode() should be refactored
2624     // to avoid the danger of creating a bogus temporary node here.
2625     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2626     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2627     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2628       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2629         KnownUndef.setBit(i);
2630   }
2631   return KnownUndef;
2632 }
2633 
2634 bool TargetLowering::SimplifyDemandedVectorElts(
2635     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2636     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2637     bool AssumeSingleUse) const {
2638   EVT VT = Op.getValueType();
2639   unsigned Opcode = Op.getOpcode();
2640   APInt DemandedElts = OriginalDemandedElts;
2641   unsigned NumElts = DemandedElts.getBitWidth();
2642   assert(VT.isVector() && "Expected vector op");
2643 
2644   KnownUndef = KnownZero = APInt::getZero(NumElts);
2645 
2646   const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
2647   if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
2648     return false;
2649 
2650   // TODO: For now we assume we know nothing about scalable vectors.
2651   if (VT.isScalableVector())
2652     return false;
2653 
2654   assert(VT.getVectorNumElements() == NumElts &&
2655          "Mask size mismatches value type element count!");
2656 
2657   // Undef operand.
2658   if (Op.isUndef()) {
2659     KnownUndef.setAllBits();
2660     return false;
2661   }
2662 
2663   // If Op has other users, assume that all elements are needed.
2664   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2665     DemandedElts.setAllBits();
2666 
2667   // Not demanding any elements from Op.
2668   if (DemandedElts == 0) {
2669     KnownUndef.setAllBits();
2670     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2671   }
2672 
2673   // Limit search depth.
2674   if (Depth >= SelectionDAG::MaxRecursionDepth)
2675     return false;
2676 
2677   SDLoc DL(Op);
2678   unsigned EltSizeInBits = VT.getScalarSizeInBits();
2679   bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
2680 
2681   // Helper for demanding the specified elements and all the bits of both binary
2682   // operands.
2683   auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2684     SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2685                                                            TLO.DAG, Depth + 1);
2686     SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2687                                                            TLO.DAG, Depth + 1);
2688     if (NewOp0 || NewOp1) {
2689       SDValue NewOp = TLO.DAG.getNode(
2690           Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2691       return TLO.CombineTo(Op, NewOp);
2692     }
2693     return false;
2694   };
2695 
2696   switch (Opcode) {
2697   case ISD::SCALAR_TO_VECTOR: {
2698     if (!DemandedElts[0]) {
2699       KnownUndef.setAllBits();
2700       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2701     }
2702     SDValue ScalarSrc = Op.getOperand(0);
2703     if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
2704       SDValue Src = ScalarSrc.getOperand(0);
2705       SDValue Idx = ScalarSrc.getOperand(1);
2706       EVT SrcVT = Src.getValueType();
2707 
2708       ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
2709 
2710       if (SrcEltCnt.isScalable())
2711         return false;
2712 
2713       unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2714       if (isNullConstant(Idx)) {
2715         APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
2716         APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
2717         APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
2718         if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2719                                        TLO, Depth + 1))
2720           return true;
2721       }
2722     }
2723     KnownUndef.setHighBits(NumElts - 1);
2724     break;
2725   }
2726   case ISD::BITCAST: {
2727     SDValue Src = Op.getOperand(0);
2728     EVT SrcVT = Src.getValueType();
2729 
2730     // We only handle vectors here.
2731     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2732     if (!SrcVT.isVector())
2733       break;
2734 
2735     // Fast handling of 'identity' bitcasts.
2736     unsigned NumSrcElts = SrcVT.getVectorNumElements();
2737     if (NumSrcElts == NumElts)
2738       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2739                                         KnownZero, TLO, Depth + 1);
2740 
2741     APInt SrcDemandedElts, SrcZero, SrcUndef;
2742 
2743     // Bitcast from 'large element' src vector to 'small element' vector, we
2744     // must demand a source element if any DemandedElt maps to it.
2745     if ((NumElts % NumSrcElts) == 0) {
2746       unsigned Scale = NumElts / NumSrcElts;
2747       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2748       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2749                                      TLO, Depth + 1))
2750         return true;
2751 
2752       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2753       // of the large element.
2754       // TODO - bigendian once we have test coverage.
2755       if (IsLE) {
2756         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2757         APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
2758         for (unsigned i = 0; i != NumElts; ++i)
2759           if (DemandedElts[i]) {
2760             unsigned Ofs = (i % Scale) * EltSizeInBits;
2761             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2762           }
2763 
2764         KnownBits Known;
2765         if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2766                                  TLO, Depth + 1))
2767           return true;
2768 
2769         // The bitcast has split each wide element into a number of
2770         // narrow subelements. We have just computed the Known bits
2771         // for wide elements. See if element splitting results in
2772         // some subelements being zero. Only for demanded elements!
2773         for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
2774           if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
2775                    .isAllOnes())
2776             continue;
2777           for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
2778             unsigned Elt = Scale * SrcElt + SubElt;
2779             if (DemandedElts[Elt])
2780               KnownZero.setBit(Elt);
2781           }
2782         }
2783       }
2784 
2785       // If the src element is zero/undef then all the output elements will be -
2786       // only demanded elements are guaranteed to be correct.
2787       for (unsigned i = 0; i != NumSrcElts; ++i) {
2788         if (SrcDemandedElts[i]) {
2789           if (SrcZero[i])
2790             KnownZero.setBits(i * Scale, (i + 1) * Scale);
2791           if (SrcUndef[i])
2792             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2793         }
2794       }
2795     }
2796 
2797     // Bitcast from 'small element' src vector to 'large element' vector, we
2798     // demand all smaller source elements covered by the larger demanded element
2799     // of this vector.
2800     if ((NumSrcElts % NumElts) == 0) {
2801       unsigned Scale = NumSrcElts / NumElts;
2802       SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2803       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2804                                      TLO, Depth + 1))
2805         return true;
2806 
2807       // If all the src elements covering an output element are zero/undef, then
2808       // the output element will be as well, assuming it was demanded.
2809       for (unsigned i = 0; i != NumElts; ++i) {
2810         if (DemandedElts[i]) {
2811           if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
2812             KnownZero.setBit(i);
2813           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
2814             KnownUndef.setBit(i);
2815         }
2816       }
2817     }
2818     break;
2819   }
2820   case ISD::BUILD_VECTOR: {
2821     // Check all elements and simplify any unused elements with UNDEF.
2822     if (!DemandedElts.isAllOnes()) {
2823       // Don't simplify BROADCASTS.
2824       if (llvm::any_of(Op->op_values(),
2825                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2826         SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2827         bool Updated = false;
2828         for (unsigned i = 0; i != NumElts; ++i) {
2829           if (!DemandedElts[i] && !Ops[i].isUndef()) {
2830             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2831             KnownUndef.setBit(i);
2832             Updated = true;
2833           }
2834         }
2835         if (Updated)
2836           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2837       }
2838     }
2839     for (unsigned i = 0; i != NumElts; ++i) {
2840       SDValue SrcOp = Op.getOperand(i);
2841       if (SrcOp.isUndef()) {
2842         KnownUndef.setBit(i);
2843       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2844                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
2845         KnownZero.setBit(i);
2846       }
2847     }
2848     break;
2849   }
2850   case ISD::CONCAT_VECTORS: {
2851     EVT SubVT = Op.getOperand(0).getValueType();
2852     unsigned NumSubVecs = Op.getNumOperands();
2853     unsigned NumSubElts = SubVT.getVectorNumElements();
2854     for (unsigned i = 0; i != NumSubVecs; ++i) {
2855       SDValue SubOp = Op.getOperand(i);
2856       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2857       APInt SubUndef, SubZero;
2858       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2859                                      Depth + 1))
2860         return true;
2861       KnownUndef.insertBits(SubUndef, i * NumSubElts);
2862       KnownZero.insertBits(SubZero, i * NumSubElts);
2863     }
2864     break;
2865   }
2866   case ISD::INSERT_SUBVECTOR: {
2867     // Demand any elements from the subvector and the remainder from the src its
2868     // inserted into.
2869     SDValue Src = Op.getOperand(0);
2870     SDValue Sub = Op.getOperand(1);
2871     uint64_t Idx = Op.getConstantOperandVal(2);
2872     unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
2873     APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
2874     APInt DemandedSrcElts = DemandedElts;
2875     DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
2876 
2877     APInt SubUndef, SubZero;
2878     if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
2879                                    Depth + 1))
2880       return true;
2881 
2882     // If none of the src operand elements are demanded, replace it with undef.
2883     if (!DemandedSrcElts && !Src.isUndef())
2884       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2885                                                TLO.DAG.getUNDEF(VT), Sub,
2886                                                Op.getOperand(2)));
2887 
2888     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
2889                                    TLO, Depth + 1))
2890       return true;
2891     KnownUndef.insertBits(SubUndef, Idx);
2892     KnownZero.insertBits(SubZero, Idx);
2893 
2894     // Attempt to avoid multi-use ops if we don't need anything from them.
2895     if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
2896       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2897           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2898       SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
2899           Sub, DemandedSubElts, TLO.DAG, Depth + 1);
2900       if (NewSrc || NewSub) {
2901         NewSrc = NewSrc ? NewSrc : Src;
2902         NewSub = NewSub ? NewSub : Sub;
2903         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2904                                         NewSub, Op.getOperand(2));
2905         return TLO.CombineTo(Op, NewOp);
2906       }
2907     }
2908     break;
2909   }
2910   case ISD::EXTRACT_SUBVECTOR: {
2911     // Offset the demanded elts by the subvector index.
2912     SDValue Src = Op.getOperand(0);
2913     if (Src.getValueType().isScalableVector())
2914       break;
2915     uint64_t Idx = Op.getConstantOperandVal(1);
2916     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2917     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
2918 
2919     APInt SrcUndef, SrcZero;
2920     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2921                                    Depth + 1))
2922       return true;
2923     KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2924     KnownZero = SrcZero.extractBits(NumElts, Idx);
2925 
2926     // Attempt to avoid multi-use ops if we don't need anything from them.
2927     if (!DemandedElts.isAllOnes()) {
2928       SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
2929           Src, DemandedSrcElts, TLO.DAG, Depth + 1);
2930       if (NewSrc) {
2931         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
2932                                         Op.getOperand(1));
2933         return TLO.CombineTo(Op, NewOp);
2934       }
2935     }
2936     break;
2937   }
2938   case ISD::INSERT_VECTOR_ELT: {
2939     SDValue Vec = Op.getOperand(0);
2940     SDValue Scl = Op.getOperand(1);
2941     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2942 
2943     // For a legal, constant insertion index, if we don't need this insertion
2944     // then strip it, else remove it from the demanded elts.
2945     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
2946       unsigned Idx = CIdx->getZExtValue();
2947       if (!DemandedElts[Idx])
2948         return TLO.CombineTo(Op, Vec);
2949 
2950       APInt DemandedVecElts(DemandedElts);
2951       DemandedVecElts.clearBit(Idx);
2952       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2953                                      KnownZero, TLO, Depth + 1))
2954         return true;
2955 
2956       KnownUndef.setBitVal(Idx, Scl.isUndef());
2957 
2958       KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
2959       break;
2960     }
2961 
2962     APInt VecUndef, VecZero;
2963     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2964                                    Depth + 1))
2965       return true;
2966     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2967     break;
2968   }
2969   case ISD::VSELECT: {
2970     // Try to transform the select condition based on the current demanded
2971     // elements.
2972     // TODO: If a condition element is undef, we can choose from one arm of the
2973     //       select (and if one arm is undef, then we can propagate that to the
2974     //       result).
2975     // TODO - add support for constant vselect masks (see IR version of this).
2976     APInt UnusedUndef, UnusedZero;
2977     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2978                                    UnusedZero, TLO, Depth + 1))
2979       return true;
2980 
2981     // See if we can simplify either vselect operand.
2982     APInt DemandedLHS(DemandedElts);
2983     APInt DemandedRHS(DemandedElts);
2984     APInt UndefLHS, ZeroLHS;
2985     APInt UndefRHS, ZeroRHS;
2986     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2987                                    ZeroLHS, TLO, Depth + 1))
2988       return true;
2989     if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
2990                                    ZeroRHS, TLO, Depth + 1))
2991       return true;
2992 
2993     KnownUndef = UndefLHS & UndefRHS;
2994     KnownZero = ZeroLHS & ZeroRHS;
2995     break;
2996   }
2997   case ISD::VECTOR_SHUFFLE: {
2998     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
2999 
3000     // Collect demanded elements from shuffle operands..
3001     APInt DemandedLHS(NumElts, 0);
3002     APInt DemandedRHS(NumElts, 0);
3003     for (unsigned i = 0; i != NumElts; ++i) {
3004       int M = ShuffleMask[i];
3005       if (M < 0 || !DemandedElts[i])
3006         continue;
3007       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3008       if (M < (int)NumElts)
3009         DemandedLHS.setBit(M);
3010       else
3011         DemandedRHS.setBit(M - NumElts);
3012     }
3013 
3014     // See if we can simplify either shuffle operand.
3015     APInt UndefLHS, ZeroLHS;
3016     APInt UndefRHS, ZeroRHS;
3017     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
3018                                    ZeroLHS, TLO, Depth + 1))
3019       return true;
3020     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
3021                                    ZeroRHS, TLO, Depth + 1))
3022       return true;
3023 
3024     // Simplify mask using undef elements from LHS/RHS.
3025     bool Updated = false;
3026     bool IdentityLHS = true, IdentityRHS = true;
3027     SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
3028     for (unsigned i = 0; i != NumElts; ++i) {
3029       int &M = NewMask[i];
3030       if (M < 0)
3031         continue;
3032       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3033           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3034         Updated = true;
3035         M = -1;
3036       }
3037       IdentityLHS &= (M < 0) || (M == (int)i);
3038       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3039     }
3040 
3041     // Update legal shuffle masks based on demanded elements if it won't reduce
3042     // to Identity which can cause premature removal of the shuffle mask.
3043     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3044       SDValue LegalShuffle =
3045           buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
3046                                   NewMask, TLO.DAG);
3047       if (LegalShuffle)
3048         return TLO.CombineTo(Op, LegalShuffle);
3049     }
3050 
3051     // Propagate undef/zero elements from LHS/RHS.
3052     for (unsigned i = 0; i != NumElts; ++i) {
3053       int M = ShuffleMask[i];
3054       if (M < 0) {
3055         KnownUndef.setBit(i);
3056       } else if (M < (int)NumElts) {
3057         if (UndefLHS[M])
3058           KnownUndef.setBit(i);
3059         if (ZeroLHS[M])
3060           KnownZero.setBit(i);
3061       } else {
3062         if (UndefRHS[M - NumElts])
3063           KnownUndef.setBit(i);
3064         if (ZeroRHS[M - NumElts])
3065           KnownZero.setBit(i);
3066       }
3067     }
3068     break;
3069   }
3070   case ISD::ANY_EXTEND_VECTOR_INREG:
3071   case ISD::SIGN_EXTEND_VECTOR_INREG:
3072   case ISD::ZERO_EXTEND_VECTOR_INREG: {
3073     APInt SrcUndef, SrcZero;
3074     SDValue Src = Op.getOperand(0);
3075     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3076     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
3077     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3078                                    Depth + 1))
3079       return true;
3080     KnownZero = SrcZero.zextOrTrunc(NumElts);
3081     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3082 
3083     if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3084         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3085         DemandedSrcElts == 1) {
3086       // aext - if we just need the bottom element then we can bitcast.
3087       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3088     }
3089 
3090     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3091       // zext(undef) upper bits are guaranteed to be zero.
3092       if (DemandedElts.isSubsetOf(KnownUndef))
3093         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3094       KnownUndef.clearAllBits();
3095 
3096       // zext - if we just need the bottom element then we can mask:
3097       // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3098       if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3099           Op->isOnlyUserOf(Src.getNode()) &&
3100           Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3101         SDLoc DL(Op);
3102         EVT SrcVT = Src.getValueType();
3103         EVT SrcSVT = SrcVT.getScalarType();
3104         SmallVector<SDValue> MaskElts;
3105         MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3106         MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3107         SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3108         if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3109                 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3110           Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3111           return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3112         }
3113       }
3114     }
3115     break;
3116   }
3117 
3118   // TODO: There are more binop opcodes that could be handled here - MIN,
3119   // MAX, saturated math, etc.
3120   case ISD::ADD: {
3121     SDValue Op0 = Op.getOperand(0);
3122     SDValue Op1 = Op.getOperand(1);
3123     if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3124       APInt UndefLHS, ZeroLHS;
3125       if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3126                                      Depth + 1, /*AssumeSingleUse*/ true))
3127         return true;
3128     }
3129     LLVM_FALLTHROUGH;
3130   }
3131   case ISD::OR:
3132   case ISD::XOR:
3133   case ISD::SUB:
3134   case ISD::FADD:
3135   case ISD::FSUB:
3136   case ISD::FMUL:
3137   case ISD::FDIV:
3138   case ISD::FREM: {
3139     SDValue Op0 = Op.getOperand(0);
3140     SDValue Op1 = Op.getOperand(1);
3141 
3142     APInt UndefRHS, ZeroRHS;
3143     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3144                                    Depth + 1))
3145       return true;
3146     APInt UndefLHS, ZeroLHS;
3147     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3148                                    Depth + 1))
3149       return true;
3150 
3151     KnownZero = ZeroLHS & ZeroRHS;
3152     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3153 
3154     // Attempt to avoid multi-use ops if we don't need anything from them.
3155     // TODO - use KnownUndef to relax the demandedelts?
3156     if (!DemandedElts.isAllOnes())
3157       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3158         return true;
3159     break;
3160   }
3161   case ISD::SHL:
3162   case ISD::SRL:
3163   case ISD::SRA:
3164   case ISD::ROTL:
3165   case ISD::ROTR: {
3166     SDValue Op0 = Op.getOperand(0);
3167     SDValue Op1 = Op.getOperand(1);
3168 
3169     APInt UndefRHS, ZeroRHS;
3170     if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3171                                    Depth + 1))
3172       return true;
3173     APInt UndefLHS, ZeroLHS;
3174     if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3175                                    Depth + 1))
3176       return true;
3177 
3178     KnownZero = ZeroLHS;
3179     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3180 
3181     // Attempt to avoid multi-use ops if we don't need anything from them.
3182     // TODO - use KnownUndef to relax the demandedelts?
3183     if (!DemandedElts.isAllOnes())
3184       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3185         return true;
3186     break;
3187   }
3188   case ISD::MUL:
3189   case ISD::AND: {
3190     SDValue Op0 = Op.getOperand(0);
3191     SDValue Op1 = Op.getOperand(1);
3192 
3193     APInt SrcUndef, SrcZero;
3194     if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3195                                    Depth + 1))
3196       return true;
3197     if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
3198                                    TLO, Depth + 1))
3199       return true;
3200 
3201     // If either side has a zero element, then the result element is zero, even
3202     // if the other is an UNDEF.
3203     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3204     // and then handle 'and' nodes with the rest of the binop opcodes.
3205     KnownZero |= SrcZero;
3206     KnownUndef &= SrcUndef;
3207     KnownUndef &= ~KnownZero;
3208 
3209     // Attempt to avoid multi-use ops if we don't need anything from them.
3210     // TODO - use KnownUndef to relax the demandedelts?
3211     if (!DemandedElts.isAllOnes())
3212       if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3213         return true;
3214     break;
3215   }
3216   case ISD::TRUNCATE:
3217   case ISD::SIGN_EXTEND:
3218   case ISD::ZERO_EXTEND:
3219     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3220                                    KnownZero, TLO, Depth + 1))
3221       return true;
3222 
3223     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3224       // zext(undef) upper bits are guaranteed to be zero.
3225       if (DemandedElts.isSubsetOf(KnownUndef))
3226         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3227       KnownUndef.clearAllBits();
3228     }
3229     break;
3230   default: {
3231     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3232       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3233                                                   KnownZero, TLO, Depth))
3234         return true;
3235     } else {
3236       KnownBits Known;
3237       APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3238       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3239                                TLO, Depth, AssumeSingleUse))
3240         return true;
3241     }
3242     break;
3243   }
3244   }
3245   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3246 
3247   // Constant fold all undef cases.
3248   // TODO: Handle zero cases as well.
3249   if (DemandedElts.isSubsetOf(KnownUndef))
3250     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3251 
3252   return false;
3253 }
3254 
3255 /// Determine which of the bits specified in Mask are known to be either zero or
3256 /// one and return them in the Known.
3257 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3258                                                    KnownBits &Known,
3259                                                    const APInt &DemandedElts,
3260                                                    const SelectionDAG &DAG,
3261                                                    unsigned Depth) const {
3262   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3263           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3264           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3265           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3266          "Should use MaskedValueIsZero if you don't know whether Op"
3267          " is a target node!");
3268   Known.resetAll();
3269 }
3270 
3271 void TargetLowering::computeKnownBitsForTargetInstr(
3272     GISelKnownBits &Analysis, Register R, KnownBits &Known,
3273     const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3274     unsigned Depth) const {
3275   Known.resetAll();
3276 }
3277 
3278 void TargetLowering::computeKnownBitsForFrameIndex(
3279   const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3280   // The low bits are known zero if the pointer is aligned.
3281   Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3282 }
3283 
3284 Align TargetLowering::computeKnownAlignForTargetInstr(
3285   GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3286   unsigned Depth) const {
3287   return Align(1);
3288 }
3289 
3290 /// This method can be implemented by targets that want to expose additional
3291 /// information about sign bits to the DAG Combiner.
3292 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3293                                                          const APInt &,
3294                                                          const SelectionDAG &,
3295                                                          unsigned Depth) const {
3296   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3297           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3298           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3299           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3300          "Should use ComputeNumSignBits if you don't know whether Op"
3301          " is a target node!");
3302   return 1;
3303 }
3304 
3305 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3306   GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3307   const MachineRegisterInfo &MRI, unsigned Depth) const {
3308   return 1;
3309 }
3310 
3311 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3312     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3313     TargetLoweringOpt &TLO, unsigned Depth) const {
3314   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3315           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3316           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3317           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3318          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3319          " is a target node!");
3320   return false;
3321 }
3322 
3323 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3324     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3325     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3326   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3327           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3328           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3329           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3330          "Should use SimplifyDemandedBits if you don't know whether Op"
3331          " is a target node!");
3332   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3333   return false;
3334 }
3335 
3336 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3337     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3338     SelectionDAG &DAG, unsigned Depth) const {
3339   assert(
3340       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3341        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3342        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3343        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3344       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3345       " is a target node!");
3346   return SDValue();
3347 }
3348 
3349 SDValue
3350 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3351                                         SDValue N1, MutableArrayRef<int> Mask,
3352                                         SelectionDAG &DAG) const {
3353   bool LegalMask = isShuffleMaskLegal(Mask, VT);
3354   if (!LegalMask) {
3355     std::swap(N0, N1);
3356     ShuffleVectorSDNode::commuteMask(Mask);
3357     LegalMask = isShuffleMaskLegal(Mask, VT);
3358   }
3359 
3360   if (!LegalMask)
3361     return SDValue();
3362 
3363   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3364 }
3365 
3366 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3367   return nullptr;
3368 }
3369 
3370 bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3371     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3372     bool PoisonOnly, unsigned Depth) const {
3373   assert(
3374       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3375        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3376        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3377        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3378       "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3379       " is a target node!");
3380   return false;
3381 }
3382 
3383 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3384                                                   const SelectionDAG &DAG,
3385                                                   bool SNaN,
3386                                                   unsigned Depth) const {
3387   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3388           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3389           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3390           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3391          "Should use isKnownNeverNaN if you don't know whether Op"
3392          " is a target node!");
3393   return false;
3394 }
3395 
3396 bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3397                                                const APInt &DemandedElts,
3398                                                APInt &UndefElts,
3399                                                unsigned Depth) const {
3400   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3401           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3402           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3403           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3404          "Should use isSplatValue if you don't know whether Op"
3405          " is a target node!");
3406   return false;
3407 }
3408 
3409 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3410 // work with truncating build vectors and vectors with elements of less than
3411 // 8 bits.
3412 bool TargetLowering::isConstTrueVal(SDValue N) const {
3413   if (!N)
3414     return false;
3415 
3416   unsigned EltWidth;
3417   APInt CVal;
3418   if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3419                                                /*AllowTruncation=*/true)) {
3420     CVal = CN->getAPIntValue();
3421     EltWidth = N.getValueType().getScalarSizeInBits();
3422   } else
3423     return false;
3424 
3425   // If this is a truncating splat, truncate the splat value.
3426   // Otherwise, we may fail to match the expected values below.
3427   if (EltWidth < CVal.getBitWidth())
3428     CVal = CVal.trunc(EltWidth);
3429 
3430   switch (getBooleanContents(N.getValueType())) {
3431   case UndefinedBooleanContent:
3432     return CVal[0];
3433   case ZeroOrOneBooleanContent:
3434     return CVal.isOne();
3435   case ZeroOrNegativeOneBooleanContent:
3436     return CVal.isAllOnes();
3437   }
3438 
3439   llvm_unreachable("Invalid boolean contents");
3440 }
3441 
3442 bool TargetLowering::isConstFalseVal(SDValue N) const {
3443   if (!N)
3444     return false;
3445 
3446   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3447   if (!CN) {
3448     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3449     if (!BV)
3450       return false;
3451 
3452     // Only interested in constant splats, we don't care about undef
3453     // elements in identifying boolean constants and getConstantSplatNode
3454     // returns NULL if all ops are undef;
3455     CN = BV->getConstantSplatNode();
3456     if (!CN)
3457       return false;
3458   }
3459 
3460   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3461     return !CN->getAPIntValue()[0];
3462 
3463   return CN->isZero();
3464 }
3465 
3466 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3467                                        bool SExt) const {
3468   if (VT == MVT::i1)
3469     return N->isOne();
3470 
3471   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3472   switch (Cnt) {
3473   case TargetLowering::ZeroOrOneBooleanContent:
3474     // An extended value of 1 is always true, unless its original type is i1,
3475     // in which case it will be sign extended to -1.
3476     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3477   case TargetLowering::UndefinedBooleanContent:
3478   case TargetLowering::ZeroOrNegativeOneBooleanContent:
3479     return N->isAllOnes() && SExt;
3480   }
3481   llvm_unreachable("Unexpected enumeration.");
3482 }
3483 
3484 /// This helper function of SimplifySetCC tries to optimize the comparison when
3485 /// either operand of the SetCC node is a bitwise-and instruction.
3486 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3487                                          ISD::CondCode Cond, const SDLoc &DL,
3488                                          DAGCombinerInfo &DCI) const {
3489   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3490     std::swap(N0, N1);
3491 
3492   SelectionDAG &DAG = DCI.DAG;
3493   EVT OpVT = N0.getValueType();
3494   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3495       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3496     return SDValue();
3497 
3498   // (X & Y) != 0 --> zextOrTrunc(X & Y)
3499   // iff everything but LSB is known zero:
3500   if (Cond == ISD::SETNE && isNullConstant(N1) &&
3501       (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
3502        getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3503     unsigned NumEltBits = OpVT.getScalarSizeInBits();
3504     APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3505     if (DAG.MaskedValueIsZero(N0, UpperBits))
3506       return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3507   }
3508 
3509   // Match these patterns in any of their permutations:
3510   // (X & Y) == Y
3511   // (X & Y) != Y
3512   SDValue X, Y;
3513   if (N0.getOperand(0) == N1) {
3514     X = N0.getOperand(1);
3515     Y = N0.getOperand(0);
3516   } else if (N0.getOperand(1) == N1) {
3517     X = N0.getOperand(0);
3518     Y = N0.getOperand(1);
3519   } else {
3520     return SDValue();
3521   }
3522 
3523   SDValue Zero = DAG.getConstant(0, DL, OpVT);
3524   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3525     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3526     // Note that where Y is variable and is known to have at most one bit set
3527     // (for example, if it is Z & 1) we cannot do this; the expressions are not
3528     // equivalent when Y == 0.
3529     assert(OpVT.isInteger());
3530     Cond = ISD::getSetCCInverse(Cond, OpVT);
3531     if (DCI.isBeforeLegalizeOps() ||
3532         isCondCodeLegal(Cond, N0.getSimpleValueType()))
3533       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3534   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3535     // If the target supports an 'and-not' or 'and-complement' logic operation,
3536     // try to use that to make a comparison operation more efficient.
3537     // But don't do this transform if the mask is a single bit because there are
3538     // more efficient ways to deal with that case (for example, 'bt' on x86 or
3539     // 'rlwinm' on PPC).
3540 
3541     // Bail out if the compare operand that we want to turn into a zero is
3542     // already a zero (otherwise, infinite loop).
3543     auto *YConst = dyn_cast<ConstantSDNode>(Y);
3544     if (YConst && YConst->isZero())
3545       return SDValue();
3546 
3547     // Transform this into: ~X & Y == 0.
3548     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3549     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3550     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3551   }
3552 
3553   return SDValue();
3554 }
3555 
3556 /// There are multiple IR patterns that could be checking whether certain
3557 /// truncation of a signed number would be lossy or not. The pattern which is
3558 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
3559 /// We are looking for the following pattern: (KeptBits is a constant)
3560 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3561 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3562 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
3563 /// We will unfold it into the natural trunc+sext pattern:
3564 ///   ((%x << C) a>> C) dstcond %x
3565 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
3566 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3567     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3568     const SDLoc &DL) const {
3569   // We must be comparing with a constant.
3570   ConstantSDNode *C1;
3571   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3572     return SDValue();
3573 
3574   // N0 should be:  add %x, (1 << (KeptBits-1))
3575   if (N0->getOpcode() != ISD::ADD)
3576     return SDValue();
3577 
3578   // And we must be 'add'ing a constant.
3579   ConstantSDNode *C01;
3580   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3581     return SDValue();
3582 
3583   SDValue X = N0->getOperand(0);
3584   EVT XVT = X.getValueType();
3585 
3586   // Validate constants ...
3587 
3588   APInt I1 = C1->getAPIntValue();
3589 
3590   ISD::CondCode NewCond;
3591   if (Cond == ISD::CondCode::SETULT) {
3592     NewCond = ISD::CondCode::SETEQ;
3593   } else if (Cond == ISD::CondCode::SETULE) {
3594     NewCond = ISD::CondCode::SETEQ;
3595     // But need to 'canonicalize' the constant.
3596     I1 += 1;
3597   } else if (Cond == ISD::CondCode::SETUGT) {
3598     NewCond = ISD::CondCode::SETNE;
3599     // But need to 'canonicalize' the constant.
3600     I1 += 1;
3601   } else if (Cond == ISD::CondCode::SETUGE) {
3602     NewCond = ISD::CondCode::SETNE;
3603   } else
3604     return SDValue();
3605 
3606   APInt I01 = C01->getAPIntValue();
3607 
3608   auto checkConstants = [&I1, &I01]() -> bool {
3609     // Both of them must be power-of-two, and the constant from setcc is bigger.
3610     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3611   };
3612 
3613   if (checkConstants()) {
3614     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
3615   } else {
3616     // What if we invert constants? (and the target predicate)
3617     I1.negate();
3618     I01.negate();
3619     assert(XVT.isInteger());
3620     NewCond = getSetCCInverse(NewCond, XVT);
3621     if (!checkConstants())
3622       return SDValue();
3623     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
3624   }
3625 
3626   // They are power-of-two, so which bit is set?
3627   const unsigned KeptBits = I1.logBase2();
3628   const unsigned KeptBitsMinusOne = I01.logBase2();
3629 
3630   // Magic!
3631   if (KeptBits != (KeptBitsMinusOne + 1))
3632     return SDValue();
3633   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3634 
3635   // We don't want to do this in every single case.
3636   SelectionDAG &DAG = DCI.DAG;
3637   if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
3638           XVT, KeptBits))
3639     return SDValue();
3640 
3641   const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3642   assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3643 
3644   // Unfold into:  ((%x << C) a>> C) cond %x
3645   // Where 'cond' will be either 'eq' or 'ne'.
3646   SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3647   SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3648   SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3649   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3650 
3651   return T2;
3652 }
3653 
3654 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
3655 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3656     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3657     DAGCombinerInfo &DCI, const SDLoc &DL) const {
3658   assert(isConstOrConstSplat(N1C) &&
3659          isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
3660          "Should be a comparison with 0.");
3661   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3662          "Valid only for [in]equality comparisons.");
3663 
3664   unsigned NewShiftOpcode;
3665   SDValue X, C, Y;
3666 
3667   SelectionDAG &DAG = DCI.DAG;
3668   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3669 
3670   // Look for '(C l>>/<< Y)'.
3671   auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3672     // The shift should be one-use.
3673     if (!V.hasOneUse())
3674       return false;
3675     unsigned OldShiftOpcode = V.getOpcode();
3676     switch (OldShiftOpcode) {
3677     case ISD::SHL:
3678       NewShiftOpcode = ISD::SRL;
3679       break;
3680     case ISD::SRL:
3681       NewShiftOpcode = ISD::SHL;
3682       break;
3683     default:
3684       return false; // must be a logical shift.
3685     }
3686     // We should be shifting a constant.
3687     // FIXME: best to use isConstantOrConstantVector().
3688     C = V.getOperand(0);
3689     ConstantSDNode *CC =
3690         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3691     if (!CC)
3692       return false;
3693     Y = V.getOperand(1);
3694 
3695     ConstantSDNode *XC =
3696         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3697     return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3698         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3699   };
3700 
3701   // LHS of comparison should be an one-use 'and'.
3702   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3703     return SDValue();
3704 
3705   X = N0.getOperand(0);
3706   SDValue Mask = N0.getOperand(1);
3707 
3708   // 'and' is commutative!
3709   if (!Match(Mask)) {
3710     std::swap(X, Mask);
3711     if (!Match(Mask))
3712       return SDValue();
3713   }
3714 
3715   EVT VT = X.getValueType();
3716 
3717   // Produce:
3718   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3719   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3720   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3721   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3722   return T2;
3723 }
3724 
3725 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3726 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3727 /// handle the commuted versions of these patterns.
3728 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3729                                            ISD::CondCode Cond, const SDLoc &DL,
3730                                            DAGCombinerInfo &DCI) const {
3731   unsigned BOpcode = N0.getOpcode();
3732   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3733          "Unexpected binop");
3734   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3735 
3736   // (X + Y) == X --> Y == 0
3737   // (X - Y) == X --> Y == 0
3738   // (X ^ Y) == X --> Y == 0
3739   SelectionDAG &DAG = DCI.DAG;
3740   EVT OpVT = N0.getValueType();
3741   SDValue X = N0.getOperand(0);
3742   SDValue Y = N0.getOperand(1);
3743   if (X == N1)
3744     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3745 
3746   if (Y != N1)
3747     return SDValue();
3748 
3749   // (X + Y) == Y --> X == 0
3750   // (X ^ Y) == Y --> X == 0
3751   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3752     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3753 
3754   // The shift would not be valid if the operands are boolean (i1).
3755   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3756     return SDValue();
3757 
3758   // (X - Y) == Y --> X == Y << 1
3759   EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3760                                  !DCI.isBeforeLegalize());
3761   SDValue One = DAG.getConstant(1, DL, ShiftVT);
3762   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3763   if (!DCI.isCalledByLegalizer())
3764     DCI.AddToWorklist(YShl1.getNode());
3765   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3766 }
3767 
3768 static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
3769                                       SDValue N0, const APInt &C1,
3770                                       ISD::CondCode Cond, const SDLoc &dl,
3771                                       SelectionDAG &DAG) {
3772   // Look through truncs that don't change the value of a ctpop.
3773   // FIXME: Add vector support? Need to be careful with setcc result type below.
3774   SDValue CTPOP = N0;
3775   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
3776       N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
3777     CTPOP = N0.getOperand(0);
3778 
3779   if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
3780     return SDValue();
3781 
3782   EVT CTVT = CTPOP.getValueType();
3783   SDValue CTOp = CTPOP.getOperand(0);
3784 
3785   // If this is a vector CTPOP, keep the CTPOP if it is legal.
3786   // TODO: Should we check if CTPOP is legal(or custom) for scalars?
3787   if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
3788     return SDValue();
3789 
3790   // (ctpop x) u< 2 -> (x & x-1) == 0
3791   // (ctpop x) u> 1 -> (x & x-1) != 0
3792   if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
3793     unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
3794     if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
3795       return SDValue();
3796     if (C1 == 0 && (Cond == ISD::SETULT))
3797       return SDValue(); // This is handled elsewhere.
3798 
3799     unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
3800 
3801     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3802     SDValue Result = CTOp;
3803     for (unsigned i = 0; i < Passes; i++) {
3804       SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
3805       Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
3806     }
3807     ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
3808     return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
3809   }
3810 
3811   // If ctpop is not supported, expand a power-of-2 comparison based on it.
3812   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
3813     // For scalars, keep CTPOP if it is legal or custom.
3814     if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
3815       return SDValue();
3816     // This is based on X86's custom lowering for CTPOP which produces more
3817     // instructions than the expansion here.
3818 
3819     // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3820     // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3821     SDValue Zero = DAG.getConstant(0, dl, CTVT);
3822     SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3823     assert(CTVT.isInteger());
3824     ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
3825     SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3826     SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3827     SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3828     SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3829     unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3830     return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3831   }
3832 
3833   return SDValue();
3834 }
3835 
3836 static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
3837                                    ISD::CondCode Cond, const SDLoc &dl,
3838                                    SelectionDAG &DAG) {
3839   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
3840     return SDValue();
3841 
3842   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
3843   if (!C1 || !(C1->isZero() || C1->isAllOnes()))
3844     return SDValue();
3845 
3846   auto getRotateSource = [](SDValue X) {
3847     if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
3848       return X.getOperand(0);
3849     return SDValue();
3850   };
3851 
3852   // Peek through a rotated value compared against 0 or -1:
3853   // (rot X, Y) == 0/-1 --> X == 0/-1
3854   // (rot X, Y) != 0/-1 --> X != 0/-1
3855   if (SDValue R = getRotateSource(N0))
3856     return DAG.getSetCC(dl, VT, R, N1, Cond);
3857 
3858   // Peek through an 'or' of a rotated value compared against 0:
3859   // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
3860   // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
3861   //
3862   // TODO: Add the 'and' with -1 sibling.
3863   // TODO: Recurse through a series of 'or' ops to find the rotate.
3864   EVT OpVT = N0.getValueType();
3865   if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
3866     if (SDValue R = getRotateSource(N0.getOperand(0))) {
3867       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
3868       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3869     }
3870     if (SDValue R = getRotateSource(N0.getOperand(1))) {
3871       SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
3872       return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3873     }
3874   }
3875 
3876   return SDValue();
3877 }
3878 
3879 static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
3880                                         ISD::CondCode Cond, const SDLoc &dl,
3881                                         SelectionDAG &DAG) {
3882   // If we are testing for all-bits-clear, we might be able to do that with
3883   // less shifting since bit-order does not matter.
3884   if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
3885     return SDValue();
3886 
3887   auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
3888   if (!C1 || !C1->isZero())
3889     return SDValue();
3890 
3891   if (!N0.hasOneUse() ||
3892       (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
3893     return SDValue();
3894 
3895   unsigned BitWidth = N0.getScalarValueSizeInBits();
3896   auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
3897   if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
3898     return SDValue();
3899 
3900   // Canonicalize fshr as fshl to reduce pattern-matching.
3901   unsigned ShAmt = ShAmtC->getZExtValue();
3902   if (N0.getOpcode() == ISD::FSHR)
3903     ShAmt = BitWidth - ShAmt;
3904 
3905   // Match an 'or' with a specific operand 'Other' in either commuted variant.
3906   SDValue X, Y;
3907   auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
3908     if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
3909       return false;
3910     if (Or.getOperand(0) == Other) {
3911       X = Or.getOperand(0);
3912       Y = Or.getOperand(1);
3913       return true;
3914     }
3915     if (Or.getOperand(1) == Other) {
3916       X = Or.getOperand(1);
3917       Y = Or.getOperand(0);
3918       return true;
3919     }
3920     return false;
3921   };
3922 
3923   EVT OpVT = N0.getValueType();
3924   EVT ShAmtVT = N0.getOperand(2).getValueType();
3925   SDValue F0 = N0.getOperand(0);
3926   SDValue F1 = N0.getOperand(1);
3927   if (matchOr(F0, F1)) {
3928     // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
3929     SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
3930     SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
3931     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
3932     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3933   }
3934   if (matchOr(F1, F0)) {
3935     // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
3936     SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
3937     SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
3938     SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
3939     return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3940   }
3941 
3942   return SDValue();
3943 }
3944 
3945 /// Try to simplify a setcc built with the specified operands and cc. If it is
3946 /// unable to simplify it, return a null SDValue.
3947 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
3948                                       ISD::CondCode Cond, bool foldBooleans,
3949                                       DAGCombinerInfo &DCI,
3950                                       const SDLoc &dl) const {
3951   SelectionDAG &DAG = DCI.DAG;
3952   const DataLayout &Layout = DAG.getDataLayout();
3953   EVT OpVT = N0.getValueType();
3954 
3955   // Constant fold or commute setcc.
3956   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
3957     return Fold;
3958 
3959   bool N0ConstOrSplat =
3960       isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
3961   bool N1ConstOrSplat =
3962       isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
3963 
3964   // Ensure that the constant occurs on the RHS and fold constant comparisons.
3965   // TODO: Handle non-splat vector constants. All undef causes trouble.
3966   // FIXME: We can't yet fold constant scalable vector splats, so avoid an
3967   // infinite loop here when we encounter one.
3968   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
3969   if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
3970       (DCI.isBeforeLegalizeOps() ||
3971        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
3972     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3973 
3974   // If we have a subtract with the same 2 non-constant operands as this setcc
3975   // -- but in reverse order -- then try to commute the operands of this setcc
3976   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3977   // instruction on some targets.
3978   if (!N0ConstOrSplat && !N1ConstOrSplat &&
3979       (DCI.isBeforeLegalizeOps() ||
3980        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
3981       DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
3982       !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
3983     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3984 
3985   if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
3986     return V;
3987 
3988   if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
3989     return V;
3990 
3991   if (auto *N1C = isConstOrConstSplat(N1)) {
3992     const APInt &C1 = N1C->getAPIntValue();
3993 
3994     // Optimize some CTPOP cases.
3995     if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
3996       return V;
3997 
3998     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
3999     // equality comparison, then we're just comparing whether X itself is
4000     // zero.
4001     if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4002         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4003         isPowerOf2_32(N0.getScalarValueSizeInBits())) {
4004       if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4005         if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4006             ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4007           if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4008             // (srl (ctlz x), 5) == 0  -> X != 0
4009             // (srl (ctlz x), 5) != 1  -> X != 0
4010             Cond = ISD::SETNE;
4011           } else {
4012             // (srl (ctlz x), 5) != 0  -> X == 0
4013             // (srl (ctlz x), 5) == 1  -> X == 0
4014             Cond = ISD::SETEQ;
4015           }
4016           SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4017           return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4018                               Cond);
4019         }
4020       }
4021     }
4022   }
4023 
4024   // FIXME: Support vectors.
4025   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4026     const APInt &C1 = N1C->getAPIntValue();
4027 
4028     // (zext x) == C --> x == (trunc C)
4029     // (sext x) == C --> x == (trunc C)
4030     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4031         DCI.isBeforeLegalize() && N0->hasOneUse()) {
4032       unsigned MinBits = N0.getValueSizeInBits();
4033       SDValue PreExt;
4034       bool Signed = false;
4035       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4036         // ZExt
4037         MinBits = N0->getOperand(0).getValueSizeInBits();
4038         PreExt = N0->getOperand(0);
4039       } else if (N0->getOpcode() == ISD::AND) {
4040         // DAGCombine turns costly ZExts into ANDs
4041         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4042           if ((C->getAPIntValue()+1).isPowerOf2()) {
4043             MinBits = C->getAPIntValue().countTrailingOnes();
4044             PreExt = N0->getOperand(0);
4045           }
4046       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4047         // SExt
4048         MinBits = N0->getOperand(0).getValueSizeInBits();
4049         PreExt = N0->getOperand(0);
4050         Signed = true;
4051       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4052         // ZEXTLOAD / SEXTLOAD
4053         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4054           MinBits = LN0->getMemoryVT().getSizeInBits();
4055           PreExt = N0;
4056         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4057           Signed = true;
4058           MinBits = LN0->getMemoryVT().getSizeInBits();
4059           PreExt = N0;
4060         }
4061       }
4062 
4063       // Figure out how many bits we need to preserve this constant.
4064       unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits();
4065 
4066       // Make sure we're not losing bits from the constant.
4067       if (MinBits > 0 &&
4068           MinBits < C1.getBitWidth() &&
4069           MinBits >= ReqdBits) {
4070         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4071         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4072           // Will get folded away.
4073           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4074           if (MinBits == 1 && C1 == 1)
4075             // Invert the condition.
4076             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4077                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4078           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4079           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4080         }
4081 
4082         // If truncating the setcc operands is not desirable, we can still
4083         // simplify the expression in some cases:
4084         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4085         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4086         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4087         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4088         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4089         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4090         SDValue TopSetCC = N0->getOperand(0);
4091         unsigned N0Opc = N0->getOpcode();
4092         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4093         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4094             TopSetCC.getOpcode() == ISD::SETCC &&
4095             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4096             (isConstFalseVal(N1) ||
4097              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4098 
4099           bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4100                          (!N1C->isZero() && Cond == ISD::SETNE);
4101 
4102           if (!Inverse)
4103             return TopSetCC;
4104 
4105           ISD::CondCode InvCond = ISD::getSetCCInverse(
4106               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4107               TopSetCC.getOperand(0).getValueType());
4108           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4109                                       TopSetCC.getOperand(1),
4110                                       InvCond);
4111         }
4112       }
4113     }
4114 
4115     // If the LHS is '(and load, const)', the RHS is 0, the test is for
4116     // equality or unsigned, and all 1 bits of the const are in the same
4117     // partial word, see if we can shorten the load.
4118     if (DCI.isBeforeLegalize() &&
4119         !ISD::isSignedIntSetCC(Cond) &&
4120         N0.getOpcode() == ISD::AND && C1 == 0 &&
4121         N0.getNode()->hasOneUse() &&
4122         isa<LoadSDNode>(N0.getOperand(0)) &&
4123         N0.getOperand(0).getNode()->hasOneUse() &&
4124         isa<ConstantSDNode>(N0.getOperand(1))) {
4125       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4126       APInt bestMask;
4127       unsigned bestWidth = 0, bestOffset = 0;
4128       if (Lod->isSimple() && Lod->isUnindexed()) {
4129         unsigned origWidth = N0.getValueSizeInBits();
4130         unsigned maskWidth = origWidth;
4131         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4132         // 8 bits, but have to be careful...
4133         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4134           origWidth = Lod->getMemoryVT().getSizeInBits();
4135         const APInt &Mask = N0.getConstantOperandAPInt(1);
4136         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4137           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4138           for (unsigned offset=0; offset<origWidth/width; offset++) {
4139             if (Mask.isSubsetOf(newMask)) {
4140               if (Layout.isLittleEndian())
4141                 bestOffset = (uint64_t)offset * (width/8);
4142               else
4143                 bestOffset = (origWidth/width - offset - 1) * (width/8);
4144               bestMask = Mask.lshr(offset * (width/8) * 8);
4145               bestWidth = width;
4146               break;
4147             }
4148             newMask <<= width;
4149           }
4150         }
4151       }
4152       if (bestWidth) {
4153         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4154         if (newVT.isRound() &&
4155             shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
4156           SDValue Ptr = Lod->getBasePtr();
4157           if (bestOffset != 0)
4158             Ptr =
4159                 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
4160           SDValue NewLoad =
4161               DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4162                           Lod->getPointerInfo().getWithOffset(bestOffset),
4163                           Lod->getOriginalAlign());
4164           return DAG.getSetCC(dl, VT,
4165                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4166                                       DAG.getConstant(bestMask.trunc(bestWidth),
4167                                                       dl, newVT)),
4168                               DAG.getConstant(0LL, dl, newVT), Cond);
4169         }
4170       }
4171     }
4172 
4173     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4174     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4175       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4176 
4177       // If the comparison constant has bits in the upper part, the
4178       // zero-extended value could never match.
4179       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4180                                               C1.getBitWidth() - InSize))) {
4181         switch (Cond) {
4182         case ISD::SETUGT:
4183         case ISD::SETUGE:
4184         case ISD::SETEQ:
4185           return DAG.getConstant(0, dl, VT);
4186         case ISD::SETULT:
4187         case ISD::SETULE:
4188         case ISD::SETNE:
4189           return DAG.getConstant(1, dl, VT);
4190         case ISD::SETGT:
4191         case ISD::SETGE:
4192           // True if the sign bit of C1 is set.
4193           return DAG.getConstant(C1.isNegative(), dl, VT);
4194         case ISD::SETLT:
4195         case ISD::SETLE:
4196           // True if the sign bit of C1 isn't set.
4197           return DAG.getConstant(C1.isNonNegative(), dl, VT);
4198         default:
4199           break;
4200         }
4201       }
4202 
4203       // Otherwise, we can perform the comparison with the low bits.
4204       switch (Cond) {
4205       case ISD::SETEQ:
4206       case ISD::SETNE:
4207       case ISD::SETUGT:
4208       case ISD::SETUGE:
4209       case ISD::SETULT:
4210       case ISD::SETULE: {
4211         EVT newVT = N0.getOperand(0).getValueType();
4212         if (DCI.isBeforeLegalizeOps() ||
4213             (isOperationLegal(ISD::SETCC, newVT) &&
4214              isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4215           EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4216           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4217 
4218           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4219                                           NewConst, Cond);
4220           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4221         }
4222         break;
4223       }
4224       default:
4225         break; // todo, be more careful with signed comparisons
4226       }
4227     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4228                (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4229                !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4230                                       OpVT)) {
4231       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4232       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4233       EVT ExtDstTy = N0.getValueType();
4234       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4235 
4236       // If the constant doesn't fit into the number of bits for the source of
4237       // the sign extension, it is impossible for both sides to be equal.
4238       if (C1.getMinSignedBits() > ExtSrcTyBits)
4239         return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4240 
4241       assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4242              ExtDstTy != ExtSrcTy && "Unexpected types!");
4243       APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4244       SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4245                                    DAG.getConstant(Imm, dl, ExtDstTy));
4246       if (!DCI.isCalledByLegalizer())
4247         DCI.AddToWorklist(ZextOp.getNode());
4248       // Otherwise, make this a use of a zext.
4249       return DAG.getSetCC(dl, VT, ZextOp,
4250                           DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4251     } else if ((N1C->isZero() || N1C->isOne()) &&
4252                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4253       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
4254       if (N0.getOpcode() == ISD::SETCC &&
4255           isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4256           (N0.getValueType() == MVT::i1 ||
4257            getBooleanContents(N0.getOperand(0).getValueType()) ==
4258                        ZeroOrOneBooleanContent)) {
4259         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4260         if (TrueWhenTrue)
4261           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4262         // Invert the condition.
4263         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4264         CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4265         if (DCI.isBeforeLegalizeOps() ||
4266             isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4267           return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4268       }
4269 
4270       if ((N0.getOpcode() == ISD::XOR ||
4271            (N0.getOpcode() == ISD::AND &&
4272             N0.getOperand(0).getOpcode() == ISD::XOR &&
4273             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4274           isOneConstant(N0.getOperand(1))) {
4275         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
4276         // can only do this if the top bits are known zero.
4277         unsigned BitWidth = N0.getValueSizeInBits();
4278         if (DAG.MaskedValueIsZero(N0,
4279                                   APInt::getHighBitsSet(BitWidth,
4280                                                         BitWidth-1))) {
4281           // Okay, get the un-inverted input value.
4282           SDValue Val;
4283           if (N0.getOpcode() == ISD::XOR) {
4284             Val = N0.getOperand(0);
4285           } else {
4286             assert(N0.getOpcode() == ISD::AND &&
4287                     N0.getOperand(0).getOpcode() == ISD::XOR);
4288             // ((X^1)&1)^1 -> X & 1
4289             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4290                               N0.getOperand(0).getOperand(0),
4291                               N0.getOperand(1));
4292           }
4293 
4294           return DAG.getSetCC(dl, VT, Val, N1,
4295                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4296         }
4297       } else if (N1C->isOne()) {
4298         SDValue Op0 = N0;
4299         if (Op0.getOpcode() == ISD::TRUNCATE)
4300           Op0 = Op0.getOperand(0);
4301 
4302         if ((Op0.getOpcode() == ISD::XOR) &&
4303             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4304             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4305           SDValue XorLHS = Op0.getOperand(0);
4306           SDValue XorRHS = Op0.getOperand(1);
4307           // Ensure that the input setccs return an i1 type or 0/1 value.
4308           if (Op0.getValueType() == MVT::i1 ||
4309               (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4310                       ZeroOrOneBooleanContent &&
4311                getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4312                         ZeroOrOneBooleanContent)) {
4313             // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4314             Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4315             return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4316           }
4317         }
4318         if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4319           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4320           if (Op0.getValueType().bitsGT(VT))
4321             Op0 = DAG.getNode(ISD::AND, dl, VT,
4322                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4323                           DAG.getConstant(1, dl, VT));
4324           else if (Op0.getValueType().bitsLT(VT))
4325             Op0 = DAG.getNode(ISD::AND, dl, VT,
4326                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4327                         DAG.getConstant(1, dl, VT));
4328 
4329           return DAG.getSetCC(dl, VT, Op0,
4330                               DAG.getConstant(0, dl, Op0.getValueType()),
4331                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4332         }
4333         if (Op0.getOpcode() == ISD::AssertZext &&
4334             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4335           return DAG.getSetCC(dl, VT, Op0,
4336                               DAG.getConstant(0, dl, Op0.getValueType()),
4337                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4338       }
4339     }
4340 
4341     // Given:
4342     //   icmp eq/ne (urem %x, %y), 0
4343     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4344     //   icmp eq/ne %x, 0
4345     if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4346         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4347       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4348       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4349       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4350         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4351     }
4352 
4353     // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4354     //  and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4355     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4356         N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4357         N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4358         N1C && N1C->isAllOnes()) {
4359       return DAG.getSetCC(dl, VT, N0.getOperand(0),
4360                           DAG.getConstant(0, dl, OpVT),
4361                           Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4362     }
4363 
4364     if (SDValue V =
4365             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4366       return V;
4367   }
4368 
4369   // These simplifications apply to splat vectors as well.
4370   // TODO: Handle more splat vector cases.
4371   if (auto *N1C = isConstOrConstSplat(N1)) {
4372     const APInt &C1 = N1C->getAPIntValue();
4373 
4374     APInt MinVal, MaxVal;
4375     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4376     if (ISD::isSignedIntSetCC(Cond)) {
4377       MinVal = APInt::getSignedMinValue(OperandBitSize);
4378       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4379     } else {
4380       MinVal = APInt::getMinValue(OperandBitSize);
4381       MaxVal = APInt::getMaxValue(OperandBitSize);
4382     }
4383 
4384     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4385     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4386       // X >= MIN --> true
4387       if (C1 == MinVal)
4388         return DAG.getBoolConstant(true, dl, VT, OpVT);
4389 
4390       if (!VT.isVector()) { // TODO: Support this for vectors.
4391         // X >= C0 --> X > (C0 - 1)
4392         APInt C = C1 - 1;
4393         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4394         if ((DCI.isBeforeLegalizeOps() ||
4395              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4396             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4397                                   isLegalICmpImmediate(C.getSExtValue())))) {
4398           return DAG.getSetCC(dl, VT, N0,
4399                               DAG.getConstant(C, dl, N1.getValueType()),
4400                               NewCC);
4401         }
4402       }
4403     }
4404 
4405     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4406       // X <= MAX --> true
4407       if (C1 == MaxVal)
4408         return DAG.getBoolConstant(true, dl, VT, OpVT);
4409 
4410       // X <= C0 --> X < (C0 + 1)
4411       if (!VT.isVector()) { // TODO: Support this for vectors.
4412         APInt C = C1 + 1;
4413         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4414         if ((DCI.isBeforeLegalizeOps() ||
4415              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4416             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4417                                   isLegalICmpImmediate(C.getSExtValue())))) {
4418           return DAG.getSetCC(dl, VT, N0,
4419                               DAG.getConstant(C, dl, N1.getValueType()),
4420                               NewCC);
4421         }
4422       }
4423     }
4424 
4425     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4426       if (C1 == MinVal)
4427         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4428 
4429       // TODO: Support this for vectors after legalize ops.
4430       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4431         // Canonicalize setlt X, Max --> setne X, Max
4432         if (C1 == MaxVal)
4433           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4434 
4435         // If we have setult X, 1, turn it into seteq X, 0
4436         if (C1 == MinVal+1)
4437           return DAG.getSetCC(dl, VT, N0,
4438                               DAG.getConstant(MinVal, dl, N0.getValueType()),
4439                               ISD::SETEQ);
4440       }
4441     }
4442 
4443     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4444       if (C1 == MaxVal)
4445         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4446 
4447       // TODO: Support this for vectors after legalize ops.
4448       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4449         // Canonicalize setgt X, Min --> setne X, Min
4450         if (C1 == MinVal)
4451           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4452 
4453         // If we have setugt X, Max-1, turn it into seteq X, Max
4454         if (C1 == MaxVal-1)
4455           return DAG.getSetCC(dl, VT, N0,
4456                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
4457                               ISD::SETEQ);
4458       }
4459     }
4460 
4461     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
4462       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
4463       if (C1.isZero())
4464         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4465                 VT, N0, N1, Cond, DCI, dl))
4466           return CC;
4467 
4468       // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4469       // For example, when high 32-bits of i64 X are known clear:
4470       // all bits clear: (X | (Y<<32)) ==  0 --> (X | Y) ==  0
4471       // all bits set:   (X | (Y<<32)) == -1 --> (X & Y) == -1
4472       bool CmpZero = N1C->getAPIntValue().isZero();
4473       bool CmpNegOne = N1C->getAPIntValue().isAllOnes();
4474       if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
4475         // Match or(lo,shl(hi,bw/2)) pattern.
4476         auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
4477           unsigned EltBits = V.getScalarValueSizeInBits();
4478           if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
4479             return false;
4480           SDValue LHS = V.getOperand(0);
4481           SDValue RHS = V.getOperand(1);
4482           APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
4483           // Unshifted element must have zero upperbits.
4484           if (RHS.getOpcode() == ISD::SHL &&
4485               isa<ConstantSDNode>(RHS.getOperand(1)) &&
4486               RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4487               DAG.MaskedValueIsZero(LHS, HiBits)) {
4488             Lo = LHS;
4489             Hi = RHS.getOperand(0);
4490             return true;
4491           }
4492           if (LHS.getOpcode() == ISD::SHL &&
4493               isa<ConstantSDNode>(LHS.getOperand(1)) &&
4494               LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4495               DAG.MaskedValueIsZero(RHS, HiBits)) {
4496             Lo = RHS;
4497             Hi = LHS.getOperand(0);
4498             return true;
4499           }
4500           return false;
4501         };
4502 
4503         auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
4504           unsigned EltBits = N0.getScalarValueSizeInBits();
4505           unsigned HalfBits = EltBits / 2;
4506           APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
4507           SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
4508           SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
4509           SDValue NewN0 =
4510               DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
4511           SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
4512           return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
4513         };
4514 
4515         SDValue Lo, Hi;
4516         if (IsConcat(N0, Lo, Hi))
4517           return MergeConcat(Lo, Hi);
4518 
4519         if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
4520           SDValue Lo0, Lo1, Hi0, Hi1;
4521           if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
4522               IsConcat(N0.getOperand(1), Lo1, Hi1)) {
4523             return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
4524                                DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
4525           }
4526         }
4527       }
4528     }
4529 
4530     // If we have "setcc X, C0", check to see if we can shrink the immediate
4531     // by changing cc.
4532     // TODO: Support this for vectors after legalize ops.
4533     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4534       // SETUGT X, SINTMAX  -> SETLT X, 0
4535       // SETUGE X, SINTMIN -> SETLT X, 0
4536       if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
4537           (Cond == ISD::SETUGE && C1.isMinSignedValue()))
4538         return DAG.getSetCC(dl, VT, N0,
4539                             DAG.getConstant(0, dl, N1.getValueType()),
4540                             ISD::SETLT);
4541 
4542       // SETULT X, SINTMIN  -> SETGT X, -1
4543       // SETULE X, SINTMAX  -> SETGT X, -1
4544       if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
4545           (Cond == ISD::SETULE && C1.isMaxSignedValue()))
4546         return DAG.getSetCC(dl, VT, N0,
4547                             DAG.getAllOnesConstant(dl, N1.getValueType()),
4548                             ISD::SETGT);
4549     }
4550   }
4551 
4552   // Back to non-vector simplifications.
4553   // TODO: Can we do these for vector splats?
4554   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4555     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4556     const APInt &C1 = N1C->getAPIntValue();
4557     EVT ShValTy = N0.getValueType();
4558 
4559     // Fold bit comparisons when we can. This will result in an
4560     // incorrect value when boolean false is negative one, unless
4561     // the bitsize is 1 in which case the false value is the same
4562     // in practice regardless of the representation.
4563     if ((VT.getSizeInBits() == 1 ||
4564          getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4565         (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4566         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
4567         N0.getOpcode() == ISD::AND) {
4568       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4569         EVT ShiftTy =
4570             getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
4571         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
4572           // Perform the xform if the AND RHS is a single bit.
4573           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
4574           if (AndRHS->getAPIntValue().isPowerOf2() &&
4575               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
4576             return DAG.getNode(ISD::TRUNCATE, dl, VT,
4577                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4578                                            DAG.getConstant(ShCt, dl, ShiftTy)));
4579           }
4580         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
4581           // (X & 8) == 8  -->  (X & 8) >> 3
4582           // Perform the xform if C1 is a single bit.
4583           unsigned ShCt = C1.logBase2();
4584           if (C1.isPowerOf2() &&
4585               !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
4586             return DAG.getNode(ISD::TRUNCATE, dl, VT,
4587                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4588                                            DAG.getConstant(ShCt, dl, ShiftTy)));
4589           }
4590         }
4591       }
4592     }
4593 
4594     if (C1.getMinSignedBits() <= 64 &&
4595         !isLegalICmpImmediate(C1.getSExtValue())) {
4596       EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
4597       // (X & -256) == 256 -> (X >> 8) == 1
4598       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4599           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4600         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4601           const APInt &AndRHSC = AndRHS->getAPIntValue();
4602           if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
4603             unsigned ShiftBits = AndRHSC.countTrailingZeros();
4604             if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
4605               SDValue Shift =
4606                 DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
4607                             DAG.getConstant(ShiftBits, dl, ShiftTy));
4608               SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
4609               return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
4610             }
4611           }
4612         }
4613       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
4614                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
4615         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
4616         // X <  0x100000000 -> (X >> 32) <  1
4617         // X >= 0x100000000 -> (X >> 32) >= 1
4618         // X <= 0x0ffffffff -> (X >> 32) <  1
4619         // X >  0x0ffffffff -> (X >> 32) >= 1
4620         unsigned ShiftBits;
4621         APInt NewC = C1;
4622         ISD::CondCode NewCond = Cond;
4623         if (AdjOne) {
4624           ShiftBits = C1.countTrailingOnes();
4625           NewC = NewC + 1;
4626           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4627         } else {
4628           ShiftBits = C1.countTrailingZeros();
4629         }
4630         NewC.lshrInPlace(ShiftBits);
4631         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
4632             isLegalICmpImmediate(NewC.getSExtValue()) &&
4633             !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
4634           SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4635                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
4636           SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
4637           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
4638         }
4639       }
4640     }
4641   }
4642 
4643   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
4644     auto *CFP = cast<ConstantFPSDNode>(N1);
4645     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
4646 
4647     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
4648     // constant if knowing that the operand is non-nan is enough.  We prefer to
4649     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
4650     // materialize 0.0.
4651     if (Cond == ISD::SETO || Cond == ISD::SETUO)
4652       return DAG.getSetCC(dl, VT, N0, N0, Cond);
4653 
4654     // setcc (fneg x), C -> setcc swap(pred) x, -C
4655     if (N0.getOpcode() == ISD::FNEG) {
4656       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
4657       if (DCI.isBeforeLegalizeOps() ||
4658           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
4659         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
4660         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
4661       }
4662     }
4663 
4664     // If the condition is not legal, see if we can find an equivalent one
4665     // which is legal.
4666     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
4667       // If the comparison was an awkward floating-point == or != and one of
4668       // the comparison operands is infinity or negative infinity, convert the
4669       // condition to a less-awkward <= or >=.
4670       if (CFP->getValueAPF().isInfinity()) {
4671         bool IsNegInf = CFP->getValueAPF().isNegative();
4672         ISD::CondCode NewCond = ISD::SETCC_INVALID;
4673         switch (Cond) {
4674         case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
4675         case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
4676         case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
4677         case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
4678         default: break;
4679         }
4680         if (NewCond != ISD::SETCC_INVALID &&
4681             isCondCodeLegal(NewCond, N0.getSimpleValueType()))
4682           return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4683       }
4684     }
4685   }
4686 
4687   if (N0 == N1) {
4688     // The sext(setcc()) => setcc() optimization relies on the appropriate
4689     // constant being emitted.
4690     assert(!N0.getValueType().isInteger() &&
4691            "Integer types should be handled by FoldSetCC");
4692 
4693     bool EqTrue = ISD::isTrueWhenEqual(Cond);
4694     unsigned UOF = ISD::getUnorderedFlavor(Cond);
4695     if (UOF == 2) // FP operators that are undefined on NaNs.
4696       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4697     if (UOF == unsigned(EqTrue))
4698       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4699     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
4700     // if it is not already.
4701     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
4702     if (NewCond != Cond &&
4703         (DCI.isBeforeLegalizeOps() ||
4704                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
4705       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4706   }
4707 
4708   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4709       N0.getValueType().isInteger()) {
4710     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
4711         N0.getOpcode() == ISD::XOR) {
4712       // Simplify (X+Y) == (X+Z) -->  Y == Z
4713       if (N0.getOpcode() == N1.getOpcode()) {
4714         if (N0.getOperand(0) == N1.getOperand(0))
4715           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
4716         if (N0.getOperand(1) == N1.getOperand(1))
4717           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
4718         if (isCommutativeBinOp(N0.getOpcode())) {
4719           // If X op Y == Y op X, try other combinations.
4720           if (N0.getOperand(0) == N1.getOperand(1))
4721             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
4722                                 Cond);
4723           if (N0.getOperand(1) == N1.getOperand(0))
4724             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
4725                                 Cond);
4726         }
4727       }
4728 
4729       // If RHS is a legal immediate value for a compare instruction, we need
4730       // to be careful about increasing register pressure needlessly.
4731       bool LegalRHSImm = false;
4732 
4733       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
4734         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4735           // Turn (X+C1) == C2 --> X == C2-C1
4736           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
4737             return DAG.getSetCC(
4738                 dl, VT, N0.getOperand(0),
4739                 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
4740                                 dl, N0.getValueType()),
4741                 Cond);
4742 
4743           // Turn (X^C1) == C2 --> X == C1^C2
4744           if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
4745             return DAG.getSetCC(
4746                 dl, VT, N0.getOperand(0),
4747                 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
4748                                 dl, N0.getValueType()),
4749                 Cond);
4750         }
4751 
4752         // Turn (C1-X) == C2 --> X == C1-C2
4753         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
4754           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
4755             return DAG.getSetCC(
4756                 dl, VT, N0.getOperand(1),
4757                 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
4758                                 dl, N0.getValueType()),
4759                 Cond);
4760 
4761         // Could RHSC fold directly into a compare?
4762         if (RHSC->getValueType(0).getSizeInBits() <= 64)
4763           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
4764       }
4765 
4766       // (X+Y) == X --> Y == 0 and similar folds.
4767       // Don't do this if X is an immediate that can fold into a cmp
4768       // instruction and X+Y has other uses. It could be an induction variable
4769       // chain, and the transform would increase register pressure.
4770       if (!LegalRHSImm || N0.hasOneUse())
4771         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
4772           return V;
4773     }
4774 
4775     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
4776         N1.getOpcode() == ISD::XOR)
4777       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
4778         return V;
4779 
4780     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
4781       return V;
4782   }
4783 
4784   // Fold remainder of division by a constant.
4785   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
4786       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4787     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4788 
4789     // When division is cheap or optimizing for minimum size,
4790     // fall through to DIVREM creation by skipping this fold.
4791     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
4792       if (N0.getOpcode() == ISD::UREM) {
4793         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
4794           return Folded;
4795       } else if (N0.getOpcode() == ISD::SREM) {
4796         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
4797           return Folded;
4798       }
4799     }
4800   }
4801 
4802   // Fold away ALL boolean setcc's.
4803   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
4804     SDValue Temp;
4805     switch (Cond) {
4806     default: llvm_unreachable("Unknown integer setcc!");
4807     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
4808       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4809       N0 = DAG.getNOT(dl, Temp, OpVT);
4810       if (!DCI.isCalledByLegalizer())
4811         DCI.AddToWorklist(Temp.getNode());
4812       break;
4813     case ISD::SETNE:  // X != Y   -->  (X^Y)
4814       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
4815       break;
4816     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
4817     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
4818       Temp = DAG.getNOT(dl, N0, OpVT);
4819       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
4820       if (!DCI.isCalledByLegalizer())
4821         DCI.AddToWorklist(Temp.getNode());
4822       break;
4823     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
4824     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
4825       Temp = DAG.getNOT(dl, N1, OpVT);
4826       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
4827       if (!DCI.isCalledByLegalizer())
4828         DCI.AddToWorklist(Temp.getNode());
4829       break;
4830     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
4831     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
4832       Temp = DAG.getNOT(dl, N0, OpVT);
4833       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
4834       if (!DCI.isCalledByLegalizer())
4835         DCI.AddToWorklist(Temp.getNode());
4836       break;
4837     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
4838     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
4839       Temp = DAG.getNOT(dl, N1, OpVT);
4840       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
4841       break;
4842     }
4843     if (VT.getScalarType() != MVT::i1) {
4844       if (!DCI.isCalledByLegalizer())
4845         DCI.AddToWorklist(N0.getNode());
4846       // FIXME: If running after legalize, we probably can't do this.
4847       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
4848       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
4849     }
4850     return N0;
4851   }
4852 
4853   // Could not fold it.
4854   return SDValue();
4855 }
4856 
4857 /// Returns true (and the GlobalValue and the offset) if the node is a
4858 /// GlobalAddress + offset.
4859 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
4860                                     int64_t &Offset) const {
4861 
4862   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
4863 
4864   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
4865     GA = GASD->getGlobal();
4866     Offset += GASD->getOffset();
4867     return true;
4868   }
4869 
4870   if (N->getOpcode() == ISD::ADD) {
4871     SDValue N1 = N->getOperand(0);
4872     SDValue N2 = N->getOperand(1);
4873     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
4874       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
4875         Offset += V->getSExtValue();
4876         return true;
4877       }
4878     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
4879       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
4880         Offset += V->getSExtValue();
4881         return true;
4882       }
4883     }
4884   }
4885 
4886   return false;
4887 }
4888 
4889 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
4890                                           DAGCombinerInfo &DCI) const {
4891   // Default implementation: no optimization.
4892   return SDValue();
4893 }
4894 
4895 //===----------------------------------------------------------------------===//
4896 //  Inline Assembler Implementation Methods
4897 //===----------------------------------------------------------------------===//
4898 
4899 TargetLowering::ConstraintType
4900 TargetLowering::getConstraintType(StringRef Constraint) const {
4901   unsigned S = Constraint.size();
4902 
4903   if (S == 1) {
4904     switch (Constraint[0]) {
4905     default: break;
4906     case 'r':
4907       return C_RegisterClass;
4908     case 'm': // memory
4909     case 'o': // offsetable
4910     case 'V': // not offsetable
4911       return C_Memory;
4912     case 'p': // Address.
4913       return C_Address;
4914     case 'n': // Simple Integer
4915     case 'E': // Floating Point Constant
4916     case 'F': // Floating Point Constant
4917       return C_Immediate;
4918     case 'i': // Simple Integer or Relocatable Constant
4919     case 's': // Relocatable Constant
4920     case 'X': // Allow ANY value.
4921     case 'I': // Target registers.
4922     case 'J':
4923     case 'K':
4924     case 'L':
4925     case 'M':
4926     case 'N':
4927     case 'O':
4928     case 'P':
4929     case '<':
4930     case '>':
4931       return C_Other;
4932     }
4933   }
4934 
4935   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
4936     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
4937       return C_Memory;
4938     return C_Register;
4939   }
4940   return C_Unknown;
4941 }
4942 
4943 /// Try to replace an X constraint, which matches anything, with another that
4944 /// has more specific requirements based on the type of the corresponding
4945 /// operand.
4946 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4947   if (ConstraintVT.isInteger())
4948     return "r";
4949   if (ConstraintVT.isFloatingPoint())
4950     return "f"; // works for many targets
4951   return nullptr;
4952 }
4953 
4954 SDValue TargetLowering::LowerAsmOutputForConstraint(
4955     SDValue &Chain, SDValue &Flag, const SDLoc &DL,
4956     const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
4957   return SDValue();
4958 }
4959 
4960 /// Lower the specified operand into the Ops vector.
4961 /// If it is invalid, don't add anything to Ops.
4962 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4963                                                   std::string &Constraint,
4964                                                   std::vector<SDValue> &Ops,
4965                                                   SelectionDAG &DAG) const {
4966 
4967   if (Constraint.length() > 1) return;
4968 
4969   char ConstraintLetter = Constraint[0];
4970   switch (ConstraintLetter) {
4971   default: break;
4972   case 'X':    // Allows any operand
4973   case 'i':    // Simple Integer or Relocatable Constant
4974   case 'n':    // Simple Integer
4975   case 's': {  // Relocatable Constant
4976 
4977     ConstantSDNode *C;
4978     uint64_t Offset = 0;
4979 
4980     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
4981     // etc., since getelementpointer is variadic. We can't use
4982     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
4983     // while in this case the GA may be furthest from the root node which is
4984     // likely an ISD::ADD.
4985     while (true) {
4986       if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
4987         // gcc prints these as sign extended.  Sign extend value to 64 bits
4988         // now; without this it would get ZExt'd later in
4989         // ScheduleDAGSDNodes::EmitNode, which is very generic.
4990         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
4991         BooleanContent BCont = getBooleanContents(MVT::i64);
4992         ISD::NodeType ExtOpc =
4993             IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
4994         int64_t ExtVal =
4995             ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
4996         Ops.push_back(
4997             DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
4998         return;
4999       }
5000       if (ConstraintLetter != 'n') {
5001         if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5002           Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5003                                                    GA->getValueType(0),
5004                                                    Offset + GA->getOffset()));
5005           return;
5006         }
5007         if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5008           Ops.push_back(DAG.getTargetBlockAddress(
5009               BA->getBlockAddress(), BA->getValueType(0),
5010               Offset + BA->getOffset(), BA->getTargetFlags()));
5011           return;
5012         }
5013         if (isa<BasicBlockSDNode>(Op)) {
5014           Ops.push_back(Op);
5015           return;
5016         }
5017       }
5018       const unsigned OpCode = Op.getOpcode();
5019       if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5020         if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5021           Op = Op.getOperand(1);
5022         // Subtraction is not commutative.
5023         else if (OpCode == ISD::ADD &&
5024                  (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5025           Op = Op.getOperand(0);
5026         else
5027           return;
5028         Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5029         continue;
5030       }
5031       return;
5032     }
5033     break;
5034   }
5035   }
5036 }
5037 
5038 std::pair<unsigned, const TargetRegisterClass *>
5039 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5040                                              StringRef Constraint,
5041                                              MVT VT) const {
5042   if (Constraint.empty() || Constraint[0] != '{')
5043     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5044   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5045 
5046   // Remove the braces from around the name.
5047   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5048 
5049   std::pair<unsigned, const TargetRegisterClass *> R =
5050       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5051 
5052   // Figure out which register class contains this reg.
5053   for (const TargetRegisterClass *RC : RI->regclasses()) {
5054     // If none of the value types for this register class are valid, we
5055     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5056     if (!isLegalRC(*RI, *RC))
5057       continue;
5058 
5059     for (const MCPhysReg &PR : *RC) {
5060       if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5061         std::pair<unsigned, const TargetRegisterClass *> S =
5062             std::make_pair(PR, RC);
5063 
5064         // If this register class has the requested value type, return it,
5065         // otherwise keep searching and return the first class found
5066         // if no other is found which explicitly has the requested type.
5067         if (RI->isTypeLegalForClass(*RC, VT))
5068           return S;
5069         if (!R.second)
5070           R = S;
5071       }
5072     }
5073   }
5074 
5075   return R;
5076 }
5077 
5078 //===----------------------------------------------------------------------===//
5079 // Constraint Selection.
5080 
5081 /// Return true of this is an input operand that is a matching constraint like
5082 /// "4".
5083 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5084   assert(!ConstraintCode.empty() && "No known constraint!");
5085   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5086 }
5087 
5088 /// If this is an input matching constraint, this method returns the output
5089 /// operand it matches.
5090 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5091   assert(!ConstraintCode.empty() && "No known constraint!");
5092   return atoi(ConstraintCode.c_str());
5093 }
5094 
5095 /// Split up the constraint string from the inline assembly value into the
5096 /// specific constraints and their prefixes, and also tie in the associated
5097 /// operand values.
5098 /// If this returns an empty vector, and if the constraint string itself
5099 /// isn't empty, there was an error parsing.
5100 TargetLowering::AsmOperandInfoVector
5101 TargetLowering::ParseConstraints(const DataLayout &DL,
5102                                  const TargetRegisterInfo *TRI,
5103                                  const CallBase &Call) const {
5104   /// Information about all of the constraints.
5105   AsmOperandInfoVector ConstraintOperands;
5106   const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5107   unsigned maCount = 0; // Largest number of multiple alternative constraints.
5108 
5109   // Do a prepass over the constraints, canonicalizing them, and building up the
5110   // ConstraintOperands list.
5111   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5112   unsigned ResNo = 0; // ResNo - The result number of the next output.
5113 
5114   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5115     ConstraintOperands.emplace_back(std::move(CI));
5116     AsmOperandInfo &OpInfo = ConstraintOperands.back();
5117 
5118     // Update multiple alternative constraint count.
5119     if (OpInfo.multipleAlternatives.size() > maCount)
5120       maCount = OpInfo.multipleAlternatives.size();
5121 
5122     OpInfo.ConstraintVT = MVT::Other;
5123 
5124     // Compute the value type for each operand.
5125     switch (OpInfo.Type) {
5126     case InlineAsm::isOutput:
5127       // Indirect outputs just consume an argument.
5128       if (OpInfo.isIndirect) {
5129         OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5130         break;
5131       }
5132 
5133       // The return value of the call is this value.  As such, there is no
5134       // corresponding argument.
5135       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5136       if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
5137         OpInfo.ConstraintVT =
5138             getSimpleValueType(DL, STy->getElementType(ResNo));
5139       } else {
5140         assert(ResNo == 0 && "Asm only has one result!");
5141         OpInfo.ConstraintVT =
5142             getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5143       }
5144       ++ResNo;
5145       break;
5146     case InlineAsm::isInput:
5147       OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5148       break;
5149     case InlineAsm::isClobber:
5150       // Nothing to do.
5151       break;
5152     }
5153 
5154     if (OpInfo.CallOperandVal) {
5155       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5156       if (OpInfo.isIndirect) {
5157         OpTy = Call.getParamElementType(ArgNo);
5158         assert(OpTy && "Indirect operand must have elementtype attribute");
5159       }
5160 
5161       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5162       if (StructType *STy = dyn_cast<StructType>(OpTy))
5163         if (STy->getNumElements() == 1)
5164           OpTy = STy->getElementType(0);
5165 
5166       // If OpTy is not a single value, it may be a struct/union that we
5167       // can tile with integers.
5168       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5169         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5170         switch (BitSize) {
5171         default: break;
5172         case 1:
5173         case 8:
5174         case 16:
5175         case 32:
5176         case 64:
5177         case 128:
5178           OpInfo.ConstraintVT =
5179               MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
5180           break;
5181         }
5182       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
5183         unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
5184         OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
5185       } else {
5186         OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
5187       }
5188 
5189       ArgNo++;
5190     }
5191   }
5192 
5193   // If we have multiple alternative constraints, select the best alternative.
5194   if (!ConstraintOperands.empty()) {
5195     if (maCount) {
5196       unsigned bestMAIndex = 0;
5197       int bestWeight = -1;
5198       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
5199       int weight = -1;
5200       unsigned maIndex;
5201       // Compute the sums of the weights for each alternative, keeping track
5202       // of the best (highest weight) one so far.
5203       for (maIndex = 0; maIndex < maCount; ++maIndex) {
5204         int weightSum = 0;
5205         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5206              cIndex != eIndex; ++cIndex) {
5207           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5208           if (OpInfo.Type == InlineAsm::isClobber)
5209             continue;
5210 
5211           // If this is an output operand with a matching input operand,
5212           // look up the matching input. If their types mismatch, e.g. one
5213           // is an integer, the other is floating point, or their sizes are
5214           // different, flag it as an maCantMatch.
5215           if (OpInfo.hasMatchingInput()) {
5216             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5217             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5218               if ((OpInfo.ConstraintVT.isInteger() !=
5219                    Input.ConstraintVT.isInteger()) ||
5220                   (OpInfo.ConstraintVT.getSizeInBits() !=
5221                    Input.ConstraintVT.getSizeInBits())) {
5222                 weightSum = -1; // Can't match.
5223                 break;
5224               }
5225             }
5226           }
5227           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5228           if (weight == -1) {
5229             weightSum = -1;
5230             break;
5231           }
5232           weightSum += weight;
5233         }
5234         // Update best.
5235         if (weightSum > bestWeight) {
5236           bestWeight = weightSum;
5237           bestMAIndex = maIndex;
5238         }
5239       }
5240 
5241       // Now select chosen alternative in each constraint.
5242       for (AsmOperandInfo &cInfo : ConstraintOperands)
5243         if (cInfo.Type != InlineAsm::isClobber)
5244           cInfo.selectAlternative(bestMAIndex);
5245     }
5246   }
5247 
5248   // Check and hook up tied operands, choose constraint code to use.
5249   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5250        cIndex != eIndex; ++cIndex) {
5251     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5252 
5253     // If this is an output operand with a matching input operand, look up the
5254     // matching input. If their types mismatch, e.g. one is an integer, the
5255     // other is floating point, or their sizes are different, flag it as an
5256     // error.
5257     if (OpInfo.hasMatchingInput()) {
5258       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5259 
5260       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5261         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5262             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5263                                          OpInfo.ConstraintVT);
5264         std::pair<unsigned, const TargetRegisterClass *> InputRC =
5265             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5266                                          Input.ConstraintVT);
5267         if ((OpInfo.ConstraintVT.isInteger() !=
5268              Input.ConstraintVT.isInteger()) ||
5269             (MatchRC.second != InputRC.second)) {
5270           report_fatal_error("Unsupported asm: input constraint"
5271                              " with a matching output constraint of"
5272                              " incompatible type!");
5273         }
5274       }
5275     }
5276   }
5277 
5278   return ConstraintOperands;
5279 }
5280 
5281 /// Return an integer indicating how general CT is.
5282 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
5283   switch (CT) {
5284   case TargetLowering::C_Immediate:
5285   case TargetLowering::C_Other:
5286   case TargetLowering::C_Unknown:
5287     return 0;
5288   case TargetLowering::C_Register:
5289     return 1;
5290   case TargetLowering::C_RegisterClass:
5291     return 2;
5292   case TargetLowering::C_Memory:
5293   case TargetLowering::C_Address:
5294     return 3;
5295   }
5296   llvm_unreachable("Invalid constraint type");
5297 }
5298 
5299 /// Examine constraint type and operand type and determine a weight value.
5300 /// This object must already have been set up with the operand type
5301 /// and the current alternative constraint selected.
5302 TargetLowering::ConstraintWeight
5303   TargetLowering::getMultipleConstraintMatchWeight(
5304     AsmOperandInfo &info, int maIndex) const {
5305   InlineAsm::ConstraintCodeVector *rCodes;
5306   if (maIndex >= (int)info.multipleAlternatives.size())
5307     rCodes = &info.Codes;
5308   else
5309     rCodes = &info.multipleAlternatives[maIndex].Codes;
5310   ConstraintWeight BestWeight = CW_Invalid;
5311 
5312   // Loop over the options, keeping track of the most general one.
5313   for (const std::string &rCode : *rCodes) {
5314     ConstraintWeight weight =
5315         getSingleConstraintMatchWeight(info, rCode.c_str());
5316     if (weight > BestWeight)
5317       BestWeight = weight;
5318   }
5319 
5320   return BestWeight;
5321 }
5322 
5323 /// Examine constraint type and operand type and determine a weight value.
5324 /// This object must already have been set up with the operand type
5325 /// and the current alternative constraint selected.
5326 TargetLowering::ConstraintWeight
5327   TargetLowering::getSingleConstraintMatchWeight(
5328     AsmOperandInfo &info, const char *constraint) const {
5329   ConstraintWeight weight = CW_Invalid;
5330   Value *CallOperandVal = info.CallOperandVal;
5331     // If we don't have a value, we can't do a match,
5332     // but allow it at the lowest weight.
5333   if (!CallOperandVal)
5334     return CW_Default;
5335   // Look at the constraint type.
5336   switch (*constraint) {
5337     case 'i': // immediate integer.
5338     case 'n': // immediate integer with a known value.
5339       if (isa<ConstantInt>(CallOperandVal))
5340         weight = CW_Constant;
5341       break;
5342     case 's': // non-explicit intregal immediate.
5343       if (isa<GlobalValue>(CallOperandVal))
5344         weight = CW_Constant;
5345       break;
5346     case 'E': // immediate float if host format.
5347     case 'F': // immediate float.
5348       if (isa<ConstantFP>(CallOperandVal))
5349         weight = CW_Constant;
5350       break;
5351     case '<': // memory operand with autodecrement.
5352     case '>': // memory operand with autoincrement.
5353     case 'm': // memory operand.
5354     case 'o': // offsettable memory operand
5355     case 'V': // non-offsettable memory operand
5356       weight = CW_Memory;
5357       break;
5358     case 'r': // general register.
5359     case 'g': // general register, memory operand or immediate integer.
5360               // note: Clang converts "g" to "imr".
5361       if (CallOperandVal->getType()->isIntegerTy())
5362         weight = CW_Register;
5363       break;
5364     case 'X': // any operand.
5365   default:
5366     weight = CW_Default;
5367     break;
5368   }
5369   return weight;
5370 }
5371 
5372 /// If there are multiple different constraints that we could pick for this
5373 /// operand (e.g. "imr") try to pick the 'best' one.
5374 /// This is somewhat tricky: constraints fall into four classes:
5375 ///    Other         -> immediates and magic values
5376 ///    Register      -> one specific register
5377 ///    RegisterClass -> a group of regs
5378 ///    Memory        -> memory
5379 /// Ideally, we would pick the most specific constraint possible: if we have
5380 /// something that fits into a register, we would pick it.  The problem here
5381 /// is that if we have something that could either be in a register or in
5382 /// memory that use of the register could cause selection of *other*
5383 /// operands to fail: they might only succeed if we pick memory.  Because of
5384 /// this the heuristic we use is:
5385 ///
5386 ///  1) If there is an 'other' constraint, and if the operand is valid for
5387 ///     that constraint, use it.  This makes us take advantage of 'i'
5388 ///     constraints when available.
5389 ///  2) Otherwise, pick the most general constraint present.  This prefers
5390 ///     'm' over 'r', for example.
5391 ///
5392 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
5393                              const TargetLowering &TLI,
5394                              SDValue Op, SelectionDAG *DAG) {
5395   assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
5396   unsigned BestIdx = 0;
5397   TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
5398   int BestGenerality = -1;
5399 
5400   // Loop over the options, keeping track of the most general one.
5401   for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
5402     TargetLowering::ConstraintType CType =
5403       TLI.getConstraintType(OpInfo.Codes[i]);
5404 
5405     // Indirect 'other' or 'immediate' constraints are not allowed.
5406     if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5407                                CType == TargetLowering::C_Register ||
5408                                CType == TargetLowering::C_RegisterClass))
5409       continue;
5410 
5411     // If this is an 'other' or 'immediate' constraint, see if the operand is
5412     // valid for it. For example, on X86 we might have an 'rI' constraint. If
5413     // the operand is an integer in the range [0..31] we want to use I (saving a
5414     // load of a register), otherwise we must use 'r'.
5415     if ((CType == TargetLowering::C_Other ||
5416          CType == TargetLowering::C_Immediate) && Op.getNode()) {
5417       assert(OpInfo.Codes[i].size() == 1 &&
5418              "Unhandled multi-letter 'other' constraint");
5419       std::vector<SDValue> ResultOps;
5420       TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
5421                                        ResultOps, *DAG);
5422       if (!ResultOps.empty()) {
5423         BestType = CType;
5424         BestIdx = i;
5425         break;
5426       }
5427     }
5428 
5429     // Things with matching constraints can only be registers, per gcc
5430     // documentation.  This mainly affects "g" constraints.
5431     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5432       continue;
5433 
5434     // This constraint letter is more general than the previous one, use it.
5435     int Generality = getConstraintGenerality(CType);
5436     if (Generality > BestGenerality) {
5437       BestType = CType;
5438       BestIdx = i;
5439       BestGenerality = Generality;
5440     }
5441   }
5442 
5443   OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
5444   OpInfo.ConstraintType = BestType;
5445 }
5446 
5447 /// Determines the constraint code and constraint type to use for the specific
5448 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
5449 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
5450                                             SDValue Op,
5451                                             SelectionDAG *DAG) const {
5452   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
5453 
5454   // Single-letter constraints ('r') are very common.
5455   if (OpInfo.Codes.size() == 1) {
5456     OpInfo.ConstraintCode = OpInfo.Codes[0];
5457     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
5458   } else {
5459     ChooseConstraint(OpInfo, *this, Op, DAG);
5460   }
5461 
5462   // 'X' matches anything.
5463   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
5464     // Constants are handled elsewhere.  For Functions, the type here is the
5465     // type of the result, which is not what we want to look at; leave them
5466     // alone.
5467     Value *v = OpInfo.CallOperandVal;
5468     if (isa<ConstantInt>(v) || isa<Function>(v)) {
5469       return;
5470     }
5471 
5472     if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
5473       OpInfo.ConstraintCode = "i";
5474       return;
5475     }
5476 
5477     // Otherwise, try to resolve it to something we know about by looking at
5478     // the actual operand type.
5479     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
5480       OpInfo.ConstraintCode = Repl;
5481       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
5482     }
5483   }
5484 }
5485 
5486 /// Given an exact SDIV by a constant, create a multiplication
5487 /// with the multiplicative inverse of the constant.
5488 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
5489                               const SDLoc &dl, SelectionDAG &DAG,
5490                               SmallVectorImpl<SDNode *> &Created) {
5491   SDValue Op0 = N->getOperand(0);
5492   SDValue Op1 = N->getOperand(1);
5493   EVT VT = N->getValueType(0);
5494   EVT SVT = VT.getScalarType();
5495   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
5496   EVT ShSVT = ShVT.getScalarType();
5497 
5498   bool UseSRA = false;
5499   SmallVector<SDValue, 16> Shifts, Factors;
5500 
5501   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
5502     if (C->isZero())
5503       return false;
5504     APInt Divisor = C->getAPIntValue();
5505     unsigned Shift = Divisor.countTrailingZeros();
5506     if (Shift) {
5507       Divisor.ashrInPlace(Shift);
5508       UseSRA = true;
5509     }
5510     // Calculate the multiplicative inverse, using Newton's method.
5511     APInt t;
5512     APInt Factor = Divisor;
5513     while ((t = Divisor * Factor) != 1)
5514       Factor *= APInt(Divisor.getBitWidth(), 2) - t;
5515     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
5516     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
5517     return true;
5518   };
5519 
5520   // Collect all magic values from the build vector.
5521   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
5522     return SDValue();
5523 
5524   SDValue Shift, Factor;
5525   if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
5526     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
5527     Factor = DAG.getBuildVector(VT, dl, Factors);
5528   } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
5529     assert(Shifts.size() == 1 && Factors.size() == 1 &&
5530            "Expected matchUnaryPredicate to return one element for scalable "
5531            "vectors");
5532     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
5533     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
5534   } else {
5535     assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
5536     Shift = Shifts[0];
5537     Factor = Factors[0];
5538   }
5539 
5540   SDValue Res = Op0;
5541 
5542   // Shift the value upfront if it is even, so the LSB is one.
5543   if (UseSRA) {
5544     // TODO: For UDIV use SRL instead of SRA.
5545     SDNodeFlags Flags;
5546     Flags.setExact(true);
5547     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
5548     Created.push_back(Res.getNode());
5549   }
5550 
5551   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
5552 }
5553 
5554 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
5555                               SelectionDAG &DAG,
5556                               SmallVectorImpl<SDNode *> &Created) const {
5557   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5558   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5559   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
5560     return SDValue(N, 0); // Lower SDIV as SDIV
5561   return SDValue();
5562 }
5563 
5564 SDValue
5565 TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
5566                               SelectionDAG &DAG,
5567                               SmallVectorImpl<SDNode *> &Created) const {
5568   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5569   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5570   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
5571     return SDValue(N, 0); // Lower SREM as SREM
5572   return SDValue();
5573 }
5574 
5575 /// Given an ISD::SDIV node expressing a divide by constant,
5576 /// return a DAG expression to select that will generate the same value by
5577 /// multiplying by a magic number.
5578 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
5579 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
5580                                   bool IsAfterLegalization,
5581                                   SmallVectorImpl<SDNode *> &Created) const {
5582   SDLoc dl(N);
5583   EVT VT = N->getValueType(0);
5584   EVT SVT = VT.getScalarType();
5585   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5586   EVT ShSVT = ShVT.getScalarType();
5587   unsigned EltBits = VT.getScalarSizeInBits();
5588   EVT MulVT;
5589 
5590   // Check to see if we can do this.
5591   // FIXME: We should be more aggressive here.
5592   if (!isTypeLegal(VT)) {
5593     // Limit this to simple scalars for now.
5594     if (VT.isVector() || !VT.isSimple())
5595       return SDValue();
5596 
5597     // If this type will be promoted to a large enough type with a legal
5598     // multiply operation, we can go ahead and do this transform.
5599     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
5600       return SDValue();
5601 
5602     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
5603     if (MulVT.getSizeInBits() < (2 * EltBits) ||
5604         !isOperationLegal(ISD::MUL, MulVT))
5605       return SDValue();
5606   }
5607 
5608   // If the sdiv has an 'exact' bit we can use a simpler lowering.
5609   if (N->getFlags().hasExact())
5610     return BuildExactSDIV(*this, N, dl, DAG, Created);
5611 
5612   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5613 
5614   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
5615     if (C->isZero())
5616       return false;
5617 
5618     const APInt &Divisor = C->getAPIntValue();
5619     SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
5620     int NumeratorFactor = 0;
5621     int ShiftMask = -1;
5622 
5623     if (Divisor.isOne() || Divisor.isAllOnes()) {
5624       // If d is +1/-1, we just multiply the numerator by +1/-1.
5625       NumeratorFactor = Divisor.getSExtValue();
5626       magics.Magic = 0;
5627       magics.ShiftAmount = 0;
5628       ShiftMask = 0;
5629     } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
5630       // If d > 0 and m < 0, add the numerator.
5631       NumeratorFactor = 1;
5632     } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
5633       // If d < 0 and m > 0, subtract the numerator.
5634       NumeratorFactor = -1;
5635     }
5636 
5637     MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
5638     Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
5639     Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
5640     ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
5641     return true;
5642   };
5643 
5644   SDValue N0 = N->getOperand(0);
5645   SDValue N1 = N->getOperand(1);
5646 
5647   // Collect the shifts / magic values from each element.
5648   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
5649     return SDValue();
5650 
5651   SDValue MagicFactor, Factor, Shift, ShiftMask;
5652   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
5653     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
5654     Factor = DAG.getBuildVector(VT, dl, Factors);
5655     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
5656     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
5657   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
5658     assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
5659            Shifts.size() == 1 && ShiftMasks.size() == 1 &&
5660            "Expected matchUnaryPredicate to return one element for scalable "
5661            "vectors");
5662     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
5663     Factor = DAG.getSplatVector(VT, dl, Factors[0]);
5664     Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
5665     ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
5666   } else {
5667     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
5668     MagicFactor = MagicFactors[0];
5669     Factor = Factors[0];
5670     Shift = Shifts[0];
5671     ShiftMask = ShiftMasks[0];
5672   }
5673 
5674   // Multiply the numerator (operand 0) by the magic value.
5675   // FIXME: We should support doing a MUL in a wider type.
5676   auto GetMULHS = [&](SDValue X, SDValue Y) {
5677     // If the type isn't legal, use a wider mul of the the type calculated
5678     // earlier.
5679     if (!isTypeLegal(VT)) {
5680       X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
5681       Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
5682       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
5683       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
5684                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
5685       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
5686     }
5687 
5688     if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
5689       return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
5690     if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
5691       SDValue LoHi =
5692           DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
5693       return SDValue(LoHi.getNode(), 1);
5694     }
5695     return SDValue();
5696   };
5697 
5698   SDValue Q = GetMULHS(N0, MagicFactor);
5699   if (!Q)
5700     return SDValue();
5701 
5702   Created.push_back(Q.getNode());
5703 
5704   // (Optionally) Add/subtract the numerator using Factor.
5705   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
5706   Created.push_back(Factor.getNode());
5707   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
5708   Created.push_back(Q.getNode());
5709 
5710   // Shift right algebraic by shift value.
5711   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
5712   Created.push_back(Q.getNode());
5713 
5714   // Extract the sign bit, mask it and add it to the quotient.
5715   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
5716   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
5717   Created.push_back(T.getNode());
5718   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
5719   Created.push_back(T.getNode());
5720   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
5721 }
5722 
5723 /// Given an ISD::UDIV node expressing a divide by constant,
5724 /// return a DAG expression to select that will generate the same value by
5725 /// multiplying by a magic number.
5726 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
5727 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
5728                                   bool IsAfterLegalization,
5729                                   SmallVectorImpl<SDNode *> &Created) const {
5730   SDLoc dl(N);
5731   EVT VT = N->getValueType(0);
5732   EVT SVT = VT.getScalarType();
5733   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5734   EVT ShSVT = ShVT.getScalarType();
5735   unsigned EltBits = VT.getScalarSizeInBits();
5736   EVT MulVT;
5737 
5738   // Check to see if we can do this.
5739   // FIXME: We should be more aggressive here.
5740   if (!isTypeLegal(VT)) {
5741     // Limit this to simple scalars for now.
5742     if (VT.isVector() || !VT.isSimple())
5743       return SDValue();
5744 
5745     // If this type will be promoted to a large enough type with a legal
5746     // multiply operation, we can go ahead and do this transform.
5747     if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
5748       return SDValue();
5749 
5750     MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
5751     if (MulVT.getSizeInBits() < (2 * EltBits) ||
5752         !isOperationLegal(ISD::MUL, MulVT))
5753       return SDValue();
5754   }
5755 
5756   bool UseNPQ = false;
5757   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5758 
5759   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
5760     if (C->isZero())
5761       return false;
5762     // FIXME: We should use a narrower constant when the upper
5763     // bits are known to be zero.
5764     const APInt& Divisor = C->getAPIntValue();
5765     UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor);
5766     unsigned PreShift = 0, PostShift = 0;
5767 
5768     // If the divisor is even, we can avoid using the expensive fixup by
5769     // shifting the divided value upfront.
5770     if (magics.IsAdd != 0 && !Divisor[0]) {
5771       PreShift = Divisor.countTrailingZeros();
5772       // Get magic number for the shifted divisor.
5773       magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
5774       assert(magics.IsAdd == 0 && "Should use cheap fixup now");
5775     }
5776 
5777     APInt Magic = magics.Magic;
5778 
5779     unsigned SelNPQ;
5780     if (magics.IsAdd == 0 || Divisor.isOne()) {
5781       assert(magics.ShiftAmount < Divisor.getBitWidth() &&
5782              "We shouldn't generate an undefined shift!");
5783       PostShift = magics.ShiftAmount;
5784       SelNPQ = false;
5785     } else {
5786       PostShift = magics.ShiftAmount - 1;
5787       SelNPQ = true;
5788     }
5789 
5790     PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
5791     MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
5792     NPQFactors.push_back(
5793         DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5794                                : APInt::getZero(EltBits),
5795                         dl, SVT));
5796     PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
5797     UseNPQ |= SelNPQ;
5798     return true;
5799   };
5800 
5801   SDValue N0 = N->getOperand(0);
5802   SDValue N1 = N->getOperand(1);
5803 
5804   // Collect the shifts/magic values from each element.
5805   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
5806     return SDValue();
5807 
5808   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
5809   if (N1.getOpcode() == ISD::BUILD_VECTOR) {
5810     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
5811     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
5812     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
5813     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
5814   } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
5815     assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
5816            NPQFactors.size() == 1 && PostShifts.size() == 1 &&
5817            "Expected matchUnaryPredicate to return one for scalable vectors");
5818     PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
5819     MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
5820     NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
5821     PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
5822   } else {
5823     assert(isa<ConstantSDNode>(N1) && "Expected a constant");
5824     PreShift = PreShifts[0];
5825     MagicFactor = MagicFactors[0];
5826     PostShift = PostShifts[0];
5827   }
5828 
5829   SDValue Q = N0;
5830   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
5831   Created.push_back(Q.getNode());
5832 
5833   // FIXME: We should support doing a MUL in a wider type.
5834   auto GetMULHU = [&](SDValue X, SDValue Y) {
5835     // If the type isn't legal, use a wider mul of the the type calculated
5836     // earlier.
5837     if (!isTypeLegal(VT)) {
5838       X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
5839       Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
5840       Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
5841       Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
5842                       DAG.getShiftAmountConstant(EltBits, MulVT, dl));
5843       return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
5844     }
5845 
5846     if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
5847       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
5848     if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
5849       SDValue LoHi =
5850           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
5851       return SDValue(LoHi.getNode(), 1);
5852     }
5853     return SDValue(); // No mulhu or equivalent
5854   };
5855 
5856   // Multiply the numerator (operand 0) by the magic value.
5857   Q = GetMULHU(Q, MagicFactor);
5858   if (!Q)
5859     return SDValue();
5860 
5861   Created.push_back(Q.getNode());
5862 
5863   if (UseNPQ) {
5864     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
5865     Created.push_back(NPQ.getNode());
5866 
5867     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5868     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
5869     if (VT.isVector())
5870       NPQ = GetMULHU(NPQ, NPQFactor);
5871     else
5872       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
5873 
5874     Created.push_back(NPQ.getNode());
5875 
5876     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
5877     Created.push_back(Q.getNode());
5878   }
5879 
5880   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
5881   Created.push_back(Q.getNode());
5882 
5883   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5884 
5885   SDValue One = DAG.getConstant(1, dl, VT);
5886   SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
5887   return DAG.getSelect(dl, VT, IsOne, N0, Q);
5888 }
5889 
5890 /// If all values in Values that *don't* match the predicate are same 'splat'
5891 /// value, then replace all values with that splat value.
5892 /// Else, if AlternativeReplacement was provided, then replace all values that
5893 /// do match predicate with AlternativeReplacement value.
5894 static void
5895 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
5896                           std::function<bool(SDValue)> Predicate,
5897                           SDValue AlternativeReplacement = SDValue()) {
5898   SDValue Replacement;
5899   // Is there a value for which the Predicate does *NOT* match? What is it?
5900   auto SplatValue = llvm::find_if_not(Values, Predicate);
5901   if (SplatValue != Values.end()) {
5902     // Does Values consist only of SplatValue's and values matching Predicate?
5903     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
5904           return Value == *SplatValue || Predicate(Value);
5905         })) // Then we shall replace values matching predicate with SplatValue.
5906       Replacement = *SplatValue;
5907   }
5908   if (!Replacement) {
5909     // Oops, we did not find the "baseline" splat value.
5910     if (!AlternativeReplacement)
5911       return; // Nothing to do.
5912     // Let's replace with provided value then.
5913     Replacement = AlternativeReplacement;
5914   }
5915   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
5916 }
5917 
5918 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
5919 /// where the divisor is constant and the comparison target is zero,
5920 /// return a DAG expression that will generate the same comparison result
5921 /// using only multiplications, additions and shifts/rotations.
5922 /// Ref: "Hacker's Delight" 10-17.
5923 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
5924                                         SDValue CompTargetNode,
5925                                         ISD::CondCode Cond,
5926                                         DAGCombinerInfo &DCI,
5927                                         const SDLoc &DL) const {
5928   SmallVector<SDNode *, 5> Built;
5929   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5930                                          DCI, DL, Built)) {
5931     for (SDNode *N : Built)
5932       DCI.AddToWorklist(N);
5933     return Folded;
5934   }
5935 
5936   return SDValue();
5937 }
5938 
5939 SDValue
5940 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
5941                                   SDValue CompTargetNode, ISD::CondCode Cond,
5942                                   DAGCombinerInfo &DCI, const SDLoc &DL,
5943                                   SmallVectorImpl<SDNode *> &Created) const {
5944   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
5945   // - D must be constant, with D = D0 * 2^K where D0 is odd
5946   // - P is the multiplicative inverse of D0 modulo 2^W
5947   // - Q = floor(((2^W) - 1) / D)
5948   // where W is the width of the common type of N and D.
5949   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5950          "Only applicable for (in)equality comparisons.");
5951 
5952   SelectionDAG &DAG = DCI.DAG;
5953 
5954   EVT VT = REMNode.getValueType();
5955   EVT SVT = VT.getScalarType();
5956   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
5957   EVT ShSVT = ShVT.getScalarType();
5958 
5959   // If MUL is unavailable, we cannot proceed in any case.
5960   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
5961     return SDValue();
5962 
5963   bool ComparingWithAllZeros = true;
5964   bool AllComparisonsWithNonZerosAreTautological = true;
5965   bool HadTautologicalLanes = false;
5966   bool AllLanesAreTautological = true;
5967   bool HadEvenDivisor = false;
5968   bool AllDivisorsArePowerOfTwo = true;
5969   bool HadTautologicalInvertedLanes = false;
5970   SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
5971 
5972   auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
5973     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5974     if (CDiv->isZero())
5975       return false;
5976 
5977     const APInt &D = CDiv->getAPIntValue();
5978     const APInt &Cmp = CCmp->getAPIntValue();
5979 
5980     ComparingWithAllZeros &= Cmp.isZero();
5981 
5982     // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5983     // if C2 is not less than C1, the comparison is always false.
5984     // But we will only be able to produce the comparison that will give the
5985     // opposive tautological answer. So this lane would need to be fixed up.
5986     bool TautologicalInvertedLane = D.ule(Cmp);
5987     HadTautologicalInvertedLanes |= TautologicalInvertedLane;
5988 
5989     // If all lanes are tautological (either all divisors are ones, or divisor
5990     // is not greater than the constant we are comparing with),
5991     // we will prefer to avoid the fold.
5992     bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
5993     HadTautologicalLanes |= TautologicalLane;
5994     AllLanesAreTautological &= TautologicalLane;
5995 
5996     // If we are comparing with non-zero, we need'll need  to subtract said
5997     // comparison value from the LHS. But there is no point in doing that if
5998     // every lane where we are comparing with non-zero is tautological..
5999     if (!Cmp.isZero())
6000       AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6001 
6002     // Decompose D into D0 * 2^K
6003     unsigned K = D.countTrailingZeros();
6004     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6005     APInt D0 = D.lshr(K);
6006 
6007     // D is even if it has trailing zeros.
6008     HadEvenDivisor |= (K != 0);
6009     // D is a power-of-two if D0 is one.
6010     // If all divisors are power-of-two, we will prefer to avoid the fold.
6011     AllDivisorsArePowerOfTwo &= D0.isOne();
6012 
6013     // P = inv(D0, 2^W)
6014     // 2^W requires W + 1 bits, so we have to extend and then truncate.
6015     unsigned W = D.getBitWidth();
6016     APInt P = D0.zext(W + 1)
6017                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
6018                   .trunc(W);
6019     assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6020     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6021 
6022     // Q = floor((2^W - 1) u/ D)
6023     // R = ((2^W - 1) u% D)
6024     APInt Q, R;
6025     APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
6026 
6027     // If we are comparing with zero, then that comparison constant is okay,
6028     // else it may need to be one less than that.
6029     if (Cmp.ugt(R))
6030       Q -= 1;
6031 
6032     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6033            "We are expecting that K is always less than all-ones for ShSVT");
6034 
6035     // If the lane is tautological the result can be constant-folded.
6036     if (TautologicalLane) {
6037       // Set P and K amount to a bogus values so we can try to splat them.
6038       P = 0;
6039       K = -1;
6040       // And ensure that comparison constant is tautological,
6041       // it will always compare true/false.
6042       Q = -1;
6043     }
6044 
6045     PAmts.push_back(DAG.getConstant(P, DL, SVT));
6046     KAmts.push_back(
6047         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6048     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6049     return true;
6050   };
6051 
6052   SDValue N = REMNode.getOperand(0);
6053   SDValue D = REMNode.getOperand(1);
6054 
6055   // Collect the values from each element.
6056   if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6057     return SDValue();
6058 
6059   // If all lanes are tautological, the result can be constant-folded.
6060   if (AllLanesAreTautological)
6061     return SDValue();
6062 
6063   // If this is a urem by a powers-of-two, avoid the fold since it can be
6064   // best implemented as a bit test.
6065   if (AllDivisorsArePowerOfTwo)
6066     return SDValue();
6067 
6068   SDValue PVal, KVal, QVal;
6069   if (D.getOpcode() == ISD::BUILD_VECTOR) {
6070     if (HadTautologicalLanes) {
6071       // Try to turn PAmts into a splat, since we don't care about the values
6072       // that are currently '0'. If we can't, just keep '0'`s.
6073       turnVectorIntoSplatVector(PAmts, isNullConstant);
6074       // Try to turn KAmts into a splat, since we don't care about the values
6075       // that are currently '-1'. If we can't, change them to '0'`s.
6076       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6077                                 DAG.getConstant(0, DL, ShSVT));
6078     }
6079 
6080     PVal = DAG.getBuildVector(VT, DL, PAmts);
6081     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6082     QVal = DAG.getBuildVector(VT, DL, QAmts);
6083   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6084     assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6085            "Expected matchBinaryPredicate to return one element for "
6086            "SPLAT_VECTORs");
6087     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6088     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6089     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6090   } else {
6091     PVal = PAmts[0];
6092     KVal = KAmts[0];
6093     QVal = QAmts[0];
6094   }
6095 
6096   if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6097     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6098       return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6099     assert(CompTargetNode.getValueType() == N.getValueType() &&
6100            "Expecting that the types on LHS and RHS of comparisons match.");
6101     N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6102   }
6103 
6104   // (mul N, P)
6105   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6106   Created.push_back(Op0.getNode());
6107 
6108   // Rotate right only if any divisor was even. We avoid rotates for all-odd
6109   // divisors as a performance improvement, since rotating by 0 is a no-op.
6110   if (HadEvenDivisor) {
6111     // We need ROTR to do this.
6112     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6113       return SDValue();
6114     // UREM: (rotr (mul N, P), K)
6115     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6116     Created.push_back(Op0.getNode());
6117   }
6118 
6119   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6120   SDValue NewCC =
6121       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6122                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6123   if (!HadTautologicalInvertedLanes)
6124     return NewCC;
6125 
6126   // If any lanes previously compared always-false, the NewCC will give
6127   // always-true result for them, so we need to fixup those lanes.
6128   // Or the other way around for inequality predicate.
6129   assert(VT.isVector() && "Can/should only get here for vectors.");
6130   Created.push_back(NewCC.getNode());
6131 
6132   // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6133   // if C2 is not less than C1, the comparison is always false.
6134   // But we have produced the comparison that will give the
6135   // opposive tautological answer. So these lanes would need to be fixed up.
6136   SDValue TautologicalInvertedChannels =
6137       DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6138   Created.push_back(TautologicalInvertedChannels.getNode());
6139 
6140   // NOTE: we avoid letting illegal types through even if we're before legalize
6141   // ops – legalization has a hard time producing good code for this.
6142   if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6143     // If we have a vector select, let's replace the comparison results in the
6144     // affected lanes with the correct tautological result.
6145     SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6146                                               DL, SETCCVT, SETCCVT);
6147     return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6148                        Replacement, NewCC);
6149   }
6150 
6151   // Else, we can just invert the comparison result in the appropriate lanes.
6152   //
6153   // NOTE: see the note above VSELECT above.
6154   if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6155     return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6156                        TautologicalInvertedChannels);
6157 
6158   return SDValue(); // Don't know how to lower.
6159 }
6160 
6161 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6162 /// where the divisor is constant and the comparison target is zero,
6163 /// return a DAG expression that will generate the same comparison result
6164 /// using only multiplications, additions and shifts/rotations.
6165 /// Ref: "Hacker's Delight" 10-17.
6166 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6167                                         SDValue CompTargetNode,
6168                                         ISD::CondCode Cond,
6169                                         DAGCombinerInfo &DCI,
6170                                         const SDLoc &DL) const {
6171   SmallVector<SDNode *, 7> Built;
6172   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6173                                          DCI, DL, Built)) {
6174     assert(Built.size() <= 7 && "Max size prediction failed.");
6175     for (SDNode *N : Built)
6176       DCI.AddToWorklist(N);
6177     return Folded;
6178   }
6179 
6180   return SDValue();
6181 }
6182 
6183 SDValue
6184 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6185                                   SDValue CompTargetNode, ISD::CondCode Cond,
6186                                   DAGCombinerInfo &DCI, const SDLoc &DL,
6187                                   SmallVectorImpl<SDNode *> &Created) const {
6188   // Fold:
6189   //   (seteq/ne (srem N, D), 0)
6190   // To:
6191   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
6192   //
6193   // - D must be constant, with D = D0 * 2^K where D0 is odd
6194   // - P is the multiplicative inverse of D0 modulo 2^W
6195   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6196   // - Q = floor((2 * A) / (2^K))
6197   // where W is the width of the common type of N and D.
6198   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6199          "Only applicable for (in)equality comparisons.");
6200 
6201   SelectionDAG &DAG = DCI.DAG;
6202 
6203   EVT VT = REMNode.getValueType();
6204   EVT SVT = VT.getScalarType();
6205   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6206   EVT ShSVT = ShVT.getScalarType();
6207 
6208   // If we are after ops legalization, and MUL is unavailable, we can not
6209   // proceed.
6210   if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6211     return SDValue();
6212 
6213   // TODO: Could support comparing with non-zero too.
6214   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6215   if (!CompTarget || !CompTarget->isZero())
6216     return SDValue();
6217 
6218   bool HadIntMinDivisor = false;
6219   bool HadOneDivisor = false;
6220   bool AllDivisorsAreOnes = true;
6221   bool HadEvenDivisor = false;
6222   bool NeedToApplyOffset = false;
6223   bool AllDivisorsArePowerOfTwo = true;
6224   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6225 
6226   auto BuildSREMPattern = [&](ConstantSDNode *C) {
6227     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6228     if (C->isZero())
6229       return false;
6230 
6231     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6232 
6233     // WARNING: this fold is only valid for positive divisors!
6234     APInt D = C->getAPIntValue();
6235     if (D.isNegative())
6236       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
6237 
6238     HadIntMinDivisor |= D.isMinSignedValue();
6239 
6240     // If all divisors are ones, we will prefer to avoid the fold.
6241     HadOneDivisor |= D.isOne();
6242     AllDivisorsAreOnes &= D.isOne();
6243 
6244     // Decompose D into D0 * 2^K
6245     unsigned K = D.countTrailingZeros();
6246     assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6247     APInt D0 = D.lshr(K);
6248 
6249     if (!D.isMinSignedValue()) {
6250       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
6251       // we don't care about this lane in this fold, we'll special-handle it.
6252       HadEvenDivisor |= (K != 0);
6253     }
6254 
6255     // D is a power-of-two if D0 is one. This includes INT_MIN.
6256     // If all divisors are power-of-two, we will prefer to avoid the fold.
6257     AllDivisorsArePowerOfTwo &= D0.isOne();
6258 
6259     // P = inv(D0, 2^W)
6260     // 2^W requires W + 1 bits, so we have to extend and then truncate.
6261     unsigned W = D.getBitWidth();
6262     APInt P = D0.zext(W + 1)
6263                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
6264                   .trunc(W);
6265     assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6266     assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6267 
6268     // A = floor((2^(W - 1) - 1) / D0) & -2^K
6269     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
6270     A.clearLowBits(K);
6271 
6272     if (!D.isMinSignedValue()) {
6273       // If divisor INT_MIN, then we don't care about this lane in this fold,
6274       // we'll special-handle it.
6275       NeedToApplyOffset |= A != 0;
6276     }
6277 
6278     // Q = floor((2 * A) / (2^K))
6279     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
6280 
6281     assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
6282            "We are expecting that A is always less than all-ones for SVT");
6283     assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6284            "We are expecting that K is always less than all-ones for ShSVT");
6285 
6286     // If the divisor is 1 the result can be constant-folded. Likewise, we
6287     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
6288     if (D.isOne()) {
6289       // Set P, A and K to a bogus values so we can try to splat them.
6290       P = 0;
6291       A = -1;
6292       K = -1;
6293 
6294       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
6295       Q = -1;
6296     }
6297 
6298     PAmts.push_back(DAG.getConstant(P, DL, SVT));
6299     AAmts.push_back(DAG.getConstant(A, DL, SVT));
6300     KAmts.push_back(
6301         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6302     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6303     return true;
6304   };
6305 
6306   SDValue N = REMNode.getOperand(0);
6307   SDValue D = REMNode.getOperand(1);
6308 
6309   // Collect the values from each element.
6310   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
6311     return SDValue();
6312 
6313   // If this is a srem by a one, avoid the fold since it can be constant-folded.
6314   if (AllDivisorsAreOnes)
6315     return SDValue();
6316 
6317   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
6318   // since it can be best implemented as a bit test.
6319   if (AllDivisorsArePowerOfTwo)
6320     return SDValue();
6321 
6322   SDValue PVal, AVal, KVal, QVal;
6323   if (D.getOpcode() == ISD::BUILD_VECTOR) {
6324     if (HadOneDivisor) {
6325       // Try to turn PAmts into a splat, since we don't care about the values
6326       // that are currently '0'. If we can't, just keep '0'`s.
6327       turnVectorIntoSplatVector(PAmts, isNullConstant);
6328       // Try to turn AAmts into a splat, since we don't care about the
6329       // values that are currently '-1'. If we can't, change them to '0'`s.
6330       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
6331                                 DAG.getConstant(0, DL, SVT));
6332       // Try to turn KAmts into a splat, since we don't care about the values
6333       // that are currently '-1'. If we can't, change them to '0'`s.
6334       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6335                                 DAG.getConstant(0, DL, ShSVT));
6336     }
6337 
6338     PVal = DAG.getBuildVector(VT, DL, PAmts);
6339     AVal = DAG.getBuildVector(VT, DL, AAmts);
6340     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6341     QVal = DAG.getBuildVector(VT, DL, QAmts);
6342   } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6343     assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
6344            QAmts.size() == 1 &&
6345            "Expected matchUnaryPredicate to return one element for scalable "
6346            "vectors");
6347     PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6348     AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
6349     KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6350     QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6351   } else {
6352     assert(isa<ConstantSDNode>(D) && "Expected a constant");
6353     PVal = PAmts[0];
6354     AVal = AAmts[0];
6355     KVal = KAmts[0];
6356     QVal = QAmts[0];
6357   }
6358 
6359   // (mul N, P)
6360   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6361   Created.push_back(Op0.getNode());
6362 
6363   if (NeedToApplyOffset) {
6364     // We need ADD to do this.
6365     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
6366       return SDValue();
6367 
6368     // (add (mul N, P), A)
6369     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
6370     Created.push_back(Op0.getNode());
6371   }
6372 
6373   // Rotate right only if any divisor was even. We avoid rotates for all-odd
6374   // divisors as a performance improvement, since rotating by 0 is a no-op.
6375   if (HadEvenDivisor) {
6376     // We need ROTR to do this.
6377     if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6378       return SDValue();
6379     // SREM: (rotr (add (mul N, P), A), K)
6380     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6381     Created.push_back(Op0.getNode());
6382   }
6383 
6384   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
6385   SDValue Fold =
6386       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6387                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6388 
6389   // If we didn't have lanes with INT_MIN divisor, then we're done.
6390   if (!HadIntMinDivisor)
6391     return Fold;
6392 
6393   // That fold is only valid for positive divisors. Which effectively means,
6394   // it is invalid for INT_MIN divisors. So if we have such a lane,
6395   // we must fix-up results for said lanes.
6396   assert(VT.isVector() && "Can/should only get here for vectors.");
6397 
6398   // NOTE: we avoid letting illegal types through even if we're before legalize
6399   // ops – legalization has a hard time producing good code for the code that
6400   // follows.
6401   if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
6402       !isOperationLegalOrCustom(ISD::AND, VT) ||
6403       !isOperationLegalOrCustom(Cond, VT) ||
6404       !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
6405     return SDValue();
6406 
6407   Created.push_back(Fold.getNode());
6408 
6409   SDValue IntMin = DAG.getConstant(
6410       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
6411   SDValue IntMax = DAG.getConstant(
6412       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
6413   SDValue Zero =
6414       DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
6415 
6416   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
6417   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
6418   Created.push_back(DivisorIsIntMin.getNode());
6419 
6420   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
6421   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
6422   Created.push_back(Masked.getNode());
6423   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
6424   Created.push_back(MaskedIsZero.getNode());
6425 
6426   // To produce final result we need to blend 2 vectors: 'SetCC' and
6427   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
6428   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
6429   // constant-folded, select can get lowered to a shuffle with constant mask.
6430   SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
6431                                 MaskedIsZero, Fold);
6432 
6433   return Blended;
6434 }
6435 
6436 bool TargetLowering::
6437 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
6438   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
6439     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
6440                                 "be a constant integer");
6441     return true;
6442   }
6443 
6444   return false;
6445 }
6446 
6447 SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
6448                                          const DenormalMode &Mode) const {
6449   SDLoc DL(Op);
6450   EVT VT = Op.getValueType();
6451   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6452   SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
6453   // Testing it with denormal inputs to avoid wrong estimate.
6454   if (Mode.Input == DenormalMode::IEEE) {
6455     // This is specifically a check for the handling of denormal inputs,
6456     // not the result.
6457 
6458     // Test = fabs(X) < SmallestNormal
6459     const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
6460     APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
6461     SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
6462     SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
6463     return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
6464   }
6465   // Test = X == 0.0
6466   return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
6467 }
6468 
6469 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
6470                                              bool LegalOps, bool OptForSize,
6471                                              NegatibleCost &Cost,
6472                                              unsigned Depth) const {
6473   // fneg is removable even if it has multiple uses.
6474   if (Op.getOpcode() == ISD::FNEG) {
6475     Cost = NegatibleCost::Cheaper;
6476     return Op.getOperand(0);
6477   }
6478 
6479   // Don't recurse exponentially.
6480   if (Depth > SelectionDAG::MaxRecursionDepth)
6481     return SDValue();
6482 
6483   // Pre-increment recursion depth for use in recursive calls.
6484   ++Depth;
6485   const SDNodeFlags Flags = Op->getFlags();
6486   const TargetOptions &Options = DAG.getTarget().Options;
6487   EVT VT = Op.getValueType();
6488   unsigned Opcode = Op.getOpcode();
6489 
6490   // Don't allow anything with multiple uses unless we know it is free.
6491   if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
6492     bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
6493                         isFPExtFree(VT, Op.getOperand(0).getValueType());
6494     if (!IsFreeExtend)
6495       return SDValue();
6496   }
6497 
6498   auto RemoveDeadNode = [&](SDValue N) {
6499     if (N && N.getNode()->use_empty())
6500       DAG.RemoveDeadNode(N.getNode());
6501   };
6502 
6503   SDLoc DL(Op);
6504 
6505   // Because getNegatedExpression can delete nodes we need a handle to keep
6506   // temporary nodes alive in case the recursion manages to create an identical
6507   // node.
6508   std::list<HandleSDNode> Handles;
6509 
6510   switch (Opcode) {
6511   case ISD::ConstantFP: {
6512     // Don't invert constant FP values after legalization unless the target says
6513     // the negated constant is legal.
6514     bool IsOpLegal =
6515         isOperationLegal(ISD::ConstantFP, VT) ||
6516         isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
6517                      OptForSize);
6518 
6519     if (LegalOps && !IsOpLegal)
6520       break;
6521 
6522     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
6523     V.changeSign();
6524     SDValue CFP = DAG.getConstantFP(V, DL, VT);
6525 
6526     // If we already have the use of the negated floating constant, it is free
6527     // to negate it even it has multiple uses.
6528     if (!Op.hasOneUse() && CFP.use_empty())
6529       break;
6530     Cost = NegatibleCost::Neutral;
6531     return CFP;
6532   }
6533   case ISD::BUILD_VECTOR: {
6534     // Only permit BUILD_VECTOR of constants.
6535     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
6536           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
6537         }))
6538       break;
6539 
6540     bool IsOpLegal =
6541         (isOperationLegal(ISD::ConstantFP, VT) &&
6542          isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
6543         llvm::all_of(Op->op_values(), [&](SDValue N) {
6544           return N.isUndef() ||
6545                  isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
6546                               OptForSize);
6547         });
6548 
6549     if (LegalOps && !IsOpLegal)
6550       break;
6551 
6552     SmallVector<SDValue, 4> Ops;
6553     for (SDValue C : Op->op_values()) {
6554       if (C.isUndef()) {
6555         Ops.push_back(C);
6556         continue;
6557       }
6558       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
6559       V.changeSign();
6560       Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
6561     }
6562     Cost = NegatibleCost::Neutral;
6563     return DAG.getBuildVector(VT, DL, Ops);
6564   }
6565   case ISD::FADD: {
6566     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6567       break;
6568 
6569     // After operation legalization, it might not be legal to create new FSUBs.
6570     if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
6571       break;
6572     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6573 
6574     // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
6575     NegatibleCost CostX = NegatibleCost::Expensive;
6576     SDValue NegX =
6577         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6578     // Prevent this node from being deleted by the next call.
6579     if (NegX)
6580       Handles.emplace_back(NegX);
6581 
6582     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
6583     NegatibleCost CostY = NegatibleCost::Expensive;
6584     SDValue NegY =
6585         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6586 
6587     // We're done with the handles.
6588     Handles.clear();
6589 
6590     // Negate the X if its cost is less or equal than Y.
6591     if (NegX && (CostX <= CostY)) {
6592       Cost = CostX;
6593       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
6594       if (NegY != N)
6595         RemoveDeadNode(NegY);
6596       return N;
6597     }
6598 
6599     // Negate the Y if it is not expensive.
6600     if (NegY) {
6601       Cost = CostY;
6602       SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
6603       if (NegX != N)
6604         RemoveDeadNode(NegX);
6605       return N;
6606     }
6607     break;
6608   }
6609   case ISD::FSUB: {
6610     // We can't turn -(A-B) into B-A when we honor signed zeros.
6611     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6612       break;
6613 
6614     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6615     // fold (fneg (fsub 0, Y)) -> Y
6616     if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
6617       if (C->isZero()) {
6618         Cost = NegatibleCost::Cheaper;
6619         return Y;
6620       }
6621 
6622     // fold (fneg (fsub X, Y)) -> (fsub Y, X)
6623     Cost = NegatibleCost::Neutral;
6624     return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
6625   }
6626   case ISD::FMUL:
6627   case ISD::FDIV: {
6628     SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6629 
6630     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
6631     NegatibleCost CostX = NegatibleCost::Expensive;
6632     SDValue NegX =
6633         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6634     // Prevent this node from being deleted by the next call.
6635     if (NegX)
6636       Handles.emplace_back(NegX);
6637 
6638     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
6639     NegatibleCost CostY = NegatibleCost::Expensive;
6640     SDValue NegY =
6641         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6642 
6643     // We're done with the handles.
6644     Handles.clear();
6645 
6646     // Negate the X if its cost is less or equal than Y.
6647     if (NegX && (CostX <= CostY)) {
6648       Cost = CostX;
6649       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
6650       if (NegY != N)
6651         RemoveDeadNode(NegY);
6652       return N;
6653     }
6654 
6655     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
6656     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
6657       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
6658         break;
6659 
6660     // Negate the Y if it is not expensive.
6661     if (NegY) {
6662       Cost = CostY;
6663       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
6664       if (NegX != N)
6665         RemoveDeadNode(NegX);
6666       return N;
6667     }
6668     break;
6669   }
6670   case ISD::FMA:
6671   case ISD::FMAD: {
6672     if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6673       break;
6674 
6675     SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
6676     NegatibleCost CostZ = NegatibleCost::Expensive;
6677     SDValue NegZ =
6678         getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
6679     // Give up if fail to negate the Z.
6680     if (!NegZ)
6681       break;
6682 
6683     // Prevent this node from being deleted by the next two calls.
6684     Handles.emplace_back(NegZ);
6685 
6686     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
6687     NegatibleCost CostX = NegatibleCost::Expensive;
6688     SDValue NegX =
6689         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6690     // Prevent this node from being deleted by the next call.
6691     if (NegX)
6692       Handles.emplace_back(NegX);
6693 
6694     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
6695     NegatibleCost CostY = NegatibleCost::Expensive;
6696     SDValue NegY =
6697         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6698 
6699     // We're done with the handles.
6700     Handles.clear();
6701 
6702     // Negate the X if its cost is less or equal than Y.
6703     if (NegX && (CostX <= CostY)) {
6704       Cost = std::min(CostX, CostZ);
6705       SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
6706       if (NegY != N)
6707         RemoveDeadNode(NegY);
6708       return N;
6709     }
6710 
6711     // Negate the Y if it is not expensive.
6712     if (NegY) {
6713       Cost = std::min(CostY, CostZ);
6714       SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
6715       if (NegX != N)
6716         RemoveDeadNode(NegX);
6717       return N;
6718     }
6719     break;
6720   }
6721 
6722   case ISD::FP_EXTEND:
6723   case ISD::FSIN:
6724     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
6725                                             OptForSize, Cost, Depth))
6726       return DAG.getNode(Opcode, DL, VT, NegV);
6727     break;
6728   case ISD::FP_ROUND:
6729     if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
6730                                             OptForSize, Cost, Depth))
6731       return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
6732     break;
6733   }
6734 
6735   return SDValue();
6736 }
6737 
6738 //===----------------------------------------------------------------------===//
6739 // Legalization Utilities
6740 //===----------------------------------------------------------------------===//
6741 
6742 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
6743                                     SDValue LHS, SDValue RHS,
6744                                     SmallVectorImpl<SDValue> &Result,
6745                                     EVT HiLoVT, SelectionDAG &DAG,
6746                                     MulExpansionKind Kind, SDValue LL,
6747                                     SDValue LH, SDValue RL, SDValue RH) const {
6748   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
6749          Opcode == ISD::SMUL_LOHI);
6750 
6751   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
6752                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
6753   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
6754                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
6755   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
6756                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
6757   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
6758                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
6759 
6760   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
6761     return false;
6762 
6763   unsigned OuterBitSize = VT.getScalarSizeInBits();
6764   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
6765 
6766   // LL, LH, RL, and RH must be either all NULL or all set to a value.
6767   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
6768          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
6769 
6770   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
6771   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
6772                           bool Signed) -> bool {
6773     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
6774       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
6775       Hi = SDValue(Lo.getNode(), 1);
6776       return true;
6777     }
6778     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
6779       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
6780       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
6781       return true;
6782     }
6783     return false;
6784   };
6785 
6786   SDValue Lo, Hi;
6787 
6788   if (!LL.getNode() && !RL.getNode() &&
6789       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
6790     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
6791     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
6792   }
6793 
6794   if (!LL.getNode())
6795     return false;
6796 
6797   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
6798   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
6799       DAG.MaskedValueIsZero(RHS, HighMask)) {
6800     // The inputs are both zero-extended.
6801     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
6802       Result.push_back(Lo);
6803       Result.push_back(Hi);
6804       if (Opcode != ISD::MUL) {
6805         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
6806         Result.push_back(Zero);
6807         Result.push_back(Zero);
6808       }
6809       return true;
6810     }
6811   }
6812 
6813   if (!VT.isVector() && Opcode == ISD::MUL &&
6814       DAG.ComputeNumSignBits(LHS) > InnerBitSize &&
6815       DAG.ComputeNumSignBits(RHS) > InnerBitSize) {
6816     // The input values are both sign-extended.
6817     // TODO non-MUL case?
6818     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
6819       Result.push_back(Lo);
6820       Result.push_back(Hi);
6821       return true;
6822     }
6823   }
6824 
6825   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
6826   EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
6827   SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
6828 
6829   if (!LH.getNode() && !RH.getNode() &&
6830       isOperationLegalOrCustom(ISD::SRL, VT) &&
6831       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
6832     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
6833     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
6834     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
6835     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
6836   }
6837 
6838   if (!LH.getNode())
6839     return false;
6840 
6841   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
6842     return false;
6843 
6844   Result.push_back(Lo);
6845 
6846   if (Opcode == ISD::MUL) {
6847     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
6848     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
6849     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
6850     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
6851     Result.push_back(Hi);
6852     return true;
6853   }
6854 
6855   // Compute the full width result.
6856   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
6857     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
6858     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
6859     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
6860     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
6861   };
6862 
6863   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
6864   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
6865     return false;
6866 
6867   // This is effectively the add part of a multiply-add of half-sized operands,
6868   // so it cannot overflow.
6869   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
6870 
6871   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
6872     return false;
6873 
6874   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
6875   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6876 
6877   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
6878                   isOperationLegalOrCustom(ISD::ADDE, VT));
6879   if (UseGlue)
6880     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
6881                        Merge(Lo, Hi));
6882   else
6883     Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
6884                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
6885 
6886   SDValue Carry = Next.getValue(1);
6887   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6888   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
6889 
6890   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
6891     return false;
6892 
6893   if (UseGlue)
6894     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
6895                      Carry);
6896   else
6897     Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
6898                      Zero, Carry);
6899 
6900   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
6901 
6902   if (Opcode == ISD::SMUL_LOHI) {
6903     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
6904                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
6905     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
6906 
6907     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
6908                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
6909     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
6910   }
6911 
6912   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6913   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
6914   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
6915   return true;
6916 }
6917 
6918 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
6919                                SelectionDAG &DAG, MulExpansionKind Kind,
6920                                SDValue LL, SDValue LH, SDValue RL,
6921                                SDValue RH) const {
6922   SmallVector<SDValue, 2> Result;
6923   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
6924                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
6925                            DAG, Kind, LL, LH, RL, RH);
6926   if (Ok) {
6927     assert(Result.size() == 2);
6928     Lo = Result[0];
6929     Hi = Result[1];
6930   }
6931   return Ok;
6932 }
6933 
6934 // Check that (every element of) Z is undef or not an exact multiple of BW.
6935 static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
6936   return ISD::matchUnaryPredicate(
6937       Z,
6938       [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
6939       true);
6940 }
6941 
6942 SDValue TargetLowering::expandFunnelShift(SDNode *Node,
6943                                           SelectionDAG &DAG) const {
6944   EVT VT = Node->getValueType(0);
6945 
6946   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
6947                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6948                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6949                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6950     return SDValue();
6951 
6952   SDValue X = Node->getOperand(0);
6953   SDValue Y = Node->getOperand(1);
6954   SDValue Z = Node->getOperand(2);
6955 
6956   unsigned BW = VT.getScalarSizeInBits();
6957   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
6958   SDLoc DL(SDValue(Node, 0));
6959 
6960   EVT ShVT = Z.getValueType();
6961 
6962   // If a funnel shift in the other direction is more supported, use it.
6963   unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
6964   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
6965       isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
6966     if (isNonZeroModBitWidthOrUndef(Z, BW)) {
6967       // fshl X, Y, Z -> fshr X, Y, -Z
6968       // fshr X, Y, Z -> fshl X, Y, -Z
6969       SDValue Zero = DAG.getConstant(0, DL, ShVT);
6970       Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
6971     } else {
6972       // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6973       // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6974       SDValue One = DAG.getConstant(1, DL, ShVT);
6975       if (IsFSHL) {
6976         Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
6977         X = DAG.getNode(ISD::SRL, DL, VT, X, One);
6978       } else {
6979         X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
6980         Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
6981       }
6982       Z = DAG.getNOT(DL, Z, ShVT);
6983     }
6984     return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
6985   }
6986 
6987   SDValue ShX, ShY;
6988   SDValue ShAmt, InvShAmt;
6989   if (isNonZeroModBitWidthOrUndef(Z, BW)) {
6990     // fshl: X << C | Y >> (BW - C)
6991     // fshr: X << (BW - C) | Y >> C
6992     // where C = Z % BW is not zero
6993     SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
6994     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
6995     InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
6996     ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
6997     ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
6998   } else {
6999     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7000     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7001     SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
7002     if (isPowerOf2_32(BW)) {
7003       // Z % BW -> Z & (BW - 1)
7004       ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
7005       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7006       InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
7007     } else {
7008       SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7009       ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7010       InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
7011     }
7012 
7013     SDValue One = DAG.getConstant(1, DL, ShVT);
7014     if (IsFSHL) {
7015       ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
7016       SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
7017       ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
7018     } else {
7019       SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
7020       ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
7021       ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
7022     }
7023   }
7024   return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
7025 }
7026 
7027 // TODO: Merge with expandFunnelShift.
7028 SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
7029                                   SelectionDAG &DAG) const {
7030   EVT VT = Node->getValueType(0);
7031   unsigned EltSizeInBits = VT.getScalarSizeInBits();
7032   bool IsLeft = Node->getOpcode() == ISD::ROTL;
7033   SDValue Op0 = Node->getOperand(0);
7034   SDValue Op1 = Node->getOperand(1);
7035   SDLoc DL(SDValue(Node, 0));
7036 
7037   EVT ShVT = Op1.getValueType();
7038   SDValue Zero = DAG.getConstant(0, DL, ShVT);
7039 
7040   // If a rotate in the other direction is more supported, use it.
7041   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
7042   if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7043       isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
7044     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7045     return DAG.getNode(RevRot, DL, VT, Op0, Sub);
7046   }
7047 
7048   if (!AllowVectorOps && VT.isVector() &&
7049       (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7050        !isOperationLegalOrCustom(ISD::SRL, VT) ||
7051        !isOperationLegalOrCustom(ISD::SUB, VT) ||
7052        !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
7053        !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
7054     return SDValue();
7055 
7056   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
7057   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
7058   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
7059   SDValue ShVal;
7060   SDValue HsVal;
7061   if (isPowerOf2_32(EltSizeInBits)) {
7062     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7063     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7064     SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7065     SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
7066     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
7067     SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
7068     HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
7069   } else {
7070     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7071     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7072     SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
7073     SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
7074     ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
7075     SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
7076     SDValue One = DAG.getConstant(1, DL, ShVT);
7077     HsVal =
7078         DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
7079   }
7080   return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
7081 }
7082 
7083 void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
7084                                       SelectionDAG &DAG) const {
7085   assert(Node->getNumOperands() == 3 && "Not a double-shift!");
7086   EVT VT = Node->getValueType(0);
7087   unsigned VTBits = VT.getScalarSizeInBits();
7088   assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
7089 
7090   bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
7091   bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
7092   SDValue ShOpLo = Node->getOperand(0);
7093   SDValue ShOpHi = Node->getOperand(1);
7094   SDValue ShAmt = Node->getOperand(2);
7095   EVT ShAmtVT = ShAmt.getValueType();
7096   EVT ShAmtCCVT =
7097       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
7098   SDLoc dl(Node);
7099 
7100   // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
7101   // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
7102   // away during isel.
7103   SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
7104                                   DAG.getConstant(VTBits - 1, dl, ShAmtVT));
7105   SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
7106                                      DAG.getConstant(VTBits - 1, dl, ShAmtVT))
7107                        : DAG.getConstant(0, dl, VT);
7108 
7109   SDValue Tmp2, Tmp3;
7110   if (IsSHL) {
7111     Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
7112     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
7113   } else {
7114     Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
7115     Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
7116   }
7117 
7118   // If the shift amount is larger or equal than the width of a part we don't
7119   // use the result from the FSHL/FSHR. Insert a test and select the appropriate
7120   // values for large shift amounts.
7121   SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
7122                                 DAG.getConstant(VTBits, dl, ShAmtVT));
7123   SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
7124                               DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
7125 
7126   if (IsSHL) {
7127     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
7128     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
7129   } else {
7130     Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
7131     Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
7132   }
7133 }
7134 
7135 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
7136                                       SelectionDAG &DAG) const {
7137   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
7138   SDValue Src = Node->getOperand(OpNo);
7139   EVT SrcVT = Src.getValueType();
7140   EVT DstVT = Node->getValueType(0);
7141   SDLoc dl(SDValue(Node, 0));
7142 
7143   // FIXME: Only f32 to i64 conversions are supported.
7144   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
7145     return false;
7146 
7147   if (Node->isStrictFPOpcode())
7148     // When a NaN is converted to an integer a trap is allowed. We can't
7149     // use this expansion here because it would eliminate that trap. Other
7150     // traps are also allowed and cannot be eliminated. See
7151     // IEEE 754-2008 sec 5.8.
7152     return false;
7153 
7154   // Expand f32 -> i64 conversion
7155   // This algorithm comes from compiler-rt's implementation of fixsfdi:
7156   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
7157   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
7158   EVT IntVT = SrcVT.changeTypeToInteger();
7159   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
7160 
7161   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
7162   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
7163   SDValue Bias = DAG.getConstant(127, dl, IntVT);
7164   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
7165   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
7166   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
7167 
7168   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
7169 
7170   SDValue ExponentBits = DAG.getNode(
7171       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
7172       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
7173   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
7174 
7175   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
7176                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
7177                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
7178   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
7179 
7180   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
7181                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
7182                           DAG.getConstant(0x00800000, dl, IntVT));
7183 
7184   R = DAG.getZExtOrTrunc(R, dl, DstVT);
7185 
7186   R = DAG.getSelectCC(
7187       dl, Exponent, ExponentLoBit,
7188       DAG.getNode(ISD::SHL, dl, DstVT, R,
7189                   DAG.getZExtOrTrunc(
7190                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
7191                       dl, IntShVT)),
7192       DAG.getNode(ISD::SRL, dl, DstVT, R,
7193                   DAG.getZExtOrTrunc(
7194                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
7195                       dl, IntShVT)),
7196       ISD::SETGT);
7197 
7198   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
7199                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
7200 
7201   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
7202                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
7203   return true;
7204 }
7205 
7206 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
7207                                       SDValue &Chain,
7208                                       SelectionDAG &DAG) const {
7209   SDLoc dl(SDValue(Node, 0));
7210   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
7211   SDValue Src = Node->getOperand(OpNo);
7212 
7213   EVT SrcVT = Src.getValueType();
7214   EVT DstVT = Node->getValueType(0);
7215   EVT SetCCVT =
7216       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7217   EVT DstSetCCVT =
7218       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
7219 
7220   // Only expand vector types if we have the appropriate vector bit operations.
7221   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
7222                                                    ISD::FP_TO_SINT;
7223   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
7224                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
7225     return false;
7226 
7227   // If the maximum float value is smaller then the signed integer range,
7228   // the destination signmask can't be represented by the float, so we can
7229   // just use FP_TO_SINT directly.
7230   const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
7231   APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
7232   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
7233   if (APFloat::opOverflow &
7234       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
7235     if (Node->isStrictFPOpcode()) {
7236       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
7237                            { Node->getOperand(0), Src });
7238       Chain = Result.getValue(1);
7239     } else
7240       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
7241     return true;
7242   }
7243 
7244   // Don't expand it if there isn't cheap fsub instruction.
7245   if (!isOperationLegalOrCustom(
7246           Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
7247     return false;
7248 
7249   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
7250   SDValue Sel;
7251 
7252   if (Node->isStrictFPOpcode()) {
7253     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
7254                        Node->getOperand(0), /*IsSignaling*/ true);
7255     Chain = Sel.getValue(1);
7256   } else {
7257     Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
7258   }
7259 
7260   bool Strict = Node->isStrictFPOpcode() ||
7261                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
7262 
7263   if (Strict) {
7264     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
7265     // signmask then offset (the result of which should be fully representable).
7266     // Sel = Src < 0x8000000000000000
7267     // FltOfs = select Sel, 0, 0x8000000000000000
7268     // IntOfs = select Sel, 0, 0x8000000000000000
7269     // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
7270 
7271     // TODO: Should any fast-math-flags be set for the FSUB?
7272     SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
7273                                    DAG.getConstantFP(0.0, dl, SrcVT), Cst);
7274     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
7275     SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
7276                                    DAG.getConstant(0, dl, DstVT),
7277                                    DAG.getConstant(SignMask, dl, DstVT));
7278     SDValue SInt;
7279     if (Node->isStrictFPOpcode()) {
7280       SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
7281                                 { Chain, Src, FltOfs });
7282       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
7283                          { Val.getValue(1), Val });
7284       Chain = SInt.getValue(1);
7285     } else {
7286       SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
7287       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
7288     }
7289     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
7290   } else {
7291     // Expand based on maximum range of FP_TO_SINT:
7292     // True = fp_to_sint(Src)
7293     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
7294     // Result = select (Src < 0x8000000000000000), True, False
7295 
7296     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
7297     // TODO: Should any fast-math-flags be set for the FSUB?
7298     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
7299                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
7300     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
7301                         DAG.getConstant(SignMask, dl, DstVT));
7302     Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
7303     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
7304   }
7305   return true;
7306 }
7307 
7308 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
7309                                       SDValue &Chain,
7310                                       SelectionDAG &DAG) const {
7311   // This transform is not correct for converting 0 when rounding mode is set
7312   // to round toward negative infinity which will produce -0.0. So disable under
7313   // strictfp.
7314   if (Node->isStrictFPOpcode())
7315     return false;
7316 
7317   SDValue Src = Node->getOperand(0);
7318   EVT SrcVT = Src.getValueType();
7319   EVT DstVT = Node->getValueType(0);
7320 
7321   if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
7322     return false;
7323 
7324   // Only expand vector types if we have the appropriate vector bit operations.
7325   if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
7326                            !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
7327                            !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
7328                            !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
7329                            !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
7330     return false;
7331 
7332   SDLoc dl(SDValue(Node, 0));
7333   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
7334 
7335   // Implementation of unsigned i64 to f64 following the algorithm in
7336   // __floatundidf in compiler_rt.  This implementation performs rounding
7337   // correctly in all rounding modes with the exception of converting 0
7338   // when rounding toward negative infinity. In that case the fsub will produce
7339   // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
7340   SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
7341   SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
7342       BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
7343   SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
7344   SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
7345   SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
7346 
7347   SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
7348   SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
7349   SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
7350   SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
7351   SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
7352   SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
7353   SDValue HiSub =
7354       DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
7355   Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
7356   return true;
7357 }
7358 
7359 SDValue
7360 TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
7361                                                SelectionDAG &DAG) const {
7362   unsigned Opcode = Node->getOpcode();
7363   assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
7364           Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
7365          "Wrong opcode");
7366 
7367   if (Node->getFlags().hasNoNaNs()) {
7368     ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
7369     SDValue Op1 = Node->getOperand(0);
7370     SDValue Op2 = Node->getOperand(1);
7371     SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
7372     // Copy FMF flags, but always set the no-signed-zeros flag
7373     // as this is implied by the FMINNUM/FMAXNUM semantics.
7374     SDNodeFlags Flags = Node->getFlags();
7375     Flags.setNoSignedZeros(true);
7376     SelCC->setFlags(Flags);
7377     return SelCC;
7378   }
7379 
7380   return SDValue();
7381 }
7382 
7383 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
7384                                               SelectionDAG &DAG) const {
7385   SDLoc dl(Node);
7386   unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
7387     ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
7388   EVT VT = Node->getValueType(0);
7389 
7390   if (VT.isScalableVector())
7391     report_fatal_error(
7392         "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
7393 
7394   if (isOperationLegalOrCustom(NewOp, VT)) {
7395     SDValue Quiet0 = Node->getOperand(0);
7396     SDValue Quiet1 = Node->getOperand(1);
7397 
7398     if (!Node->getFlags().hasNoNaNs()) {
7399       // Insert canonicalizes if it's possible we need to quiet to get correct
7400       // sNaN behavior.
7401       if (!DAG.isKnownNeverSNaN(Quiet0)) {
7402         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
7403                              Node->getFlags());
7404       }
7405       if (!DAG.isKnownNeverSNaN(Quiet1)) {
7406         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
7407                              Node->getFlags());
7408       }
7409     }
7410 
7411     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
7412   }
7413 
7414   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
7415   // instead if there are no NaNs.
7416   if (Node->getFlags().hasNoNaNs()) {
7417     unsigned IEEE2018Op =
7418         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
7419     if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
7420       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
7421                          Node->getOperand(1), Node->getFlags());
7422     }
7423   }
7424 
7425   if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
7426     return SelCC;
7427 
7428   return SDValue();
7429 }
7430 
7431 SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
7432                                          unsigned Test, SDNodeFlags Flags,
7433                                          const SDLoc &DL,
7434                                          SelectionDAG &DAG) const {
7435   EVT OperandVT = Op.getValueType();
7436   assert(OperandVT.isFloatingPoint());
7437 
7438   // Degenerated cases.
7439   if (Test == 0 || (Test & fcAllFlags) == fcAllFlags)
7440     return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
7441 
7442   // PPC double double is a pair of doubles, of which the higher part determines
7443   // the value class.
7444   if (OperandVT == MVT::ppcf128) {
7445     Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
7446                      DAG.getConstant(1, DL, MVT::i32));
7447     OperandVT = MVT::f64;
7448   }
7449 
7450   // Some checks may be represented as inversion of simpler check, for example
7451   // "inf|normal|subnormal|zero" => !"nan".
7452   bool IsInverted = false;
7453   if (unsigned InvertedCheck = getInvertedFPClassTest(Test)) {
7454     IsInverted = true;
7455     Test = InvertedCheck;
7456   }
7457 
7458   // Floating-point type properties.
7459   EVT ScalarFloatVT = OperandVT.getScalarType();
7460   const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
7461   const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
7462   bool IsF80 = (ScalarFloatVT == MVT::f80);
7463 
7464   // Some checks can be implemented using float comparisons, if floating point
7465   // exceptions are ignored.
7466   if (Flags.hasNoFPExcept() &&
7467       isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
7468     if (Test == fcZero)
7469       return DAG.getSetCC(DL, ResultVT, Op,
7470                           DAG.getConstantFP(0.0, DL, OperandVT),
7471                           IsInverted ? ISD::SETUNE : ISD::SETOEQ);
7472     if (Test == fcNan)
7473       return DAG.getSetCC(DL, ResultVT, Op, Op,
7474                           IsInverted ? ISD::SETO : ISD::SETUO);
7475   }
7476 
7477   // In the general case use integer operations.
7478   unsigned BitSize = OperandVT.getScalarSizeInBits();
7479   EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
7480   if (OperandVT.isVector())
7481     IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
7482                              OperandVT.getVectorElementCount());
7483   SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
7484 
7485   // Various masks.
7486   APInt SignBit = APInt::getSignMask(BitSize);
7487   APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
7488   APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
7489   const unsigned ExplicitIntBitInF80 = 63;
7490   APInt ExpMask = Inf;
7491   if (IsF80)
7492     ExpMask.clearBit(ExplicitIntBitInF80);
7493   APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
7494   APInt QNaNBitMask =
7495       APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
7496   APInt InvertionMask = APInt::getAllOnesValue(ResultVT.getScalarSizeInBits());
7497 
7498   SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
7499   SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
7500   SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
7501   SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
7502   SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
7503   SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
7504 
7505   SDValue Res;
7506   const auto appendResult = [&](SDValue PartialRes) {
7507     if (PartialRes) {
7508       if (Res)
7509         Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
7510       else
7511         Res = PartialRes;
7512     }
7513   };
7514 
7515   SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
7516   const auto getIntBitIsSet = [&]() -> SDValue {
7517     if (!IntBitIsSetV) {
7518       APInt IntBitMask(BitSize, 0);
7519       IntBitMask.setBit(ExplicitIntBitInF80);
7520       SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
7521       SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
7522       IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
7523     }
7524     return IntBitIsSetV;
7525   };
7526 
7527   // Split the value into sign bit and absolute value.
7528   SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
7529   SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
7530                                DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
7531 
7532   // Tests that involve more than one class should be processed first.
7533   SDValue PartialRes;
7534 
7535   if (IsF80)
7536     ; // Detect finite numbers of f80 by checking individual classes because
7537       // they have different settings of the explicit integer bit.
7538   else if ((Test & fcFinite) == fcFinite) {
7539     // finite(V) ==> abs(V) < exp_mask
7540     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
7541     Test &= ~fcFinite;
7542   } else if ((Test & fcFinite) == fcPosFinite) {
7543     // finite(V) && V > 0 ==> V < exp_mask
7544     PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
7545     Test &= ~fcPosFinite;
7546   } else if ((Test & fcFinite) == fcNegFinite) {
7547     // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
7548     PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
7549     PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
7550     Test &= ~fcNegFinite;
7551   }
7552   appendResult(PartialRes);
7553 
7554   // Check for individual classes.
7555 
7556   if (unsigned PartialCheck = Test & fcZero) {
7557     if (PartialCheck == fcPosZero)
7558       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
7559     else if (PartialCheck == fcZero)
7560       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
7561     else // ISD::fcNegZero
7562       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
7563     appendResult(PartialRes);
7564   }
7565 
7566   if (unsigned PartialCheck = Test & fcInf) {
7567     if (PartialCheck == fcPosInf)
7568       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
7569     else if (PartialCheck == fcInf)
7570       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
7571     else { // ISD::fcNegInf
7572       APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
7573       SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
7574       PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
7575     }
7576     appendResult(PartialRes);
7577   }
7578 
7579   if (unsigned PartialCheck = Test & fcNan) {
7580     APInt InfWithQnanBit = Inf | QNaNBitMask;
7581     SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
7582     if (PartialCheck == fcNan) {
7583       // isnan(V) ==> abs(V) > int(inf)
7584       PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
7585       if (IsF80) {
7586         // Recognize unsupported values as NaNs for compatibility with glibc.
7587         // In them (exp(V)==0) == int_bit.
7588         SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
7589         SDValue ExpIsZero =
7590             DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
7591         SDValue IsPseudo =
7592             DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
7593         PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
7594       }
7595     } else if (PartialCheck == fcQNan) {
7596       // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
7597       PartialRes =
7598           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
7599     } else { // ISD::fcSNan
7600       // issignaling(V) ==> abs(V) > unsigned(Inf) &&
7601       //                    abs(V) < (unsigned(Inf) | quiet_bit)
7602       SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
7603       SDValue IsNotQnan =
7604           DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
7605       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
7606     }
7607     appendResult(PartialRes);
7608   }
7609 
7610   if (unsigned PartialCheck = Test & fcSubnormal) {
7611     // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
7612     // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
7613     SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
7614     SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
7615     SDValue VMinusOneV =
7616         DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
7617     PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
7618     if (PartialCheck == fcNegSubnormal)
7619       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
7620     appendResult(PartialRes);
7621   }
7622 
7623   if (unsigned PartialCheck = Test & fcNormal) {
7624     // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
7625     APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
7626     SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
7627     SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
7628     APInt ExpLimit = ExpMask - ExpLSB;
7629     SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
7630     PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
7631     if (PartialCheck == fcNegNormal)
7632       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
7633     else if (PartialCheck == fcPosNormal) {
7634       SDValue PosSignV =
7635           DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
7636       PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
7637     }
7638     if (IsF80)
7639       PartialRes =
7640           DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
7641     appendResult(PartialRes);
7642   }
7643 
7644   if (!Res)
7645     return DAG.getConstant(IsInverted, DL, ResultVT);
7646   if (IsInverted)
7647     Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
7648   return Res;
7649 }
7650 
7651 // Only expand vector types if we have the appropriate vector bit operations.
7652 static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
7653   assert(VT.isVector() && "Expected vector type");
7654   unsigned Len = VT.getScalarSizeInBits();
7655   return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
7656          TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
7657          TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
7658          (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
7659          TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
7660 }
7661 
7662 SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
7663   SDLoc dl(Node);
7664   EVT VT = Node->getValueType(0);
7665   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7666   SDValue Op = Node->getOperand(0);
7667   unsigned Len = VT.getScalarSizeInBits();
7668   assert(VT.isInteger() && "CTPOP not implemented for this type.");
7669 
7670   // TODO: Add support for irregular type lengths.
7671   if (!(Len <= 128 && Len % 8 == 0))
7672     return SDValue();
7673 
7674   // Only expand vector types if we have the appropriate vector bit operations.
7675   if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
7676     return SDValue();
7677 
7678   // This is the "best" algorithm from
7679   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
7680   SDValue Mask55 =
7681       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
7682   SDValue Mask33 =
7683       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
7684   SDValue Mask0F =
7685       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
7686   SDValue Mask01 =
7687       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
7688 
7689   // v = v - ((v >> 1) & 0x55555555...)
7690   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
7691                    DAG.getNode(ISD::AND, dl, VT,
7692                                DAG.getNode(ISD::SRL, dl, VT, Op,
7693                                            DAG.getConstant(1, dl, ShVT)),
7694                                Mask55));
7695   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
7696   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
7697                    DAG.getNode(ISD::AND, dl, VT,
7698                                DAG.getNode(ISD::SRL, dl, VT, Op,
7699                                            DAG.getConstant(2, dl, ShVT)),
7700                                Mask33));
7701   // v = (v + (v >> 4)) & 0x0F0F0F0F...
7702   Op = DAG.getNode(ISD::AND, dl, VT,
7703                    DAG.getNode(ISD::ADD, dl, VT, Op,
7704                                DAG.getNode(ISD::SRL, dl, VT, Op,
7705                                            DAG.getConstant(4, dl, ShVT))),
7706                    Mask0F);
7707   // v = (v * 0x01010101...) >> (Len - 8)
7708   if (Len > 8)
7709     Op =
7710         DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
7711                     DAG.getConstant(Len - 8, dl, ShVT));
7712 
7713   return Op;
7714 }
7715 
7716 SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
7717   SDLoc dl(Node);
7718   EVT VT = Node->getValueType(0);
7719   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7720   SDValue Op = Node->getOperand(0);
7721   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
7722 
7723   // If the non-ZERO_UNDEF version is supported we can use that instead.
7724   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
7725       isOperationLegalOrCustom(ISD::CTLZ, VT))
7726     return DAG.getNode(ISD::CTLZ, dl, VT, Op);
7727 
7728   // If the ZERO_UNDEF version is supported use that and handle the zero case.
7729   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
7730     EVT SetCCVT =
7731         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7732     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
7733     SDValue Zero = DAG.getConstant(0, dl, VT);
7734     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
7735     return DAG.getSelect(dl, VT, SrcIsZero,
7736                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
7737   }
7738 
7739   // Only expand vector types if we have the appropriate vector bit operations.
7740   // This includes the operations needed to expand CTPOP if it isn't supported.
7741   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
7742                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
7743                          !canExpandVectorCTPOP(*this, VT)) ||
7744                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
7745                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
7746     return SDValue();
7747 
7748   // for now, we do this:
7749   // x = x | (x >> 1);
7750   // x = x | (x >> 2);
7751   // ...
7752   // x = x | (x >>16);
7753   // x = x | (x >>32); // for 64-bit input
7754   // return popcount(~x);
7755   //
7756   // Ref: "Hacker's Delight" by Henry Warren
7757   for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
7758     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
7759     Op = DAG.getNode(ISD::OR, dl, VT, Op,
7760                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
7761   }
7762   Op = DAG.getNOT(dl, Op, VT);
7763   return DAG.getNode(ISD::CTPOP, dl, VT, Op);
7764 }
7765 
7766 SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
7767   SDLoc dl(Node);
7768   EVT VT = Node->getValueType(0);
7769   SDValue Op = Node->getOperand(0);
7770   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
7771 
7772   // If the non-ZERO_UNDEF version is supported we can use that instead.
7773   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
7774       isOperationLegalOrCustom(ISD::CTTZ, VT))
7775     return DAG.getNode(ISD::CTTZ, dl, VT, Op);
7776 
7777   // If the ZERO_UNDEF version is supported use that and handle the zero case.
7778   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
7779     EVT SetCCVT =
7780         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7781     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
7782     SDValue Zero = DAG.getConstant(0, dl, VT);
7783     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
7784     return DAG.getSelect(dl, VT, SrcIsZero,
7785                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
7786   }
7787 
7788   // Only expand vector types if we have the appropriate vector bit operations.
7789   // This includes the operations needed to expand CTPOP if it isn't supported.
7790   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
7791                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
7792                          !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
7793                          !canExpandVectorCTPOP(*this, VT)) ||
7794                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
7795                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
7796                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
7797     return SDValue();
7798 
7799   // for now, we use: { return popcount(~x & (x - 1)); }
7800   // unless the target has ctlz but not ctpop, in which case we use:
7801   // { return 32 - nlz(~x & (x-1)); }
7802   // Ref: "Hacker's Delight" by Henry Warren
7803   SDValue Tmp = DAG.getNode(
7804       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
7805       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
7806 
7807   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
7808   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
7809     return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
7810                        DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
7811   }
7812 
7813   return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
7814 }
7815 
7816 SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
7817                                   bool IsNegative) const {
7818   SDLoc dl(N);
7819   EVT VT = N->getValueType(0);
7820   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7821   SDValue Op = N->getOperand(0);
7822 
7823   // abs(x) -> smax(x,sub(0,x))
7824   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
7825       isOperationLegal(ISD::SMAX, VT)) {
7826     SDValue Zero = DAG.getConstant(0, dl, VT);
7827     return DAG.getNode(ISD::SMAX, dl, VT, Op,
7828                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
7829   }
7830 
7831   // abs(x) -> umin(x,sub(0,x))
7832   if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
7833       isOperationLegal(ISD::UMIN, VT)) {
7834     SDValue Zero = DAG.getConstant(0, dl, VT);
7835     return DAG.getNode(ISD::UMIN, dl, VT, Op,
7836                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
7837   }
7838 
7839   // 0 - abs(x) -> smin(x, sub(0,x))
7840   if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
7841       isOperationLegal(ISD::SMIN, VT)) {
7842     SDValue Zero = DAG.getConstant(0, dl, VT);
7843     return DAG.getNode(ISD::SMIN, dl, VT, Op,
7844                        DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
7845   }
7846 
7847   // Only expand vector types if we have the appropriate vector operations.
7848   if (VT.isVector() &&
7849       (!isOperationLegalOrCustom(ISD::SRA, VT) ||
7850        (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
7851        (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
7852        !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
7853     return SDValue();
7854 
7855   SDValue Shift =
7856       DAG.getNode(ISD::SRA, dl, VT, Op,
7857                   DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
7858   SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
7859 
7860   // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
7861   if (!IsNegative)
7862     return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
7863 
7864   // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
7865   return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
7866 }
7867 
7868 SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
7869   SDLoc dl(N);
7870   EVT VT = N->getValueType(0);
7871   SDValue Op = N->getOperand(0);
7872 
7873   if (!VT.isSimple())
7874     return SDValue();
7875 
7876   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
7877   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
7878   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
7879   default:
7880     return SDValue();
7881   case MVT::i16:
7882     // Use a rotate by 8. This can be further expanded if necessary.
7883     return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7884   case MVT::i32:
7885     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
7886     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7887     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7888     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
7889     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
7890                        DAG.getConstant(0xFF0000, dl, VT));
7891     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
7892     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
7893     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
7894     return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
7895   case MVT::i64:
7896     Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
7897     Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
7898     Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
7899     Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7900     Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
7901     Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
7902     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
7903     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
7904     Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
7905                        DAG.getConstant(255ULL<<48, dl, VT));
7906     Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
7907                        DAG.getConstant(255ULL<<40, dl, VT));
7908     Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
7909                        DAG.getConstant(255ULL<<32, dl, VT));
7910     Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
7911                        DAG.getConstant(255ULL<<24, dl, VT));
7912     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
7913                        DAG.getConstant(255ULL<<16, dl, VT));
7914     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
7915                        DAG.getConstant(255ULL<<8 , dl, VT));
7916     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
7917     Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
7918     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
7919     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
7920     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
7921     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
7922     return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
7923   }
7924 }
7925 
7926 SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
7927   SDLoc dl(N);
7928   EVT VT = N->getValueType(0);
7929   SDValue Op = N->getOperand(0);
7930   EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
7931   unsigned Sz = VT.getScalarSizeInBits();
7932 
7933   SDValue Tmp, Tmp2, Tmp3;
7934 
7935   // If we can, perform BSWAP first and then the mask+swap the i4, then i2
7936   // and finally the i1 pairs.
7937   // TODO: We can easily support i4/i2 legal types if any target ever does.
7938   if (Sz >= 8 && isPowerOf2_32(Sz)) {
7939     // Create the masks - repeating the pattern every byte.
7940     APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
7941     APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
7942     APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
7943 
7944     // BSWAP if the type is wider than a single byte.
7945     Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
7946 
7947     // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
7948     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
7949     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
7950     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
7951     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
7952     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7953 
7954     // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
7955     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
7956     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
7957     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
7958     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
7959     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7960 
7961     // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
7962     Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
7963     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
7964     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
7965     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
7966     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7967     return Tmp;
7968   }
7969 
7970   Tmp = DAG.getConstant(0, dl, VT);
7971   for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
7972     if (I < J)
7973       Tmp2 =
7974           DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
7975     else
7976       Tmp2 =
7977           DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
7978 
7979     APInt Shift(Sz, 1);
7980     Shift <<= J;
7981     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
7982     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
7983   }
7984 
7985   return Tmp;
7986 }
7987 
7988 std::pair<SDValue, SDValue>
7989 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
7990                                     SelectionDAG &DAG) const {
7991   SDLoc SL(LD);
7992   SDValue Chain = LD->getChain();
7993   SDValue BasePTR = LD->getBasePtr();
7994   EVT SrcVT = LD->getMemoryVT();
7995   EVT DstVT = LD->getValueType(0);
7996   ISD::LoadExtType ExtType = LD->getExtensionType();
7997 
7998   if (SrcVT.isScalableVector())
7999     report_fatal_error("Cannot scalarize scalable vector loads");
8000 
8001   unsigned NumElem = SrcVT.getVectorNumElements();
8002 
8003   EVT SrcEltVT = SrcVT.getScalarType();
8004   EVT DstEltVT = DstVT.getScalarType();
8005 
8006   // A vector must always be stored in memory as-is, i.e. without any padding
8007   // between the elements, since various code depend on it, e.g. in the
8008   // handling of a bitcast of a vector type to int, which may be done with a
8009   // vector store followed by an integer load. A vector that does not have
8010   // elements that are byte-sized must therefore be stored as an integer
8011   // built out of the extracted vector elements.
8012   if (!SrcEltVT.isByteSized()) {
8013     unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
8014     EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
8015 
8016     unsigned NumSrcBits = SrcVT.getSizeInBits();
8017     EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
8018 
8019     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
8020     SDValue SrcEltBitMask = DAG.getConstant(
8021         APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
8022 
8023     // Load the whole vector and avoid masking off the top bits as it makes
8024     // the codegen worse.
8025     SDValue Load =
8026         DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
8027                        LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
8028                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
8029 
8030     SmallVector<SDValue, 8> Vals;
8031     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
8032       unsigned ShiftIntoIdx =
8033           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
8034       SDValue ShiftAmount =
8035           DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
8036                                      LoadVT, SL, /*LegalTypes=*/false);
8037       SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
8038       SDValue Elt =
8039           DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
8040       SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
8041 
8042       if (ExtType != ISD::NON_EXTLOAD) {
8043         unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
8044         Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
8045       }
8046 
8047       Vals.push_back(Scalar);
8048     }
8049 
8050     SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
8051     return std::make_pair(Value, Load.getValue(1));
8052   }
8053 
8054   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
8055   assert(SrcEltVT.isByteSized());
8056 
8057   SmallVector<SDValue, 8> Vals;
8058   SmallVector<SDValue, 8> LoadChains;
8059 
8060   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
8061     SDValue ScalarLoad =
8062         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
8063                        LD->getPointerInfo().getWithOffset(Idx * Stride),
8064                        SrcEltVT, LD->getOriginalAlign(),
8065                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
8066 
8067     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
8068 
8069     Vals.push_back(ScalarLoad.getValue(0));
8070     LoadChains.push_back(ScalarLoad.getValue(1));
8071   }
8072 
8073   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
8074   SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
8075 
8076   return std::make_pair(Value, NewChain);
8077 }
8078 
8079 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
8080                                              SelectionDAG &DAG) const {
8081   SDLoc SL(ST);
8082 
8083   SDValue Chain = ST->getChain();
8084   SDValue BasePtr = ST->getBasePtr();
8085   SDValue Value = ST->getValue();
8086   EVT StVT = ST->getMemoryVT();
8087 
8088   if (StVT.isScalableVector())
8089     report_fatal_error("Cannot scalarize scalable vector stores");
8090 
8091   // The type of the data we want to save
8092   EVT RegVT = Value.getValueType();
8093   EVT RegSclVT = RegVT.getScalarType();
8094 
8095   // The type of data as saved in memory.
8096   EVT MemSclVT = StVT.getScalarType();
8097 
8098   unsigned NumElem = StVT.getVectorNumElements();
8099 
8100   // A vector must always be stored in memory as-is, i.e. without any padding
8101   // between the elements, since various code depend on it, e.g. in the
8102   // handling of a bitcast of a vector type to int, which may be done with a
8103   // vector store followed by an integer load. A vector that does not have
8104   // elements that are byte-sized must therefore be stored as an integer
8105   // built out of the extracted vector elements.
8106   if (!MemSclVT.isByteSized()) {
8107     unsigned NumBits = StVT.getSizeInBits();
8108     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
8109 
8110     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
8111 
8112     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
8113       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
8114                                 DAG.getVectorIdxConstant(Idx, SL));
8115       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
8116       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
8117       unsigned ShiftIntoIdx =
8118           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
8119       SDValue ShiftAmount =
8120           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
8121       SDValue ShiftedElt =
8122           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
8123       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
8124     }
8125 
8126     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
8127                         ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
8128                         ST->getAAInfo());
8129   }
8130 
8131   // Store Stride in bytes
8132   unsigned Stride = MemSclVT.getSizeInBits() / 8;
8133   assert(Stride && "Zero stride!");
8134   // Extract each of the elements from the original vector and save them into
8135   // memory individually.
8136   SmallVector<SDValue, 8> Stores;
8137   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
8138     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
8139                               DAG.getVectorIdxConstant(Idx, SL));
8140 
8141     SDValue Ptr =
8142         DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
8143 
8144     // This scalar TruncStore may be illegal, but we legalize it later.
8145     SDValue Store = DAG.getTruncStore(
8146         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
8147         MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
8148         ST->getAAInfo());
8149 
8150     Stores.push_back(Store);
8151   }
8152 
8153   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
8154 }
8155 
8156 std::pair<SDValue, SDValue>
8157 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
8158   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
8159          "unaligned indexed loads not implemented!");
8160   SDValue Chain = LD->getChain();
8161   SDValue Ptr = LD->getBasePtr();
8162   EVT VT = LD->getValueType(0);
8163   EVT LoadedVT = LD->getMemoryVT();
8164   SDLoc dl(LD);
8165   auto &MF = DAG.getMachineFunction();
8166 
8167   if (VT.isFloatingPoint() || VT.isVector()) {
8168     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
8169     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
8170       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
8171           LoadedVT.isVector()) {
8172         // Scalarize the load and let the individual components be handled.
8173         return scalarizeVectorLoad(LD, DAG);
8174       }
8175 
8176       // Expand to a (misaligned) integer load of the same size,
8177       // then bitconvert to floating point or vector.
8178       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
8179                                     LD->getMemOperand());
8180       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
8181       if (LoadedVT != VT)
8182         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
8183                              ISD::ANY_EXTEND, dl, VT, Result);
8184 
8185       return std::make_pair(Result, newLoad.getValue(1));
8186     }
8187 
8188     // Copy the value to a (aligned) stack slot using (unaligned) integer
8189     // loads and stores, then do a (aligned) load from the stack slot.
8190     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
8191     unsigned LoadedBytes = LoadedVT.getStoreSize();
8192     unsigned RegBytes = RegVT.getSizeInBits() / 8;
8193     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
8194 
8195     // Make sure the stack slot is also aligned for the register type.
8196     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
8197     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
8198     SmallVector<SDValue, 8> Stores;
8199     SDValue StackPtr = StackBase;
8200     unsigned Offset = 0;
8201 
8202     EVT PtrVT = Ptr.getValueType();
8203     EVT StackPtrVT = StackPtr.getValueType();
8204 
8205     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
8206     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
8207 
8208     // Do all but one copies using the full register width.
8209     for (unsigned i = 1; i < NumRegs; i++) {
8210       // Load one integer register's worth from the original location.
8211       SDValue Load = DAG.getLoad(
8212           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
8213           LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
8214           LD->getAAInfo());
8215       // Follow the load with a store to the stack slot.  Remember the store.
8216       Stores.push_back(DAG.getStore(
8217           Load.getValue(1), dl, Load, StackPtr,
8218           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
8219       // Increment the pointers.
8220       Offset += RegBytes;
8221 
8222       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
8223       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
8224     }
8225 
8226     // The last copy may be partial.  Do an extending load.
8227     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
8228                                   8 * (LoadedBytes - Offset));
8229     SDValue Load =
8230         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
8231                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
8232                        LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
8233                        LD->getAAInfo());
8234     // Follow the load with a store to the stack slot.  Remember the store.
8235     // On big-endian machines this requires a truncating store to ensure
8236     // that the bits end up in the right place.
8237     Stores.push_back(DAG.getTruncStore(
8238         Load.getValue(1), dl, Load, StackPtr,
8239         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
8240 
8241     // The order of the stores doesn't matter - say it with a TokenFactor.
8242     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8243 
8244     // Finally, perform the original load only redirected to the stack slot.
8245     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
8246                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
8247                           LoadedVT);
8248 
8249     // Callers expect a MERGE_VALUES node.
8250     return std::make_pair(Load, TF);
8251   }
8252 
8253   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
8254          "Unaligned load of unsupported type.");
8255 
8256   // Compute the new VT that is half the size of the old one.  This is an
8257   // integer MVT.
8258   unsigned NumBits = LoadedVT.getSizeInBits();
8259   EVT NewLoadedVT;
8260   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
8261   NumBits >>= 1;
8262 
8263   Align Alignment = LD->getOriginalAlign();
8264   unsigned IncrementSize = NumBits / 8;
8265   ISD::LoadExtType HiExtType = LD->getExtensionType();
8266 
8267   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
8268   if (HiExtType == ISD::NON_EXTLOAD)
8269     HiExtType = ISD::ZEXTLOAD;
8270 
8271   // Load the value in two parts
8272   SDValue Lo, Hi;
8273   if (DAG.getDataLayout().isLittleEndian()) {
8274     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
8275                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
8276                         LD->getAAInfo());
8277 
8278     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
8279     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
8280                         LD->getPointerInfo().getWithOffset(IncrementSize),
8281                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
8282                         LD->getAAInfo());
8283   } else {
8284     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
8285                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
8286                         LD->getAAInfo());
8287 
8288     Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
8289     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
8290                         LD->getPointerInfo().getWithOffset(IncrementSize),
8291                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
8292                         LD->getAAInfo());
8293   }
8294 
8295   // aggregate the two parts
8296   SDValue ShiftAmount =
8297       DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
8298                                                     DAG.getDataLayout()));
8299   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
8300   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
8301 
8302   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
8303                              Hi.getValue(1));
8304 
8305   return std::make_pair(Result, TF);
8306 }
8307 
8308 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
8309                                              SelectionDAG &DAG) const {
8310   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
8311          "unaligned indexed stores not implemented!");
8312   SDValue Chain = ST->getChain();
8313   SDValue Ptr = ST->getBasePtr();
8314   SDValue Val = ST->getValue();
8315   EVT VT = Val.getValueType();
8316   Align Alignment = ST->getOriginalAlign();
8317   auto &MF = DAG.getMachineFunction();
8318   EVT StoreMemVT = ST->getMemoryVT();
8319 
8320   SDLoc dl(ST);
8321   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
8322     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
8323     if (isTypeLegal(intVT)) {
8324       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
8325           StoreMemVT.isVector()) {
8326         // Scalarize the store and let the individual components be handled.
8327         SDValue Result = scalarizeVectorStore(ST, DAG);
8328         return Result;
8329       }
8330       // Expand to a bitconvert of the value to the integer type of the
8331       // same size, then a (misaligned) int store.
8332       // FIXME: Does not handle truncating floating point stores!
8333       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
8334       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
8335                             Alignment, ST->getMemOperand()->getFlags());
8336       return Result;
8337     }
8338     // Do a (aligned) store to a stack slot, then copy from the stack slot
8339     // to the final destination using (unaligned) integer loads and stores.
8340     MVT RegVT = getRegisterType(
8341         *DAG.getContext(),
8342         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
8343     EVT PtrVT = Ptr.getValueType();
8344     unsigned StoredBytes = StoreMemVT.getStoreSize();
8345     unsigned RegBytes = RegVT.getSizeInBits() / 8;
8346     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
8347 
8348     // Make sure the stack slot is also aligned for the register type.
8349     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
8350     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
8351 
8352     // Perform the original store, only redirected to the stack slot.
8353     SDValue Store = DAG.getTruncStore(
8354         Chain, dl, Val, StackPtr,
8355         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
8356 
8357     EVT StackPtrVT = StackPtr.getValueType();
8358 
8359     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
8360     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
8361     SmallVector<SDValue, 8> Stores;
8362     unsigned Offset = 0;
8363 
8364     // Do all but one copies using the full register width.
8365     for (unsigned i = 1; i < NumRegs; i++) {
8366       // Load one integer register's worth from the stack slot.
8367       SDValue Load = DAG.getLoad(
8368           RegVT, dl, Store, StackPtr,
8369           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
8370       // Store it to the final location.  Remember the store.
8371       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
8372                                     ST->getPointerInfo().getWithOffset(Offset),
8373                                     ST->getOriginalAlign(),
8374                                     ST->getMemOperand()->getFlags()));
8375       // Increment the pointers.
8376       Offset += RegBytes;
8377       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
8378       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
8379     }
8380 
8381     // The last store may be partial.  Do a truncating store.  On big-endian
8382     // machines this requires an extending load from the stack slot to ensure
8383     // that the bits are in the right place.
8384     EVT LoadMemVT =
8385         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
8386 
8387     // Load from the stack slot.
8388     SDValue Load = DAG.getExtLoad(
8389         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
8390         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
8391 
8392     Stores.push_back(
8393         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
8394                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
8395                           ST->getOriginalAlign(),
8396                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
8397     // The order of the stores doesn't matter - say it with a TokenFactor.
8398     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8399     return Result;
8400   }
8401 
8402   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
8403          "Unaligned store of unknown type.");
8404   // Get the half-size VT
8405   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
8406   unsigned NumBits = NewStoredVT.getFixedSizeInBits();
8407   unsigned IncrementSize = NumBits / 8;
8408 
8409   // Divide the stored value in two parts.
8410   SDValue ShiftAmount = DAG.getConstant(
8411       NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
8412   SDValue Lo = Val;
8413   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
8414 
8415   // Store the two parts
8416   SDValue Store1, Store2;
8417   Store1 = DAG.getTruncStore(Chain, dl,
8418                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
8419                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
8420                              ST->getMemOperand()->getFlags());
8421 
8422   Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
8423   Store2 = DAG.getTruncStore(
8424       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
8425       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
8426       ST->getMemOperand()->getFlags(), ST->getAAInfo());
8427 
8428   SDValue Result =
8429       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
8430   return Result;
8431 }
8432 
8433 SDValue
8434 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
8435                                        const SDLoc &DL, EVT DataVT,
8436                                        SelectionDAG &DAG,
8437                                        bool IsCompressedMemory) const {
8438   SDValue Increment;
8439   EVT AddrVT = Addr.getValueType();
8440   EVT MaskVT = Mask.getValueType();
8441   assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
8442          "Incompatible types of Data and Mask");
8443   if (IsCompressedMemory) {
8444     if (DataVT.isScalableVector())
8445       report_fatal_error(
8446           "Cannot currently handle compressed memory with scalable vectors");
8447     // Incrementing the pointer according to number of '1's in the mask.
8448     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
8449     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
8450     if (MaskIntVT.getSizeInBits() < 32) {
8451       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
8452       MaskIntVT = MVT::i32;
8453     }
8454 
8455     // Count '1's with POPCNT.
8456     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
8457     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
8458     // Scale is an element size in bytes.
8459     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
8460                                     AddrVT);
8461     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
8462   } else if (DataVT.isScalableVector()) {
8463     Increment = DAG.getVScale(DL, AddrVT,
8464                               APInt(AddrVT.getFixedSizeInBits(),
8465                                     DataVT.getStoreSize().getKnownMinSize()));
8466   } else
8467     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
8468 
8469   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
8470 }
8471 
8472 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
8473                                        EVT VecVT, const SDLoc &dl,
8474                                        ElementCount SubEC) {
8475   assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
8476          "Cannot index a scalable vector within a fixed-width vector");
8477 
8478   unsigned NElts = VecVT.getVectorMinNumElements();
8479   unsigned NumSubElts = SubEC.getKnownMinValue();
8480   EVT IdxVT = Idx.getValueType();
8481 
8482   if (VecVT.isScalableVector() && !SubEC.isScalable()) {
8483     // If this is a constant index and we know the value plus the number of the
8484     // elements in the subvector minus one is less than the minimum number of
8485     // elements then it's safe to return Idx.
8486     if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
8487       if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
8488         return Idx;
8489     SDValue VS =
8490         DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
8491     unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
8492     SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
8493                               DAG.getConstant(NumSubElts, dl, IdxVT));
8494     return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
8495   }
8496   if (isPowerOf2_32(NElts) && NumSubElts == 1) {
8497     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
8498     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
8499                        DAG.getConstant(Imm, dl, IdxVT));
8500   }
8501   unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
8502   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
8503                      DAG.getConstant(MaxIndex, dl, IdxVT));
8504 }
8505 
8506 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
8507                                                 SDValue VecPtr, EVT VecVT,
8508                                                 SDValue Index) const {
8509   return getVectorSubVecPointer(
8510       DAG, VecPtr, VecVT,
8511       EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
8512       Index);
8513 }
8514 
8515 SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
8516                                                SDValue VecPtr, EVT VecVT,
8517                                                EVT SubVecVT,
8518                                                SDValue Index) const {
8519   SDLoc dl(Index);
8520   // Make sure the index type is big enough to compute in.
8521   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
8522 
8523   EVT EltVT = VecVT.getVectorElementType();
8524 
8525   // Calculate the element offset and add it to the pointer.
8526   unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
8527   assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
8528          "Converting bits to bytes lost precision");
8529   assert(SubVecVT.getVectorElementType() == EltVT &&
8530          "Sub-vector must be a vector with matching element type");
8531   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
8532                                   SubVecVT.getVectorElementCount());
8533 
8534   EVT IdxVT = Index.getValueType();
8535   if (SubVecVT.isScalableVector())
8536     Index =
8537         DAG.getNode(ISD::MUL, dl, IdxVT, Index,
8538                     DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
8539 
8540   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
8541                       DAG.getConstant(EltSize, dl, IdxVT));
8542   return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
8543 }
8544 
8545 //===----------------------------------------------------------------------===//
8546 // Implementation of Emulated TLS Model
8547 //===----------------------------------------------------------------------===//
8548 
8549 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
8550                                                 SelectionDAG &DAG) const {
8551   // Access to address of TLS varialbe xyz is lowered to a function call:
8552   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
8553   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8554   PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
8555   SDLoc dl(GA);
8556 
8557   ArgListTy Args;
8558   ArgListEntry Entry;
8559   std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
8560   Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
8561   StringRef EmuTlsVarName(NameString);
8562   GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
8563   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
8564   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
8565   Entry.Ty = VoidPtrType;
8566   Args.push_back(Entry);
8567 
8568   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
8569 
8570   TargetLowering::CallLoweringInfo CLI(DAG);
8571   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
8572   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
8573   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
8574 
8575   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
8576   // At last for X86 targets, maybe good for other targets too?
8577   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
8578   MFI.setAdjustsStack(true); // Is this only for X86 target?
8579   MFI.setHasCalls(true);
8580 
8581   assert((GA->getOffset() == 0) &&
8582          "Emulated TLS must have zero offset in GlobalAddressSDNode");
8583   return CallResult.first;
8584 }
8585 
8586 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
8587                                                 SelectionDAG &DAG) const {
8588   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
8589   if (!isCtlzFast())
8590     return SDValue();
8591   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8592   SDLoc dl(Op);
8593   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
8594     if (C->isZero() && CC == ISD::SETEQ) {
8595       EVT VT = Op.getOperand(0).getValueType();
8596       SDValue Zext = Op.getOperand(0);
8597       if (VT.bitsLT(MVT::i32)) {
8598         VT = MVT::i32;
8599         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
8600       }
8601       unsigned Log2b = Log2_32(VT.getSizeInBits());
8602       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
8603       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
8604                                 DAG.getConstant(Log2b, dl, MVT::i32));
8605       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
8606     }
8607   }
8608   return SDValue();
8609 }
8610 
8611 // Convert redundant addressing modes (e.g. scaling is redundant
8612 // when accessing bytes).
8613 ISD::MemIndexType
8614 TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
8615                                       SDValue Offsets) const {
8616   bool IsScaledIndex =
8617       (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED);
8618   bool IsSignedIndex =
8619       (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
8620 
8621   // Scaling is unimportant for bytes, canonicalize to unscaled.
8622   if (IsScaledIndex && MemVT.getScalarType() == MVT::i8)
8623     return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
8624 
8625   return IndexType;
8626 }
8627 
8628 SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
8629   SDValue Op0 = Node->getOperand(0);
8630   SDValue Op1 = Node->getOperand(1);
8631   EVT VT = Op0.getValueType();
8632   unsigned Opcode = Node->getOpcode();
8633   SDLoc DL(Node);
8634 
8635   // umin(x,y) -> sub(x,usubsat(x,y))
8636   if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
8637       isOperationLegal(ISD::USUBSAT, VT)) {
8638     return DAG.getNode(ISD::SUB, DL, VT, Op0,
8639                        DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
8640   }
8641 
8642   // umax(x,y) -> add(x,usubsat(y,x))
8643   if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
8644       isOperationLegal(ISD::USUBSAT, VT)) {
8645     return DAG.getNode(ISD::ADD, DL, VT, Op0,
8646                        DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
8647   }
8648 
8649   // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
8650   ISD::CondCode CC;
8651   switch (Opcode) {
8652   default: llvm_unreachable("How did we get here?");
8653   case ISD::SMAX: CC = ISD::SETGT; break;
8654   case ISD::SMIN: CC = ISD::SETLT; break;
8655   case ISD::UMAX: CC = ISD::SETUGT; break;
8656   case ISD::UMIN: CC = ISD::SETULT; break;
8657   }
8658 
8659   // FIXME: Should really try to split the vector in case it's legal on a
8660   // subvector.
8661   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8662     return DAG.UnrollVectorOp(Node);
8663 
8664   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8665   SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
8666   return DAG.getSelect(DL, VT, Cond, Op0, Op1);
8667 }
8668 
8669 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
8670   unsigned Opcode = Node->getOpcode();
8671   SDValue LHS = Node->getOperand(0);
8672   SDValue RHS = Node->getOperand(1);
8673   EVT VT = LHS.getValueType();
8674   SDLoc dl(Node);
8675 
8676   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
8677   assert(VT.isInteger() && "Expected operands to be integers");
8678 
8679   // usub.sat(a, b) -> umax(a, b) - b
8680   if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
8681     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
8682     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
8683   }
8684 
8685   // uadd.sat(a, b) -> umin(a, ~b) + b
8686   if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
8687     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
8688     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
8689     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
8690   }
8691 
8692   unsigned OverflowOp;
8693   switch (Opcode) {
8694   case ISD::SADDSAT:
8695     OverflowOp = ISD::SADDO;
8696     break;
8697   case ISD::UADDSAT:
8698     OverflowOp = ISD::UADDO;
8699     break;
8700   case ISD::SSUBSAT:
8701     OverflowOp = ISD::SSUBO;
8702     break;
8703   case ISD::USUBSAT:
8704     OverflowOp = ISD::USUBO;
8705     break;
8706   default:
8707     llvm_unreachable("Expected method to receive signed or unsigned saturation "
8708                      "addition or subtraction node.");
8709   }
8710 
8711   // FIXME: Should really try to split the vector in case it's legal on a
8712   // subvector.
8713   if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8714     return DAG.UnrollVectorOp(Node);
8715 
8716   unsigned BitWidth = LHS.getScalarValueSizeInBits();
8717   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8718   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
8719   SDValue SumDiff = Result.getValue(0);
8720   SDValue Overflow = Result.getValue(1);
8721   SDValue Zero = DAG.getConstant(0, dl, VT);
8722   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
8723 
8724   if (Opcode == ISD::UADDSAT) {
8725     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
8726       // (LHS + RHS) | OverflowMask
8727       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
8728       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
8729     }
8730     // Overflow ? 0xffff.... : (LHS + RHS)
8731     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
8732   }
8733 
8734   if (Opcode == ISD::USUBSAT) {
8735     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
8736       // (LHS - RHS) & ~OverflowMask
8737       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
8738       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
8739       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
8740     }
8741     // Overflow ? 0 : (LHS - RHS)
8742     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
8743   }
8744 
8745   // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
8746   APInt MinVal = APInt::getSignedMinValue(BitWidth);
8747   SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
8748   SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
8749                               DAG.getConstant(BitWidth - 1, dl, VT));
8750   Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
8751   return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
8752 }
8753 
8754 SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
8755   unsigned Opcode = Node->getOpcode();
8756   bool IsSigned = Opcode == ISD::SSHLSAT;
8757   SDValue LHS = Node->getOperand(0);
8758   SDValue RHS = Node->getOperand(1);
8759   EVT VT = LHS.getValueType();
8760   SDLoc dl(Node);
8761 
8762   assert((Node->getOpcode() == ISD::SSHLSAT ||
8763           Node->getOpcode() == ISD::USHLSAT) &&
8764           "Expected a SHLSAT opcode");
8765   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
8766   assert(VT.isInteger() && "Expected operands to be integers");
8767 
8768   // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
8769 
8770   unsigned BW = VT.getScalarSizeInBits();
8771   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
8772   SDValue Orig =
8773       DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
8774 
8775   SDValue SatVal;
8776   if (IsSigned) {
8777     SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
8778     SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
8779     SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT),
8780                              SatMin, SatMax, ISD::SETLT);
8781   } else {
8782     SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
8783   }
8784   Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE);
8785 
8786   return Result;
8787 }
8788 
8789 SDValue
8790 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
8791   assert((Node->getOpcode() == ISD::SMULFIX ||
8792           Node->getOpcode() == ISD::UMULFIX ||
8793           Node->getOpcode() == ISD::SMULFIXSAT ||
8794           Node->getOpcode() == ISD::UMULFIXSAT) &&
8795          "Expected a fixed point multiplication opcode");
8796 
8797   SDLoc dl(Node);
8798   SDValue LHS = Node->getOperand(0);
8799   SDValue RHS = Node->getOperand(1);
8800   EVT VT = LHS.getValueType();
8801   unsigned Scale = Node->getConstantOperandVal(2);
8802   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
8803                      Node->getOpcode() == ISD::UMULFIXSAT);
8804   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
8805                  Node->getOpcode() == ISD::SMULFIXSAT);
8806   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8807   unsigned VTSize = VT.getScalarSizeInBits();
8808 
8809   if (!Scale) {
8810     // [us]mul.fix(a, b, 0) -> mul(a, b)
8811     if (!Saturating) {
8812       if (isOperationLegalOrCustom(ISD::MUL, VT))
8813         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
8814     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
8815       SDValue Result =
8816           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
8817       SDValue Product = Result.getValue(0);
8818       SDValue Overflow = Result.getValue(1);
8819       SDValue Zero = DAG.getConstant(0, dl, VT);
8820 
8821       APInt MinVal = APInt::getSignedMinValue(VTSize);
8822       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
8823       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
8824       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
8825       // Xor the inputs, if resulting sign bit is 0 the product will be
8826       // positive, else negative.
8827       SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
8828       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
8829       Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
8830       return DAG.getSelect(dl, VT, Overflow, Result, Product);
8831     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
8832       SDValue Result =
8833           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
8834       SDValue Product = Result.getValue(0);
8835       SDValue Overflow = Result.getValue(1);
8836 
8837       APInt MaxVal = APInt::getMaxValue(VTSize);
8838       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
8839       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
8840     }
8841   }
8842 
8843   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
8844          "Expected scale to be less than the number of bits if signed or at "
8845          "most the number of bits if unsigned.");
8846   assert(LHS.getValueType() == RHS.getValueType() &&
8847          "Expected both operands to be the same type");
8848 
8849   // Get the upper and lower bits of the result.
8850   SDValue Lo, Hi;
8851   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
8852   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
8853   if (isOperationLegalOrCustom(LoHiOp, VT)) {
8854     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
8855     Lo = Result.getValue(0);
8856     Hi = Result.getValue(1);
8857   } else if (isOperationLegalOrCustom(HiOp, VT)) {
8858     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
8859     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
8860   } else if (VT.isVector()) {
8861     return SDValue();
8862   } else {
8863     report_fatal_error("Unable to expand fixed point multiplication.");
8864   }
8865 
8866   if (Scale == VTSize)
8867     // Result is just the top half since we'd be shifting by the width of the
8868     // operand. Overflow impossible so this works for both UMULFIX and
8869     // UMULFIXSAT.
8870     return Hi;
8871 
8872   // The result will need to be shifted right by the scale since both operands
8873   // are scaled. The result is given to us in 2 halves, so we only want part of
8874   // both in the result.
8875   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
8876   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
8877                                DAG.getConstant(Scale, dl, ShiftTy));
8878   if (!Saturating)
8879     return Result;
8880 
8881   if (!Signed) {
8882     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
8883     // widened multiplication) aren't all zeroes.
8884 
8885     // Saturate to max if ((Hi >> Scale) != 0),
8886     // which is the same as if (Hi > ((1 << Scale) - 1))
8887     APInt MaxVal = APInt::getMaxValue(VTSize);
8888     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
8889                                       dl, VT);
8890     Result = DAG.getSelectCC(dl, Hi, LowMask,
8891                              DAG.getConstant(MaxVal, dl, VT), Result,
8892                              ISD::SETUGT);
8893 
8894     return Result;
8895   }
8896 
8897   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
8898   // widened multiplication) aren't all ones or all zeroes.
8899 
8900   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
8901   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
8902 
8903   if (Scale == 0) {
8904     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
8905                                DAG.getConstant(VTSize - 1, dl, ShiftTy));
8906     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
8907     // Saturated to SatMin if wide product is negative, and SatMax if wide
8908     // product is positive ...
8909     SDValue Zero = DAG.getConstant(0, dl, VT);
8910     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
8911                                                ISD::SETLT);
8912     // ... but only if we overflowed.
8913     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
8914   }
8915 
8916   //  We handled Scale==0 above so all the bits to examine is in Hi.
8917 
8918   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
8919   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
8920   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
8921                                     dl, VT);
8922   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
8923   // Saturate to min if (Hi >> (Scale - 1)) < -1),
8924   // which is the same as if (HI < (-1 << (Scale - 1))
8925   SDValue HighMask =
8926       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
8927                       dl, VT);
8928   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
8929   return Result;
8930 }
8931 
8932 SDValue
8933 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
8934                                     SDValue LHS, SDValue RHS,
8935                                     unsigned Scale, SelectionDAG &DAG) const {
8936   assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
8937           Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
8938          "Expected a fixed point division opcode");
8939 
8940   EVT VT = LHS.getValueType();
8941   bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
8942   bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
8943   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8944 
8945   // If there is enough room in the type to upscale the LHS or downscale the
8946   // RHS before the division, we can perform it in this type without having to
8947   // resize. For signed operations, the LHS headroom is the number of
8948   // redundant sign bits, and for unsigned ones it is the number of zeroes.
8949   // The headroom for the RHS is the number of trailing zeroes.
8950   unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
8951                             : DAG.computeKnownBits(LHS).countMinLeadingZeros();
8952   unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
8953 
8954   // For signed saturating operations, we need to be able to detect true integer
8955   // division overflow; that is, when you have MIN / -EPS. However, this
8956   // is undefined behavior and if we emit divisions that could take such
8957   // values it may cause undesired behavior (arithmetic exceptions on x86, for
8958   // example).
8959   // Avoid this by requiring an extra bit so that we never get this case.
8960   // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
8961   // signed saturating division, we need to emit a whopping 32-bit division.
8962   if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
8963     return SDValue();
8964 
8965   unsigned LHSShift = std::min(LHSLead, Scale);
8966   unsigned RHSShift = Scale - LHSShift;
8967 
8968   // At this point, we know that if we shift the LHS up by LHSShift and the
8969   // RHS down by RHSShift, we can emit a regular division with a final scaling
8970   // factor of Scale.
8971 
8972   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
8973   if (LHSShift)
8974     LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
8975                       DAG.getConstant(LHSShift, dl, ShiftTy));
8976   if (RHSShift)
8977     RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
8978                       DAG.getConstant(RHSShift, dl, ShiftTy));
8979 
8980   SDValue Quot;
8981   if (Signed) {
8982     // For signed operations, if the resulting quotient is negative and the
8983     // remainder is nonzero, subtract 1 from the quotient to round towards
8984     // negative infinity.
8985     SDValue Rem;
8986     // FIXME: Ideally we would always produce an SDIVREM here, but if the
8987     // type isn't legal, SDIVREM cannot be expanded. There is no reason why
8988     // we couldn't just form a libcall, but the type legalizer doesn't do it.
8989     if (isTypeLegal(VT) &&
8990         isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
8991       Quot = DAG.getNode(ISD::SDIVREM, dl,
8992                          DAG.getVTList(VT, VT),
8993                          LHS, RHS);
8994       Rem = Quot.getValue(1);
8995       Quot = Quot.getValue(0);
8996     } else {
8997       Quot = DAG.getNode(ISD::SDIV, dl, VT,
8998                          LHS, RHS);
8999       Rem = DAG.getNode(ISD::SREM, dl, VT,
9000                         LHS, RHS);
9001     }
9002     SDValue Zero = DAG.getConstant(0, dl, VT);
9003     SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
9004     SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
9005     SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
9006     SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
9007     SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
9008                                DAG.getConstant(1, dl, VT));
9009     Quot = DAG.getSelect(dl, VT,
9010                          DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
9011                          Sub1, Quot);
9012   } else
9013     Quot = DAG.getNode(ISD::UDIV, dl, VT,
9014                        LHS, RHS);
9015 
9016   return Quot;
9017 }
9018 
9019 void TargetLowering::expandUADDSUBO(
9020     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
9021   SDLoc dl(Node);
9022   SDValue LHS = Node->getOperand(0);
9023   SDValue RHS = Node->getOperand(1);
9024   bool IsAdd = Node->getOpcode() == ISD::UADDO;
9025 
9026   // If ADD/SUBCARRY is legal, use that instead.
9027   unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
9028   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
9029     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
9030     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
9031                                     { LHS, RHS, CarryIn });
9032     Result = SDValue(NodeCarry.getNode(), 0);
9033     Overflow = SDValue(NodeCarry.getNode(), 1);
9034     return;
9035   }
9036 
9037   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
9038                             LHS.getValueType(), LHS, RHS);
9039 
9040   EVT ResultType = Node->getValueType(1);
9041   EVT SetCCType = getSetCCResultType(
9042       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
9043   SDValue SetCC;
9044   if (IsAdd && isOneConstant(RHS)) {
9045     // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
9046     // the live range of X. We assume comparing with 0 is cheap.
9047     // The general case (X + C) < C is not necessarily beneficial. Although we
9048     // reduce the live range of X, we may introduce the materialization of
9049     // constant C.
9050     SetCC =
9051         DAG.getSetCC(dl, SetCCType, Result,
9052                      DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
9053   } else {
9054     ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
9055     SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
9056   }
9057   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
9058 }
9059 
9060 void TargetLowering::expandSADDSUBO(
9061     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
9062   SDLoc dl(Node);
9063   SDValue LHS = Node->getOperand(0);
9064   SDValue RHS = Node->getOperand(1);
9065   bool IsAdd = Node->getOpcode() == ISD::SADDO;
9066 
9067   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
9068                             LHS.getValueType(), LHS, RHS);
9069 
9070   EVT ResultType = Node->getValueType(1);
9071   EVT OType = getSetCCResultType(
9072       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
9073 
9074   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9075   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
9076   if (isOperationLegal(OpcSat, LHS.getValueType())) {
9077     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
9078     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
9079     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
9080     return;
9081   }
9082 
9083   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
9084 
9085   // For an addition, the result should be less than one of the operands (LHS)
9086   // if and only if the other operand (RHS) is negative, otherwise there will
9087   // be overflow.
9088   // For a subtraction, the result should be less than one of the operands
9089   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9090   // otherwise there will be overflow.
9091   SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
9092   SDValue ConditionRHS =
9093       DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
9094 
9095   Overflow = DAG.getBoolExtOrTrunc(
9096       DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
9097       ResultType, ResultType);
9098 }
9099 
9100 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
9101                                 SDValue &Overflow, SelectionDAG &DAG) const {
9102   SDLoc dl(Node);
9103   EVT VT = Node->getValueType(0);
9104   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9105   SDValue LHS = Node->getOperand(0);
9106   SDValue RHS = Node->getOperand(1);
9107   bool isSigned = Node->getOpcode() == ISD::SMULO;
9108 
9109   // For power-of-two multiplications we can use a simpler shift expansion.
9110   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
9111     const APInt &C = RHSC->getAPIntValue();
9112     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
9113     if (C.isPowerOf2()) {
9114       // smulo(x, signed_min) is same as umulo(x, signed_min).
9115       bool UseArithShift = isSigned && !C.isMinSignedValue();
9116       EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
9117       SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
9118       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
9119       Overflow = DAG.getSetCC(dl, SetCCVT,
9120           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
9121                       dl, VT, Result, ShiftAmt),
9122           LHS, ISD::SETNE);
9123       return true;
9124     }
9125   }
9126 
9127   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
9128   if (VT.isVector())
9129     WideVT =
9130         EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
9131 
9132   SDValue BottomHalf;
9133   SDValue TopHalf;
9134   static const unsigned Ops[2][3] =
9135       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
9136         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
9137   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
9138     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
9139     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
9140   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
9141     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
9142                              RHS);
9143     TopHalf = BottomHalf.getValue(1);
9144   } else if (isTypeLegal(WideVT)) {
9145     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
9146     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
9147     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
9148     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
9149     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
9150         getShiftAmountTy(WideVT, DAG.getDataLayout()));
9151     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
9152                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
9153   } else {
9154     if (VT.isVector())
9155       return false;
9156 
9157     // We can fall back to a libcall with an illegal type for the MUL if we
9158     // have a libcall big enough.
9159     // Also, we can fall back to a division in some cases, but that's a big
9160     // performance hit in the general case.
9161     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
9162     if (WideVT == MVT::i16)
9163       LC = RTLIB::MUL_I16;
9164     else if (WideVT == MVT::i32)
9165       LC = RTLIB::MUL_I32;
9166     else if (WideVT == MVT::i64)
9167       LC = RTLIB::MUL_I64;
9168     else if (WideVT == MVT::i128)
9169       LC = RTLIB::MUL_I128;
9170     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
9171 
9172     SDValue HiLHS;
9173     SDValue HiRHS;
9174     if (isSigned) {
9175       // The high part is obtained by SRA'ing all but one of the bits of low
9176       // part.
9177       unsigned LoSize = VT.getFixedSizeInBits();
9178       HiLHS =
9179           DAG.getNode(ISD::SRA, dl, VT, LHS,
9180                       DAG.getConstant(LoSize - 1, dl,
9181                                       getPointerTy(DAG.getDataLayout())));
9182       HiRHS =
9183           DAG.getNode(ISD::SRA, dl, VT, RHS,
9184                       DAG.getConstant(LoSize - 1, dl,
9185                                       getPointerTy(DAG.getDataLayout())));
9186     } else {
9187         HiLHS = DAG.getConstant(0, dl, VT);
9188         HiRHS = DAG.getConstant(0, dl, VT);
9189     }
9190 
9191     // Here we're passing the 2 arguments explicitly as 4 arguments that are
9192     // pre-lowered to the correct types. This all depends upon WideVT not
9193     // being a legal type for the architecture and thus has to be split to
9194     // two arguments.
9195     SDValue Ret;
9196     TargetLowering::MakeLibCallOptions CallOptions;
9197     CallOptions.setSExt(isSigned);
9198     CallOptions.setIsPostTypeLegalization(true);
9199     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
9200       // Halves of WideVT are packed into registers in different order
9201       // depending on platform endianness. This is usually handled by
9202       // the C calling convention, but we can't defer to it in
9203       // the legalizer.
9204       SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
9205       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
9206     } else {
9207       SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
9208       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
9209     }
9210     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
9211            "Ret value is a collection of constituent nodes holding result.");
9212     if (DAG.getDataLayout().isLittleEndian()) {
9213       // Same as above.
9214       BottomHalf = Ret.getOperand(0);
9215       TopHalf = Ret.getOperand(1);
9216     } else {
9217       BottomHalf = Ret.getOperand(1);
9218       TopHalf = Ret.getOperand(0);
9219     }
9220   }
9221 
9222   Result = BottomHalf;
9223   if (isSigned) {
9224     SDValue ShiftAmt = DAG.getConstant(
9225         VT.getScalarSizeInBits() - 1, dl,
9226         getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
9227     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
9228     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
9229   } else {
9230     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
9231                             DAG.getConstant(0, dl, VT), ISD::SETNE);
9232   }
9233 
9234   // Truncate the result if SetCC returns a larger type than needed.
9235   EVT RType = Node->getValueType(1);
9236   if (RType.bitsLT(Overflow.getValueType()))
9237     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
9238 
9239   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
9240          "Unexpected result type for S/UMULO legalization");
9241   return true;
9242 }
9243 
9244 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
9245   SDLoc dl(Node);
9246   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
9247   SDValue Op = Node->getOperand(0);
9248   EVT VT = Op.getValueType();
9249 
9250   if (VT.isScalableVector())
9251     report_fatal_error(
9252         "Expanding reductions for scalable vectors is undefined.");
9253 
9254   // Try to use a shuffle reduction for power of two vectors.
9255   if (VT.isPow2VectorType()) {
9256     while (VT.getVectorNumElements() > 1) {
9257       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
9258       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
9259         break;
9260 
9261       SDValue Lo, Hi;
9262       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
9263       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
9264       VT = HalfVT;
9265     }
9266   }
9267 
9268   EVT EltVT = VT.getVectorElementType();
9269   unsigned NumElts = VT.getVectorNumElements();
9270 
9271   SmallVector<SDValue, 8> Ops;
9272   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
9273 
9274   SDValue Res = Ops[0];
9275   for (unsigned i = 1; i < NumElts; i++)
9276     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
9277 
9278   // Result type may be wider than element type.
9279   if (EltVT != Node->getValueType(0))
9280     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
9281   return Res;
9282 }
9283 
9284 SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
9285   SDLoc dl(Node);
9286   SDValue AccOp = Node->getOperand(0);
9287   SDValue VecOp = Node->getOperand(1);
9288   SDNodeFlags Flags = Node->getFlags();
9289 
9290   EVT VT = VecOp.getValueType();
9291   EVT EltVT = VT.getVectorElementType();
9292 
9293   if (VT.isScalableVector())
9294     report_fatal_error(
9295         "Expanding reductions for scalable vectors is undefined.");
9296 
9297   unsigned NumElts = VT.getVectorNumElements();
9298 
9299   SmallVector<SDValue, 8> Ops;
9300   DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
9301 
9302   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
9303 
9304   SDValue Res = AccOp;
9305   for (unsigned i = 0; i < NumElts; i++)
9306     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
9307 
9308   return Res;
9309 }
9310 
9311 bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
9312                                SelectionDAG &DAG) const {
9313   EVT VT = Node->getValueType(0);
9314   SDLoc dl(Node);
9315   bool isSigned = Node->getOpcode() == ISD::SREM;
9316   unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
9317   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
9318   SDValue Dividend = Node->getOperand(0);
9319   SDValue Divisor = Node->getOperand(1);
9320   if (isOperationLegalOrCustom(DivRemOpc, VT)) {
9321     SDVTList VTs = DAG.getVTList(VT, VT);
9322     Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
9323     return true;
9324   }
9325   if (isOperationLegalOrCustom(DivOpc, VT)) {
9326     // X % Y -> X-X/Y*Y
9327     SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
9328     SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
9329     Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
9330     return true;
9331   }
9332   return false;
9333 }
9334 
9335 SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
9336                                             SelectionDAG &DAG) const {
9337   bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
9338   SDLoc dl(SDValue(Node, 0));
9339   SDValue Src = Node->getOperand(0);
9340 
9341   // DstVT is the result type, while SatVT is the size to which we saturate
9342   EVT SrcVT = Src.getValueType();
9343   EVT DstVT = Node->getValueType(0);
9344 
9345   EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
9346   unsigned SatWidth = SatVT.getScalarSizeInBits();
9347   unsigned DstWidth = DstVT.getScalarSizeInBits();
9348   assert(SatWidth <= DstWidth &&
9349          "Expected saturation width smaller than result width");
9350 
9351   // Determine minimum and maximum integer values and their corresponding
9352   // floating-point values.
9353   APInt MinInt, MaxInt;
9354   if (IsSigned) {
9355     MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
9356     MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
9357   } else {
9358     MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
9359     MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
9360   }
9361 
9362   // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
9363   // libcall emission cannot handle this. Large result types will fail.
9364   if (SrcVT == MVT::f16) {
9365     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
9366     SrcVT = Src.getValueType();
9367   }
9368 
9369   APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
9370   APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
9371 
9372   APFloat::opStatus MinStatus =
9373       MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
9374   APFloat::opStatus MaxStatus =
9375       MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
9376   bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
9377                              !(MaxStatus & APFloat::opStatus::opInexact);
9378 
9379   SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
9380   SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
9381 
9382   // If the integer bounds are exactly representable as floats and min/max are
9383   // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
9384   // of comparisons and selects.
9385   bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
9386                      isOperationLegal(ISD::FMAXNUM, SrcVT);
9387   if (AreExactFloatBounds && MinMaxLegal) {
9388     SDValue Clamped = Src;
9389 
9390     // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
9391     Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
9392     // Clamp by MaxFloat from above. NaN cannot occur.
9393     Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
9394     // Convert clamped value to integer.
9395     SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
9396                                   dl, DstVT, Clamped);
9397 
9398     // In the unsigned case we're done, because we mapped NaN to MinFloat,
9399     // which will cast to zero.
9400     if (!IsSigned)
9401       return FpToInt;
9402 
9403     // Otherwise, select 0 if Src is NaN.
9404     SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
9405     return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
9406                            ISD::CondCode::SETUO);
9407   }
9408 
9409   SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
9410   SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
9411 
9412   // Result of direct conversion. The assumption here is that the operation is
9413   // non-trapping and it's fine to apply it to an out-of-range value if we
9414   // select it away later.
9415   SDValue FpToInt =
9416       DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
9417 
9418   SDValue Select = FpToInt;
9419 
9420   // If Src ULT MinFloat, select MinInt. In particular, this also selects
9421   // MinInt if Src is NaN.
9422   Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
9423                            ISD::CondCode::SETULT);
9424   // If Src OGT MaxFloat, select MaxInt.
9425   Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
9426                            ISD::CondCode::SETOGT);
9427 
9428   // In the unsigned case we are done, because we mapped NaN to MinInt, which
9429   // is already zero.
9430   if (!IsSigned)
9431     return Select;
9432 
9433   // Otherwise, select 0 if Src is NaN.
9434   SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
9435   return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
9436 }
9437 
9438 SDValue TargetLowering::expandVectorSplice(SDNode *Node,
9439                                            SelectionDAG &DAG) const {
9440   assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
9441   assert(Node->getValueType(0).isScalableVector() &&
9442          "Fixed length vector types expected to use SHUFFLE_VECTOR!");
9443 
9444   EVT VT = Node->getValueType(0);
9445   SDValue V1 = Node->getOperand(0);
9446   SDValue V2 = Node->getOperand(1);
9447   int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
9448   SDLoc DL(Node);
9449 
9450   // Expand through memory thusly:
9451   //  Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
9452   //  Store V1, Ptr
9453   //  Store V2, Ptr + sizeof(V1)
9454   //  If (Imm < 0)
9455   //    TrailingElts = -Imm
9456   //    Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
9457   //  else
9458   //    Ptr = Ptr + (Imm * sizeof(VT.Elt))
9459   //  Res = Load Ptr
9460 
9461   Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
9462 
9463   EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
9464                                VT.getVectorElementCount() * 2);
9465   SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
9466   EVT PtrVT = StackPtr.getValueType();
9467   auto &MF = DAG.getMachineFunction();
9468   auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
9469   auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
9470 
9471   // Store the lo part of CONCAT_VECTORS(V1, V2)
9472   SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
9473   // Store the hi part of CONCAT_VECTORS(V1, V2)
9474   SDValue OffsetToV2 = DAG.getVScale(
9475       DL, PtrVT,
9476       APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
9477   SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
9478   SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
9479 
9480   if (Imm >= 0) {
9481     // Load back the required element. getVectorElementPointer takes care of
9482     // clamping the index if it's out-of-bounds.
9483     StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
9484     // Load the spliced result
9485     return DAG.getLoad(VT, DL, StoreV2, StackPtr,
9486                        MachinePointerInfo::getUnknownStack(MF));
9487   }
9488 
9489   uint64_t TrailingElts = -Imm;
9490 
9491   // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
9492   TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
9493   SDValue TrailingBytes =
9494       DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
9495 
9496   if (TrailingElts > VT.getVectorMinNumElements()) {
9497     SDValue VLBytes = DAG.getVScale(
9498         DL, PtrVT,
9499         APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
9500     TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
9501   }
9502 
9503   // Calculate the start address of the spliced result.
9504   StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
9505 
9506   // Load the spliced result
9507   return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
9508                      MachinePointerInfo::getUnknownStack(MF));
9509 }
9510 
9511 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
9512                                            SDValue &LHS, SDValue &RHS,
9513                                            SDValue &CC, SDValue Mask,
9514                                            SDValue EVL, bool &NeedInvert,
9515                                            const SDLoc &dl, SDValue &Chain,
9516                                            bool IsSignaling) const {
9517   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9518   MVT OpVT = LHS.getSimpleValueType();
9519   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
9520   NeedInvert = false;
9521   assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
9522   bool IsNonVP = !EVL;
9523   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
9524   default:
9525     llvm_unreachable("Unknown condition code action!");
9526   case TargetLowering::Legal:
9527     // Nothing to do.
9528     break;
9529   case TargetLowering::Expand: {
9530     ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
9531     if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
9532       std::swap(LHS, RHS);
9533       CC = DAG.getCondCode(InvCC);
9534       return true;
9535     }
9536     // Swapping operands didn't work. Try inverting the condition.
9537     bool NeedSwap = false;
9538     InvCC = getSetCCInverse(CCCode, OpVT);
9539     if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
9540       // If inverting the condition is not enough, try swapping operands
9541       // on top of it.
9542       InvCC = ISD::getSetCCSwappedOperands(InvCC);
9543       NeedSwap = true;
9544     }
9545     if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
9546       CC = DAG.getCondCode(InvCC);
9547       NeedInvert = true;
9548       if (NeedSwap)
9549         std::swap(LHS, RHS);
9550       return true;
9551     }
9552 
9553     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
9554     unsigned Opc = 0;
9555     switch (CCCode) {
9556     default:
9557       llvm_unreachable("Don't know how to expand this condition!");
9558     case ISD::SETUO:
9559       if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
9560         CC1 = ISD::SETUNE;
9561         CC2 = ISD::SETUNE;
9562         Opc = ISD::OR;
9563         break;
9564       }
9565       assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
9566              "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
9567       NeedInvert = true;
9568       LLVM_FALLTHROUGH;
9569     case ISD::SETO:
9570       assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
9571              "If SETO is expanded, SETOEQ must be legal!");
9572       CC1 = ISD::SETOEQ;
9573       CC2 = ISD::SETOEQ;
9574       Opc = ISD::AND;
9575       break;
9576     case ISD::SETONE:
9577     case ISD::SETUEQ:
9578       // If the SETUO or SETO CC isn't legal, we might be able to use
9579       // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
9580       // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
9581       // the operands.
9582       CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
9583       if (!TLI.isCondCodeLegal(CC2, OpVT) &&
9584           (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
9585            TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
9586         CC1 = ISD::SETOGT;
9587         CC2 = ISD::SETOLT;
9588         Opc = ISD::OR;
9589         NeedInvert = ((unsigned)CCCode & 0x8U);
9590         break;
9591       }
9592       LLVM_FALLTHROUGH;
9593     case ISD::SETOEQ:
9594     case ISD::SETOGT:
9595     case ISD::SETOGE:
9596     case ISD::SETOLT:
9597     case ISD::SETOLE:
9598     case ISD::SETUNE:
9599     case ISD::SETUGT:
9600     case ISD::SETUGE:
9601     case ISD::SETULT:
9602     case ISD::SETULE:
9603       // If we are floating point, assign and break, otherwise fall through.
9604       if (!OpVT.isInteger()) {
9605         // We can use the 4th bit to tell if we are the unordered
9606         // or ordered version of the opcode.
9607         CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
9608         Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
9609         CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
9610         break;
9611       }
9612       // Fallthrough if we are unsigned integer.
9613       LLVM_FALLTHROUGH;
9614     case ISD::SETLE:
9615     case ISD::SETGT:
9616     case ISD::SETGE:
9617     case ISD::SETLT:
9618     case ISD::SETNE:
9619     case ISD::SETEQ:
9620       // If all combinations of inverting the condition and swapping operands
9621       // didn't work then we have no means to expand the condition.
9622       llvm_unreachable("Don't know how to expand this condition!");
9623     }
9624 
9625     SDValue SetCC1, SetCC2;
9626     if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
9627       // If we aren't the ordered or unorder operation,
9628       // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
9629       if (IsNonVP) {
9630         SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
9631         SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
9632       } else {
9633         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
9634         SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
9635       }
9636     } else {
9637       // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
9638       if (IsNonVP) {
9639         SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
9640         SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
9641       } else {
9642         SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
9643         SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
9644       }
9645     }
9646     if (Chain)
9647       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
9648                           SetCC2.getValue(1));
9649     if (IsNonVP)
9650       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
9651     else {
9652       // Transform the binary opcode to the VP equivalent.
9653       assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
9654       Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
9655       LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
9656     }
9657     RHS = SDValue();
9658     CC = SDValue();
9659     return true;
9660   }
9661   }
9662   return false;
9663 }
9664